panvk: Prepare per-gen split

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12551>
This commit is contained in:
Boris Brezillon 2021-07-07 16:19:16 +02:00
parent c14c246439
commit 792a0ab0b1
22 changed files with 4864 additions and 4518 deletions

View File

@ -28,7 +28,9 @@ panvk_entrypoints = custom_target(
command : [
prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak',
'--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'panvk',
'--device-prefix', 'panvk_v5', '--device-prefix', 'panvk_v6', '--device-prefix', 'panvk_v7',
],
depend_files : vk_entrypoints_gen_depend_files,
)
libpanvk_files = files(
@ -39,7 +41,6 @@ libpanvk_files = files(
'panvk_formats.c',
'panvk_image.c',
'panvk_mempool.c',
'panvk_meta.c',
'panvk_pass.c',
'panvk_pipeline.c',
'panvk_pipeline_cache.c',
@ -48,13 +49,46 @@ libpanvk_files = files(
'panvk_shader.c',
'panvk_sync.c',
'panvk_util.c',
'panvk_varyings.c',
'panvk_wsi.c',
'panvk_wsi_display.c',
)
panvk_deps = []
panvk_flags = []
panvk_per_arch_libs = []
foreach arch : ['5', '6', '7']
panvk_per_arch_libs += static_library(
'panvk_v@0@'.format(arch),
[
'panvk_vX_cmd_buffer.c',
'panvk_vX_cs.c',
'panvk_vX_descriptor_set.c',
'panvk_vX_device.c',
'panvk_vX_image.c',
'panvk_vX_meta.c',
'panvk_vX_pipeline.c',
'panvk_vX_shader.c',
],
include_directories : [
inc_include,
inc_src,
inc_compiler,
inc_gallium, # XXX: pipe/p_format.h
inc_gallium_aux, # XXX: renderonly
inc_vulkan_wsi,
inc_panfrost,
],
dependencies : [
idep_nir_headers,
idep_pan_packers,
idep_vulkan_util_headers,
dep_libdrm,
dep_valgrind,
],
c_args : [no_override_init_args, panvk_flags, '-DPAN_ARCH=@0@'.format(arch)],
)
endforeach
if system_has_kms_drm
panvk_flags += '-DVK_USE_PLATFORM_DISPLAY_KHR'
@ -80,6 +114,7 @@ libvulkan_panfrost = shared_library(
inc_vulkan_wsi,
inc_panfrost,
],
link_whole : [panvk_per_arch_libs],
link_with : [
libvulkan_wsi,
libpanfrost_shared,

File diff suppressed because it is too large Load Diff

View File

@ -26,443 +26,10 @@
#include "panfrost-quirks.h"
#include "pan_cs.h"
#include "pan_encoder.h"
#include "pan_pool.h"
#include "panvk_cs.h"
#include "panvk_private.h"
#include "panvk_varyings.h"
static mali_pixel_format
panvk_varying_hw_format(const struct panvk_device *dev,
const struct panvk_varyings_info *varyings,
gl_shader_stage stage, unsigned idx)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
gl_varying_slot loc = varyings->stage[stage].loc[idx];
bool fs = stage == MESA_SHADER_FRAGMENT;
switch (loc) {
case VARYING_SLOT_PNTC:
case VARYING_SLOT_PSIZ:
return (MALI_R16F << 12) |
(pdev->quirks & HAS_SWIZZLES ?
panfrost_get_default_swizzle(1) : 0);
case VARYING_SLOT_POS:
return ((fs ? MALI_RGBA32F : MALI_SNAP_4) << 12) |
(pdev->quirks & HAS_SWIZZLES ?
panfrost_get_default_swizzle(4) : 0);
default:
assert(!panvk_varying_is_builtin(stage, loc));
return pdev->formats[varyings->varying[loc].format].hw;
}
}
static void
panvk_emit_varying(const struct panvk_device *dev,
const struct panvk_varyings_info *varyings,
gl_shader_stage stage, unsigned idx,
void *attrib)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
gl_varying_slot loc = varyings->stage[stage].loc[idx];
bool fs = stage == MESA_SHADER_FRAGMENT;
pan_pack(attrib, ATTRIBUTE, cfg) {
if (!panvk_varying_is_builtin(stage, loc)) {
cfg.buffer_index = varyings->varying[loc].buf;
cfg.offset = varyings->varying[loc].offset;
} else {
cfg.buffer_index =
panvk_varying_buf_index(varyings,
panvk_varying_buf_id(fs, loc));
}
cfg.offset_enable = !pan_is_bifrost(pdev);
cfg.format = panvk_varying_hw_format(dev, varyings, stage, idx);
}
}
void
panvk_emit_varyings(const struct panvk_device *dev,
const struct panvk_varyings_info *varyings,
gl_shader_stage stage,
void *descs)
{
struct mali_attribute_packed *attrib = descs;
for (unsigned i = 0; i < varyings->stage[stage].count; i++)
panvk_emit_varying(dev, varyings, stage, i, attrib++);
}
static void
panvk_emit_varying_buf(const struct panvk_device *dev,
const struct panvk_varyings_info *varyings,
enum panvk_varying_buf_id id, void *buf)
{
unsigned buf_idx = panvk_varying_buf_index(varyings, id);
enum mali_attribute_special special_id = panvk_varying_special_buf_id(id);
pan_pack(buf, ATTRIBUTE_BUFFER, cfg) {
if (special_id) {
cfg.type = 0;
cfg.special = special_id;
} else {
unsigned offset = varyings->buf[buf_idx].address & 63;
cfg.stride = varyings->buf[buf_idx].stride;
cfg.size = varyings->buf[buf_idx].size + offset;
cfg.pointer = varyings->buf[buf_idx].address & ~63ULL;
}
}
}
void
panvk_emit_varying_bufs(const struct panvk_device *dev,
const struct panvk_varyings_info *varyings,
void *descs)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
struct mali_attribute_buffer_packed *buf = descs;
for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) {
if (varyings->buf_mask & (1 << i))
panvk_emit_varying_buf(dev, varyings, i, buf++);
}
if (pan_is_bifrost(pdev))
memset(buf, 0, sizeof(*buf));
}
static void
panvk_emit_attrib_buf(const struct panvk_device *dev,
const struct panvk_attribs_info *info,
const struct panvk_draw_info *draw,
const struct panvk_attrib_buf *bufs,
unsigned buf_count,
unsigned idx, void *desc)
{
ASSERTED const struct panfrost_device *pdev = &dev->physical_device->pdev;
const struct panvk_attrib_buf_info *buf_info = &info->buf[idx];
if (buf_info->special) {
assert(!pan_is_bifrost(pdev));
switch (buf_info->special_id) {
case PAN_VERTEX_ID:
panfrost_vertex_id(draw->padded_vertex_count, desc,
draw->instance_count > 1);
return;
case PAN_INSTANCE_ID:
panfrost_instance_id(draw->padded_vertex_count, desc,
draw->instance_count > 1);
return;
default:
unreachable("Invalid attribute ID");
}
}
assert(idx < buf_count);
const struct panvk_attrib_buf *buf = &bufs[idx];
unsigned divisor = buf_info->per_instance ?
draw->padded_vertex_count : 0;
unsigned stride = divisor && draw->instance_count == 1 ?
0 : buf_info->stride;
mali_ptr addr = buf->address & ~63ULL;
unsigned size = buf->size + (buf->address & 63);
/* TODO: support instanced arrays */
pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
if (draw->instance_count > 1 && divisor) {
cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
cfg.divisor = divisor;
}
cfg.pointer = addr;
cfg.stride = stride;
cfg.size = size;
}
}
void
panvk_emit_attrib_bufs(const struct panvk_device *dev,
const struct panvk_attribs_info *info,
const struct panvk_attrib_buf *bufs,
unsigned buf_count,
const struct panvk_draw_info *draw,
void *descs)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
struct mali_attribute_buffer_packed *buf = descs;
for (unsigned i = 0; i < info->buf_count; i++)
panvk_emit_attrib_buf(dev, info, draw, bufs, buf_count, i, buf++);
/* A NULL entry is needed to stop prefecting on Bifrost */
if (pan_is_bifrost(pdev))
memset(buf, 0, sizeof(*buf));
}
static void
panvk_emit_attrib(const struct panvk_device *dev,
const struct panvk_attribs_info *attribs,
const struct panvk_attrib_buf *bufs,
unsigned buf_count,
unsigned idx, void *attrib)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
pan_pack(attrib, ATTRIBUTE, cfg) {
cfg.buffer_index = attribs->attrib[idx].buf;
cfg.offset = attribs->attrib[idx].offset +
(bufs[cfg.buffer_index].address & 63);
cfg.format = pdev->formats[attribs->attrib[idx].format].hw;
}
}
void
panvk_emit_attribs(const struct panvk_device *dev,
const struct panvk_attribs_info *attribs,
const struct panvk_attrib_buf *bufs,
unsigned buf_count,
void *descs)
{
struct mali_attribute_packed *attrib = descs;
for (unsigned i = 0; i < attribs->attrib_count; i++)
panvk_emit_attrib(dev, attribs, bufs, buf_count, i, attrib++);
}
void
panvk_emit_ubos(const struct panvk_pipeline *pipeline,
const struct panvk_descriptor_state *state,
void *descs)
{
struct mali_uniform_buffer_packed *ubos = descs;
for (unsigned i = 0; i < ARRAY_SIZE(state->sets); i++) {
const struct panvk_descriptor_set_layout *set_layout =
pipeline->layout->sets[i].layout;
const struct panvk_descriptor_set *set = state->sets[i].set;
unsigned offset = pipeline->layout->sets[i].ubo_offset;
if (!set_layout)
continue;
if (!set) {
unsigned num_ubos = (set_layout->num_dynoffsets != 0) + set_layout->num_ubos;
memset(&ubos[offset], 0, num_ubos * sizeof(*ubos));
} else {
memcpy(&ubos[offset], set->ubos, set_layout->num_ubos * sizeof(*ubos));
if (set_layout->num_dynoffsets) {
pan_pack(&ubos[offset + set_layout->num_ubos], UNIFORM_BUFFER, cfg) {
cfg.pointer = state->sets[i].dynoffsets.gpu;
cfg.entries = DIV_ROUND_UP(set->layout->num_dynoffsets, 16);
}
}
}
}
for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) {
if (!pipeline->sysvals[i].ids.sysval_count)
continue;
pan_pack(&ubos[pipeline->sysvals[i].ubo_idx], UNIFORM_BUFFER, cfg) {
cfg.pointer = pipeline->sysvals[i].ubo ? :
state->sysvals[i];
cfg.entries = pipeline->sysvals[i].ids.sysval_count;
}
}
}
void
panvk_emit_vertex_job(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
const struct panvk_draw_info *draw,
void *job)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
void *section = pan_section_ptr(job, COMPUTE_JOB, INVOCATION);
memcpy(section, &draw->invocation, pan_size(INVOCATION));
pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
cfg.job_task_split = 5;
}
pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) {
cfg.draw_descriptor_is_64b = true;
if (!pan_is_bifrost(pdev))
cfg.texture_descriptor_is_64b = true;
cfg.state = pipeline->rsds[MESA_SHADER_VERTEX];
cfg.attributes = draw->stages[MESA_SHADER_VERTEX].attributes;
cfg.attribute_buffers = draw->attribute_bufs;
cfg.varyings = draw->stages[MESA_SHADER_VERTEX].varyings;
cfg.varying_buffers = draw->varying_bufs;
cfg.thread_storage = draw->tls;
cfg.offset_start = draw->offset_start;
cfg.instance_size = draw->instance_count > 1 ?
draw->padded_vertex_count : 1;
cfg.uniform_buffers = draw->ubos;
cfg.push_uniforms = draw->stages[PIPE_SHADER_VERTEX].push_constants;
cfg.textures = draw->textures;
cfg.samplers = draw->samplers;
}
pan_section_pack(job, COMPUTE_JOB, DRAW_PADDING, cfg);
}
void
panvk_emit_tiler_job(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
const struct panvk_draw_info *draw,
void *job)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
void *section = pan_is_bifrost(pdev) ?
pan_section_ptr(job, BIFROST_TILER_JOB, INVOCATION) :
pan_section_ptr(job, MIDGARD_TILER_JOB, INVOCATION);
memcpy(section, &draw->invocation, pan_size(INVOCATION));
section = pan_is_bifrost(pdev) ?
pan_section_ptr(job, BIFROST_TILER_JOB, PRIMITIVE) :
pan_section_ptr(job, MIDGARD_TILER_JOB, PRIMITIVE);
pan_pack(section, PRIMITIVE, cfg) {
cfg.draw_mode = pipeline->ia.topology;
if (pipeline->ia.writes_point_size)
cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
cfg.first_provoking_vertex = true;
if (pipeline->ia.primitive_restart)
cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
cfg.job_task_split = 6;
/* TODO: indexed draws */
cfg.index_count = draw->vertex_count;
}
section = pan_is_bifrost(pdev) ?
pan_section_ptr(job, BIFROST_TILER_JOB, PRIMITIVE_SIZE) :
pan_section_ptr(job, MIDGARD_TILER_JOB, PRIMITIVE_SIZE);
pan_pack(section, PRIMITIVE_SIZE, cfg) {
if (pipeline->ia.writes_point_size) {
cfg.size_array = draw->psiz;
} else {
cfg.constant = draw->line_width;
}
}
section = pan_is_bifrost(pdev) ?
pan_section_ptr(job, BIFROST_TILER_JOB, DRAW) :
pan_section_ptr(job, MIDGARD_TILER_JOB, DRAW);
pan_pack(section, DRAW, cfg) {
cfg.four_components_per_vertex = true;
cfg.draw_descriptor_is_64b = true;
if (!pan_is_bifrost(pdev))
cfg.texture_descriptor_is_64b = true;
cfg.front_face_ccw = pipeline->rast.front_ccw;
cfg.cull_front_face = pipeline->rast.cull_front_face;
cfg.cull_back_face = pipeline->rast.cull_back_face;
cfg.position = draw->position;
cfg.state = draw->fs_rsd;
cfg.attributes = draw->stages[MESA_SHADER_FRAGMENT].attributes;
cfg.attribute_buffers = draw->attribute_bufs;
cfg.viewport = draw->viewport;
cfg.varyings = draw->stages[MESA_SHADER_FRAGMENT].varyings;
cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0;
if (pan_is_bifrost(pdev))
cfg.thread_storage = draw->tls;
else
cfg.fbd = draw->fb;
/* For all primitives but lines DRAW.flat_shading_vertex must
* be set to 0 and the provoking vertex is selected with the
* PRIMITIVE.first_provoking_vertex field.
*/
if (pipeline->ia.topology == MALI_DRAW_MODE_LINES ||
pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP ||
pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) {
/* The logic is inverted on bifrost. */
cfg.flat_shading_vertex = pan_is_bifrost(pdev) ?
true : false;
}
cfg.offset_start = draw->offset_start;
cfg.instance_size = draw->instance_count > 1 ?
draw->padded_vertex_count : 1;
cfg.uniform_buffers = draw->ubos;
cfg.push_uniforms = draw->stages[PIPE_SHADER_FRAGMENT].push_constants;
cfg.textures = draw->textures;
cfg.samplers = draw->samplers;
/* TODO: occlusion queries */
}
if (pan_is_bifrost(pdev)) {
pan_section_pack(job, BIFROST_TILER_JOB, TILER, cfg) {
cfg.address = draw->tiler_ctx->bifrost;
}
pan_section_pack(job, BIFROST_TILER_JOB, DRAW_PADDING, padding);
pan_section_pack(job, BIFROST_TILER_JOB, PADDING, padding);
}
}
void
panvk_emit_fragment_job(const struct panvk_device *dev,
const struct panvk_framebuffer *fb,
mali_ptr fbdesc,
void *job)
{
pan_section_pack(job, FRAGMENT_JOB, HEADER, header) {
header.type = MALI_JOB_TYPE_FRAGMENT;
header.index = 1;
}
pan_section_pack(job, FRAGMENT_JOB, PAYLOAD, payload) {
payload.bound_min_x = 0;
payload.bound_min_y = 0;
payload.bound_max_x = (fb->width - 1) >> MALI_TILE_SHIFT;
payload.bound_max_y = (fb->height - 1) >> MALI_TILE_SHIFT;
payload.framebuffer = fbdesc;
}
}
void
panvk_emit_viewport(const VkViewport *viewport, const VkRect2D *scissor,
void *vpd)
{
/* The spec says "width must be greater than 0.0" */
assert(viewport->x >= 0);
int minx = (int)viewport->x;
int maxx = (int)(viewport->x + viewport->width);
/* Viewport height can be negative */
int miny = MIN2((int)viewport->y, (int)(viewport->y + viewport->height));
int maxy = MAX2((int)viewport->y, (int)(viewport->y + viewport->height));
assert(scissor->offset.x >= 0 && scissor->offset.y >= 0);
miny = MAX2(scissor->offset.x, minx);
miny = MAX2(scissor->offset.y, miny);
maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx);
maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy);
/* Make sure we don't end up with a max < min when width/height is 0 */
maxx = maxx > minx ? maxx - 1 : maxx;
maxy = maxy > miny ? maxy - 1 : maxy;
assert(viewport->minDepth >= 0.0f && viewport->minDepth <= 1.0f);
assert(viewport->maxDepth >= 0.0f && viewport->maxDepth <= 1.0f);
pan_pack(vpd, VIEWPORT, cfg) {
cfg.scissor_minimum_x = minx;
cfg.scissor_minimum_y = miny;
cfg.scissor_maximum_x = maxx;
cfg.scissor_maximum_y = maxy;
cfg.minimum_z = MIN2(viewport->minDepth, viewport->maxDepth);
cfg.maximum_z = MAX2(viewport->minDepth, viewport->maxDepth);
}
}
void
panvk_sysval_upload_viewport_scale(const VkViewport *viewport,
@ -481,368 +48,3 @@ panvk_sysval_upload_viewport_offset(const VkViewport *viewport,
data->f32[1] = (0.5f * viewport->height) + viewport->y;
data->f32[2] = (0.5f * (viewport->maxDepth - viewport->minDepth)) + viewport->minDepth;
}
static enum mali_bifrost_register_file_format
bifrost_blend_type_from_nir(nir_alu_type nir_type)
{
switch(nir_type) {
case 0: /* Render target not in use */
return 0;
case nir_type_float16:
return MALI_BIFROST_REGISTER_FILE_FORMAT_F16;
case nir_type_float32:
return MALI_BIFROST_REGISTER_FILE_FORMAT_F32;
case nir_type_int32:
return MALI_BIFROST_REGISTER_FILE_FORMAT_I32;
case nir_type_uint32:
return MALI_BIFROST_REGISTER_FILE_FORMAT_U32;
case nir_type_int16:
return MALI_BIFROST_REGISTER_FILE_FORMAT_I16;
case nir_type_uint16:
return MALI_BIFROST_REGISTER_FILE_FORMAT_U16;
default:
unreachable("Unsupported blend shader type for NIR alu type");
}
}
static void
panvk_emit_bifrost_blend(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
unsigned rt, void *bd)
{
const struct pan_blend_state *blend = &pipeline->blend.state;
const struct panfrost_device *pdev = &dev->physical_device->pdev;
const struct pan_blend_rt_state *rts = &blend->rts[rt];
bool dithered = false;
pan_pack(bd, BLEND, cfg) {
if (!blend->rt_count || !rts->equation.color_mask) {
cfg.enable = false;
cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OFF;
continue;
}
cfg.srgb = util_format_is_srgb(rts->format);
cfg.load_destination = pan_blend_reads_dest(blend->rts[rt].equation);
cfg.round_to_fb_precision = !dithered;
const struct util_format_description *format_desc =
util_format_description(rts->format);
unsigned chan_size = 0;
for (unsigned i = 0; i < format_desc->nr_channels; i++)
chan_size = MAX2(format_desc->channel[0].size, chan_size);
pan_blend_to_fixed_function_equation(blend->rts[rt].equation,
&cfg.bifrost.equation);
/* Fixed point constant */
float fconst =
pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation),
blend->constants);
u16 constant = fconst * ((1 << chan_size) - 1);
constant <<= 16 - chan_size;
cfg.bifrost.constant = constant;
if (pan_blend_is_opaque(blend->rts[rt].equation))
cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OPAQUE;
else
cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_FIXED_FUNCTION;
/* If we want the conversion to work properly,
* num_comps must be set to 4
*/
cfg.bifrost.internal.fixed_function.num_comps = 4;
cfg.bifrost.internal.fixed_function.conversion.memory_format =
panfrost_format_to_bifrost_blend(pdev, rts->format, dithered);
cfg.bifrost.internal.fixed_function.conversion.register_format =
bifrost_blend_type_from_nir(pipeline->fs.info.bifrost.blend[rt].type);
cfg.bifrost.internal.fixed_function.rt = rt;
}
}
static void
panvk_emit_midgard_blend(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
unsigned rt, void *bd)
{
const struct pan_blend_state *blend = &pipeline->blend.state;
const struct pan_blend_rt_state *rts = &blend->rts[rt];
pan_pack(bd, BLEND, cfg) {
if (!blend->rt_count || !rts->equation.color_mask) {
cfg.enable = false;
continue;
}
cfg.srgb = util_format_is_srgb(rts->format);
cfg.load_destination = pan_blend_reads_dest(blend->rts[rt].equation);
cfg.round_to_fb_precision = true;
cfg.midgard.blend_shader = false;
pan_blend_to_fixed_function_equation(blend->rts[rt].equation,
&cfg.midgard.equation);
cfg.midgard.constant =
pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation),
blend->constants);
}
}
void
panvk_emit_blend(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
unsigned rt, void *bd)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
if (pan_is_bifrost(pdev))
panvk_emit_bifrost_blend(dev, pipeline, rt, bd);
else
panvk_emit_midgard_blend(dev, pipeline, rt, bd);
}
void
panvk_emit_blend_constant(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
unsigned rt, const float *constants, void *bd)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
float constant = constants[pipeline->blend.constant[rt].index];
pan_pack(bd, BLEND, cfg) {
cfg.enable = false;
if (pan_is_bifrost(pdev)) {
cfg.bifrost.constant = constant * pipeline->blend.constant[rt].bifrost_factor;
} else {
cfg.midgard.constant = constant;
}
}
}
void
panvk_emit_dyn_fs_rsd(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
const struct panvk_cmd_state *state,
void *rsd)
{
pan_pack(rsd, RENDERER_STATE, cfg) {
if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) {
cfg.depth_units = state->rast.depth_bias.constant_factor * 2.0f;
cfg.depth_factor = state->rast.depth_bias.slope_factor;
cfg.depth_bias_clamp = state->rast.depth_bias.clamp;
}
if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
cfg.stencil_front.mask = state->zs.s_front.compare_mask;
cfg.stencil_back.mask = state->zs.s_back.compare_mask;
}
if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
cfg.stencil_mask_misc.stencil_mask_front = state->zs.s_front.write_mask;
cfg.stencil_mask_misc.stencil_mask_back = state->zs.s_back.write_mask;
}
if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
cfg.stencil_front.reference_value = state->zs.s_front.ref;
cfg.stencil_back.reference_value = state->zs.s_back.ref;
}
}
}
void
panvk_emit_base_fs_rsd(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
void *rsd)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
const struct pan_shader_info *info = &pipeline->fs.info;
pan_pack(rsd, RENDERER_STATE, cfg) {
if (pipeline->fs.required) {
pan_shader_prepare_rsd(pdev, info, pipeline->fs.address, &cfg);
if (pan_is_bifrost(pdev)) {
cfg.properties.bifrost.allow_forward_pixel_to_kill = info->fs.can_fpk;
} else {
/* If either depth or stencil is enabled, discard matters */
bool zs_enabled =
(pipeline->zs.z_test && pipeline->zs.z_compare_func != MALI_FUNC_ALWAYS) ||
pipeline->zs.s_test;
cfg.properties.midgard.work_register_count = info->work_reg_count;
cfg.properties.midgard.force_early_z =
info->fs.can_early_z && !pipeline->ms.alpha_to_coverage &&
pipeline->zs.z_compare_func == MALI_FUNC_ALWAYS;
/* Workaround a hardware errata where early-z cannot be enabled
* when discarding even when the depth buffer is read-only, by
* lying to the hardware about the discard and setting the
* reads tilebuffer? flag to compensate */
cfg.properties.midgard.shader_reads_tilebuffer =
info->fs.outputs_read ||
(!zs_enabled && info->fs.can_discard);
cfg.properties.midgard.shader_contains_discard =
zs_enabled && info->fs.can_discard;
}
} else {
if (pan_is_bifrost(pdev)) {
cfg.properties.bifrost.shader_modifies_coverage = true;
cfg.properties.bifrost.allow_forward_pixel_to_kill = true;
cfg.properties.bifrost.allow_forward_pixel_to_be_killed = true;
cfg.properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
} else {
cfg.shader.shader = 0x1;
cfg.properties.midgard.work_register_count = 1;
cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
cfg.properties.midgard.force_early_z = true;
}
}
bool msaa = pipeline->ms.rast_samples > 1;
cfg.multisample_misc.multisample_enable = msaa;
cfg.multisample_misc.sample_mask =
msaa ? pipeline->ms.sample_mask : UINT16_MAX;
cfg.multisample_misc.depth_function =
pipeline->zs.z_test ? pipeline->zs.z_compare_func : MALI_FUNC_ALWAYS;
cfg.multisample_misc.depth_write_mask = pipeline->zs.z_write;
cfg.multisample_misc.fixed_function_near_discard = !pipeline->rast.clamp_depth;
cfg.multisample_misc.fixed_function_far_discard = !pipeline->rast.clamp_depth;
cfg.multisample_misc.shader_depth_range_fixed = true;
cfg.stencil_mask_misc.stencil_enable = pipeline->zs.s_test;
cfg.stencil_mask_misc.alpha_to_coverage = pipeline->ms.alpha_to_coverage;
cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
cfg.stencil_mask_misc.depth_range_1 = pipeline->rast.depth_bias.enable;
cfg.stencil_mask_misc.depth_range_2 = pipeline->rast.depth_bias.enable;
cfg.stencil_mask_misc.single_sampled_lines = pipeline->ms.rast_samples <= 1;
if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS))) {
cfg.depth_units = pipeline->rast.depth_bias.constant_factor * 2.0f;
cfg.depth_factor = pipeline->rast.depth_bias.slope_factor;
cfg.depth_bias_clamp = pipeline->rast.depth_bias.clamp;
}
if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK))) {
cfg.stencil_front.mask = pipeline->zs.s_front.compare_mask;
cfg.stencil_back.mask = pipeline->zs.s_back.compare_mask;
}
if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK))) {
cfg.stencil_mask_misc.stencil_mask_front = pipeline->zs.s_front.write_mask;
cfg.stencil_mask_misc.stencil_mask_back = pipeline->zs.s_back.write_mask;
}
if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))) {
cfg.stencil_front.reference_value = pipeline->zs.s_front.ref;
cfg.stencil_back.reference_value = pipeline->zs.s_back.ref;
}
cfg.stencil_front.compare_function = pipeline->zs.s_front.compare_func;
cfg.stencil_front.stencil_fail = pipeline->zs.s_front.fail_op;
cfg.stencil_front.depth_fail = pipeline->zs.s_front.z_fail_op;
cfg.stencil_front.depth_pass = pipeline->zs.s_front.pass_op;
cfg.stencil_back.compare_function = pipeline->zs.s_back.compare_func;
cfg.stencil_back.stencil_fail = pipeline->zs.s_back.fail_op;
cfg.stencil_back.depth_fail = pipeline->zs.s_back.z_fail_op;
cfg.stencil_back.depth_pass = pipeline->zs.s_back.pass_op;
}
}
void
panvk_emit_non_fs_rsd(const struct panvk_device *dev,
const struct pan_shader_info *shader_info,
mali_ptr shader_ptr,
void *rsd)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
assert(shader_info->stage != MESA_SHADER_FRAGMENT);
pan_pack(rsd, RENDERER_STATE, cfg) {
pan_shader_prepare_rsd(pdev, shader_info, shader_ptr, &cfg);
}
}
void
panvk_emit_bifrost_tiler_context(const struct panvk_device *dev,
unsigned width, unsigned height,
const struct panfrost_ptr *descs)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
pan_pack(descs->cpu + pan_size(BIFROST_TILER), BIFROST_TILER_HEAP, cfg) {
cfg.size = pdev->tiler_heap->size;
cfg.base = pdev->tiler_heap->ptr.gpu;
cfg.bottom = pdev->tiler_heap->ptr.gpu;
cfg.top = pdev->tiler_heap->ptr.gpu + pdev->tiler_heap->size;
}
pan_pack(descs->cpu, BIFROST_TILER, cfg) {
cfg.hierarchy_mask = 0x28;
cfg.fb_width = width;
cfg.fb_height = height;
cfg.heap = descs->gpu + pan_size(BIFROST_TILER);
}
}
unsigned
panvk_emit_fb(const struct panvk_device *dev,
const struct panvk_batch *batch,
const struct panvk_subpass *subpass,
const struct panvk_framebuffer *fb,
const struct panvk_clear_value *clears,
const struct pan_tls_info *tlsinfo,
const struct pan_tiler_context *tilerctx,
void *desc)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
struct panvk_image_view *view;
bool crc_valid[8] = { false };
struct pan_fb_info fbinfo = {
.width = fb->width,
.height = fb->height,
.extent.maxx = fb->width - 1,
.extent.maxy = fb->height - 1,
.nr_samples = 1,
};
for (unsigned cb = 0; cb < subpass->color_count; cb++) {
int idx = subpass->color_attachments[cb].idx;
view = idx != VK_ATTACHMENT_UNUSED ?
fb->attachments[idx].iview : NULL;
if (!view)
continue;
fbinfo.rts[cb].view = &view->pview;
fbinfo.rts[cb].clear = subpass->color_attachments[idx].clear;
fbinfo.rts[cb].crc_valid = &crc_valid[cb];
memcpy(fbinfo.rts[cb].clear_value, clears[idx].color,
sizeof(fbinfo.rts[cb].clear_value));
fbinfo.nr_samples =
MAX2(fbinfo.nr_samples, view->pview.image->layout.nr_samples);
}
if (subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED) {
view = fb->attachments[subpass->zs_attachment.idx].iview;
const struct util_format_description *fdesc =
util_format_description(view->pview.format);
fbinfo.nr_samples =
MAX2(fbinfo.nr_samples, view->pview.image->layout.nr_samples);
if (util_format_has_depth(fdesc)) {
fbinfo.zs.clear.z = subpass->zs_attachment.clear;
fbinfo.zs.clear_value.depth = clears[subpass->zs_attachment.idx].depth;
fbinfo.zs.view.zs = &view->pview;
}
if (util_format_has_depth(fdesc)) {
fbinfo.zs.clear.s = subpass->zs_attachment.clear;
fbinfo.zs.clear_value.stencil = clears[subpass->zs_attachment.idx].depth;
if (!fbinfo.zs.view.zs)
fbinfo.zs.view.s = &view->pview;
}
}
return pan_emit_fbd(pdev, &fbinfo, tlsinfo, tilerctx, desc);
}

View File

@ -24,12 +24,18 @@
#ifndef PANVK_CS_H
#define PANVK_CS_H
#include "pan_encoder.h"
#include <vulkan/vulkan.h>
#include "compiler/shader_enums.h"
#include "panfrost-job.h"
#include "pan_cs.h"
#include "vk_util.h"
#include "panvk_private.h"
struct pan_blend_state;
struct pan_shader_info;
struct panfrost_ptr;
@ -50,107 +56,32 @@ struct panvk_descriptor_state;
struct panvk_subpass;
struct panvk_clear_value;
void
panvk_emit_varyings(const struct panvk_device *dev,
const struct panvk_varyings_info *varyings,
gl_shader_stage stage,
void *descs);
#ifdef PAN_ARCH
static inline enum mali_func
panvk_per_arch(translate_compare_func)(VkCompareOp comp)
{
STATIC_ASSERT(VK_COMPARE_OP_NEVER == (VkCompareOp)MALI_FUNC_NEVER);
STATIC_ASSERT(VK_COMPARE_OP_LESS == (VkCompareOp)MALI_FUNC_LESS);
STATIC_ASSERT(VK_COMPARE_OP_EQUAL == (VkCompareOp)MALI_FUNC_EQUAL);
STATIC_ASSERT(VK_COMPARE_OP_LESS_OR_EQUAL == (VkCompareOp)MALI_FUNC_LEQUAL);
STATIC_ASSERT(VK_COMPARE_OP_GREATER == (VkCompareOp)MALI_FUNC_GREATER);
STATIC_ASSERT(VK_COMPARE_OP_NOT_EQUAL == (VkCompareOp)MALI_FUNC_NOT_EQUAL);
STATIC_ASSERT(VK_COMPARE_OP_GREATER_OR_EQUAL == (VkCompareOp)MALI_FUNC_GEQUAL);
STATIC_ASSERT(VK_COMPARE_OP_ALWAYS == (VkCompareOp)MALI_FUNC_ALWAYS);
void
panvk_emit_varying_bufs(const struct panvk_device *dev,
const struct panvk_varyings_info *varyings,
void *descs);
return (enum mali_func)comp;
}
void
panvk_emit_attrib_bufs(const struct panvk_device *dev,
const struct panvk_attribs_info *info,
const struct panvk_attrib_buf *bufs,
unsigned buf_count,
const struct panvk_draw_info *draw,
void *descs);
static inline enum mali_func
panvk_per_arch(translate_sampler_compare_func)(const VkSamplerCreateInfo *pCreateInfo)
{
if (!pCreateInfo->compareEnable)
return MALI_FUNC_NEVER;
void
panvk_emit_attribs(const struct panvk_device *dev,
const struct panvk_attribs_info *attribs,
const struct panvk_attrib_buf *bufs,
unsigned buf_count,
void *descs);
void
panvk_emit_ubos(const struct panvk_pipeline *pipeline,
const struct panvk_descriptor_state *state,
void *descs);
void
panvk_emit_vertex_job(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
const struct panvk_draw_info *draw,
void *job);
void
panvk_emit_tiler_job(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
const struct panvk_draw_info *draw,
void *job);
void
panvk_emit_fragment_job(const struct panvk_device *dev,
const struct panvk_framebuffer *fb,
mali_ptr fbdesc,
void *job);
void
panvk_emit_viewport(const VkViewport *viewport, const VkRect2D *scissor,
void *vpd);
void
panvk_emit_blend(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
unsigned rt, void *bd);
void
panvk_emit_blend_constant(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
unsigned rt, const float *constants, void *bd);
void
panvk_emit_dyn_fs_rsd(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
const struct panvk_cmd_state *state,
void *rsd);
void
panvk_emit_base_fs_rsd(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
void *rsd);
void
panvk_emit_non_fs_rsd(const struct panvk_device *dev,
const struct pan_shader_info *shader_info,
mali_ptr shader_ptr,
void *rsd);
void
panvk_emit_bifrost_tiler_context(const struct panvk_device *dev,
unsigned width, unsigned height,
const struct panfrost_ptr *descs);
unsigned
panvk_emit_fb(const struct panvk_device *dev,
const struct panvk_batch *batch,
const struct panvk_subpass *subpass,
const struct panvk_framebuffer *fb,
const struct panvk_clear_value *clears,
const struct pan_tls_info *tlsinfo,
const struct pan_tiler_context *tilerctx,
void *desc);
void
panvk_emit_tls(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
const struct pan_compute_dim *wg_count,
struct pan_pool *tls_pool,
void *desc);
enum mali_func f = panvk_per_arch(translate_compare_func)(pCreateInfo->compareOp);
return panfrost_flip_compare_func(f);
}
#endif
void
panvk_sysval_upload_viewport_scale(const VkViewport *viewport,

View File

@ -37,7 +37,6 @@
#include "vk_util.h"
#include "pan_bo.h"
#include "gen_macros.h"
VkResult
panvk_CreateDescriptorSetLayout(VkDevice _device,
@ -418,128 +417,18 @@ panvk_ResetDescriptorPool(VkDevice _device,
return VK_SUCCESS;
}
static VkResult
panvk_descriptor_set_create(struct panvk_device *device,
struct panvk_descriptor_pool *pool,
const struct panvk_descriptor_set_layout *layout,
struct panvk_descriptor_set **out_set)
{
const struct panfrost_device *pdev = &device->physical_device->pdev;
struct panvk_descriptor_set *set;
/* TODO: Allocate from the pool! */
set = vk_object_zalloc(&device->vk, NULL,
sizeof(struct panvk_descriptor_set),
VK_OBJECT_TYPE_DESCRIPTOR_SET);
if (!set)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
set->layout = layout;
set->descs = vk_alloc(&device->vk.alloc,
sizeof(*set->descs) * layout->num_descs, 8,
VK_OBJECT_TYPE_DESCRIPTOR_SET);
if (!set->descs)
goto err_free_set;
if (layout->num_ubos) {
set->ubos = vk_zalloc(&device->vk.alloc,
sizeof(*set->ubos) * layout->num_ubos, 8,
VK_OBJECT_TYPE_DESCRIPTOR_SET);
if (!set->ubos)
goto err_free_set;
}
if (layout->num_samplers) {
set->samplers = vk_zalloc(&device->vk.alloc,
sizeof(*set->samplers) * layout->num_samplers, 8,
VK_OBJECT_TYPE_DESCRIPTOR_SET);
if (!set->samplers)
goto err_free_set;
}
if (layout->num_textures) {
if (pan_is_bifrost(pdev)) {
set->textures.bifrost = vk_zalloc(&device->vk.alloc,
sizeof(*set->textures.bifrost) *
layout->num_textures,
8, VK_OBJECT_TYPE_DESCRIPTOR_SET);
} else {
set->textures.midgard = vk_zalloc(&device->vk.alloc,
sizeof(*set->textures.midgard) *
layout->num_textures,
8, VK_OBJECT_TYPE_DESCRIPTOR_SET);
}
if (!set->textures.midgard)
goto err_free_set;
}
for (unsigned i = 0; i < layout->binding_count; i++) {
if (!layout->bindings[i].immutable_samplers)
continue;
for (unsigned j = 0; j < layout->bindings[i].array_size; j++) {
set->descs[layout->bindings[i].desc_idx].image.sampler =
layout->bindings[i].immutable_samplers[j];
}
}
*out_set = set;
return VK_SUCCESS;
err_free_set:
vk_free(&device->vk.alloc, set->textures.midgard);
vk_free(&device->vk.alloc, set->samplers);
vk_free(&device->vk.alloc, set->ubos);
vk_free(&device->vk.alloc, set->descs);
vk_object_free(&device->vk, NULL, set);
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
}
static void
panvk_descriptor_set_destroy(struct panvk_device *device,
struct panvk_descriptor_pool *pool,
struct panvk_descriptor_set *set)
{
vk_free(&device->vk.alloc, set->textures.midgard);
vk_free(&device->vk.alloc, set->textures);
vk_free(&device->vk.alloc, set->samplers);
vk_free(&device->vk.alloc, set->ubos);
vk_free(&device->vk.alloc, set->descs);
vk_object_free(&device->vk, NULL, set);
}
VkResult
panvk_AllocateDescriptorSets(VkDevice _device,
const VkDescriptorSetAllocateInfo *pAllocateInfo,
VkDescriptorSet *pDescriptorSets)
{
VK_FROM_HANDLE(panvk_device, device, _device);
VK_FROM_HANDLE(panvk_descriptor_pool, pool, pAllocateInfo->descriptorPool);
VkResult result;
unsigned i;
for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
VK_FROM_HANDLE(panvk_descriptor_set_layout, layout,
pAllocateInfo->pSetLayouts[i]);
struct panvk_descriptor_set *set = NULL;
result = panvk_descriptor_set_create(device, pool, layout, &set);
if (result != VK_SUCCESS)
goto err_free_sets;
pDescriptorSets[i] = panvk_descriptor_set_to_handle(set);
}
return VK_SUCCESS;
err_free_sets:
panvk_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool, i, pDescriptorSets);
for (i = 0; i < pAllocateInfo->descriptorSetCount; i++)
pDescriptorSets[i] = VK_NULL_HANDLE;
return result;
}
VkResult
panvk_FreeDescriptorSets(VkDevice _device,
VkDescriptorPool descriptorPool,
@ -558,245 +447,6 @@ panvk_FreeDescriptorSets(VkDevice _device,
return VK_SUCCESS;
}
static void
panvk_set_image_desc(struct panvk_descriptor *desc,
const VkDescriptorImageInfo *pImageInfo)
{
VK_FROM_HANDLE(panvk_sampler, sampler, pImageInfo->sampler);
VK_FROM_HANDLE(panvk_image_view, image_view, pImageInfo->imageView);
desc->image.sampler = sampler;
desc->image.view = image_view;
desc->image.layout = pImageInfo->imageLayout;
}
static void
panvk_set_texel_buffer_view_desc(struct panvk_descriptor *desc,
const VkBufferView *pTexelBufferView)
{
VK_FROM_HANDLE(panvk_buffer_view, buffer_view, *pTexelBufferView);
desc->buffer_view = buffer_view;
}
static void
panvk_set_buffer_info_desc(struct panvk_descriptor *desc,
const VkDescriptorBufferInfo *pBufferInfo)
{
VK_FROM_HANDLE(panvk_buffer, buffer, pBufferInfo->buffer);
desc->buffer_info.buffer = buffer;
desc->buffer_info.offset = pBufferInfo->offset;
desc->buffer_info.range = pBufferInfo->range;
}
static void
panvk_set_ubo_desc(void *ubo,
const VkDescriptorBufferInfo *pBufferInfo)
{
VK_FROM_HANDLE(panvk_buffer, buffer, pBufferInfo->buffer);
size_t size = pBufferInfo->range == VK_WHOLE_SIZE ?
(buffer->bo->size - pBufferInfo->offset) :
pBufferInfo->range;
pan_pack(ubo, UNIFORM_BUFFER, cfg) {
cfg.pointer = buffer->bo->ptr.gpu + pBufferInfo->offset;
cfg.entries = DIV_ROUND_UP(size, 16);
}
}
static void
panvk_set_sampler_desc(void *desc,
const VkDescriptorImageInfo *pImageInfo)
{
VK_FROM_HANDLE(panvk_sampler, sampler, pImageInfo->sampler);
memcpy(desc, &sampler->desc, sizeof(sampler->desc));
}
static void
panvk_set_bifrost_texture_desc(struct mali_bifrost_texture_packed *desc,
const VkDescriptorImageInfo *pImageInfo)
{
VK_FROM_HANDLE(panvk_image_view, view, pImageInfo->imageView);
*desc = view->bifrost.tex_desc;
}
static void
panvk_set_midgard_texture_desc(mali_ptr *desc,
const VkDescriptorImageInfo *pImageInfo)
{
VK_FROM_HANDLE(panvk_image_view, view, pImageInfo->imageView);
*desc = view->bo->ptr.gpu;
}
static void
panvk_write_descriptor_set(struct panvk_device *dev,
const VkWriteDescriptorSet *pDescriptorWrite)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
VK_FROM_HANDLE(panvk_descriptor_set, set, pDescriptorWrite->dstSet);
const struct panvk_descriptor_set_layout *layout = set->layout;
unsigned dest_offset = pDescriptorWrite->dstArrayElement;
unsigned binding = pDescriptorWrite->dstBinding;
unsigned src_offset = 0;
while (src_offset < pDescriptorWrite->descriptorCount &&
binding < layout->binding_count) {
const struct panvk_descriptor_set_binding_layout *binding_layout =
&layout->bindings[binding];
if (!binding_layout->array_size) {
binding++;
dest_offset = 0;
continue;
}
assert(pDescriptorWrite->descriptorType == binding_layout->type);
unsigned ndescs = MIN2(pDescriptorWrite->descriptorCount - src_offset,
binding_layout->array_size - dest_offset);
struct panvk_descriptor *descs = &set->descs[binding_layout->desc_idx + dest_offset];
assert(binding_layout->desc_idx + dest_offset + ndescs <= set->layout->num_descs);
switch (pDescriptorWrite->descriptorType) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
for (unsigned i = 0; i < ndescs; i++) {
const VkDescriptorImageInfo *info = &pDescriptorWrite->pImageInfo[src_offset + i];
if (pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER ||
pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
if (binding_layout->immutable_samplers == NULL) {
unsigned sampler = binding_layout->sampler_idx + dest_offset + i;
panvk_set_sampler_desc(&set->samplers[sampler], info);
}
}
if (pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE ||
pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
unsigned tex = binding_layout->tex_idx + dest_offset + i;
if (pan_is_bifrost(pdev))
panvk_set_bifrost_texture_desc(&set->textures.bifrost[tex], info);
else
panvk_set_midgard_texture_desc(&set->textures.midgard[tex], info);
}
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
for (unsigned i = 0; i < ndescs; i++)
panvk_set_image_desc(&descs[i], &pDescriptorWrite->pImageInfo[src_offset + i]);
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
for (unsigned i = 0; i < ndescs; i++)
panvk_set_texel_buffer_view_desc(&descs[i], &pDescriptorWrite->pTexelBufferView[src_offset + i]);
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
for (unsigned i = 0; i < ndescs; i++) {
unsigned ubo = binding_layout->ubo_idx + dest_offset + i;
panvk_set_ubo_desc(&set->ubos[ubo],
&pDescriptorWrite->pBufferInfo[src_offset + i]);
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
for (unsigned i = 0; i < ndescs; i++)
panvk_set_buffer_info_desc(&descs[i], &pDescriptorWrite->pBufferInfo[src_offset + i]);
break;
default:
unreachable("Invalid type");
}
src_offset += ndescs;
binding++;
dest_offset = 0;
}
}
static void
panvk_copy_descriptor_set(struct panvk_device *dev,
const VkCopyDescriptorSet *pDescriptorCopy)
{
VK_FROM_HANDLE(panvk_descriptor_set, dest_set, pDescriptorCopy->dstSet);
VK_FROM_HANDLE(panvk_descriptor_set, src_set, pDescriptorCopy->srcSet);
const struct panvk_descriptor_set_layout *dest_layout = dest_set->layout;
const struct panvk_descriptor_set_layout *src_layout = dest_set->layout;
unsigned dest_offset = pDescriptorCopy->dstArrayElement;
unsigned src_offset = pDescriptorCopy->srcArrayElement;
unsigned dest_binding = pDescriptorCopy->dstBinding;
unsigned src_binding = pDescriptorCopy->srcBinding;
unsigned desc_count = pDescriptorCopy->descriptorCount;
while (desc_count && src_binding < src_layout->binding_count &&
dest_binding < dest_layout->binding_count) {
const struct panvk_descriptor_set_binding_layout *dest_binding_layout =
&src_layout->bindings[dest_binding];
if (!dest_binding_layout->array_size) {
dest_binding++;
dest_offset = 0;
continue;
}
const struct panvk_descriptor_set_binding_layout *src_binding_layout =
&src_layout->bindings[src_binding];
if (!src_binding_layout->array_size) {
src_binding++;
src_offset = 0;
continue;
}
assert(dest_binding_layout->type == src_binding_layout->type);
unsigned ndescs = MAX3(desc_count,
dest_binding_layout->array_size - dest_offset,
src_binding_layout->array_size - src_offset);
struct panvk_descriptor *dest_descs = dest_set->descs + dest_binding_layout->desc_idx + dest_offset;
struct panvk_descriptor *src_descs = src_set->descs + src_binding_layout->desc_idx + src_offset;
memcpy(dest_descs, src_descs, ndescs * sizeof(*dest_descs));
desc_count -= ndescs;
dest_offset += ndescs;
if (dest_offset == dest_binding_layout->array_size) {
dest_binding++;
dest_offset = 0;
continue;
}
src_offset += ndescs;
if (src_offset == src_binding_layout->array_size) {
src_binding++;
src_offset = 0;
continue;
}
}
assert(!desc_count);
}
void
panvk_UpdateDescriptorSets(VkDevice _device,
uint32_t descriptorWriteCount,
const VkWriteDescriptorSet *pDescriptorWrites,
uint32_t descriptorCopyCount,
const VkCopyDescriptorSet *pDescriptorCopies)
{
VK_FROM_HANDLE(panvk_device, dev, _device);
for (unsigned i = 0; i < descriptorWriteCount; i++)
panvk_write_descriptor_set(dev, &pDescriptorWrites[i]);
for (unsigned i = 0; i < descriptorCopyCount; i++)
panvk_copy_descriptor_set(dev, &pDescriptorCopies[i]);
}
VkResult
panvk_CreateDescriptorUpdateTemplate(VkDevice _device,
const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo,

View File

@ -32,7 +32,6 @@
#include "pan_bo.h"
#include "pan_encoder.h"
#include "pan_util.h"
#include "decode.h"
#include <fcntl.h>
#include <libsync.h>
@ -199,7 +198,7 @@ panvk_physical_device_finish(struct panvk_physical_device *device)
{
panvk_wsi_finish(device);
panvk_meta_cleanup(device);
panvk_arch_dispatch(device->pdev.arch, meta_cleanup, device);
panfrost_close_device(&device->pdev);
if (device->master_fd != -1)
close(device->master_fd);
@ -306,7 +305,7 @@ panvk_physical_device_init(struct panvk_physical_device *device,
goto fail;
}
panvk_meta_init(device);
panvk_arch_dispatch(device->pdev.arch, meta_init, device);
memset(device->name, 0, sizeof(device->name));
sprintf(device->name, "%s", panfrost_model_name(device->pdev.gpu_id));
@ -955,10 +954,29 @@ panvk_CreateDevice(VkPhysicalDevice physicalDevice,
if (!device)
return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
const struct vk_device_entrypoint_table *dev_entrypoints;
struct vk_device_dispatch_table dispatch_table;
switch (physical_device->pdev.arch) {
case 5:
dev_entrypoints = &panvk_v5_device_entrypoints;
break;
case 6:
dev_entrypoints = &panvk_v6_device_entrypoints;
break;
case 7:
dev_entrypoints = &panvk_v7_device_entrypoints;
break;
default:
unreachable("Unsupported architecture");
}
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
dev_entrypoints,
true);
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
&panvk_device_entrypoints,
true);
false);
result = vk_device_init(&device->vk, &physical_device->vk, &dispatch_table,
pCreateInfo, pAllocator);
if (result != VK_SUCCESS) {
@ -1076,262 +1094,6 @@ panvk_GetDeviceQueue(VkDevice _device,
panvk_GetDeviceQueue2(_device, &info, pQueue);
}
static void
panvk_queue_submit_batch(struct panvk_queue *queue,
struct panvk_batch *batch,
uint32_t *bos, unsigned nr_bos,
uint32_t *in_fences,
unsigned nr_in_fences)
{
const struct panvk_device *dev = queue->device;
unsigned debug = dev->physical_device->instance->debug_flags;
const struct panfrost_device *pdev = &dev->physical_device->pdev;
int ret;
/* Reset the batch if it's already been issued */
if (batch->issued) {
util_dynarray_foreach(&batch->jobs, void *, job)
memset((*job), 0, 4 * 4);
/* Reset the tiler before re-issuing the batch */
if (pan_is_bifrost(pdev) && batch->tiler.bifrost_descs.cpu) {
memcpy(batch->tiler.bifrost_descs.cpu, &batch->tiler.templ.bifrost,
sizeof(batch->tiler.templ.bifrost));
} else if (!pan_is_bifrost(pdev) && batch->fb.desc.cpu) {
void *tiler = pan_section_ptr(batch->fb.desc.cpu, MULTI_TARGET_FRAMEBUFFER, TILER);
memcpy(tiler, &batch->tiler.templ.midgard, sizeof(batch->tiler.templ.midgard));
/* All weights set to 0, nothing to do here */
pan_section_pack(batch->fb.desc.cpu, MULTI_TARGET_FRAMEBUFFER, TILER_WEIGHTS, w);
}
}
if (batch->scoreboard.first_job) {
struct drm_panfrost_submit submit = {
.bo_handles = (uintptr_t)bos,
.bo_handle_count = nr_bos,
.in_syncs = (uintptr_t)in_fences,
.in_sync_count = nr_in_fences,
.out_sync = queue->sync,
.jc = batch->scoreboard.first_job,
};
ret = drmIoctl(pdev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
assert(!ret);
if (debug & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC)) {
ret = drmSyncobjWait(pdev->fd, &submit.out_sync, 1, INT64_MAX, 0, NULL);
assert(!ret);
}
if (debug & PANVK_DEBUG_TRACE)
pandecode_jc(batch->scoreboard.first_job, pan_is_bifrost(pdev), pdev->gpu_id);
}
if (batch->fragment_job) {
struct drm_panfrost_submit submit = {
.bo_handles = (uintptr_t)bos,
.bo_handle_count = nr_bos,
.out_sync = queue->sync,
.jc = batch->fragment_job,
.requirements = PANFROST_JD_REQ_FS,
};
if (batch->scoreboard.first_job) {
submit.in_syncs = (uintptr_t)(&queue->sync);
submit.in_sync_count = 1;
} else {
submit.in_syncs = (uintptr_t)in_fences;
submit.in_sync_count = nr_in_fences;
}
ret = drmIoctl(pdev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
assert(!ret);
if (debug & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC)) {
ret = drmSyncobjWait(pdev->fd, &submit.out_sync, 1, INT64_MAX, 0, NULL);
assert(!ret);
}
if (debug & PANVK_DEBUG_TRACE)
pandecode_jc(batch->fragment_job, pan_is_bifrost(pdev), pdev->gpu_id);
}
if (debug & PANVK_DEBUG_TRACE)
pandecode_next_frame();
batch->issued = true;
}
static void
panvk_queue_transfer_sync(struct panvk_queue *queue, uint32_t syncobj)
{
const struct panfrost_device *pdev = &queue->device->physical_device->pdev;
int ret;
struct drm_syncobj_handle handle = {
.handle = queue->sync,
.flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE,
.fd = -1,
};
ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle);
assert(!ret);
assert(handle.fd >= 0);
handle.handle = syncobj;
ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &handle);
assert(!ret);
close(handle.fd);
}
static void
panvk_add_wait_event_syncobjs(struct panvk_batch *batch, uint32_t *in_fences, unsigned *nr_in_fences)
{
util_dynarray_foreach(&batch->event_ops, struct panvk_event_op, op) {
switch (op->type) {
case PANVK_EVENT_OP_SET:
/* Nothing to do yet */
break;
case PANVK_EVENT_OP_RESET:
/* Nothing to do yet */
break;
case PANVK_EVENT_OP_WAIT:
in_fences[*nr_in_fences++] = op->event->syncobj;
break;
default:
unreachable("bad panvk_event_op type\n");
}
}
}
static void
panvk_signal_event_syncobjs(struct panvk_queue *queue, struct panvk_batch *batch)
{
const struct panfrost_device *pdev = &queue->device->physical_device->pdev;
util_dynarray_foreach(&batch->event_ops, struct panvk_event_op, op) {
switch (op->type) {
case PANVK_EVENT_OP_SET: {
panvk_queue_transfer_sync(queue, op->event->syncobj);
break;
}
case PANVK_EVENT_OP_RESET: {
struct panvk_event *event = op->event;
struct drm_syncobj_array objs = {
.handles = (uint64_t) (uintptr_t) &event->syncobj,
.count_handles = 1
};
int ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_RESET, &objs);
assert(!ret);
break;
}
case PANVK_EVENT_OP_WAIT:
/* Nothing left to do */
break;
default:
unreachable("bad panvk_event_op type\n");
}
}
}
VkResult
panvk_QueueSubmit(VkQueue _queue,
uint32_t submitCount,
const VkSubmitInfo *pSubmits,
VkFence _fence)
{
VK_FROM_HANDLE(panvk_queue, queue, _queue);
VK_FROM_HANDLE(panvk_fence, fence, _fence);
const struct panfrost_device *pdev = &queue->device->physical_device->pdev;
for (uint32_t i = 0; i < submitCount; ++i) {
const VkSubmitInfo *submit = pSubmits + i;
unsigned nr_semaphores = submit->waitSemaphoreCount + 1;
uint32_t semaphores[nr_semaphores];
semaphores[0] = queue->sync;
for (unsigned i = 0; i < submit->waitSemaphoreCount; i++) {
VK_FROM_HANDLE(panvk_semaphore, sem, submit->pWaitSemaphores[i]);
semaphores[i + 1] = sem->syncobj.temporary ? : sem->syncobj.permanent;
}
for (uint32_t j = 0; j < submit->commandBufferCount; ++j) {
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, (submit->pCommandBuffers[j]));
list_for_each_entry(struct panvk_batch, batch, &cmdbuf->batches, node) {
/* FIXME: should be done at the batch level */
unsigned nr_bos =
panvk_pool_num_bos(&cmdbuf->desc_pool) +
panvk_pool_num_bos(&cmdbuf->varying_pool) +
panvk_pool_num_bos(&cmdbuf->tls_pool) +
(batch->fb.info ? batch->fb.info->attachment_count : 0) +
(batch->blit.src ? 1 : 0) +
(batch->blit.dst ? 1 : 0) +
(batch->scoreboard.first_tiler ? 1 : 0) + 1;
unsigned bo_idx = 0;
uint32_t bos[nr_bos];
panvk_pool_get_bo_handles(&cmdbuf->desc_pool, &bos[bo_idx]);
bo_idx += panvk_pool_num_bos(&cmdbuf->desc_pool);
panvk_pool_get_bo_handles(&cmdbuf->varying_pool, &bos[bo_idx]);
bo_idx += panvk_pool_num_bos(&cmdbuf->varying_pool);
panvk_pool_get_bo_handles(&cmdbuf->tls_pool, &bos[bo_idx]);
bo_idx += panvk_pool_num_bos(&cmdbuf->tls_pool);
if (batch->fb.info) {
for (unsigned i = 0; i < batch->fb.info->attachment_count; i++) {
bos[bo_idx++] = batch->fb.info->attachments[i].iview->pview.image->data.bo->gem_handle;
}
}
if (batch->blit.src)
bos[bo_idx++] = batch->blit.src->gem_handle;
if (batch->blit.dst)
bos[bo_idx++] = batch->blit.dst->gem_handle;
if (batch->scoreboard.first_tiler)
bos[bo_idx++] = pdev->tiler_heap->gem_handle;
bos[bo_idx++] = pdev->sample_positions->gem_handle;
assert(bo_idx == nr_bos);
unsigned nr_in_fences = 0;
unsigned max_wait_event_syncobjs =
util_dynarray_num_elements(&batch->event_ops,
struct panvk_event_op);
uint32_t in_fences[nr_semaphores + max_wait_event_syncobjs];
memcpy(in_fences, semaphores, nr_semaphores * sizeof(*in_fences));
nr_in_fences += nr_semaphores;
panvk_add_wait_event_syncobjs(batch, in_fences, &nr_in_fences);
panvk_queue_submit_batch(queue, batch, bos, nr_bos, in_fences, nr_in_fences);
panvk_signal_event_syncobjs(queue, batch);
}
}
/* Transfer the out fence to signal semaphores */
for (unsigned i = 0; i < submit->signalSemaphoreCount; i++) {
VK_FROM_HANDLE(panvk_semaphore, sem, submit->pSignalSemaphores[i]);
panvk_queue_transfer_sync(queue, sem->syncobj.temporary ? : sem->syncobj.permanent);
}
}
if (fence) {
/* Transfer the last out fence to the fence object */
panvk_queue_transfer_sync(queue, fence->syncobj.temporary ? : fence->syncobj.permanent);
}
return VK_SUCCESS;
}
VkResult
panvk_QueueWaitIdle(VkQueue _queue)
{
@ -1891,202 +1653,6 @@ panvk_DestroyFramebuffer(VkDevice _device,
vk_object_free(&device->vk, pAllocator, fb);
}
static enum mali_mipmap_mode
panvk_translate_sampler_mipmap_mode(VkSamplerMipmapMode mode)
{
switch (mode) {
case VK_SAMPLER_MIPMAP_MODE_NEAREST: return MALI_MIPMAP_MODE_NEAREST;
case VK_SAMPLER_MIPMAP_MODE_LINEAR: return MALI_MIPMAP_MODE_TRILINEAR;
default: unreachable("Invalid mipmap mode");
}
}
static unsigned
panvk_translate_sampler_address_mode(VkSamplerAddressMode mode)
{
switch (mode) {
case VK_SAMPLER_ADDRESS_MODE_REPEAT: return MALI_WRAP_MODE_REPEAT;
case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT;
case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE;
case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER;
case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
default: unreachable("Invalid wrap");
}
}
static enum mali_func
panvk_translate_sampler_compare_func(const VkSamplerCreateInfo *pCreateInfo)
{
if (!pCreateInfo->compareEnable)
return MALI_FUNC_NEVER;
enum mali_func f = panvk_translate_compare_func(pCreateInfo->compareOp);
return panfrost_flip_compare_func(f);
}
static void
panvk_init_midgard_sampler(struct panvk_sampler *sampler,
const VkSamplerCreateInfo *pCreateInfo)
{
const VkSamplerCustomBorderColorCreateInfoEXT *pBorderColor =
vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
pan_pack(&sampler->desc, MIDGARD_SAMPLER, cfg) {
cfg.magnify_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST;
cfg.minify_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST;
cfg.mipmap_mode = panvk_translate_sampler_mipmap_mode(pCreateInfo->mipmapMode);
cfg.normalized_coordinates = !pCreateInfo->unnormalizedCoordinates;
cfg.lod_bias = FIXED_16(pCreateInfo->mipLodBias, true);
cfg.minimum_lod = FIXED_16(pCreateInfo->minLod, false);
cfg.maximum_lod = FIXED_16(pCreateInfo->maxLod, false);
cfg.wrap_mode_s = panvk_translate_sampler_address_mode(pCreateInfo->addressModeU);
cfg.wrap_mode_t = panvk_translate_sampler_address_mode(pCreateInfo->addressModeV);
cfg.wrap_mode_r = panvk_translate_sampler_address_mode(pCreateInfo->addressModeW);
cfg.compare_function = panvk_translate_sampler_compare_func(pCreateInfo);
switch (pCreateInfo->borderColor) {
case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
cfg.border_color_r = fui(0.0);
cfg.border_color_g = fui(0.0);
cfg.border_color_b = fui(0.0);
cfg.border_color_a =
pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK ?
fui(1.0) : fui(0.0);
break;
case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
cfg.border_color_r = 0;
cfg.border_color_g = 0;
cfg.border_color_b = 0;
cfg.border_color_a =
pCreateInfo->borderColor == VK_BORDER_COLOR_INT_OPAQUE_BLACK ?
UINT_MAX : 0;
break;
case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
cfg.border_color_r = fui(1.0);
cfg.border_color_g = fui(1.0);
cfg.border_color_b = fui(1.0);
cfg.border_color_a = fui(1.0);
break;
case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
cfg.border_color_r = UINT_MAX;
cfg.border_color_g = UINT_MAX;
cfg.border_color_b = UINT_MAX;
cfg.border_color_a = UINT_MAX;
break;
case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
case VK_BORDER_COLOR_INT_CUSTOM_EXT:
cfg.border_color_r = pBorderColor->customBorderColor.int32[0];
cfg.border_color_g = pBorderColor->customBorderColor.int32[1];
cfg.border_color_b = pBorderColor->customBorderColor.int32[2];
cfg.border_color_a = pBorderColor->customBorderColor.int32[3];
break;
default:
unreachable("Invalid border color");
}
}
}
static void
panvk_init_bifrost_sampler(struct panvk_sampler *sampler,
const VkSamplerCreateInfo *pCreateInfo)
{
const VkSamplerCustomBorderColorCreateInfoEXT *pBorderColor =
vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
pan_pack(&sampler->desc, BIFROST_SAMPLER, cfg) {
cfg.magnify_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST;
cfg.minify_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST;
cfg.mipmap_mode = panvk_translate_sampler_mipmap_mode(pCreateInfo->mipmapMode);
cfg.normalized_coordinates = !pCreateInfo->unnormalizedCoordinates;
cfg.lod_bias = FIXED_16(pCreateInfo->mipLodBias, true);
cfg.minimum_lod = FIXED_16(pCreateInfo->minLod, false);
cfg.maximum_lod = FIXED_16(pCreateInfo->maxLod, false);
cfg.wrap_mode_s = panvk_translate_sampler_address_mode(pCreateInfo->addressModeU);
cfg.wrap_mode_t = panvk_translate_sampler_address_mode(pCreateInfo->addressModeV);
cfg.wrap_mode_r = panvk_translate_sampler_address_mode(pCreateInfo->addressModeW);
cfg.compare_function = panvk_translate_sampler_compare_func(pCreateInfo);
switch (pCreateInfo->borderColor) {
case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
cfg.border_color_r = fui(0.0);
cfg.border_color_g = fui(0.0);
cfg.border_color_b = fui(0.0);
cfg.border_color_a =
pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK ?
fui(1.0) : fui(0.0);
break;
case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
cfg.border_color_r = 0;
cfg.border_color_g = 0;
cfg.border_color_b = 0;
cfg.border_color_a =
pCreateInfo->borderColor == VK_BORDER_COLOR_INT_OPAQUE_BLACK ?
UINT_MAX : 0;
break;
case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
cfg.border_color_r = fui(1.0);
cfg.border_color_g = fui(1.0);
cfg.border_color_b = fui(1.0);
cfg.border_color_a = fui(1.0);
break;
case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
cfg.border_color_r = UINT_MAX;
cfg.border_color_g = UINT_MAX;
cfg.border_color_b = UINT_MAX;
cfg.border_color_a = UINT_MAX;
break;
case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
case VK_BORDER_COLOR_INT_CUSTOM_EXT:
cfg.border_color_r = pBorderColor->customBorderColor.int32[0];
cfg.border_color_g = pBorderColor->customBorderColor.int32[1];
cfg.border_color_b = pBorderColor->customBorderColor.int32[2];
cfg.border_color_a = pBorderColor->customBorderColor.int32[3];
break;
default:
unreachable("Invalid border color");
}
}
}
static void
panvk_init_sampler(struct panvk_device *device,
struct panvk_sampler *sampler,
const VkSamplerCreateInfo *pCreateInfo)
{
if (pan_is_bifrost(&device->physical_device->pdev))
panvk_init_bifrost_sampler(sampler, pCreateInfo);
else
panvk_init_midgard_sampler(sampler, pCreateInfo);
}
VkResult
panvk_CreateSampler(VkDevice _device,
const VkSamplerCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkSampler *pSampler)
{
VK_FROM_HANDLE(panvk_device, device, _device);
struct panvk_sampler *sampler;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
sampler = vk_object_alloc(&device->vk, pAllocator, sizeof(*sampler),
VK_OBJECT_TYPE_SAMPLER);
if (!sampler)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
panvk_init_sampler(device, sampler, pCreateInfo);
*pSampler = panvk_sampler_to_handle(sampler);
return VK_SUCCESS;
}
void
panvk_DestroySampler(VkDevice _device,
VkSampler _sampler,

View File

@ -276,121 +276,6 @@ panvk_GetImageSubresourceLayout(VkDevice _device,
pLayout->depthPitch = slice_layout->surface_stride;
}
static enum mali_texture_dimension
panvk_view_type_to_mali_tex_dim(VkImageViewType type)
{
switch (type) {
case VK_IMAGE_VIEW_TYPE_1D:
case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
return MALI_TEXTURE_DIMENSION_1D;
case VK_IMAGE_VIEW_TYPE_2D:
case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
return MALI_TEXTURE_DIMENSION_2D;
case VK_IMAGE_VIEW_TYPE_3D:
return MALI_TEXTURE_DIMENSION_3D;
case VK_IMAGE_VIEW_TYPE_CUBE:
case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
return MALI_TEXTURE_DIMENSION_CUBE;
default:
unreachable("Invalid view type");
}
}
static void
panvk_convert_swizzle(const VkComponentMapping *in,
unsigned char *out)
{
const VkComponentSwizzle *comp = &in->r;
for (unsigned i = 0; i < 4; i++) {
switch (comp[i]) {
case VK_COMPONENT_SWIZZLE_IDENTITY:
out[i] = PIPE_SWIZZLE_X + i;
break;
case VK_COMPONENT_SWIZZLE_ZERO:
out[i] = PIPE_SWIZZLE_0;
break;
case VK_COMPONENT_SWIZZLE_ONE:
out[i] = PIPE_SWIZZLE_1;
break;
case VK_COMPONENT_SWIZZLE_R:
out[i] = PIPE_SWIZZLE_X;
break;
case VK_COMPONENT_SWIZZLE_G:
out[i] = PIPE_SWIZZLE_Y;
break;
case VK_COMPONENT_SWIZZLE_B:
out[i] = PIPE_SWIZZLE_Z;
break;
case VK_COMPONENT_SWIZZLE_A:
out[i] = PIPE_SWIZZLE_W;
break;
default:
unreachable("Invalid swizzle");
}
}
}
VkResult
panvk_CreateImageView(VkDevice _device,
const VkImageViewCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkImageView *pView)
{
VK_FROM_HANDLE(panvk_device, device, _device);
VK_FROM_HANDLE(panvk_image, image, pCreateInfo->image);
struct panvk_image_view *view;
view = vk_object_zalloc(&device->vk, pAllocator, sizeof(*view),
VK_OBJECT_TYPE_IMAGE_VIEW);
if (view == NULL)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
view->pview.format = vk_format_to_pipe_format(pCreateInfo->format);
if (pCreateInfo->subresourceRange.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT)
view->pview.format = util_format_get_depth_only(view->pview.format);
else if (pCreateInfo->subresourceRange.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
view->pview.format = util_format_stencil_only(view->pview.format);
view->pview.dim = panvk_view_type_to_mali_tex_dim(pCreateInfo->viewType);
view->pview.first_level = pCreateInfo->subresourceRange.baseMipLevel;
view->pview.last_level = pCreateInfo->subresourceRange.baseMipLevel +
pCreateInfo->subresourceRange.levelCount - 1;
view->pview.first_layer = pCreateInfo->subresourceRange.baseArrayLayer;
view->pview.last_layer = pCreateInfo->subresourceRange.baseArrayLayer +
pCreateInfo->subresourceRange.layerCount - 1;
panvk_convert_swizzle(&pCreateInfo->components, view->pview.swizzle);
view->pview.image = &image->pimage;
view->pview.nr_samples = image->pimage.layout.nr_samples;
view->vk_format = pCreateInfo->format;
struct panfrost_device *pdev = &device->physical_device->pdev;
unsigned bo_size =
panfrost_estimate_texture_payload_size(pdev, &view->pview);
unsigned surf_descs_offset = 0;
if (!pan_is_bifrost(pdev)) {
bo_size += pan_size(MIDGARD_TEXTURE);
surf_descs_offset = pan_size(MIDGARD_TEXTURE);
}
view->bo = panfrost_bo_create(pdev, bo_size, 0, "Texture descriptor");
struct panfrost_ptr surf_descs = {
.cpu = view->bo->ptr.cpu + surf_descs_offset,
.gpu = view->bo->ptr.gpu + surf_descs_offset,
};
void *tex_desc = pan_is_bifrost(pdev) ?
&view->bifrost.tex_desc : view->bo->ptr.cpu;
panfrost_new_texture(pdev, &view->pview, tex_desc, &surf_descs);
*pView = panvk_image_view_to_handle(view);
return VK_SUCCESS;
}
void
panvk_DestroyImageView(VkDevice _device,
VkImageView _view,

View File

@ -40,956 +40,8 @@
#include "vk_format.h"
#include "vk_util.h"
#include "panfrost/util/pan_lower_framebuffer.h"
#include "panfrost-quirks.h"
struct panvk_pipeline_builder
{
struct panvk_device *device;
struct panvk_pipeline_cache *cache;
const VkAllocationCallbacks *alloc;
const VkGraphicsPipelineCreateInfo *create_info;
const struct panvk_pipeline_layout *layout;
struct panvk_shader *shaders[MESA_SHADER_STAGES];
struct {
uint32_t shader_offset;
uint32_t rsd_offset;
uint32_t sysvals_offset;
} stages[MESA_SHADER_STAGES];
uint32_t blend_shader_offsets[MAX_RTS];
uint32_t shader_total_size;
uint32_t static_state_size;
uint32_t vpd_offset;
bool rasterizer_discard;
/* these states are affectd by rasterizer_discard */
VkSampleCountFlagBits samples;
bool use_depth_stencil_attachment;
uint8_t active_color_attachments;
enum pipe_format color_attachment_formats[MAX_RTS];
};
static VkResult
panvk_pipeline_builder_create_pipeline(struct panvk_pipeline_builder *builder,
struct panvk_pipeline **out_pipeline)
{
struct panvk_device *dev = builder->device;
struct panvk_pipeline *pipeline =
vk_object_zalloc(&dev->vk, builder->alloc,
sizeof(*pipeline), VK_OBJECT_TYPE_PIPELINE);
if (!pipeline)
return VK_ERROR_OUT_OF_HOST_MEMORY;
pipeline->layout = builder->layout;
*out_pipeline = pipeline;
return VK_SUCCESS;
}
static void
panvk_pipeline_builder_finish(struct panvk_pipeline_builder *builder)
{
for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
if (!builder->shaders[i])
continue;
panvk_shader_destroy(builder->device, builder->shaders[i], builder->alloc);
}
}
static bool
panvk_pipeline_static_state(struct panvk_pipeline *pipeline, uint32_t id)
{
return !(pipeline->dynamic_state_mask & (1 << id));
}
static VkResult
panvk_pipeline_builder_compile_shaders(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = {
NULL
};
for (uint32_t i = 0; i < builder->create_info->stageCount; i++) {
gl_shader_stage stage = vk_to_mesa_shader_stage(builder->create_info->pStages[i].stage);
stage_infos[stage] = &builder->create_info->pStages[i];
}
/* compile shaders in reverse order */
unsigned sysval_ubo = builder->layout->num_ubos;
for (gl_shader_stage stage = MESA_SHADER_STAGES - 1;
stage > MESA_SHADER_NONE; stage--) {
const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage];
if (!stage_info)
continue;
struct panvk_shader *shader;
shader = panvk_shader_create(builder->device, stage, stage_info,
builder->layout, sysval_ubo,
&pipeline->blend.state,
panvk_pipeline_static_state(pipeline,
VK_DYNAMIC_STATE_BLEND_CONSTANTS),
builder->alloc);
if (!shader)
return VK_ERROR_OUT_OF_HOST_MEMORY;
if (shader->info.sysvals.sysval_count)
sysval_ubo++;
builder->shaders[stage] = shader;
builder->shader_total_size = ALIGN_POT(builder->shader_total_size, 128);
builder->stages[stage].shader_offset = builder->shader_total_size;
builder->shader_total_size +=
util_dynarray_num_elements(&shader->binary, uint8_t);
}
return VK_SUCCESS;
}
static VkResult
panvk_pipeline_builder_upload_shaders(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
struct panfrost_bo *bin_bo =
panfrost_bo_create(&builder->device->physical_device->pdev,
builder->shader_total_size, PAN_BO_EXECUTE,
"Shader");
pipeline->binary_bo = bin_bo;
panfrost_bo_mmap(bin_bo);
for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
const struct panvk_shader *shader = builder->shaders[i];
if (!shader)
continue;
memcpy(pipeline->binary_bo->ptr.cpu + builder->stages[i].shader_offset,
util_dynarray_element(&shader->binary, uint8_t, 0),
util_dynarray_num_elements(&shader->binary, uint8_t));
}
return VK_SUCCESS;
}
static bool
panvk_pipeline_static_sysval(struct panvk_pipeline *pipeline,
unsigned id)
{
switch (id) {
case PAN_SYSVAL_VIEWPORT_SCALE:
case PAN_SYSVAL_VIEWPORT_OFFSET:
return panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT);
default:
return false;
}
}
static void
panvk_pipeline_builder_alloc_static_state_bo(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
struct panfrost_device *pdev =
&builder->device->physical_device->pdev;
unsigned bo_size = 0;
for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
const struct panvk_shader *shader = builder->shaders[i];
if (!shader)
continue;
if (pipeline->fs.dynamic_rsd && i == MESA_SHADER_FRAGMENT)
continue;
bo_size = ALIGN_POT(bo_size, pan_alignment(RENDERER_STATE));
builder->stages[i].rsd_offset = bo_size;
bo_size += pan_size(RENDERER_STATE);
if (i == MESA_SHADER_FRAGMENT)
bo_size += pan_size(BLEND) * pipeline->blend.state.rt_count;
}
if (panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT) &&
panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_SCISSOR)) {
bo_size = ALIGN_POT(bo_size, pan_alignment(VIEWPORT));
builder->vpd_offset = bo_size;
bo_size += pan_size(VIEWPORT);
}
for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
const struct panvk_shader *shader = builder->shaders[i];
if (!shader || !shader->info.sysvals.sysval_count)
continue;
bool static_sysvals = true;
for (unsigned s = 0; s < shader->info.sysvals.sysval_count; s++) {
unsigned id = shader->info.sysvals.sysvals[i];
static_sysvals &= panvk_pipeline_static_sysval(pipeline, id);
switch (PAN_SYSVAL_TYPE(id)) {
case PAN_SYSVAL_VIEWPORT_SCALE:
case PAN_SYSVAL_VIEWPORT_OFFSET:
pipeline->sysvals[i].dirty_mask |= PANVK_DYNAMIC_VIEWPORT;
break;
default:
break;
}
}
if (!static_sysvals) {
builder->stages[i].sysvals_offset = ~0;
continue;
}
bo_size = ALIGN_POT(bo_size, 16);
builder->stages[i].sysvals_offset = bo_size;
bo_size += shader->info.sysvals.sysval_count * 16;
}
if (bo_size) {
pipeline->state_bo =
panfrost_bo_create(pdev, bo_size, 0, "Pipeline descriptors");
panfrost_bo_mmap(pipeline->state_bo);
}
}
static void
panvk_pipeline_builder_upload_sysval(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline,
unsigned id, union panvk_sysval_data *data)
{
switch (PAN_SYSVAL_TYPE(id)) {
case PAN_SYSVAL_VIEWPORT_SCALE:
panvk_sysval_upload_viewport_scale(builder->create_info->pViewportState->pViewports,
data);
break;
case PAN_SYSVAL_VIEWPORT_OFFSET:
panvk_sysval_upload_viewport_scale(builder->create_info->pViewportState->pViewports,
data);
break;
default:
unreachable("Invalid static sysval");
}
}
static void
panvk_pipeline_builder_init_sysvals(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline,
gl_shader_stage stage)
{
const struct panvk_shader *shader = builder->shaders[stage];
pipeline->sysvals[stage].ids = shader->info.sysvals;
pipeline->sysvals[stage].ubo_idx = shader->sysval_ubo;
if (!shader->info.sysvals.sysval_count ||
builder->stages[stage].sysvals_offset == ~0)
return;
union panvk_sysval_data *static_data =
pipeline->state_bo->ptr.cpu + builder->stages[stage].sysvals_offset;
pipeline->sysvals[stage].ubo =
pipeline->state_bo->ptr.gpu + builder->stages[stage].sysvals_offset;
for (unsigned i = 0; i < shader->info.sysvals.sysval_count; i++) {
unsigned id = shader->info.sysvals.sysvals[i];
panvk_pipeline_builder_upload_sysval(builder,
pipeline,
id, &static_data[i]);
}
}
static void
panvk_pipeline_builder_init_shaders(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
const struct panvk_shader *shader = builder->shaders[i];
if (!shader)
continue;
pipeline->tls_size = MAX2(pipeline->tls_size, shader->info.tls_size);
pipeline->wls_size = MAX2(pipeline->tls_size, shader->info.wls_size);
if (i == MESA_SHADER_VERTEX && shader->info.vs.writes_point_size)
pipeline->ia.writes_point_size = true;
mali_ptr shader_ptr = pipeline->binary_bo->ptr.gpu +
builder->stages[i].shader_offset;
void *rsd = pipeline->state_bo->ptr.cpu + builder->stages[i].rsd_offset;
mali_ptr gpu_rsd = pipeline->state_bo->ptr.gpu + builder->stages[i].rsd_offset;
if (i != MESA_SHADER_FRAGMENT) {
panvk_emit_non_fs_rsd(builder->device, &shader->info, shader_ptr, rsd);
} else if (!pipeline->fs.dynamic_rsd) {
void *bd = rsd + pan_size(RENDERER_STATE);
panvk_emit_base_fs_rsd(builder->device, pipeline, rsd);
for (unsigned rt = 0; rt < MAX2(pipeline->blend.state.rt_count, 1); rt++) {
panvk_emit_blend(builder->device, pipeline, rt, bd);
bd += pan_size(BLEND);
}
} else {
gpu_rsd = 0;
panvk_emit_base_fs_rsd(builder->device, pipeline, &pipeline->fs.rsd_template);
for (unsigned rt = 0; rt < MAX2(pipeline->blend.state.rt_count, 1); rt++) {
panvk_emit_blend(builder->device, pipeline, rt,
&pipeline->blend.bd_template[rt]);
}
}
pipeline->rsds[i] = gpu_rsd;
panvk_pipeline_builder_init_sysvals(builder, pipeline, i);
}
pipeline->num_ubos = builder->layout->num_ubos;
for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) {
if (pipeline->sysvals[i].ids.sysval_count)
pipeline->num_ubos = MAX2(pipeline->num_ubos, pipeline->sysvals[i].ubo_idx + 1);
}
pipeline->num_sysvals = 0;
for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++)
pipeline->num_sysvals += pipeline->sysvals[i].ids.sysval_count;
}
static void
panvk_pipeline_builder_parse_viewport(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
/* The spec says:
*
* pViewportState is a pointer to an instance of the
* VkPipelineViewportStateCreateInfo structure, and is ignored if the
* pipeline has rasterization disabled.
*/
if (!builder->rasterizer_discard &&
panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT) &&
panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_SCISSOR)) {
void *vpd = pipeline->state_bo->ptr.cpu + builder->vpd_offset;
panvk_emit_viewport(builder->create_info->pViewportState->pViewports,
builder->create_info->pViewportState->pScissors,
vpd);
pipeline->vpd = pipeline->state_bo->ptr.gpu +
builder->vpd_offset;
} else {
if (builder->create_info->pViewportState->pViewports)
pipeline->viewport = builder->create_info->pViewportState->pViewports[0];
if (builder->create_info->pViewportState->pScissors)
pipeline->scissor = builder->create_info->pViewportState->pScissors[0];
}
}
static void
panvk_pipeline_builder_parse_dynamic(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
const VkPipelineDynamicStateCreateInfo *dynamic_info =
builder->create_info->pDynamicState;
if (!dynamic_info)
return;
for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) {
VkDynamicState state = dynamic_info->pDynamicStates[i];
switch (state) {
case VK_DYNAMIC_STATE_VIEWPORT ... VK_DYNAMIC_STATE_STENCIL_REFERENCE:
pipeline->dynamic_state_mask |= 1 << state;
break;
default:
unreachable("unsupported dynamic state");
}
}
}
static enum mali_draw_mode
translate_prim_topology(VkPrimitiveTopology in)
{
switch (in) {
case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
return MALI_DRAW_MODE_POINTS;
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
return MALI_DRAW_MODE_LINES;
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
return MALI_DRAW_MODE_LINE_STRIP;
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
return MALI_DRAW_MODE_TRIANGLES;
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
return MALI_DRAW_MODE_TRIANGLE_STRIP;
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
return MALI_DRAW_MODE_TRIANGLE_FAN;
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
default:
unreachable("Invalid primitive type");
}
}
static void
panvk_pipeline_builder_parse_input_assembly(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
pipeline->ia.primitive_restart =
builder->create_info->pInputAssemblyState->primitiveRestartEnable;
pipeline->ia.topology =
translate_prim_topology(builder->create_info->pInputAssemblyState->topology);
}
static enum pipe_logicop
translate_logicop(VkLogicOp in)
{
switch (in) {
case VK_LOGIC_OP_CLEAR: return PIPE_LOGICOP_CLEAR;
case VK_LOGIC_OP_AND: return PIPE_LOGICOP_AND;
case VK_LOGIC_OP_AND_REVERSE: return PIPE_LOGICOP_AND_REVERSE;
case VK_LOGIC_OP_COPY: return PIPE_LOGICOP_COPY;
case VK_LOGIC_OP_AND_INVERTED: return PIPE_LOGICOP_AND_INVERTED;
case VK_LOGIC_OP_NO_OP: return PIPE_LOGICOP_NOOP;
case VK_LOGIC_OP_XOR: return PIPE_LOGICOP_XOR;
case VK_LOGIC_OP_OR: return PIPE_LOGICOP_OR;
case VK_LOGIC_OP_NOR: return PIPE_LOGICOP_NOR;
case VK_LOGIC_OP_EQUIVALENT: return PIPE_LOGICOP_EQUIV;
case VK_LOGIC_OP_INVERT: return PIPE_LOGICOP_INVERT;
case VK_LOGIC_OP_OR_REVERSE: return PIPE_LOGICOP_OR_REVERSE;
case VK_LOGIC_OP_COPY_INVERTED: return PIPE_LOGICOP_COPY_INVERTED;
case VK_LOGIC_OP_OR_INVERTED: return PIPE_LOGICOP_OR_INVERTED;
case VK_LOGIC_OP_NAND: return PIPE_LOGICOP_NAND;
case VK_LOGIC_OP_SET: return PIPE_LOGICOP_SET;
default: unreachable("Invalid logicop");
}
}
static enum blend_func
translate_blend_op(VkBlendOp in)
{
switch (in) {
case VK_BLEND_OP_ADD: return BLEND_FUNC_ADD;
case VK_BLEND_OP_SUBTRACT: return BLEND_FUNC_SUBTRACT;
case VK_BLEND_OP_REVERSE_SUBTRACT: return BLEND_FUNC_REVERSE_SUBTRACT;
case VK_BLEND_OP_MIN: return BLEND_FUNC_MIN;
case VK_BLEND_OP_MAX: return BLEND_FUNC_MAX;
default: unreachable("Invalid blend op");
}
}
static enum blend_factor
translate_blend_factor(VkBlendFactor in, bool dest_has_alpha)
{
switch (in) {
case VK_BLEND_FACTOR_ZERO:
case VK_BLEND_FACTOR_ONE:
return BLEND_FACTOR_ZERO;
case VK_BLEND_FACTOR_SRC_COLOR:
case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
return BLEND_FACTOR_SRC_COLOR;
case VK_BLEND_FACTOR_DST_COLOR:
case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
return BLEND_FACTOR_DST_COLOR;
case VK_BLEND_FACTOR_SRC_ALPHA:
case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
return BLEND_FACTOR_SRC_ALPHA;
case VK_BLEND_FACTOR_DST_ALPHA:
case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
return dest_has_alpha ? BLEND_FACTOR_DST_ALPHA : BLEND_FACTOR_ZERO;
case VK_BLEND_FACTOR_CONSTANT_COLOR:
case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
return BLEND_FACTOR_CONSTANT_COLOR;
case VK_BLEND_FACTOR_CONSTANT_ALPHA:
case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
return BLEND_FACTOR_CONSTANT_ALPHA;
case VK_BLEND_FACTOR_SRC1_COLOR:
case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
return BLEND_FACTOR_SRC1_COLOR;
case VK_BLEND_FACTOR_SRC1_ALPHA:
case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
return BLEND_FACTOR_SRC1_ALPHA;
case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
return BLEND_FACTOR_SRC_ALPHA_SATURATE;
default: unreachable("Invalid blend factor");
}
}
static bool
inverted_blend_factor(VkBlendFactor in, bool dest_has_alpha)
{
switch (in) {
case VK_BLEND_FACTOR_ONE:
case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
return true;
case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
return dest_has_alpha ? true : false;
case VK_BLEND_FACTOR_DST_ALPHA:
return !dest_has_alpha ? true : false;
default:
return false;
}
}
bool
panvk_blend_needs_lowering(const struct panfrost_device *dev,
const struct pan_blend_state *state,
unsigned rt)
{
/* LogicOp requires a blend shader */
if (state->logicop_enable)
return true;
/* Not all formats can be blended by fixed-function hardware */
if (!panfrost_blendable_formats_v7[state->rts[rt].format].internal)
return true;
unsigned constant_mask = pan_blend_constant_mask(state->rts[rt].equation);
/* v6 doesn't support blend constants in FF blend equations.
* v7 only uses the constant from RT 0 (TODO: what if it's the same
* constant? or a constant is shared?)
*/
if (constant_mask && (dev->arch == 6 || (dev->arch == 7 && rt > 0)))
return true;
if (!pan_blend_is_homogenous_constant(constant_mask, state->constants))
return true;
bool supports_2src = pan_blend_supports_2src(dev->arch);
return !pan_blend_can_fixed_function(state->rts[rt].equation, supports_2src);
}
static void
panvk_pipeline_builder_parse_color_blend(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
struct panfrost_device *pdev = &builder->device->physical_device->pdev;
pipeline->blend.state.logicop_enable =
builder->create_info->pColorBlendState->logicOpEnable;
pipeline->blend.state.logicop_func =
translate_logicop(builder->create_info->pColorBlendState->logicOp);
pipeline->blend.state.rt_count = util_last_bit(builder->active_color_attachments);
memcpy(pipeline->blend.state.constants,
builder->create_info->pColorBlendState->blendConstants,
sizeof(pipeline->blend.state.constants));
for (unsigned i = 0; i < pipeline->blend.state.rt_count; i++) {
const VkPipelineColorBlendAttachmentState *in =
&builder->create_info->pColorBlendState->pAttachments[i];
struct pan_blend_rt_state *out = &pipeline->blend.state.rts[i];
out->format = builder->color_attachment_formats[i];
bool dest_has_alpha = util_format_has_alpha(out->format);
out->nr_samples = builder->create_info->pMultisampleState->rasterizationSamples;
out->equation.blend_enable = in->blendEnable;
out->equation.color_mask = in->colorWriteMask;
out->equation.rgb_func = translate_blend_op(in->colorBlendOp);
out->equation.rgb_src_factor = translate_blend_factor(in->srcColorBlendFactor, dest_has_alpha);
out->equation.rgb_invert_src_factor = inverted_blend_factor(in->srcColorBlendFactor, dest_has_alpha);
out->equation.rgb_dst_factor = translate_blend_factor(in->dstColorBlendFactor, dest_has_alpha);
out->equation.rgb_invert_dst_factor = inverted_blend_factor(in->dstColorBlendFactor, dest_has_alpha);
out->equation.alpha_func = translate_blend_op(in->alphaBlendOp);
out->equation.alpha_src_factor = translate_blend_factor(in->srcAlphaBlendFactor, dest_has_alpha);
out->equation.alpha_invert_src_factor = inverted_blend_factor(in->srcAlphaBlendFactor, dest_has_alpha);
out->equation.alpha_dst_factor = translate_blend_factor(in->dstAlphaBlendFactor, dest_has_alpha);
out->equation.alpha_invert_dst_factor = inverted_blend_factor(in->dstAlphaBlendFactor, dest_has_alpha);
unsigned constant_mask =
panvk_blend_needs_lowering(pdev, &pipeline->blend.state, i) ?
0 : pan_blend_constant_mask(out->equation);
pipeline->blend.constant[i].index = ffs(constant_mask) - 1;
if (constant_mask && pan_is_bifrost(pdev)) {
/* On Bifrost, the blend constant is expressed with a UNORM of the
* size of the target format. The value is then shifted such that
* used bits are in the MSB. Here we calculate the factor at pipeline
* creation time so we only have to do a
* hw_constant = float_constant * factor;
* at descriptor emission time.
*/
const struct util_format_description *format_desc =
util_format_description(out->format);
unsigned chan_size = 0;
for (unsigned c = 0; c < format_desc->nr_channels; c++)
chan_size = MAX2(format_desc->channel[c].size, chan_size);
pipeline->blend.constant[i].bifrost_factor =
((1 << chan_size) - 1) << (16 - chan_size);
}
}
}
static void
panvk_pipeline_builder_parse_multisample(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
unsigned nr_samples =
MAX2(builder->create_info->pMultisampleState->rasterizationSamples, 1);
pipeline->ms.rast_samples =
builder->create_info->pMultisampleState->rasterizationSamples;
pipeline->ms.sample_mask =
builder->create_info->pMultisampleState->pSampleMask ?
builder->create_info->pMultisampleState->pSampleMask[0] : UINT16_MAX;
pipeline->ms.min_samples =
MAX2(builder->create_info->pMultisampleState->minSampleShading * nr_samples, 1);
}
static enum mali_stencil_op
translate_stencil_op(VkStencilOp in)
{
switch (in) {
case VK_STENCIL_OP_KEEP: return MALI_STENCIL_OP_KEEP;
case VK_STENCIL_OP_ZERO: return MALI_STENCIL_OP_ZERO;
case VK_STENCIL_OP_REPLACE: return MALI_STENCIL_OP_REPLACE;
case VK_STENCIL_OP_INCREMENT_AND_CLAMP: return MALI_STENCIL_OP_INCR_SAT;
case VK_STENCIL_OP_DECREMENT_AND_CLAMP: return MALI_STENCIL_OP_DECR_SAT;
case VK_STENCIL_OP_INCREMENT_AND_WRAP: return MALI_STENCIL_OP_INCR_WRAP;
case VK_STENCIL_OP_DECREMENT_AND_WRAP: return MALI_STENCIL_OP_DECR_WRAP;
case VK_STENCIL_OP_INVERT: return MALI_STENCIL_OP_INVERT;
default: unreachable("Invalid stencil op");
}
}
static void
panvk_pipeline_builder_parse_zs(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
pipeline->zs.z_test = builder->create_info->pDepthStencilState->depthTestEnable;
pipeline->zs.z_write = builder->create_info->pDepthStencilState->depthWriteEnable;
pipeline->zs.z_compare_func =
panvk_translate_compare_func(builder->create_info->pDepthStencilState->depthCompareOp);
pipeline->zs.s_test = builder->create_info->pDepthStencilState->stencilTestEnable;
pipeline->zs.s_front.fail_op =
translate_stencil_op(builder->create_info->pDepthStencilState->front.failOp);
pipeline->zs.s_front.pass_op =
translate_stencil_op(builder->create_info->pDepthStencilState->front.passOp);
pipeline->zs.s_front.z_fail_op =
translate_stencil_op(builder->create_info->pDepthStencilState->front.depthFailOp);
pipeline->zs.s_front.compare_func =
panvk_translate_compare_func(builder->create_info->pDepthStencilState->front.compareOp);
pipeline->zs.s_front.compare_mask =
builder->create_info->pDepthStencilState->front.compareMask;
pipeline->zs.s_front.write_mask =
builder->create_info->pDepthStencilState->front.writeMask;
pipeline->zs.s_front.ref =
builder->create_info->pDepthStencilState->front.reference;
pipeline->zs.s_back.fail_op =
translate_stencil_op(builder->create_info->pDepthStencilState->back.failOp);
pipeline->zs.s_back.pass_op =
translate_stencil_op(builder->create_info->pDepthStencilState->back.passOp);
pipeline->zs.s_back.z_fail_op =
translate_stencil_op(builder->create_info->pDepthStencilState->back.depthFailOp);
pipeline->zs.s_back.compare_func =
panvk_translate_compare_func(builder->create_info->pDepthStencilState->back.compareOp);
pipeline->zs.s_back.compare_mask =
builder->create_info->pDepthStencilState->back.compareMask;
pipeline->zs.s_back.write_mask =
builder->create_info->pDepthStencilState->back.writeMask;
pipeline->zs.s_back.ref =
builder->create_info->pDepthStencilState->back.reference;
}
static void
panvk_pipeline_builder_parse_rast(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
pipeline->rast.clamp_depth = builder->create_info->pRasterizationState->depthClampEnable;
pipeline->rast.depth_bias.enable = builder->create_info->pRasterizationState->depthBiasEnable;
pipeline->rast.depth_bias.constant_factor =
builder->create_info->pRasterizationState->depthBiasConstantFactor;
pipeline->rast.depth_bias.clamp = builder->create_info->pRasterizationState->depthBiasClamp;
pipeline->rast.depth_bias.slope_factor = builder->create_info->pRasterizationState->depthBiasSlopeFactor;
pipeline->rast.front_ccw = builder->create_info->pRasterizationState->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE;
pipeline->rast.cull_front_face = builder->create_info->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT;
pipeline->rast.cull_back_face = builder->create_info->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT;
}
static bool
panvk_fs_required(struct panvk_pipeline *pipeline)
{
const struct pan_shader_info *info = &pipeline->fs.info;
/* If we generally have side effects */
if (info->fs.sidefx)
return true;
/* If colour is written we need to execute */
const struct pan_blend_state *blend = &pipeline->blend.state;
for (unsigned i = 0; i < blend->rt_count; ++i) {
if (blend->rts[i].equation.color_mask)
return true;
}
/* If depth is written and not implied we need to execute.
* TODO: Predicate on Z/S writes being enabled */
return (info->fs.writes_depth || info->fs.writes_stencil);
}
#define PANVK_DYNAMIC_FS_RSD_MASK \
((1 << VK_DYNAMIC_STATE_DEPTH_BIAS) | \
(1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS) | \
(1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK) | \
(1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK) | \
(1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))
static void
panvk_pipeline_builder_init_fs_state(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
if (!builder->shaders[MESA_SHADER_FRAGMENT])
return;
pipeline->fs.dynamic_rsd =
pipeline->dynamic_state_mask & PANVK_DYNAMIC_FS_RSD_MASK;
pipeline->fs.address = pipeline->binary_bo->ptr.gpu +
builder->stages[MESA_SHADER_FRAGMENT].shader_offset;
pipeline->fs.info = builder->shaders[MESA_SHADER_FRAGMENT]->info;
pipeline->fs.required = panvk_fs_required(pipeline);
}
static void
panvk_pipeline_update_varying_slot(struct panvk_varyings_info *varyings,
gl_shader_stage stage,
const struct pan_shader_varying *varying,
bool input)
{
bool fs = stage == MESA_SHADER_FRAGMENT;
gl_varying_slot loc = varying->location;
enum panvk_varying_buf_id buf_id =
panvk_varying_buf_id(fs, loc);
varyings->stage[stage].loc[varyings->stage[stage].count++] = loc;
if (panvk_varying_is_builtin(stage, loc)) {
varyings->buf_mask |= 1 << buf_id;
return;
}
assert(loc < ARRAY_SIZE(varyings->varying));
enum pipe_format new_fmt = varying->format;
enum pipe_format old_fmt = varyings->varying[loc].format;
BITSET_SET(varyings->active, loc);
/* We expect inputs to either be set by a previous stage or be built
* in, skip the entry if that's not the case, we'll emit a const
* varying returning zero for those entries.
*/
if (input && old_fmt == PIPE_FORMAT_NONE)
return;
unsigned new_size = util_format_get_blocksize(new_fmt);
unsigned old_size = util_format_get_blocksize(old_fmt);
if (old_size < new_size)
varyings->varying[loc].format = new_fmt;
varyings->buf_mask |= 1 << buf_id;
}
static void
panvk_pipeline_builder_collect_varyings(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
for (uint32_t s = 0; s < MESA_SHADER_STAGES; s++) {
if (!builder->shaders[s])
continue;
const struct pan_shader_info *info = &builder->shaders[s]->info;
for (unsigned i = 0; i < info->varyings.input_count; i++) {
panvk_pipeline_update_varying_slot(&pipeline->varyings, s,
&info->varyings.input[i],
true);
}
for (unsigned i = 0; i < info->varyings.output_count; i++) {
panvk_pipeline_update_varying_slot(&pipeline->varyings, s,
&info->varyings.output[i],
false);
}
}
/* TODO: Xfb */
gl_varying_slot loc;
BITSET_FOREACH_SET(loc, pipeline->varyings.active, VARYING_SLOT_MAX) {
enum panvk_varying_buf_id buf_id =
panvk_varying_buf_id(false, loc);
unsigned buf_idx = panvk_varying_buf_index(&pipeline->varyings, buf_id);
unsigned varying_sz = panvk_varying_size(&pipeline->varyings, loc);
pipeline->varyings.varying[loc].buf = buf_idx;
pipeline->varyings.varying[loc].offset =
pipeline->varyings.buf[buf_idx].stride;
pipeline->varyings.buf[buf_idx].stride += varying_sz;
}
}
static void
panvk_pipeline_builder_parse_vertex_input(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
struct panvk_attribs_info *attribs = &pipeline->attribs;
const VkPipelineVertexInputStateCreateInfo *info =
builder->create_info->pVertexInputState;
for (unsigned i = 0; i < info->vertexBindingDescriptionCount; i++) {
const VkVertexInputBindingDescription *desc =
&info->pVertexBindingDescriptions[i];
attribs->buf_count = MAX2(desc->binding + 1, attribs->buf_count);
attribs->buf[desc->binding].stride = desc->stride;
attribs->buf[desc->binding].special = false;
}
for (unsigned i = 0; i < info->vertexAttributeDescriptionCount; i++) {
const VkVertexInputAttributeDescription *desc =
&info->pVertexAttributeDescriptions[i];
attribs->attrib[desc->location].buf = desc->binding;
attribs->attrib[desc->location].format =
vk_format_to_pipe_format(desc->format);
attribs->attrib[desc->location].offset = desc->offset;
}
const struct pan_shader_info *vs =
&builder->shaders[MESA_SHADER_VERTEX]->info;
if (vs->attribute_count >= PAN_VERTEX_ID) {
attribs->buf[attribs->buf_count].special = true;
attribs->buf[attribs->buf_count].special_id = PAN_VERTEX_ID;
attribs->attrib[PAN_VERTEX_ID].buf = attribs->buf_count++;
attribs->attrib[PAN_VERTEX_ID].format = PIPE_FORMAT_R32_UINT;
}
if (vs->attribute_count >= PAN_INSTANCE_ID) {
attribs->buf[attribs->buf_count].special = true;
attribs->buf[attribs->buf_count].special_id = PAN_INSTANCE_ID;
attribs->attrib[PAN_INSTANCE_ID].buf = attribs->buf_count++;
attribs->attrib[PAN_INSTANCE_ID].format = PIPE_FORMAT_R32_UINT;
}
attribs->attrib_count = MAX2(attribs->attrib_count, vs->attribute_count);
}
static VkResult
panvk_pipeline_builder_build(struct panvk_pipeline_builder *builder,
struct panvk_pipeline **pipeline)
{
VkResult result = panvk_pipeline_builder_create_pipeline(builder, pipeline);
if (result != VK_SUCCESS)
return result;
/* TODO: make those functions return a result and handle errors */
panvk_pipeline_builder_parse_dynamic(builder, *pipeline);
panvk_pipeline_builder_parse_color_blend(builder, *pipeline);
panvk_pipeline_builder_compile_shaders(builder, *pipeline);
panvk_pipeline_builder_collect_varyings(builder, *pipeline);
panvk_pipeline_builder_parse_input_assembly(builder, *pipeline);
panvk_pipeline_builder_parse_multisample(builder, *pipeline);
panvk_pipeline_builder_parse_zs(builder, *pipeline);
panvk_pipeline_builder_parse_rast(builder, *pipeline);
panvk_pipeline_builder_parse_vertex_input(builder, *pipeline);
panvk_pipeline_builder_upload_shaders(builder, *pipeline);
panvk_pipeline_builder_init_fs_state(builder, *pipeline);
panvk_pipeline_builder_alloc_static_state_bo(builder, *pipeline);
panvk_pipeline_builder_init_shaders(builder, *pipeline);
panvk_pipeline_builder_parse_viewport(builder, *pipeline);
return VK_SUCCESS;
}
static void
panvk_pipeline_builder_init_graphics(struct panvk_pipeline_builder *builder,
struct panvk_device *dev,
struct panvk_pipeline_cache *cache,
const VkGraphicsPipelineCreateInfo *create_info,
const VkAllocationCallbacks *alloc)
{
VK_FROM_HANDLE(panvk_pipeline_layout, layout, create_info->layout);
assert(layout);
*builder = (struct panvk_pipeline_builder) {
.device = dev,
.cache = cache,
.layout = layout,
.create_info = create_info,
.alloc = alloc,
};
builder->rasterizer_discard =
create_info->pRasterizationState->rasterizerDiscardEnable;
if (builder->rasterizer_discard) {
builder->samples = VK_SAMPLE_COUNT_1_BIT;
} else {
builder->samples = create_info->pMultisampleState->rasterizationSamples;
const struct panvk_render_pass *pass = panvk_render_pass_from_handle(create_info->renderPass);
const struct panvk_subpass *subpass = &pass->subpasses[create_info->subpass];
builder->use_depth_stencil_attachment =
subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED;
assert(subpass->color_count == create_info->pColorBlendState->attachmentCount);
builder->active_color_attachments = 0;
for (uint32_t i = 0; i < subpass->color_count; i++) {
uint32_t idx = subpass->color_attachments[i].idx;
if (idx == VK_ATTACHMENT_UNUSED)
continue;
builder->active_color_attachments |= 1 << i;
builder->color_attachment_formats[i] = pass->attachments[idx].format;
}
}
}
VkResult
panvk_CreateGraphicsPipelines(VkDevice device,
VkPipelineCache pipelineCache,
uint32_t count,
const VkGraphicsPipelineCreateInfo *pCreateInfos,
const VkAllocationCallbacks *pAllocator,
VkPipeline *pPipelines)
{
VK_FROM_HANDLE(panvk_device, dev, device);
VK_FROM_HANDLE(panvk_pipeline_cache, cache, pipelineCache);
for (uint32_t i = 0; i < count; i++) {
struct panvk_pipeline_builder builder;
panvk_pipeline_builder_init_graphics(&builder, dev, cache,
&pCreateInfos[i], pAllocator);
struct panvk_pipeline *pipeline;
VkResult result = panvk_pipeline_builder_build(&builder, &pipeline);
panvk_pipeline_builder_finish(&builder);
if (result != VK_SUCCESS) {
for (uint32_t j = 0; j < i; j++) {
panvk_DestroyPipeline(device, pPipelines[j], pAllocator);
pPipelines[j] = VK_NULL_HANDLE;
}
return result;
}
pPipelines[i] = panvk_pipeline_to_handle(pipeline);
}
return VK_SUCCESS;
}
VkResult
panvk_CreateComputePipelines(VkDevice _device,
VkPipelineCache pipelineCache,

View File

@ -57,16 +57,12 @@
#include "drm-uapi/panfrost_drm.h"
#include "midgard/midgard_compile.h"
#include "pan_blend.h"
#include "pan_blitter.h"
#include "pan_cs.h"
#include "pan_device.h"
#include "panvk_mempool.h"
#include "pan_texture.h"
#include "pan_scoreboard.h"
#include "pan_shader.h"
#include "vk_extensions.h"
#include "panvk_varyings.h"
@ -172,13 +168,6 @@ struct panvk_physical_device {
int master_fd;
};
void
panvk_meta_init(struct panvk_physical_device *dev);
void
panvk_meta_cleanup(struct panvk_physical_device *dev);
enum panvk_debug_flags {
PANVK_DEBUG_STARTUP = 1 << 0,
PANVK_DEBUG_NIR = 1 << 1,
@ -253,6 +242,8 @@ panvk_device_is_lost(struct panvk_device *device)
return unlikely(p_atomic_read(&device->_lost));
}
#define TILER_DESC_WORDS 56
struct panvk_batch {
struct list_head node;
struct util_dynarray jobs;
@ -269,14 +260,8 @@ struct panvk_batch {
mali_ptr fragment_job;
struct {
struct pan_tiler_context ctx;
struct panfrost_ptr bifrost_descs;
union {
struct {
struct mali_bifrost_tiler_heap_packed heap;
struct mali_bifrost_tiler_packed tiler;
} bifrost;
struct mali_midgard_tiler_packed midgard;
} templ;
struct panfrost_ptr descs;
uint32_t templ[TILER_DESC_WORDS];
} tiler;
bool issued;
};
@ -343,12 +328,9 @@ struct panvk_descriptor_set {
struct panvk_descriptor_pool *pool;
const struct panvk_descriptor_set_layout *layout;
struct panvk_descriptor *descs;
struct mali_uniform_buffer_packed *ubos;
struct mali_midgard_sampler_packed *samplers;
union {
struct mali_bifrost_texture_packed *bifrost;
mali_ptr *midgard;
} textures;
void *ubos;
void *samplers;
void *textures;
};
#define MAX_SETS 4
@ -483,6 +465,8 @@ struct panvk_descriptor_state {
mali_ptr samplers;
};
#define INVOCATION_DESC_WORDS 2
struct panvk_draw_info {
unsigned first_index;
unsigned index_count;
@ -493,7 +477,7 @@ struct panvk_draw_info {
unsigned instance_count;
int vertex_offset;
unsigned offset_start;
struct mali_invocation_packed invocation;
uint32_t invocation[INVOCATION_DESC_WORDS];
struct {
mali_ptr varyings;
mali_ptr attributes;
@ -665,24 +649,6 @@ struct panvk_cmd_buffer {
void
panvk_cmd_open_batch(struct panvk_cmd_buffer *cmdbuf);
void
panvk_cmd_close_batch(struct panvk_cmd_buffer *cmdbuf);
void
panvk_cmd_get_midgard_polygon_list(struct panvk_cmd_buffer *cmdbuf,
unsigned width, unsigned height,
bool has_draws);
void
panvk_cmd_get_bifrost_tiler_context(struct panvk_cmd_buffer *cmdbuf,
unsigned width, unsigned height);
void
panvk_cmd_alloc_fb_desc(struct panvk_cmd_buffer *cmdbuf);
void
panvk_cmd_alloc_tls_desc(struct panvk_cmd_buffer *cmdbuf);
void
panvk_pack_color(struct panvk_clear_value *out,
const VkClearColorValue *in,
@ -729,6 +695,9 @@ union panvk_sysval_data {
uint64_t u64[2];
};
#define RSD_WORDS 16
#define BLEND_DESC_WORDS 4
struct panvk_pipeline {
struct vk_object_base base;
@ -763,13 +732,13 @@ struct panvk_pipeline {
struct {
mali_ptr address;
struct pan_shader_info info;
struct mali_renderer_state_packed rsd_template;
uint32_t rsd_template[RSD_WORDS];
bool required;
bool dynamic_rsd;
} fs;
struct {
enum mali_draw_mode topology;
unsigned topology;
bool writes_point_size;
bool primitive_restart;
} ia;
@ -791,13 +760,13 @@ struct panvk_pipeline {
struct {
bool z_test;
bool z_write;
enum mali_func z_compare_func;
unsigned z_compare_func;
bool s_test;
struct {
enum mali_stencil_op fail_op;
enum mali_stencil_op pass_op;
enum mali_stencil_op z_fail_op;
enum mali_func compare_func;
unsigned fail_op;
unsigned pass_op;
unsigned z_fail_op;
unsigned compare_func;
uint8_t compare_mask;
uint8_t write_mask;
uint8_t ref;
@ -814,7 +783,7 @@ struct panvk_pipeline {
struct {
struct pan_blend_state state;
struct mali_blend_packed bd_template[8];
uint32_t bd_template[8][BLEND_DESC_WORDS];
struct {
uint8_t index;
uint16_t bifrost_factor;
@ -825,11 +794,6 @@ struct panvk_pipeline {
VkRect2D scissor;
};
bool
panvk_blend_needs_lowering(const struct panfrost_device *dev,
const struct pan_blend_state *state,
unsigned rt);
struct panvk_image_level {
VkDeviceSize offset;
VkDeviceSize size;
@ -901,20 +865,22 @@ panvk_image_get_plane_size(const struct panvk_image *image, unsigned plane);
unsigned
panvk_image_get_total_size(const struct panvk_image *image);
#define TEXTURE_DESC_WORDS 8
struct panvk_image_view {
struct vk_object_base base;
struct pan_image_view pview;
VkFormat vk_format;
struct panfrost_bo *bo;
struct {
struct mali_bifrost_texture_packed tex_desc;
} bifrost;
uint32_t desc[TEXTURE_DESC_WORDS];
};
#define SAMPLER_DESC_WORDS 8
struct panvk_sampler {
struct vk_object_base base;
struct mali_midgard_sampler_packed desc;
uint32_t desc[SAMPLER_DESC_WORDS];
};
struct panvk_buffer_view {
@ -988,21 +954,6 @@ struct panvk_render_pass {
struct panvk_subpass subpasses[0];
};
static inline enum mali_func
panvk_translate_compare_func(VkCompareOp comp)
{
STATIC_ASSERT(VK_COMPARE_OP_NEVER == (VkCompareOp)MALI_FUNC_NEVER);
STATIC_ASSERT(VK_COMPARE_OP_LESS == (VkCompareOp)MALI_FUNC_LESS);
STATIC_ASSERT(VK_COMPARE_OP_EQUAL == (VkCompareOp)MALI_FUNC_EQUAL);
STATIC_ASSERT(VK_COMPARE_OP_LESS_OR_EQUAL == (VkCompareOp)MALI_FUNC_LEQUAL);
STATIC_ASSERT(VK_COMPARE_OP_GREATER == (VkCompareOp)MALI_FUNC_GREATER);
STATIC_ASSERT(VK_COMPARE_OP_NOT_EQUAL == (VkCompareOp)MALI_FUNC_NOT_EQUAL);
STATIC_ASSERT(VK_COMPARE_OP_GREATER_OR_EQUAL == (VkCompareOp)MALI_FUNC_GEQUAL);
STATIC_ASSERT(VK_COMPARE_OP_ALWAYS == (VkCompareOp)MALI_FUNC_ALWAYS);
return (enum mali_func)comp;
}
VK_DEFINE_HANDLE_CASTS(panvk_cmd_buffer, base, VkCommandBuffer, VK_OBJECT_TYPE_COMMAND_BUFFER)
VK_DEFINE_HANDLE_CASTS(panvk_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
VK_DEFINE_HANDLE_CASTS(panvk_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
@ -1030,4 +981,68 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_sampler, base, VkSampler, VK_OBJECT_TYPE_SA
VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_shader_module, base, VkShaderModule, VK_OBJECT_TYPE_SHADER_MODULE)
VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_semaphore, base, VkSemaphore, VK_OBJECT_TYPE_SEMAPHORE)
#define panvk_arch_name(name, version) panvk_## version ## _ ## name
#define panvk_arch_dispatch(arch, name, ...) \
do { \
switch (arch) { \
case 5: panvk_arch_name(name, v5)(__VA_ARGS__); break; \
case 6: panvk_arch_name(name, v6)(__VA_ARGS__); break; \
case 7: panvk_arch_name(name, v7)(__VA_ARGS__); break; \
default: unreachable("Invalid arch"); \
} \
} while (0)
#ifdef PAN_ARCH
#if PAN_ARCH == 5
#define panvk_per_arch(name) panvk_arch_name(name, v5)
#elif PAN_ARCH == 6
#define panvk_per_arch(name) panvk_arch_name(name, v6)
#elif PAN_ARCH == 7
#define panvk_per_arch(name) panvk_arch_name(name, v7)
#endif
#include "panvk_vX_cmd_buffer.h"
#include "panvk_vX_cs.h"
#include "panvk_vX_meta.h"
#else
#define PAN_ARCH 5
#define panvk_per_arch(name) panvk_arch_name(name, v5)
#include "panvk_vX_cmd_buffer.h"
#include "panvk_vX_cs.h"
#include "panvk_vX_meta.h"
#undef PAN_ARCH
#undef panvk_per_arch
#define PAN_ARCH 6
#define panvk_per_arch(name) panvk_arch_name(name, v6)
#include "panvk_vX_cmd_buffer.h"
#include "panvk_vX_cs.h"
#include "panvk_vX_meta.h"
#undef PAN_ARCH
#undef panvk_per_arch
#define PAN_ARCH 7
#define panvk_per_arch(name) panvk_arch_name(name, v7)
#include "panvk_vX_cmd_buffer.h"
#include "panvk_vX_cs.h"
#include "panvk_vX_meta.h"
#undef PAN_ARCH
#undef panvk_per_arch
#endif
#ifdef PAN_ARCH
bool
panvk_per_arch(blend_needs_lowering)(const struct panfrost_device *dev,
const struct pan_blend_state *state,
unsigned rt);
struct panvk_shader *
panvk_per_arch(shader_create)(struct panvk_device *dev,
gl_shader_stage stage,
const VkPipelineShaderStageCreateInfo *stage_info,
const struct panvk_pipeline_layout *layout,
unsigned sysval_ubo,
struct pan_blend_state *blend_state,
bool static_blend_constants,
const VkAllocationCallbacks *alloc);
#endif
#endif /* PANVK_PRIVATE_H */

View File

@ -36,353 +36,6 @@
#include "vk_util.h"
static nir_shader *
panvk_spirv_to_nir(const void *code,
size_t codesize,
gl_shader_stage stage,
const char *entry_point_name,
const VkSpecializationInfo *spec_info,
const nir_shader_compiler_options *nir_options)
{
/* TODO these are made-up */
const struct spirv_to_nir_options spirv_options = {
.caps = { false },
.ubo_addr_format = nir_address_format_32bit_index_offset,
.ssbo_addr_format = nir_address_format_32bit_index_offset,
};
/* convert VkSpecializationInfo */
uint32_t num_spec = 0;
struct nir_spirv_specialization *spec =
vk_spec_info_to_nir_spirv(spec_info, &num_spec);
nir_shader *nir = spirv_to_nir(code, codesize / sizeof(uint32_t), spec,
num_spec, stage, entry_point_name,
&spirv_options, nir_options);
free(spec);
assert(nir->info.stage == stage);
nir_validate_shader(nir, "after spirv_to_nir");
return nir;
}
struct panvk_lower_misc_ctx {
struct panvk_shader *shader;
const struct panvk_pipeline_layout *layout;
};
static unsigned
get_fixed_sampler_index(nir_deref_instr *deref,
const struct panvk_lower_misc_ctx *ctx)
{
nir_variable *var = nir_deref_instr_get_variable(deref);
unsigned set = var->data.descriptor_set;
unsigned binding = var->data.binding;
const struct panvk_descriptor_set_binding_layout *bind_layout =
&ctx->layout->sets[set].layout->bindings[binding];
return bind_layout->sampler_idx + ctx->layout->sets[set].sampler_offset;
}
static unsigned
get_fixed_texture_index(nir_deref_instr *deref,
const struct panvk_lower_misc_ctx *ctx)
{
nir_variable *var = nir_deref_instr_get_variable(deref);
unsigned set = var->data.descriptor_set;
unsigned binding = var->data.binding;
const struct panvk_descriptor_set_binding_layout *bind_layout =
&ctx->layout->sets[set].layout->bindings[binding];
return bind_layout->tex_idx + ctx->layout->sets[set].tex_offset;
}
static bool
lower_tex(nir_builder *b, nir_tex_instr *tex,
const struct panvk_lower_misc_ctx *ctx)
{
bool progress = false;
int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
b->cursor = nir_before_instr(&tex->instr);
if (sampler_src_idx >= 0) {
nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
tex->sampler_index = get_fixed_sampler_index(deref, ctx);
nir_tex_instr_remove_src(tex, sampler_src_idx);
progress = true;
}
int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
if (tex_src_idx >= 0) {
nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src);
tex->texture_index = get_fixed_texture_index(deref, ctx);
nir_tex_instr_remove_src(tex, tex_src_idx);
progress = true;
}
return progress;
}
static void
lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *intr,
const struct panvk_lower_misc_ctx *ctx)
{
nir_ssa_def *vulkan_idx = intr->src[0].ssa;
unsigned set = nir_intrinsic_desc_set(intr);
unsigned binding = nir_intrinsic_binding(intr);
struct panvk_descriptor_set_layout *set_layout = ctx->layout->sets[set].layout;
struct panvk_descriptor_set_binding_layout *binding_layout =
&set_layout->bindings[binding];
unsigned base;
switch (binding_layout->type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
base = binding_layout->ubo_idx + ctx->layout->sets[set].ubo_offset;
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
base = binding_layout->ssbo_idx + ctx->layout->sets[set].ssbo_offset;
break;
default:
unreachable("Invalid descriptor type");
break;
}
b->cursor = nir_before_instr(&intr->instr);
nir_ssa_def *idx = nir_iadd(b, nir_imm_int(b, base), vulkan_idx);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, idx);
nir_instr_remove(&intr->instr);
}
static void
lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin)
{
/* Loading the descriptor happens as part of the load/store instruction so
* this is a no-op.
*/
b->cursor = nir_before_instr(&intrin->instr);
nir_ssa_def *val = nir_vec2(b, intrin->src[0].ssa, nir_imm_int(b, 0));
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, val);
nir_instr_remove(&intrin->instr);
}
static bool
lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
const struct panvk_lower_misc_ctx *ctx)
{
switch (intr->intrinsic) {
case nir_intrinsic_vulkan_resource_index:
lower_vulkan_resource_index(b, intr, ctx);
return true;
case nir_intrinsic_load_vulkan_descriptor:
lower_load_vulkan_descriptor(b, intr);
return true;
default:
return false;
}
}
static bool
panvk_lower_misc_instr(nir_builder *b,
nir_instr *instr,
void *data)
{
const struct panvk_lower_misc_ctx *ctx = data;
switch (instr->type) {
case nir_instr_type_tex:
return lower_tex(b, nir_instr_as_tex(instr), ctx);
case nir_instr_type_intrinsic:
return lower_intrinsic(b, nir_instr_as_intrinsic(instr), ctx);
default:
return false;
}
}
static bool
panvk_lower_misc(nir_shader *nir, const struct panvk_lower_misc_ctx *ctx)
{
return nir_shader_instructions_pass(nir, panvk_lower_misc_instr,
nir_metadata_block_index |
nir_metadata_dominance,
(void *)ctx);
}
static void
panvk_lower_blend(struct panfrost_device *pdev,
nir_shader *nir,
struct pan_blend_state *blend_state,
bool static_blend_constants)
{
nir_lower_blend_options options = {
.logicop_enable = blend_state->logicop_enable,
.logicop_func = blend_state->logicop_func,
};
bool lower_blend = false;
for (unsigned rt = 0; rt < blend_state->rt_count; rt++) {
if (!panvk_blend_needs_lowering(pdev, blend_state, rt))
continue;
const struct pan_blend_rt_state *rt_state = &blend_state->rts[rt];
options.rt[rt].colormask = rt_state->equation.color_mask;
options.format[rt] = rt_state->format;
if (!rt_state->equation.blend_enable) {
static const nir_lower_blend_channel replace = {
.func = BLEND_FUNC_ADD,
.src_factor = BLEND_FACTOR_ZERO,
.invert_src_factor = true,
.dst_factor = BLEND_FACTOR_ZERO,
.invert_dst_factor = false,
};
options.rt[rt].rgb = replace;
options.rt[rt].alpha = replace;
} else {
options.rt[rt].rgb.func = rt_state->equation.rgb_func;
options.rt[rt].rgb.src_factor = rt_state->equation.rgb_src_factor;
options.rt[rt].rgb.invert_src_factor = rt_state->equation.rgb_invert_src_factor;
options.rt[rt].rgb.dst_factor = rt_state->equation.rgb_dst_factor;
options.rt[rt].rgb.invert_dst_factor = rt_state->equation.rgb_invert_dst_factor;
options.rt[rt].alpha.func = rt_state->equation.alpha_func;
options.rt[rt].alpha.src_factor = rt_state->equation.alpha_src_factor;
options.rt[rt].alpha.invert_src_factor = rt_state->equation.alpha_invert_src_factor;
options.rt[rt].alpha.dst_factor = rt_state->equation.alpha_dst_factor;
options.rt[rt].alpha.invert_dst_factor = rt_state->equation.alpha_invert_dst_factor;
}
lower_blend = true;
}
/* FIXME: currently untested */
assert(!lower_blend);
if (lower_blend)
NIR_PASS_V(nir, nir_lower_blend, options);
}
struct panvk_shader *
panvk_shader_create(struct panvk_device *dev,
gl_shader_stage stage,
const VkPipelineShaderStageCreateInfo *stage_info,
const struct panvk_pipeline_layout *layout,
unsigned sysval_ubo,
struct pan_blend_state *blend_state,
bool static_blend_constants,
const VkAllocationCallbacks *alloc)
{
const struct panvk_shader_module *module = panvk_shader_module_from_handle(stage_info->module);
struct panfrost_device *pdev = &dev->physical_device->pdev;
struct panvk_shader *shader;
shader = vk_zalloc2(&dev->vk.alloc, alloc, sizeof(*shader), 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!shader)
return NULL;
util_dynarray_init(&shader->binary, NULL);
/* translate SPIR-V to NIR */
assert(module->code_size % 4 == 0);
nir_shader *nir = panvk_spirv_to_nir(module->code,
module->code_size,
stage, stage_info->pName,
stage_info->pSpecializationInfo,
pan_shader_get_compiler_options(pdev));
if (!nir) {
vk_free2(&dev->vk.alloc, alloc, shader);
return NULL;
}
if (stage == MESA_SHADER_FRAGMENT)
panvk_lower_blend(pdev, nir, blend_state, static_blend_constants);
/* multi step inlining procedure */
NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
NIR_PASS_V(nir, nir_lower_returns);
NIR_PASS_V(nir, nir_inline_functions);
NIR_PASS_V(nir, nir_copy_prop);
NIR_PASS_V(nir, nir_opt_deref);
foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
if (!func->is_entrypoint)
exec_node_remove(&func->node);
}
assert(exec_list_length(&nir->functions) == 1);
NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp);
/* Split member structs. We do this before lower_io_to_temporaries so that
* it doesn't lower system values to temporaries by accident.
*/
NIR_PASS_V(nir, nir_split_var_copies);
NIR_PASS_V(nir, nir_split_per_member_structs);
NIR_PASS_V(nir, nir_remove_dead_variables,
nir_var_shader_in | nir_var_shader_out |
nir_var_system_value | nir_var_mem_shared,
NULL);
NIR_PASS_V(nir, nir_lower_io_to_temporaries,
nir_shader_get_entrypoint(nir), true, true);
NIR_PASS_V(nir, nir_lower_indirect_derefs,
nir_var_shader_in | nir_var_shader_out,
UINT32_MAX);
NIR_PASS_V(nir, nir_opt_copy_prop_vars);
NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);
NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, true, false);
NIR_PASS_V(nir, nir_lower_explicit_io,
nir_var_mem_ubo | nir_var_mem_ssbo,
nir_address_format_32bit_index_offset);
nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, stage);
nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, stage);
NIR_PASS_V(nir, nir_lower_system_values);
NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
NIR_PASS_V(nir, nir_lower_var_copies);
struct panvk_lower_misc_ctx ctx = {
.shader = shader,
.layout = layout,
};
NIR_PASS_V(nir, panvk_lower_misc, &ctx);
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
if (unlikely(dev->physical_device->instance->debug_flags & PANVK_DEBUG_NIR)) {
fprintf(stderr, "translated nir:\n");
nir_print_shader(nir, stderr);
}
struct panfrost_compile_inputs inputs = {
.gpu_id = pdev->gpu_id,
.no_ubo_to_push = true,
.sysval_ubo = sysval_ubo,
};
pan_shader_compile(pdev, nir, &inputs, &shader->binary, &shader->info);
/* Patch the descriptor count */
shader->info.ubo_count =
shader->info.sysvals.sysval_count ? sysval_ubo + 1 : layout->num_ubos;
shader->info.sampler_count = layout->num_samplers;
shader->info.texture_count = layout->num_textures;
shader->sysval_ubo = sysval_ubo;
ralloc_free(nir);
return shader;
}
void
panvk_shader_destroy(struct panvk_device *dev,
struct panvk_shader *shader,

File diff suppressed because it is too large Load Diff

View File

@ -21,38 +21,34 @@
* DEALINGS IN THE SOFTWARE.
*/
#include "panvk_private.h"
#include "panvk_varyings.h"
#ifndef PANVK_PRIVATE_H
#error "Must be included from panvk_private.h"
#endif
#include "pan_pool.h"
#ifndef PAN_ARCH
#error "no arch"
#endif
unsigned
panvk_varyings_buf_count(const struct panvk_device *dev,
struct panvk_varyings_info *varyings)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
return util_bitcount(varyings->buf_mask) + (pan_is_bifrost(pdev) ? 1 : 0);
}
#include <vulkan/vulkan.h>
#include "compiler/shader_enums.h"
void
panvk_varyings_alloc(struct panvk_varyings_info *varyings,
struct pan_pool *varying_mem_pool,
unsigned vertex_count)
{
for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) {
if (!(varyings->buf_mask & (1 << i))) continue;
panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf);
unsigned buf_idx = panvk_varying_buf_index(varyings, i);
unsigned size = varyings->buf[buf_idx].stride * vertex_count;
if (!size)
continue;
struct panfrost_ptr ptr =
pan_pool_alloc_aligned(varying_mem_pool, size, 64);
#if PAN_ARCH <= 5
void
panvk_per_arch(cmd_get_polygon_list)(struct panvk_cmd_buffer *cmdbuf,
unsigned width, unsigned height,
bool has_draws);
#else
void
panvk_per_arch(cmd_get_tiler_context)(struct panvk_cmd_buffer *cmdbuf,
unsigned width, unsigned height);
#endif
varyings->buf[buf_idx].size = size;
varyings->buf[buf_idx].address = ptr.gpu;
varyings->buf[buf_idx].cpu = ptr.cpu;
}
}
void
panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf);
void
panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf);

View File

@ -0,0 +1,919 @@
/*
* Copyright (C) 2021 Collabora Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "gen_macros.h"
#include "util/macros.h"
#include "compiler/shader_enums.h"
#include "vk_util.h"
#include "panfrost-quirks.h"
#include "pan_cs.h"
#include "pan_encoder.h"
#include "pan_pool.h"
#include "pan_shader.h"
#include "panvk_cs.h"
#include "panvk_private.h"
#include "panvk_varyings.h"
static enum mali_mipmap_mode
panvk_translate_sampler_mipmap_mode(VkSamplerMipmapMode mode)
{
switch (mode) {
case VK_SAMPLER_MIPMAP_MODE_NEAREST: return MALI_MIPMAP_MODE_NEAREST;
case VK_SAMPLER_MIPMAP_MODE_LINEAR: return MALI_MIPMAP_MODE_TRILINEAR;
default: unreachable("Invalid mipmap mode");
}
}
static unsigned
panvk_translate_sampler_address_mode(VkSamplerAddressMode mode)
{
switch (mode) {
case VK_SAMPLER_ADDRESS_MODE_REPEAT: return MALI_WRAP_MODE_REPEAT;
case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT;
case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE;
case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER;
case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
default: unreachable("Invalid wrap");
}
}
static void
panvk_translate_sampler_border_color(const VkSamplerCreateInfo *pCreateInfo,
uint32_t border_color[4])
{
const VkSamplerCustomBorderColorCreateInfoEXT *pBorderColor =
vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
switch (pCreateInfo->borderColor) {
case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
border_color[0] = border_color[1] = border_color[2] = fui(0.0);
border_color[3] =
pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK ?
fui(1.0) : fui(0.0);
break;
case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
border_color[0] = border_color[1] = border_color[2] = 0;
border_color[3] =
pCreateInfo->borderColor == VK_BORDER_COLOR_INT_OPAQUE_BLACK ?
UINT_MAX : 0;
break;
case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
border_color[0] = border_color[1] = border_color[2] = border_color[3] = fui(1.0);
break;
case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
border_color[0] = border_color[1] = border_color[2] = border_color[3] = UINT_MAX;
break;
case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
case VK_BORDER_COLOR_INT_CUSTOM_EXT:
memcpy(border_color, pBorderColor->customBorderColor.int32, sizeof(uint32_t) * 4);
break;
default:
unreachable("Invalid border color");
}
}
static mali_pixel_format
panvk_varying_hw_format(const struct panvk_device *dev,
const struct panvk_varyings_info *varyings,
gl_shader_stage stage, unsigned idx)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
gl_varying_slot loc = varyings->stage[stage].loc[idx];
bool fs = stage == MESA_SHADER_FRAGMENT;
switch (loc) {
case VARYING_SLOT_PNTC:
case VARYING_SLOT_PSIZ:
return (MALI_R16F << 12) | panfrost_get_default_swizzle(1);
case VARYING_SLOT_POS:
return ((fs ? MALI_RGBA32F : MALI_SNAP_4) << 12) |
panfrost_get_default_swizzle(4);
default:
assert(!panvk_varying_is_builtin(stage, loc));
return pdev->formats[varyings->varying[loc].format].hw;
}
}
static void
panvk_emit_varying(const struct panvk_device *dev,
const struct panvk_varyings_info *varyings,
gl_shader_stage stage, unsigned idx,
void *attrib)
{
gl_varying_slot loc = varyings->stage[stage].loc[idx];
bool fs = stage == MESA_SHADER_FRAGMENT;
pan_pack(attrib, ATTRIBUTE, cfg) {
if (!panvk_varying_is_builtin(stage, loc)) {
cfg.buffer_index = varyings->varying[loc].buf;
cfg.offset = varyings->varying[loc].offset;
} else {
cfg.buffer_index =
panvk_varying_buf_index(varyings,
panvk_varying_buf_id(fs, loc));
}
cfg.offset_enable = PAN_ARCH == 5;
cfg.format = panvk_varying_hw_format(dev, varyings, stage, idx);
}
}
void
panvk_per_arch(emit_varyings)(const struct panvk_device *dev,
const struct panvk_varyings_info *varyings,
gl_shader_stage stage,
void *descs)
{
struct mali_attribute_packed *attrib = descs;
for (unsigned i = 0; i < varyings->stage[stage].count; i++)
panvk_emit_varying(dev, varyings, stage, i, attrib++);
}
static void
panvk_emit_varying_buf(const struct panvk_varyings_info *varyings,
enum panvk_varying_buf_id id, void *buf)
{
unsigned buf_idx = panvk_varying_buf_index(varyings, id);
pan_pack(buf, ATTRIBUTE_BUFFER, cfg) {
#if PAN_ARCH == 5
enum mali_attribute_special special_id = panvk_varying_special_buf_id(id);
if (special_id) {
cfg.type = 0;
cfg.special = special_id;
continue;
}
#endif
unsigned offset = varyings->buf[buf_idx].address & 63;
cfg.stride = varyings->buf[buf_idx].stride;
cfg.size = varyings->buf[buf_idx].size + offset;
cfg.pointer = varyings->buf[buf_idx].address & ~63ULL;
}
}
void
panvk_per_arch(emit_varying_bufs)(const struct panvk_varyings_info *varyings,
void *descs)
{
struct mali_attribute_buffer_packed *buf = descs;
for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) {
if (varyings->buf_mask & (1 << i))
panvk_emit_varying_buf(varyings, i, buf++);
}
/* We need an empty entry to stop prefetching on Bifrost */
#if PAN_ARCH >= 6
memset(buf, 0, sizeof(*buf));
#endif
}
static void
panvk_emit_attrib_buf(const struct panvk_attribs_info *info,
const struct panvk_draw_info *draw,
const struct panvk_attrib_buf *bufs,
unsigned buf_count,
unsigned idx, void *desc)
{
const struct panvk_attrib_buf_info *buf_info = &info->buf[idx];
#if PAN_ARCH == 5
if (buf_info->special) {
switch (buf_info->special_id) {
case PAN_VERTEX_ID:
panfrost_vertex_id(draw->padded_vertex_count, desc,
draw->instance_count > 1);
return;
case PAN_INSTANCE_ID:
panfrost_instance_id(draw->padded_vertex_count, desc,
draw->instance_count > 1);
return;
default:
unreachable("Invalid attribute ID");
}
}
#endif
assert(idx < buf_count);
const struct panvk_attrib_buf *buf = &bufs[idx];
unsigned divisor = buf_info->per_instance ?
draw->padded_vertex_count : 0;
unsigned stride = divisor && draw->instance_count == 1 ?
0 : buf_info->stride;
mali_ptr addr = buf->address & ~63ULL;
unsigned size = buf->size + (buf->address & 63);
/* TODO: support instanced arrays */
pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
if (draw->instance_count > 1 && divisor) {
cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
cfg.divisor = divisor;
}
cfg.pointer = addr;
cfg.stride = stride;
cfg.size = size;
}
}
void
panvk_per_arch(emit_attrib_bufs)(const struct panvk_attribs_info *info,
const struct panvk_attrib_buf *bufs,
unsigned buf_count,
const struct panvk_draw_info *draw,
void *descs)
{
struct mali_attribute_buffer_packed *buf = descs;
for (unsigned i = 0; i < info->buf_count; i++)
panvk_emit_attrib_buf(info, draw, bufs, buf_count, i, buf++);
/* A NULL entry is needed to stop prefecting on Bifrost */
#if PAN_ARCH >= 6
memset(buf, 0, sizeof(*buf));
#endif
}
void
panvk_per_arch(emit_sampler)(const VkSamplerCreateInfo *pCreateInfo,
void *desc)
{
uint32_t border_color[4];
panvk_translate_sampler_border_color(pCreateInfo, border_color);
pan_pack(desc, SAMPLER, cfg) {
cfg.magnify_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST;
cfg.minify_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST;
cfg.mipmap_mode = panvk_translate_sampler_mipmap_mode(pCreateInfo->mipmapMode);
cfg.normalized_coordinates = !pCreateInfo->unnormalizedCoordinates;
cfg.lod_bias = FIXED_16(pCreateInfo->mipLodBias, true);
cfg.minimum_lod = FIXED_16(pCreateInfo->minLod, false);
cfg.maximum_lod = FIXED_16(pCreateInfo->maxLod, false);
cfg.wrap_mode_s = panvk_translate_sampler_address_mode(pCreateInfo->addressModeU);
cfg.wrap_mode_t = panvk_translate_sampler_address_mode(pCreateInfo->addressModeV);
cfg.wrap_mode_r = panvk_translate_sampler_address_mode(pCreateInfo->addressModeW);
cfg.compare_function = panvk_per_arch(translate_sampler_compare_func)(pCreateInfo);
cfg.border_color_r = border_color[0];
cfg.border_color_g = border_color[1];
cfg.border_color_b = border_color[2];
cfg.border_color_a = border_color[3];
}
}
static void
panvk_emit_attrib(const struct panvk_device *dev,
const struct panvk_attribs_info *attribs,
const struct panvk_attrib_buf *bufs,
unsigned buf_count,
unsigned idx, void *attrib)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
pan_pack(attrib, ATTRIBUTE, cfg) {
cfg.buffer_index = attribs->attrib[idx].buf;
cfg.offset = attribs->attrib[idx].offset +
(bufs[cfg.buffer_index].address & 63);
cfg.format = pdev->formats[attribs->attrib[idx].format].hw;
}
}
void
panvk_per_arch(emit_attribs)(const struct panvk_device *dev,
const struct panvk_attribs_info *attribs,
const struct panvk_attrib_buf *bufs,
unsigned buf_count,
void *descs)
{
struct mali_attribute_packed *attrib = descs;
for (unsigned i = 0; i < attribs->attrib_count; i++)
panvk_emit_attrib(dev, attribs, bufs, buf_count, i, attrib++);
}
void
panvk_per_arch(emit_ubo)(mali_ptr address, size_t size, void *desc)
{
pan_pack(desc, UNIFORM_BUFFER, cfg) {
cfg.pointer = address;
cfg.entries = DIV_ROUND_UP(size, 16);
}
}
void
panvk_per_arch(emit_ubos)(const struct panvk_pipeline *pipeline,
const struct panvk_descriptor_state *state,
void *descs)
{
struct mali_uniform_buffer_packed *ubos = descs;
for (unsigned i = 0; i < ARRAY_SIZE(state->sets); i++) {
const struct panvk_descriptor_set_layout *set_layout =
pipeline->layout->sets[i].layout;
const struct panvk_descriptor_set *set = state->sets[i].set;
unsigned offset = pipeline->layout->sets[i].ubo_offset;
if (!set_layout)
continue;
if (!set) {
unsigned num_ubos = (set_layout->num_dynoffsets != 0) + set_layout->num_ubos;
memset(&ubos[offset], 0, num_ubos * sizeof(*ubos));
} else {
memcpy(&ubos[offset], set->ubos, set_layout->num_ubos * sizeof(*ubos));
if (set_layout->num_dynoffsets) {
panvk_per_arch(emit_ubo)(state->sets[i].dynoffsets.gpu,
set->layout->num_dynoffsets * sizeof(uint32_t),
&ubos[offset + set_layout->num_ubos]);
}
}
}
for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) {
if (!pipeline->sysvals[i].ids.sysval_count)
continue;
panvk_per_arch(emit_ubo)(pipeline->sysvals[i].ubo ? : state->sysvals[i],
pipeline->sysvals[i].ids.sysval_count * 16,
&ubos[pipeline->sysvals[i].ubo_idx]);
}
}
void
panvk_per_arch(emit_vertex_job)(const struct panvk_pipeline *pipeline,
const struct panvk_draw_info *draw,
void *job)
{
void *section = pan_section_ptr(job, COMPUTE_JOB, INVOCATION);
memcpy(section, &draw->invocation, pan_size(INVOCATION));
pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
cfg.job_task_split = 5;
}
pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) {
cfg.draw_descriptor_is_64b = true;
#if PAN_ARCH == 5
cfg.texture_descriptor_is_64b = true;
#endif
cfg.state = pipeline->rsds[MESA_SHADER_VERTEX];
cfg.attributes = draw->stages[MESA_SHADER_VERTEX].attributes;
cfg.attribute_buffers = draw->attribute_bufs;
cfg.varyings = draw->stages[MESA_SHADER_VERTEX].varyings;
cfg.varying_buffers = draw->varying_bufs;
cfg.thread_storage = draw->tls;
cfg.offset_start = draw->offset_start;
cfg.instance_size = draw->instance_count > 1 ?
draw->padded_vertex_count : 1;
cfg.uniform_buffers = draw->ubos;
cfg.push_uniforms = draw->stages[PIPE_SHADER_VERTEX].push_constants;
cfg.textures = draw->textures;
cfg.samplers = draw->samplers;
}
pan_section_pack(job, COMPUTE_JOB, DRAW_PADDING, cfg);
}
static void
panvk_emit_tiler_primitive(const struct panvk_pipeline *pipeline,
const struct panvk_draw_info *draw,
void *prim)
{
pan_pack(prim, PRIMITIVE, cfg) {
cfg.draw_mode = pipeline->ia.topology;
if (pipeline->ia.writes_point_size)
cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
cfg.first_provoking_vertex = true;
if (pipeline->ia.primitive_restart)
cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
cfg.job_task_split = 6;
/* TODO: indexed draws */
cfg.index_count = draw->vertex_count;
}
}
static void
panvk_emit_tiler_primitive_size(const struct panvk_pipeline *pipeline,
const struct panvk_draw_info *draw,
void *primsz)
{
pan_pack(primsz, PRIMITIVE_SIZE, cfg) {
if (pipeline->ia.writes_point_size) {
cfg.size_array = draw->psiz;
} else {
cfg.constant = draw->line_width;
}
}
}
static void
panvk_emit_tiler_dcd(const struct panvk_pipeline *pipeline,
const struct panvk_draw_info *draw,
void *dcd)
{
pan_pack(dcd, DRAW, cfg) {
cfg.four_components_per_vertex = true;
cfg.draw_descriptor_is_64b = true;
#if PAN_ARCH == 5
cfg.texture_descriptor_is_64b = true;
#endif
cfg.front_face_ccw = pipeline->rast.front_ccw;
cfg.cull_front_face = pipeline->rast.cull_front_face;
cfg.cull_back_face = pipeline->rast.cull_back_face;
cfg.position = draw->position;
cfg.state = draw->fs_rsd;
cfg.attributes = draw->stages[MESA_SHADER_FRAGMENT].attributes;
cfg.attribute_buffers = draw->attribute_bufs;
cfg.viewport = draw->viewport;
cfg.varyings = draw->stages[MESA_SHADER_FRAGMENT].varyings;
cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0;
#if PAN_ARCH == 5
cfg.fbd = draw->fb;
#else
cfg.thread_storage = draw->tls;
#endif
/* For all primitives but lines DRAW.flat_shading_vertex must
* be set to 0 and the provoking vertex is selected with the
* PRIMITIVE.first_provoking_vertex field.
*/
if (pipeline->ia.topology == MALI_DRAW_MODE_LINES ||
pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP ||
pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) {
/* The logic is inverted on bifrost. */
#if PAN_ARCH == 5
cfg.flat_shading_vertex = false;
#else
cfg.flat_shading_vertex = true;
#endif
}
cfg.offset_start = draw->offset_start;
cfg.instance_size = draw->instance_count > 1 ?
draw->padded_vertex_count : 1;
cfg.uniform_buffers = draw->ubos;
cfg.push_uniforms = draw->stages[PIPE_SHADER_FRAGMENT].push_constants;
cfg.textures = draw->textures;
cfg.samplers = draw->samplers;
/* TODO: occlusion queries */
}
}
void
panvk_per_arch(emit_tiler_job)(const struct panvk_pipeline *pipeline,
const struct panvk_draw_info *draw,
void *job)
{
void *section;
section = pan_section_ptr(job, TILER_JOB, INVOCATION);
memcpy(section, &draw->invocation, pan_size(INVOCATION));
section = pan_section_ptr(job, TILER_JOB, PRIMITIVE);
panvk_emit_tiler_primitive(pipeline, draw, section);
section = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE);
panvk_emit_tiler_primitive_size(pipeline, draw, section);
section = pan_section_ptr(job, TILER_JOB, DRAW);
panvk_emit_tiler_dcd(pipeline, draw, section);
#if PAN_ARCH >= 6
pan_section_pack(job, TILER_JOB, TILER, cfg) {
cfg.address = draw->tiler_ctx->bifrost;
}
pan_section_pack(job, TILER_JOB, DRAW_PADDING, padding);
pan_section_pack(job, TILER_JOB, PADDING, padding);
#endif
}
void
panvk_per_arch(emit_fragment_job)(const struct panvk_framebuffer *fb,
mali_ptr fbdesc,
void *job)
{
pan_section_pack(job, FRAGMENT_JOB, HEADER, header) {
header.type = MALI_JOB_TYPE_FRAGMENT;
header.index = 1;
}
pan_section_pack(job, FRAGMENT_JOB, PAYLOAD, payload) {
payload.bound_min_x = 0;
payload.bound_min_y = 0;
payload.bound_max_x = (fb->width - 1) >> MALI_TILE_SHIFT;
payload.bound_max_y = (fb->height - 1) >> MALI_TILE_SHIFT;
payload.framebuffer = fbdesc;
}
}
void
panvk_per_arch(emit_viewport)(const VkViewport *viewport,
const VkRect2D *scissor,
void *vpd)
{
/* The spec says "width must be greater than 0.0" */
assert(viewport->x >= 0);
int minx = (int)viewport->x;
int maxx = (int)(viewport->x + viewport->width);
/* Viewport height can be negative */
int miny = MIN2((int)viewport->y, (int)(viewport->y + viewport->height));
int maxy = MAX2((int)viewport->y, (int)(viewport->y + viewport->height));
assert(scissor->offset.x >= 0 && scissor->offset.y >= 0);
miny = MAX2(scissor->offset.x, minx);
miny = MAX2(scissor->offset.y, miny);
maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx);
maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy);
/* Make sure we don't end up with a max < min when width/height is 0 */
maxx = maxx > minx ? maxx - 1 : maxx;
maxy = maxy > miny ? maxy - 1 : maxy;
assert(viewport->minDepth >= 0.0f && viewport->minDepth <= 1.0f);
assert(viewport->maxDepth >= 0.0f && viewport->maxDepth <= 1.0f);
pan_pack(vpd, VIEWPORT, cfg) {
cfg.scissor_minimum_x = minx;
cfg.scissor_minimum_y = miny;
cfg.scissor_maximum_x = maxx;
cfg.scissor_maximum_y = maxy;
cfg.minimum_z = MIN2(viewport->minDepth, viewport->maxDepth);
cfg.maximum_z = MAX2(viewport->minDepth, viewport->maxDepth);
}
}
#if PAN_ARCH >= 6
static enum mali_bifrost_register_file_format
bifrost_blend_type_from_nir(nir_alu_type nir_type)
{
switch(nir_type) {
case 0: /* Render target not in use */
return 0;
case nir_type_float16:
return MALI_BIFROST_REGISTER_FILE_FORMAT_F16;
case nir_type_float32:
return MALI_BIFROST_REGISTER_FILE_FORMAT_F32;
case nir_type_int32:
return MALI_BIFROST_REGISTER_FILE_FORMAT_I32;
case nir_type_uint32:
return MALI_BIFROST_REGISTER_FILE_FORMAT_U32;
case nir_type_int16:
return MALI_BIFROST_REGISTER_FILE_FORMAT_I16;
case nir_type_uint16:
return MALI_BIFROST_REGISTER_FILE_FORMAT_U16;
default:
unreachable("Unsupported blend shader type for NIR alu type");
}
}
#endif
void
panvk_per_arch(emit_blend)(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
unsigned rt, void *bd)
{
const struct pan_blend_state *blend = &pipeline->blend.state;
const struct pan_blend_rt_state *rts = &blend->rts[rt];
bool dithered = false;
pan_pack(bd, BLEND, cfg) {
if (!blend->rt_count || !rts->equation.color_mask) {
cfg.enable = false;
#if PAN_ARCH >= 6
cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OFF;
#endif
continue;
}
cfg.srgb = util_format_is_srgb(rts->format);
cfg.load_destination = pan_blend_reads_dest(blend->rts[rt].equation);
cfg.round_to_fb_precision = dithered;
#if PAN_ARCH <= 5
cfg.midgard.blend_shader = false;
pan_blend_to_fixed_function_equation(blend->rts[rt].equation,
&cfg.midgard.equation);
cfg.midgard.constant =
pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation),
blend->constants);
#else
const struct panfrost_device *pdev = &dev->physical_device->pdev;
const struct util_format_description *format_desc =
util_format_description(rts->format);
unsigned chan_size = 0;
for (unsigned i = 0; i < format_desc->nr_channels; i++)
chan_size = MAX2(format_desc->channel[0].size, chan_size);
pan_blend_to_fixed_function_equation(blend->rts[rt].equation,
&cfg.bifrost.equation);
/* Fixed point constant */
float fconst =
pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation),
blend->constants);
u16 constant = fconst * ((1 << chan_size) - 1);
constant <<= 16 - chan_size;
cfg.bifrost.constant = constant;
if (pan_blend_is_opaque(blend->rts[rt].equation))
cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OPAQUE;
else
cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_FIXED_FUNCTION;
/* If we want the conversion to work properly,
* num_comps must be set to 4
*/
cfg.bifrost.internal.fixed_function.num_comps = 4;
cfg.bifrost.internal.fixed_function.conversion.memory_format =
panfrost_format_to_bifrost_blend(pdev, rts->format, dithered);
cfg.bifrost.internal.fixed_function.conversion.register_format =
bifrost_blend_type_from_nir(pipeline->fs.info.bifrost.blend[rt].type);
cfg.bifrost.internal.fixed_function.rt = rt;
#endif
}
}
void
panvk_per_arch(emit_blend_constant)(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
unsigned rt, const float *constants,
void *bd)
{
float constant = constants[pipeline->blend.constant[rt].index];
pan_pack(bd, BLEND, cfg) {
cfg.enable = false;
#if PAN_ARCH == 5
cfg.midgard.constant = constant;
#else
cfg.bifrost.constant = constant * pipeline->blend.constant[rt].bifrost_factor;
#endif
}
}
void
panvk_per_arch(emit_dyn_fs_rsd)(const struct panvk_pipeline *pipeline,
const struct panvk_cmd_state *state,
void *rsd)
{
pan_pack(rsd, RENDERER_STATE, cfg) {
if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) {
cfg.depth_units = state->rast.depth_bias.constant_factor * 2.0f;
cfg.depth_factor = state->rast.depth_bias.slope_factor;
cfg.depth_bias_clamp = state->rast.depth_bias.clamp;
}
if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
cfg.stencil_front.mask = state->zs.s_front.compare_mask;
cfg.stencil_back.mask = state->zs.s_back.compare_mask;
}
if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
cfg.stencil_mask_misc.stencil_mask_front = state->zs.s_front.write_mask;
cfg.stencil_mask_misc.stencil_mask_back = state->zs.s_back.write_mask;
}
if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
cfg.stencil_front.reference_value = state->zs.s_front.ref;
cfg.stencil_back.reference_value = state->zs.s_back.ref;
}
}
}
void
panvk_per_arch(emit_base_fs_rsd)(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
void *rsd)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
const struct pan_shader_info *info = &pipeline->fs.info;
pan_pack(rsd, RENDERER_STATE, cfg) {
if (pipeline->fs.required) {
pan_shader_prepare_rsd(pdev, info, pipeline->fs.address, &cfg);
#if PAN_ARCH == 5
/* If either depth or stencil is enabled, discard matters */
bool zs_enabled =
(pipeline->zs.z_test && pipeline->zs.z_compare_func != MALI_FUNC_ALWAYS) ||
pipeline->zs.s_test;
cfg.properties.midgard.work_register_count = info->work_reg_count;
cfg.properties.midgard.force_early_z =
info->fs.can_early_z && !pipeline->ms.alpha_to_coverage &&
pipeline->zs.z_compare_func == MALI_FUNC_ALWAYS;
/* Workaround a hardware errata where early-z cannot be enabled
* when discarding even when the depth buffer is read-only, by
* lying to the hardware about the discard and setting the
* reads tilebuffer? flag to compensate */
cfg.properties.midgard.shader_reads_tilebuffer =
info->fs.outputs_read ||
(!zs_enabled && info->fs.can_discard);
cfg.properties.midgard.shader_contains_discard =
zs_enabled && info->fs.can_discard;
#else
cfg.properties.bifrost.allow_forward_pixel_to_kill = info->fs.can_fpk;
#endif
} else {
#if PAN_ARCH == 5
cfg.shader.shader = 0x1;
cfg.properties.midgard.work_register_count = 1;
cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
cfg.properties.midgard.force_early_z = true;
#else
cfg.properties.bifrost.shader_modifies_coverage = true;
cfg.properties.bifrost.allow_forward_pixel_to_kill = true;
cfg.properties.bifrost.allow_forward_pixel_to_be_killed = true;
cfg.properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
#endif
}
bool msaa = pipeline->ms.rast_samples > 1;
cfg.multisample_misc.multisample_enable = msaa;
cfg.multisample_misc.sample_mask =
msaa ? pipeline->ms.sample_mask : UINT16_MAX;
cfg.multisample_misc.depth_function =
pipeline->zs.z_test ? pipeline->zs.z_compare_func : MALI_FUNC_ALWAYS;
cfg.multisample_misc.depth_write_mask = pipeline->zs.z_write;
cfg.multisample_misc.fixed_function_near_discard = !pipeline->rast.clamp_depth;
cfg.multisample_misc.fixed_function_far_discard = !pipeline->rast.clamp_depth;
cfg.multisample_misc.shader_depth_range_fixed = true;
cfg.stencil_mask_misc.stencil_enable = pipeline->zs.s_test;
cfg.stencil_mask_misc.alpha_to_coverage = pipeline->ms.alpha_to_coverage;
cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
cfg.stencil_mask_misc.depth_range_1 = pipeline->rast.depth_bias.enable;
cfg.stencil_mask_misc.depth_range_2 = pipeline->rast.depth_bias.enable;
cfg.stencil_mask_misc.single_sampled_lines = pipeline->ms.rast_samples <= 1;
if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS))) {
cfg.depth_units = pipeline->rast.depth_bias.constant_factor * 2.0f;
cfg.depth_factor = pipeline->rast.depth_bias.slope_factor;
cfg.depth_bias_clamp = pipeline->rast.depth_bias.clamp;
}
if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK))) {
cfg.stencil_front.mask = pipeline->zs.s_front.compare_mask;
cfg.stencil_back.mask = pipeline->zs.s_back.compare_mask;
}
if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK))) {
cfg.stencil_mask_misc.stencil_mask_front = pipeline->zs.s_front.write_mask;
cfg.stencil_mask_misc.stencil_mask_back = pipeline->zs.s_back.write_mask;
}
if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))) {
cfg.stencil_front.reference_value = pipeline->zs.s_front.ref;
cfg.stencil_back.reference_value = pipeline->zs.s_back.ref;
}
cfg.stencil_front.compare_function = pipeline->zs.s_front.compare_func;
cfg.stencil_front.stencil_fail = pipeline->zs.s_front.fail_op;
cfg.stencil_front.depth_fail = pipeline->zs.s_front.z_fail_op;
cfg.stencil_front.depth_pass = pipeline->zs.s_front.pass_op;
cfg.stencil_back.compare_function = pipeline->zs.s_back.compare_func;
cfg.stencil_back.stencil_fail = pipeline->zs.s_back.fail_op;
cfg.stencil_back.depth_fail = pipeline->zs.s_back.z_fail_op;
cfg.stencil_back.depth_pass = pipeline->zs.s_back.pass_op;
}
}
void
panvk_per_arch(emit_non_fs_rsd)(const struct panvk_device *dev,
const struct pan_shader_info *shader_info,
mali_ptr shader_ptr,
void *rsd)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
assert(shader_info->stage != MESA_SHADER_FRAGMENT);
pan_pack(rsd, RENDERER_STATE, cfg) {
pan_shader_prepare_rsd(pdev, shader_info, shader_ptr, &cfg);
}
}
void
panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev,
unsigned width, unsigned height,
const struct panfrost_ptr *descs)
{
#if PAN_ARCH == 5
unreachable("Not supported on v5");
#else
const struct panfrost_device *pdev = &dev->physical_device->pdev;
pan_pack(descs->cpu + pan_size(TILER_CONTEXT), TILER_HEAP, cfg) {
cfg.size = pdev->tiler_heap->size;
cfg.base = pdev->tiler_heap->ptr.gpu;
cfg.bottom = pdev->tiler_heap->ptr.gpu;
cfg.top = pdev->tiler_heap->ptr.gpu + pdev->tiler_heap->size;
}
pan_pack(descs->cpu, TILER_CONTEXT, cfg) {
cfg.hierarchy_mask = 0x28;
cfg.fb_width = width;
cfg.fb_height = height;
cfg.heap = descs->gpu + pan_size(TILER_CONTEXT);
}
#endif
}
unsigned
panvk_per_arch(emit_fb)(const struct panvk_device *dev,
const struct panvk_batch *batch,
const struct panvk_subpass *subpass,
const struct panvk_framebuffer *fb,
const struct panvk_clear_value *clears,
const struct pan_tls_info *tlsinfo,
const struct pan_tiler_context *tilerctx,
void *desc)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
struct panvk_image_view *view;
bool crc_valid[8] = { false };
struct pan_fb_info fbinfo = {
.width = fb->width,
.height = fb->height,
.extent.maxx = fb->width - 1,
.extent.maxy = fb->height - 1,
.nr_samples = 1,
};
for (unsigned cb = 0; cb < subpass->color_count; cb++) {
int idx = subpass->color_attachments[cb].idx;
view = idx != VK_ATTACHMENT_UNUSED ?
fb->attachments[idx].iview : NULL;
if (!view)
continue;
fbinfo.rts[cb].view = &view->pview;
fbinfo.rts[cb].clear = subpass->color_attachments[idx].clear;
fbinfo.rts[cb].crc_valid = &crc_valid[cb];
memcpy(fbinfo.rts[cb].clear_value, clears[idx].color,
sizeof(fbinfo.rts[cb].clear_value));
fbinfo.nr_samples =
MAX2(fbinfo.nr_samples, view->pview.image->layout.nr_samples);
}
if (subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED) {
view = fb->attachments[subpass->zs_attachment.idx].iview;
const struct util_format_description *fdesc =
util_format_description(view->pview.format);
fbinfo.nr_samples =
MAX2(fbinfo.nr_samples, view->pview.image->layout.nr_samples);
if (util_format_has_depth(fdesc)) {
fbinfo.zs.clear.z = subpass->zs_attachment.clear;
fbinfo.zs.clear_value.depth = clears[subpass->zs_attachment.idx].depth;
fbinfo.zs.view.zs = &view->pview;
}
if (util_format_has_depth(fdesc)) {
fbinfo.zs.clear.s = subpass->zs_attachment.clear;
fbinfo.zs.clear_value.stencil = clears[subpass->zs_attachment.idx].depth;
if (!fbinfo.zs.view.zs)
fbinfo.zs.view.s = &view->pview;
}
}
return pan_emit_fbd(pdev, &fbinfo, tlsinfo, tilerctx, desc);
}

View File

@ -0,0 +1,137 @@
/*
* Copyright (C) 2021 Collabora Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef PANVK_PRIVATE_H
#error "Must be included from panvk_private.h"
#endif
#ifndef PAN_ARCH
#error "no arch"
#endif
#include <vulkan/vulkan.h>
#include "compiler/shader_enums.h"
void
panvk_per_arch(emit_varying)(const struct panvk_device *dev,
const struct panvk_varyings_info *varyings,
gl_shader_stage stage, unsigned idx,
void *attrib);
void
panvk_per_arch(emit_varyings)(const struct panvk_device *dev,
const struct panvk_varyings_info *varyings,
gl_shader_stage stage,
void *descs);
void
panvk_per_arch(emit_varying_bufs)(const struct panvk_varyings_info *varyings,
void *descs);
void
panvk_per_arch(emit_attrib_bufs)(const struct panvk_attribs_info *info,
const struct panvk_attrib_buf *bufs,
unsigned buf_count,
const struct panvk_draw_info *draw,
void *descs);
void
panvk_per_arch(emit_attribs)(const struct panvk_device *dev,
const struct panvk_attribs_info *attribs,
const struct panvk_attrib_buf *bufs,
unsigned buf_count,
void *descs);
void
panvk_per_arch(emit_ubo)(mali_ptr address, size_t size, void *desc);
void
panvk_per_arch(emit_ubos)(const struct panvk_pipeline *pipeline,
const struct panvk_descriptor_state *state,
void *descs);
void
panvk_per_arch(emit_sampler)(const VkSamplerCreateInfo *pCreateInfo,
void *desc);
void
panvk_per_arch(emit_vertex_job)(const struct panvk_pipeline *pipeline,
const struct panvk_draw_info *draw,
void *job);
void
panvk_per_arch(emit_tiler_job)(const struct panvk_pipeline *pipeline,
const struct panvk_draw_info *draw,
void *job);
void
panvk_per_arch(emit_fragment_job)(const struct panvk_framebuffer *fb,
mali_ptr fbdesc,
void *job);
void
panvk_per_arch(emit_viewport)(const VkViewport *viewport,
const VkRect2D *scissor,
void *vpd);
void
panvk_per_arch(emit_blend)(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
unsigned rt, void *bd);
void
panvk_per_arch(emit_blend_constant)(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
unsigned rt, const float *constants,
void *bd);
void
panvk_per_arch(emit_dyn_fs_rsd)(const struct panvk_pipeline *pipeline,
const struct panvk_cmd_state *state,
void *rsd);
void
panvk_per_arch(emit_base_fs_rsd)(const struct panvk_device *dev,
const struct panvk_pipeline *pipeline,
void *rsd);
void
panvk_per_arch(emit_non_fs_rsd)(const struct panvk_device *dev,
const struct pan_shader_info *shader_info,
mali_ptr shader_ptr,
void *rsd);
void
panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev,
unsigned width, unsigned height,
const struct panfrost_ptr *descs);
unsigned
panvk_per_arch(emit_fb)(const struct panvk_device *dev,
const struct panvk_batch *batch,
const struct panvk_subpass *subpass,
const struct panvk_framebuffer *fb,
const struct panvk_clear_value *clears,
const struct pan_tls_info *tlsinfo,
const struct pan_tiler_context *tilerctx,
void *desc);

View File

@ -0,0 +1,375 @@
/*
* Copyright © 2021 Collabora Ltd.
*
* Derived from:
* Copyright © 2016 Red Hat.
* Copyright © 2016 Bas Nieuwenhuizen
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "gen_macros.h"
#include "panvk_private.h"
#include <assert.h>
#include <fcntl.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include "util/mesa-sha1.h"
#include "vk_descriptors.h"
#include "vk_util.h"
#include "pan_bo.h"
#include "panvk_cs.h"
static VkResult
panvk_per_arch(descriptor_set_create)(struct panvk_device *device,
struct panvk_descriptor_pool *pool,
const struct panvk_descriptor_set_layout *layout,
struct panvk_descriptor_set **out_set)
{
struct panvk_descriptor_set *set;
/* TODO: Allocate from the pool! */
set = vk_object_zalloc(&device->vk, NULL,
sizeof(struct panvk_descriptor_set),
VK_OBJECT_TYPE_DESCRIPTOR_SET);
if (!set)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
set->layout = layout;
set->descs = vk_alloc(&device->vk.alloc,
sizeof(*set->descs) * layout->num_descs, 8,
VK_OBJECT_TYPE_DESCRIPTOR_SET);
if (!set->descs)
goto err_free_set;
if (layout->num_ubos) {
set->ubos = vk_zalloc(&device->vk.alloc,
pan_size(UNIFORM_BUFFER) * layout->num_ubos, 8,
VK_OBJECT_TYPE_DESCRIPTOR_SET);
if (!set->ubos)
goto err_free_set;
}
if (layout->num_samplers) {
set->samplers = vk_zalloc(&device->vk.alloc,
pan_size(SAMPLER) * layout->num_samplers, 8,
VK_OBJECT_TYPE_DESCRIPTOR_SET);
if (!set->samplers)
goto err_free_set;
}
if (layout->num_textures) {
set->textures =
vk_zalloc(&device->vk.alloc,
(PAN_ARCH >= 6 ? pan_size(TEXTURE) : sizeof(mali_ptr)) *
layout->num_textures,
8, VK_OBJECT_TYPE_DESCRIPTOR_SET);
if (!set->textures)
goto err_free_set;
}
for (unsigned i = 0; i < layout->binding_count; i++) {
if (!layout->bindings[i].immutable_samplers)
continue;
for (unsigned j = 0; j < layout->bindings[i].array_size; j++) {
set->descs[layout->bindings[i].desc_idx].image.sampler =
layout->bindings[i].immutable_samplers[j];
}
}
*out_set = set;
return VK_SUCCESS;
err_free_set:
vk_free(&device->vk.alloc, set->textures);
vk_free(&device->vk.alloc, set->samplers);
vk_free(&device->vk.alloc, set->ubos);
vk_free(&device->vk.alloc, set->descs);
vk_object_free(&device->vk, NULL, set);
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
}
VkResult
panvk_per_arch(AllocateDescriptorSets)(VkDevice _device,
const VkDescriptorSetAllocateInfo *pAllocateInfo,
VkDescriptorSet *pDescriptorSets)
{
VK_FROM_HANDLE(panvk_device, device, _device);
VK_FROM_HANDLE(panvk_descriptor_pool, pool, pAllocateInfo->descriptorPool);
VkResult result;
unsigned i;
for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
VK_FROM_HANDLE(panvk_descriptor_set_layout, layout,
pAllocateInfo->pSetLayouts[i]);
struct panvk_descriptor_set *set = NULL;
result = panvk_per_arch(descriptor_set_create)(device, pool, layout, &set);
if (result != VK_SUCCESS)
goto err_free_sets;
pDescriptorSets[i] = panvk_descriptor_set_to_handle(set);
}
return VK_SUCCESS;
err_free_sets:
panvk_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool, i, pDescriptorSets);
for (i = 0; i < pAllocateInfo->descriptorSetCount; i++)
pDescriptorSets[i] = VK_NULL_HANDLE;
return result;
}
static void
panvk_set_image_desc(struct panvk_descriptor *desc,
const VkDescriptorImageInfo *pImageInfo)
{
VK_FROM_HANDLE(panvk_sampler, sampler, pImageInfo->sampler);
VK_FROM_HANDLE(panvk_image_view, image_view, pImageInfo->imageView);
desc->image.sampler = sampler;
desc->image.view = image_view;
desc->image.layout = pImageInfo->imageLayout;
}
static void
panvk_set_texel_buffer_view_desc(struct panvk_descriptor *desc,
const VkBufferView *pTexelBufferView)
{
VK_FROM_HANDLE(panvk_buffer_view, buffer_view, *pTexelBufferView);
desc->buffer_view = buffer_view;
}
static void
panvk_set_buffer_info_desc(struct panvk_descriptor *desc,
const VkDescriptorBufferInfo *pBufferInfo)
{
VK_FROM_HANDLE(panvk_buffer, buffer, pBufferInfo->buffer);
desc->buffer_info.buffer = buffer;
desc->buffer_info.offset = pBufferInfo->offset;
desc->buffer_info.range = pBufferInfo->range;
}
static void
panvk_per_arch(set_ubo_desc)(void *ubo,
const VkDescriptorBufferInfo *pBufferInfo)
{
VK_FROM_HANDLE(panvk_buffer, buffer, pBufferInfo->buffer);
size_t size = pBufferInfo->range == VK_WHOLE_SIZE ?
(buffer->bo->size - pBufferInfo->offset) :
pBufferInfo->range;
panvk_per_arch(emit_ubo)(buffer->bo->ptr.gpu + pBufferInfo->offset, size, ubo);
}
static void
panvk_set_sampler_desc(void *desc,
const VkDescriptorImageInfo *pImageInfo)
{
VK_FROM_HANDLE(panvk_sampler, sampler, pImageInfo->sampler);
memcpy(desc, &sampler->desc, sizeof(sampler->desc));
}
static void
panvk_per_arch(set_texture_desc)(struct panvk_descriptor_set *set,
unsigned idx,
const VkDescriptorImageInfo *pImageInfo)
{
VK_FROM_HANDLE(panvk_image_view, view, pImageInfo->imageView);
#if PAN_ARCH > 5
memcpy(&((struct mali_bifrost_texture_packed *)set->textures)[idx],
view->desc, pan_size(TEXTURE));
#else
((mali_ptr *)set->textures)[idx] = view->bo->ptr.gpu;
#endif
}
static void
panvk_per_arch(write_descriptor_set)(struct panvk_device *dev,
const VkWriteDescriptorSet *pDescriptorWrite)
{
VK_FROM_HANDLE(panvk_descriptor_set, set, pDescriptorWrite->dstSet);
const struct panvk_descriptor_set_layout *layout = set->layout;
unsigned dest_offset = pDescriptorWrite->dstArrayElement;
unsigned binding = pDescriptorWrite->dstBinding;
struct mali_uniform_buffer_packed *ubos = set->ubos;
struct mali_midgard_sampler_packed *samplers = set->samplers;
unsigned src_offset = 0;
while (src_offset < pDescriptorWrite->descriptorCount &&
binding < layout->binding_count) {
const struct panvk_descriptor_set_binding_layout *binding_layout =
&layout->bindings[binding];
if (!binding_layout->array_size) {
binding++;
dest_offset = 0;
continue;
}
assert(pDescriptorWrite->descriptorType == binding_layout->type);
unsigned ndescs = MIN2(pDescriptorWrite->descriptorCount - src_offset,
binding_layout->array_size - dest_offset);
struct panvk_descriptor *descs = &set->descs[binding_layout->desc_idx + dest_offset];
assert(binding_layout->desc_idx + dest_offset + ndescs <= set->layout->num_descs);
switch (pDescriptorWrite->descriptorType) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
for (unsigned i = 0; i < ndescs; i++) {
const VkDescriptorImageInfo *info = &pDescriptorWrite->pImageInfo[src_offset + i];
if ((pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER ||
pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) &&
!binding_layout->immutable_samplers) {
unsigned sampler = binding_layout->sampler_idx + dest_offset + i;
panvk_set_sampler_desc(&samplers[sampler], info);
}
if (pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE ||
pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
unsigned tex = binding_layout->tex_idx + dest_offset + i;
panvk_per_arch(set_texture_desc)(set, tex, info);
}
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
for (unsigned i = 0; i < ndescs; i++)
panvk_set_image_desc(&descs[i], &pDescriptorWrite->pImageInfo[src_offset + i]);
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
for (unsigned i = 0; i < ndescs; i++)
panvk_set_texel_buffer_view_desc(&descs[i], &pDescriptorWrite->pTexelBufferView[src_offset + i]);
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
for (unsigned i = 0; i < ndescs; i++) {
unsigned ubo = binding_layout->ubo_idx + dest_offset + i;
panvk_per_arch(set_ubo_desc)(&ubos[ubo],
&pDescriptorWrite->pBufferInfo[src_offset + i]);
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
for (unsigned i = 0; i < ndescs; i++)
panvk_set_buffer_info_desc(&descs[i], &pDescriptorWrite->pBufferInfo[src_offset + i]);
break;
default:
unreachable("Invalid type");
}
src_offset += ndescs;
binding++;
dest_offset = 0;
}
}
static void
panvk_copy_descriptor_set(struct panvk_device *dev,
const VkCopyDescriptorSet *pDescriptorCopy)
{
VK_FROM_HANDLE(panvk_descriptor_set, dest_set, pDescriptorCopy->dstSet);
VK_FROM_HANDLE(panvk_descriptor_set, src_set, pDescriptorCopy->srcSet);
const struct panvk_descriptor_set_layout *dest_layout = dest_set->layout;
const struct panvk_descriptor_set_layout *src_layout = dest_set->layout;
unsigned dest_offset = pDescriptorCopy->dstArrayElement;
unsigned src_offset = pDescriptorCopy->srcArrayElement;
unsigned dest_binding = pDescriptorCopy->dstBinding;
unsigned src_binding = pDescriptorCopy->srcBinding;
unsigned desc_count = pDescriptorCopy->descriptorCount;
while (desc_count && src_binding < src_layout->binding_count &&
dest_binding < dest_layout->binding_count) {
const struct panvk_descriptor_set_binding_layout *dest_binding_layout =
&src_layout->bindings[dest_binding];
if (!dest_binding_layout->array_size) {
dest_binding++;
dest_offset = 0;
continue;
}
const struct panvk_descriptor_set_binding_layout *src_binding_layout =
&src_layout->bindings[src_binding];
if (!src_binding_layout->array_size) {
src_binding++;
src_offset = 0;
continue;
}
assert(dest_binding_layout->type == src_binding_layout->type);
unsigned ndescs = MAX3(desc_count,
dest_binding_layout->array_size - dest_offset,
src_binding_layout->array_size - src_offset);
struct panvk_descriptor *dest_descs = dest_set->descs + dest_binding_layout->desc_idx + dest_offset;
struct panvk_descriptor *src_descs = src_set->descs + src_binding_layout->desc_idx + src_offset;
memcpy(dest_descs, src_descs, ndescs * sizeof(*dest_descs));
desc_count -= ndescs;
dest_offset += ndescs;
if (dest_offset == dest_binding_layout->array_size) {
dest_binding++;
dest_offset = 0;
continue;
}
src_offset += ndescs;
if (src_offset == src_binding_layout->array_size) {
src_binding++;
src_offset = 0;
continue;
}
}
assert(!desc_count);
}
void
panvk_per_arch(UpdateDescriptorSets)(VkDevice _device,
uint32_t descriptorWriteCount,
const VkWriteDescriptorSet *pDescriptorWrites,
uint32_t descriptorCopyCount,
const VkCopyDescriptorSet *pDescriptorCopies)
{
VK_FROM_HANDLE(panvk_device, dev, _device);
for (unsigned i = 0; i < descriptorWriteCount; i++)
panvk_per_arch(write_descriptor_set)(dev, &pDescriptorWrites[i]);
for (unsigned i = 0; i < descriptorCopyCount; i++)
panvk_copy_descriptor_set(dev, &pDescriptorCopies[i]);
}

View File

@ -0,0 +1,317 @@
/*
* Copyright © 2021 Collabora Ltd.
*
* Derived from tu_device.c which is:
* Copyright © 2016 Red Hat.
* Copyright © 2016 Bas Nieuwenhuizen
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "gen_macros.h"
#include "decode.h"
#include "panvk_private.h"
#include "panvk_cs.h"
static void
panvk_queue_submit_batch(struct panvk_queue *queue,
struct panvk_batch *batch,
uint32_t *bos, unsigned nr_bos,
uint32_t *in_fences,
unsigned nr_in_fences)
{
const struct panvk_device *dev = queue->device;
unsigned debug = dev->physical_device->instance->debug_flags;
const struct panfrost_device *pdev = &dev->physical_device->pdev;
int ret;
/* Reset the batch if it's already been issued */
if (batch->issued) {
util_dynarray_foreach(&batch->jobs, void *, job)
memset((*job), 0, 4 * 4);
/* Reset the tiler before re-issuing the batch */
#if PAN_ARCH >= 6
if (batch->tiler.descs.cpu) {
memcpy(batch->tiler.descs.cpu, batch->tiler.templ,
pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP));
}
#else
if (batch->fb.desc.cpu) {
void *tiler = pan_section_ptr(batch->fb.desc.cpu, MULTI_TARGET_FRAMEBUFFER, TILER);
memcpy(tiler, batch->tiler.templ, pan_size(TILER_CONTEXT));
/* All weights set to 0, nothing to do here */
pan_section_pack(batch->fb.desc.cpu, MULTI_TARGET_FRAMEBUFFER, TILER_WEIGHTS, w);
}
#endif
}
if (batch->scoreboard.first_job) {
struct drm_panfrost_submit submit = {
.bo_handles = (uintptr_t)bos,
.bo_handle_count = nr_bos,
.in_syncs = (uintptr_t)in_fences,
.in_sync_count = nr_in_fences,
.out_sync = queue->sync,
.jc = batch->scoreboard.first_job,
};
ret = drmIoctl(pdev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
assert(!ret);
if (debug & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC)) {
ret = drmSyncobjWait(pdev->fd, &submit.out_sync, 1, INT64_MAX, 0, NULL);
assert(!ret);
}
if (debug & PANVK_DEBUG_TRACE)
pandecode_jc(batch->scoreboard.first_job, PAN_ARCH >= 6, pdev->gpu_id);
}
if (batch->fragment_job) {
struct drm_panfrost_submit submit = {
.bo_handles = (uintptr_t)bos,
.bo_handle_count = nr_bos,
.out_sync = queue->sync,
.jc = batch->fragment_job,
.requirements = PANFROST_JD_REQ_FS,
};
if (batch->scoreboard.first_job) {
submit.in_syncs = (uintptr_t)(&queue->sync);
submit.in_sync_count = 1;
} else {
submit.in_syncs = (uintptr_t)in_fences;
submit.in_sync_count = nr_in_fences;
}
ret = drmIoctl(pdev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
assert(!ret);
if (debug & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC)) {
ret = drmSyncobjWait(pdev->fd, &submit.out_sync, 1, INT64_MAX, 0, NULL);
assert(!ret);
}
if (debug & PANVK_DEBUG_TRACE)
pandecode_jc(batch->fragment_job, PAN_ARCH >= 6, pdev->gpu_id);
}
if (debug & PANVK_DEBUG_TRACE)
pandecode_next_frame();
batch->issued = true;
}
static void
panvk_queue_transfer_sync(struct panvk_queue *queue, uint32_t syncobj)
{
const struct panfrost_device *pdev = &queue->device->physical_device->pdev;
int ret;
struct drm_syncobj_handle handle = {
.handle = queue->sync,
.flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE,
.fd = -1,
};
ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle);
assert(!ret);
assert(handle.fd >= 0);
handle.handle = syncobj;
ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &handle);
assert(!ret);
close(handle.fd);
}
static void
panvk_add_wait_event_syncobjs(struct panvk_batch *batch, uint32_t *in_fences, unsigned *nr_in_fences)
{
util_dynarray_foreach(&batch->event_ops, struct panvk_event_op, op) {
switch (op->type) {
case PANVK_EVENT_OP_SET:
/* Nothing to do yet */
break;
case PANVK_EVENT_OP_RESET:
/* Nothing to do yet */
break;
case PANVK_EVENT_OP_WAIT:
in_fences[*nr_in_fences++] = op->event->syncobj;
break;
default:
unreachable("bad panvk_event_op type\n");
}
}
}
static void
panvk_signal_event_syncobjs(struct panvk_queue *queue, struct panvk_batch *batch)
{
const struct panfrost_device *pdev = &queue->device->physical_device->pdev;
util_dynarray_foreach(&batch->event_ops, struct panvk_event_op, op) {
switch (op->type) {
case PANVK_EVENT_OP_SET: {
panvk_queue_transfer_sync(queue, op->event->syncobj);
break;
}
case PANVK_EVENT_OP_RESET: {
struct panvk_event *event = op->event;
struct drm_syncobj_array objs = {
.handles = (uint64_t) (uintptr_t) &event->syncobj,
.count_handles = 1
};
int ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_RESET, &objs);
assert(!ret);
break;
}
case PANVK_EVENT_OP_WAIT:
/* Nothing left to do */
break;
default:
unreachable("bad panvk_event_op type\n");
}
}
}
VkResult
panvk_per_arch(QueueSubmit)(VkQueue _queue,
uint32_t submitCount,
const VkSubmitInfo *pSubmits,
VkFence _fence)
{
VK_FROM_HANDLE(panvk_queue, queue, _queue);
VK_FROM_HANDLE(panvk_fence, fence, _fence);
const struct panfrost_device *pdev = &queue->device->physical_device->pdev;
for (uint32_t i = 0; i < submitCount; ++i) {
const VkSubmitInfo *submit = pSubmits + i;
unsigned nr_semaphores = submit->waitSemaphoreCount + 1;
uint32_t semaphores[nr_semaphores];
semaphores[0] = queue->sync;
for (unsigned i = 0; i < submit->waitSemaphoreCount; i++) {
VK_FROM_HANDLE(panvk_semaphore, sem, submit->pWaitSemaphores[i]);
semaphores[i + 1] = sem->syncobj.temporary ? : sem->syncobj.permanent;
}
for (uint32_t j = 0; j < submit->commandBufferCount; ++j) {
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, (submit->pCommandBuffers[j]));
list_for_each_entry(struct panvk_batch, batch, &cmdbuf->batches, node) {
/* FIXME: should be done at the batch level */
unsigned nr_bos =
panvk_pool_num_bos(&cmdbuf->desc_pool) +
panvk_pool_num_bos(&cmdbuf->varying_pool) +
panvk_pool_num_bos(&cmdbuf->tls_pool) +
(batch->fb.info ? batch->fb.info->attachment_count : 0) +
(batch->blit.src ? 1 : 0) +
(batch->blit.dst ? 1 : 0) +
(batch->scoreboard.first_tiler ? 1 : 0) + 1;
unsigned bo_idx = 0;
uint32_t bos[nr_bos];
panvk_pool_get_bo_handles(&cmdbuf->desc_pool, &bos[bo_idx]);
bo_idx += panvk_pool_num_bos(&cmdbuf->desc_pool);
panvk_pool_get_bo_handles(&cmdbuf->varying_pool, &bos[bo_idx]);
bo_idx += panvk_pool_num_bos(&cmdbuf->varying_pool);
panvk_pool_get_bo_handles(&cmdbuf->tls_pool, &bos[bo_idx]);
bo_idx += panvk_pool_num_bos(&cmdbuf->tls_pool);
if (batch->fb.info) {
for (unsigned i = 0; i < batch->fb.info->attachment_count; i++) {
bos[bo_idx++] = batch->fb.info->attachments[i].iview->pview.image->data.bo->gem_handle;
}
}
if (batch->blit.src)
bos[bo_idx++] = batch->blit.src->gem_handle;
if (batch->blit.dst)
bos[bo_idx++] = batch->blit.dst->gem_handle;
if (batch->scoreboard.first_tiler)
bos[bo_idx++] = pdev->tiler_heap->gem_handle;
bos[bo_idx++] = pdev->sample_positions->gem_handle;
assert(bo_idx == nr_bos);
unsigned nr_in_fences = 0;
unsigned max_wait_event_syncobjs =
util_dynarray_num_elements(&batch->event_ops,
struct panvk_event_op);
uint32_t in_fences[nr_semaphores + max_wait_event_syncobjs];
memcpy(in_fences, semaphores, nr_semaphores * sizeof(*in_fences));
nr_in_fences += nr_semaphores;
panvk_add_wait_event_syncobjs(batch, in_fences, &nr_in_fences);
panvk_queue_submit_batch(queue, batch, bos, nr_bos, in_fences, nr_in_fences);
panvk_signal_event_syncobjs(queue, batch);
}
}
/* Transfer the out fence to signal semaphores */
for (unsigned i = 0; i < submit->signalSemaphoreCount; i++) {
VK_FROM_HANDLE(panvk_semaphore, sem, submit->pSignalSemaphores[i]);
panvk_queue_transfer_sync(queue, sem->syncobj.temporary ? : sem->syncobj.permanent);
}
}
if (fence) {
/* Transfer the last out fence to the fence object */
panvk_queue_transfer_sync(queue, fence->syncobj.temporary ? : fence->syncobj.permanent);
}
return VK_SUCCESS;
}
VkResult
panvk_per_arch(CreateSampler)(VkDevice _device,
const VkSamplerCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkSampler *pSampler)
{
VK_FROM_HANDLE(panvk_device, device, _device);
struct panvk_sampler *sampler;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
sampler = vk_object_alloc(&device->vk, pAllocator, sizeof(*sampler),
VK_OBJECT_TYPE_SAMPLER);
if (!sampler)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
STATIC_ASSERT(sizeof(sampler->desc) >= pan_size(SAMPLER));
panvk_per_arch(emit_sampler)(pCreateInfo, &sampler->desc);
*pSampler = panvk_sampler_to_handle(sampler);
return VK_SUCCESS;
}

View File

@ -0,0 +1,149 @@
/*
* Copyright © 2021 Collabora Ltd.
*
* Derived from tu_image.c which is:
* Copyright © 2016 Red Hat.
* Copyright © 2016 Bas Nieuwenhuizen
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "gen_macros.h"
#include "panvk_private.h"
#include "panfrost-quirks.h"
#include "util/debug.h"
#include "util/u_atomic.h"
#include "vk_format.h"
#include "vk_object.h"
#include "vk_util.h"
#include "drm-uapi/drm_fourcc.h"
static enum mali_texture_dimension
panvk_view_type_to_mali_tex_dim(VkImageViewType type)
{
switch (type) {
case VK_IMAGE_VIEW_TYPE_1D:
case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
return MALI_TEXTURE_DIMENSION_1D;
case VK_IMAGE_VIEW_TYPE_2D:
case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
return MALI_TEXTURE_DIMENSION_2D;
case VK_IMAGE_VIEW_TYPE_3D:
return MALI_TEXTURE_DIMENSION_3D;
case VK_IMAGE_VIEW_TYPE_CUBE:
case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
return MALI_TEXTURE_DIMENSION_CUBE;
default:
unreachable("Invalid view type");
}
}
static void
panvk_convert_swizzle(const VkComponentMapping *in,
unsigned char *out)
{
const VkComponentSwizzle *comp = &in->r;
for (unsigned i = 0; i < 4; i++) {
switch (comp[i]) {
case VK_COMPONENT_SWIZZLE_IDENTITY:
out[i] = PIPE_SWIZZLE_X + i;
break;
case VK_COMPONENT_SWIZZLE_ZERO:
out[i] = PIPE_SWIZZLE_0;
break;
case VK_COMPONENT_SWIZZLE_ONE:
out[i] = PIPE_SWIZZLE_1;
break;
case VK_COMPONENT_SWIZZLE_R:
out[i] = PIPE_SWIZZLE_X;
break;
case VK_COMPONENT_SWIZZLE_G:
out[i] = PIPE_SWIZZLE_Y;
break;
case VK_COMPONENT_SWIZZLE_B:
out[i] = PIPE_SWIZZLE_Z;
break;
case VK_COMPONENT_SWIZZLE_A:
out[i] = PIPE_SWIZZLE_W;
break;
default:
unreachable("Invalid swizzle");
}
}
}
VkResult
panvk_per_arch(CreateImageView)(VkDevice _device,
const VkImageViewCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkImageView *pView)
{
VK_FROM_HANDLE(panvk_device, device, _device);
VK_FROM_HANDLE(panvk_image, image, pCreateInfo->image);
struct panvk_image_view *view;
view = vk_object_zalloc(&device->vk, pAllocator, sizeof(*view),
VK_OBJECT_TYPE_IMAGE_VIEW);
if (view == NULL)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
view->pview.format = vk_format_to_pipe_format(pCreateInfo->format);
if (pCreateInfo->subresourceRange.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT)
view->pview.format = util_format_get_depth_only(view->pview.format);
else if (pCreateInfo->subresourceRange.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
view->pview.format = util_format_stencil_only(view->pview.format);
view->pview.dim = panvk_view_type_to_mali_tex_dim(pCreateInfo->viewType);
view->pview.first_level = pCreateInfo->subresourceRange.baseMipLevel;
view->pview.last_level = pCreateInfo->subresourceRange.baseMipLevel +
pCreateInfo->subresourceRange.levelCount - 1;
view->pview.first_layer = pCreateInfo->subresourceRange.baseArrayLayer;
view->pview.last_layer = pCreateInfo->subresourceRange.baseArrayLayer +
pCreateInfo->subresourceRange.layerCount - 1;
panvk_convert_swizzle(&pCreateInfo->components, view->pview.swizzle);
view->pview.image = &image->pimage;
view->pview.nr_samples = image->pimage.layout.nr_samples;
view->vk_format = pCreateInfo->format;
struct panfrost_device *pdev = &device->physical_device->pdev;
unsigned bo_size =
panfrost_estimate_texture_payload_size(pdev, &view->pview) +
pan_size(TEXTURE);
unsigned surf_descs_offset = PAN_ARCH <= 5 ? pan_size(TEXTURE) : 0;
view->bo = panfrost_bo_create(pdev, bo_size, 0, "Texture descriptor");
struct panfrost_ptr surf_descs = {
.cpu = view->bo->ptr.cpu + surf_descs_offset,
.gpu = view->bo->ptr.gpu + surf_descs_offset,
};
void *tex_desc = PAN_ARCH >= 6 ?
&view->desc : view->bo->ptr.cpu;
STATIC_ASSERT(sizeof(view->desc) >= pan_size(TEXTURE));
panfrost_new_texture(pdev, &view->pview, tex_desc, &surf_descs);
*pView = panvk_image_view_to_handle(view);
return VK_SUCCESS;
}

View File

@ -21,122 +21,125 @@
* DEALINGS IN THE SOFTWARE.
*/
#include "gen_macros.h"
#include "nir/nir_builder.h"
#include "pan_blitter.h"
#include "pan_encoder.h"
#include "pan_shader.h"
#include "panvk_private.h"
#include "vk_format.h"
void
panvk_CmdBlitImage(VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage destImage,
VkImageLayout destImageLayout,
uint32_t regionCount,
const VkImageBlit *pRegions,
VkFilter filter)
panvk_per_arch(CmdBlitImage)(VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage destImage,
VkImageLayout destImageLayout,
uint32_t regionCount,
const VkImageBlit *pRegions,
VkFilter filter)
{
panvk_stub();
}
void
panvk_CmdCopyImage(VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage destImage,
VkImageLayout destImageLayout,
uint32_t regionCount,
const VkImageCopy *pRegions)
panvk_per_arch(CmdCopyImage)(VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage destImage,
VkImageLayout destImageLayout,
uint32_t regionCount,
const VkImageCopy *pRegions)
{
panvk_stub();
}
void
panvk_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
VkBuffer srcBuffer,
VkImage destImage,
VkImageLayout destImageLayout,
uint32_t regionCount,
const VkBufferImageCopy *pRegions)
panvk_per_arch(CmdCopyBufferToImage)(VkCommandBuffer commandBuffer,
VkBuffer srcBuffer,
VkImage destImage,
VkImageLayout destImageLayout,
uint32_t regionCount,
const VkBufferImageCopy *pRegions)
{
panvk_stub();
}
void
panvk_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkBuffer destBuffer,
uint32_t regionCount,
const VkBufferImageCopy *pRegions)
panvk_per_arch(CmdCopyImageToBuffer)(VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkBuffer destBuffer,
uint32_t regionCount,
const VkBufferImageCopy *pRegions)
{
panvk_stub();
}
void
panvk_CmdCopyBuffer(VkCommandBuffer commandBuffer,
VkBuffer srcBuffer,
VkBuffer destBuffer,
uint32_t regionCount,
const VkBufferCopy *pRegions)
panvk_per_arch(CmdCopyBuffer)(VkCommandBuffer commandBuffer,
VkBuffer srcBuffer,
VkBuffer destBuffer,
uint32_t regionCount,
const VkBufferCopy *pRegions)
{
panvk_stub();
}
void
panvk_CmdResolveImage(VkCommandBuffer cmd_buffer_h,
VkImage src_image_h,
VkImageLayout src_image_layout,
VkImage dest_image_h,
VkImageLayout dest_image_layout,
uint32_t region_count,
const VkImageResolve *regions)
panvk_per_arch(CmdResolveImage)(VkCommandBuffer cmd_buffer_h,
VkImage src_image_h,
VkImageLayout src_image_layout,
VkImage dest_image_h,
VkImageLayout dest_image_layout,
uint32_t region_count,
const VkImageResolve *regions)
{
panvk_stub();
}
void
panvk_CmdFillBuffer(VkCommandBuffer commandBuffer,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize fillSize,
uint32_t data)
panvk_per_arch(CmdFillBuffer)(VkCommandBuffer commandBuffer,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize fillSize,
uint32_t data)
{
panvk_stub();
}
void
panvk_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize dataSize,
const void *pData)
panvk_per_arch(CmdUpdateBuffer)(VkCommandBuffer commandBuffer,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize dataSize,
const void *pData)
{
panvk_stub();
}
void
panvk_CmdClearColorImage(VkCommandBuffer commandBuffer,
VkImage image,
VkImageLayout imageLayout,
const VkClearColorValue *pColor,
uint32_t rangeCount,
const VkImageSubresourceRange *pRanges)
panvk_per_arch(CmdClearColorImage)(VkCommandBuffer commandBuffer,
VkImage image,
VkImageLayout imageLayout,
const VkClearColorValue *pColor,
uint32_t rangeCount,
const VkImageSubresourceRange *pRanges)
{
panvk_stub();
}
void
panvk_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
VkImage image_h,
VkImageLayout imageLayout,
const VkClearDepthStencilValue *pDepthStencil,
uint32_t rangeCount,
const VkImageSubresourceRange *pRanges)
panvk_per_arch(CmdClearDepthStencilImage)(VkCommandBuffer commandBuffer,
VkImage image_h,
VkImageLayout imageLayout,
const VkClearDepthStencilValue *pDepthStencil,
uint32_t rangeCount,
const VkImageSubresourceRange *pRanges)
{
panvk_stub();
}
@ -203,7 +206,7 @@ panvk_meta_clear_attachments_shader(struct panfrost_device *pdev,
mali_ptr shader =
pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
pan_is_bifrost(pdev) ? 128 : 64);
PAN_ARCH >= 6 ? 128 : 64);
util_dynarray_fini(&binary);
ralloc_free(b.shader);
@ -224,8 +227,6 @@ panvk_meta_clear_attachments_emit_rsd(struct panfrost_device *pdev,
PAN_DESC(RENDERER_STATE),
PAN_DESC(BLEND));
unsigned fullmask = (1 << util_format_get_nr_components(format)) - 1;
/* TODO: Support multiple render targets */
assert(rt == 0);
@ -243,47 +244,48 @@ panvk_meta_clear_attachments_emit_rsd(struct panfrost_device *pdev,
cfg.stencil_front.mask = 0xFF;
cfg.stencil_back = cfg.stencil_front;
if (pan_is_bifrost(pdev)) {
cfg.properties.bifrost.allow_forward_pixel_to_be_killed = true;
cfg.properties.bifrost.allow_forward_pixel_to_kill = true;
cfg.properties.bifrost.zs_update_operation =
MALI_PIXEL_KILL_STRONG_EARLY;
cfg.properties.bifrost.pixel_kill_operation =
MALI_PIXEL_KILL_FORCE_EARLY;
} else {
cfg.properties.midgard.shader_reads_tilebuffer = false;
cfg.properties.midgard.work_register_count = shader_info->work_reg_count;
cfg.properties.midgard.force_early_z = true;
cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
}
#if PAN_ARCH >= 6
cfg.properties.bifrost.allow_forward_pixel_to_be_killed = true;
cfg.properties.bifrost.allow_forward_pixel_to_kill = true;
cfg.properties.bifrost.zs_update_operation =
MALI_PIXEL_KILL_STRONG_EARLY;
cfg.properties.bifrost.pixel_kill_operation =
MALI_PIXEL_KILL_FORCE_EARLY;
#else
cfg.properties.midgard.shader_reads_tilebuffer = false;
cfg.properties.midgard.work_register_count = shader_info->work_reg_count;
cfg.properties.midgard.force_early_z = true;
cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
#endif
}
pan_pack(rsd_ptr.cpu + pan_size(RENDERER_STATE), BLEND, cfg) {
cfg.round_to_fb_precision = true;
cfg.load_destination = false;
if (pan_is_bifrost(pdev)) {
cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OPAQUE;
cfg.bifrost.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
cfg.bifrost.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
cfg.bifrost.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
cfg.bifrost.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
cfg.bifrost.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
cfg.bifrost.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
cfg.bifrost.equation.color_mask = 0xf;
cfg.bifrost.internal.fixed_function.num_comps = 4;
cfg.bifrost.internal.fixed_function.conversion.memory_format =
panfrost_format_to_bifrost_blend(pdev, format, false);
cfg.bifrost.internal.fixed_function.conversion.register_format =
shader_info->bifrost.blend[rt].format;
} else {
cfg.midgard.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
cfg.midgard.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
cfg.midgard.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
cfg.midgard.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
cfg.midgard.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
cfg.midgard.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
cfg.midgard.equation.color_mask = fullmask;
}
#if PAN_ARCH >= 6
cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OPAQUE;
cfg.bifrost.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
cfg.bifrost.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
cfg.bifrost.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
cfg.bifrost.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
cfg.bifrost.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
cfg.bifrost.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
cfg.bifrost.equation.color_mask = 0xf;
cfg.bifrost.internal.fixed_function.num_comps = 4;
cfg.bifrost.internal.fixed_function.conversion.memory_format =
panfrost_format_to_bifrost_blend(pdev, format, false);
cfg.bifrost.internal.fixed_function.conversion.register_format =
shader_info->bifrost.blend[rt].format;
#else
cfg.midgard.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
cfg.midgard.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
cfg.midgard.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
cfg.midgard.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
cfg.midgard.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
cfg.midgard.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
cfg.midgard.equation.color_mask =
(1 << util_format_get_nr_components(format)) - 1;
#endif
}
return rsd_ptr.gpu;
@ -342,85 +344,49 @@ panvk_meta_clear_attachment_emit_dcd(struct pan_pool *pool,
cfg.push_uniforms = push_constants;
cfg.position = coords;
cfg.viewport = vpd;
cfg.texture_descriptor_is_64b = !pan_is_bifrost(pool->dev);
cfg.texture_descriptor_is_64b = PAN_ARCH <= 5;
}
}
static struct panfrost_ptr
panvk_meta_clear_attachment_emit_bifrost_tiler_job(struct pan_pool *desc_pool,
struct pan_scoreboard *scoreboard,
mali_ptr coords,
mali_ptr ubo, mali_ptr push_constants,
mali_ptr vpd, mali_ptr rsd,
mali_ptr tsd, mali_ptr tiler)
panvk_meta_clear_attachment_emit_tiler_job(struct pan_pool *desc_pool,
struct pan_scoreboard *scoreboard,
mali_ptr coords,
mali_ptr ubo, mali_ptr push_constants,
mali_ptr vpd, mali_ptr rsd,
mali_ptr tsd, mali_ptr tiler)
{
struct panfrost_ptr job =
pan_pool_alloc_desc(desc_pool, BIFROST_TILER_JOB);
pan_pool_alloc_desc(desc_pool, TILER_JOB);
panvk_meta_clear_attachment_emit_dcd(desc_pool,
coords,
ubo, push_constants,
vpd, tsd, rsd,
pan_section_ptr(job.cpu, BIFROST_TILER_JOB, DRAW));
pan_section_ptr(job.cpu, TILER_JOB, DRAW));
pan_section_pack(job.cpu, BIFROST_TILER_JOB, PRIMITIVE, cfg) {
pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
cfg.index_count = 4;
cfg.job_task_split = 6;
}
pan_section_pack(job.cpu, BIFROST_TILER_JOB, PRIMITIVE_SIZE, cfg) {
pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
cfg.constant = 1.0f;
}
void *invoc = pan_section_ptr(job.cpu,
BIFROST_TILER_JOB,
TILER_JOB,
INVOCATION);
panfrost_pack_work_groups_compute(invoc, 1, 4,
1, 1, 1, 1, true, false);
pan_section_pack(job.cpu, BIFROST_TILER_JOB, PADDING, cfg);
pan_section_pack(job.cpu, BIFROST_TILER_JOB, TILER, cfg) {
#if PAN_ARCH >= 6
pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg);
pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) {
cfg.address = tiler;
}
panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
false, false, 0, 0, &job, false);
return job;
}
static struct panfrost_ptr
panvk_meta_clear_attachment_emit_midgard_tiler_job(struct pan_pool *desc_pool,
struct pan_scoreboard *scoreboard,
mali_ptr coords,
mali_ptr ubo, mali_ptr push_constants,
mali_ptr vpd, mali_ptr rsd,
mali_ptr tsd)
{
struct panfrost_ptr job =
pan_pool_alloc_desc(desc_pool, MIDGARD_TILER_JOB);
panvk_meta_clear_attachment_emit_dcd(desc_pool,
coords,
ubo, push_constants,
vpd, tsd, rsd,
pan_section_ptr(job.cpu, MIDGARD_TILER_JOB, DRAW));
pan_section_pack(job.cpu, MIDGARD_TILER_JOB, PRIMITIVE, cfg) {
cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
cfg.index_count = 4;
cfg.job_task_split = 6;
}
pan_section_pack(job.cpu, MIDGARD_TILER_JOB, PRIMITIVE_SIZE, cfg) {
cfg.constant = 1.0f;
}
void *invoc = pan_section_ptr(job.cpu,
MIDGARD_TILER_JOB,
INVOCATION);
panfrost_pack_work_groups_compute(invoc, 1, 4,
1, 1, 1, 1, true, false);
#endif
panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
false, false, 0, 0, &job, false);
@ -477,19 +443,19 @@ panvk_meta_clear_attachment(struct panvk_cmd_buffer *cmdbuf,
/* TODO: Support depth/stencil */
assert(mask == VK_IMAGE_ASPECT_COLOR_BIT);
panvk_cmd_alloc_fb_desc(cmdbuf);
panvk_cmd_alloc_tls_desc(cmdbuf);
panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf);
if (pan_is_bifrost(pdev)) {
panvk_cmd_get_bifrost_tiler_context(cmdbuf,
batch->fb.info->width,
batch->fb.info->height);
} else {
panvk_cmd_get_midgard_polygon_list(cmdbuf,
#if PAN_ARCH <= 5
panvk_per_arch(cmd_get_polygon_list)(cmdbuf,
batch->fb.info->width,
batch->fb.info->height,
true);
#else
panvk_per_arch(cmd_get_tiler_context)(cmdbuf,
batch->fb.info->width,
batch->fb.info->height,
true);
}
batch->fb.info->height);
#endif
mali_ptr vpd = panvk_meta_emit_viewport(&cmdbuf->desc_pool.base,
minx, miny, maxx, maxy);
@ -525,31 +491,16 @@ panvk_meta_clear_attachment(struct panvk_cmd_buffer *cmdbuf,
&cmdbuf->desc_pool.base,
clear_value);
mali_ptr tsd, tiler;
if (pan_is_bifrost(pdev)) {
tsd = batch->tls.gpu;
tiler = batch->tiler.bifrost_descs.gpu;
} else {
tsd = batch->fb.desc.gpu;
tiler = 0;
}
mali_ptr tsd = PAN_ARCH >= 6 ? batch->tls.gpu : batch->fb.desc.gpu;
mali_ptr tiler = PAN_ARCH >= 6 ? batch->tiler.descs.gpu : 0;
struct panfrost_ptr job;
if (pan_is_bifrost(pdev)) {
job = panvk_meta_clear_attachment_emit_bifrost_tiler_job(&cmdbuf->desc_pool.base,
&batch->scoreboard,
coordinates,
ubo, pushconsts,
vpd, rsd, tsd, tiler);
} else {
job = panvk_meta_clear_attachment_emit_midgard_tiler_job(&cmdbuf->desc_pool.base,
&batch->scoreboard,
coordinates,
ubo, pushconsts,
vpd, rsd, tsd);
}
job = panvk_meta_clear_attachment_emit_tiler_job(&cmdbuf->desc_pool.base,
&batch->scoreboard,
coordinates,
ubo, pushconsts,
vpd, rsd, tsd, tiler);
util_dynarray_append(&batch->jobs, void *, job.cpu);
}
@ -585,11 +536,11 @@ panvk_meta_clear_attachment_init(struct panvk_physical_device *dev)
}
void
panvk_CmdClearAttachments(VkCommandBuffer commandBuffer,
uint32_t attachmentCount,
const VkClearAttachment *pAttachments,
uint32_t rectCount,
const VkClearRect *pRects)
panvk_per_arch(CmdClearAttachments)(VkCommandBuffer commandBuffer,
uint32_t attachmentCount,
const VkClearAttachment *pAttachments,
uint32_t rectCount,
const VkClearRect *pRects)
{
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
const struct panvk_subpass *subpass = cmdbuf->state.subpass;
@ -617,7 +568,7 @@ panvk_CmdClearAttachments(VkCommandBuffer commandBuffer,
}
void
panvk_meta_init(struct panvk_physical_device *dev)
panvk_per_arch(meta_init)(struct panvk_physical_device *dev)
{
panvk_pool_init(&dev->meta.bin_pool, &dev->pdev, NULL, PAN_BO_EXECUTE,
16 * 1024, "panvk_meta binary pool", false);
@ -635,7 +586,7 @@ panvk_meta_init(struct panvk_physical_device *dev)
}
void
panvk_meta_cleanup(struct panvk_physical_device *dev)
panvk_per_arch(meta_cleanup)(struct panvk_physical_device *dev)
{
pan_blitter_cleanup(&dev->pdev);
panvk_pool_cleanup(&dev->meta.blitter.desc_pool);

View File

@ -0,0 +1,36 @@
/*
* Copyright (C) 2021 Collabora Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef PANVK_PRIVATE_H
#error "Must be included from panvk_private.h"
#endif
#ifndef PAN_ARCH
#error "no arch"
#endif
void
panvk_per_arch(meta_init)(struct panvk_physical_device *dev);
void
panvk_per_arch(meta_cleanup)(struct panvk_physical_device *dev);

View File

@ -0,0 +1,991 @@
/*
* Copyright © 2021 Collabora Ltd.
*
* Derived from tu_pipeline.c which is:
* Copyright © 2016 Red Hat.
* Copyright © 2016 Bas Nieuwenhuizen
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "panvk_cs.h"
#include "panvk_private.h"
#include "pan_bo.h"
#include "nir/nir.h"
#include "nir/nir_builder.h"
#include "spirv/nir_spirv.h"
#include "util/debug.h"
#include "util/mesa-sha1.h"
#include "util/u_atomic.h"
#include "vk_format.h"
#include "vk_util.h"
#include "panfrost/util/pan_lower_framebuffer.h"
#include "panfrost-quirks.h"
struct panvk_pipeline_builder
{
struct panvk_device *device;
struct panvk_pipeline_cache *cache;
const VkAllocationCallbacks *alloc;
const VkGraphicsPipelineCreateInfo *create_info;
const struct panvk_pipeline_layout *layout;
struct panvk_shader *shaders[MESA_SHADER_STAGES];
struct {
uint32_t shader_offset;
uint32_t rsd_offset;
uint32_t sysvals_offset;
} stages[MESA_SHADER_STAGES];
uint32_t blend_shader_offsets[MAX_RTS];
uint32_t shader_total_size;
uint32_t static_state_size;
uint32_t vpd_offset;
bool rasterizer_discard;
/* these states are affectd by rasterizer_discard */
VkSampleCountFlagBits samples;
bool use_depth_stencil_attachment;
uint8_t active_color_attachments;
enum pipe_format color_attachment_formats[MAX_RTS];
};
static VkResult
panvk_pipeline_builder_create_pipeline(struct panvk_pipeline_builder *builder,
struct panvk_pipeline **out_pipeline)
{
struct panvk_device *dev = builder->device;
struct panvk_pipeline *pipeline =
vk_object_zalloc(&dev->vk, builder->alloc,
sizeof(*pipeline), VK_OBJECT_TYPE_PIPELINE);
if (!pipeline)
return VK_ERROR_OUT_OF_HOST_MEMORY;
pipeline->layout = builder->layout;
*out_pipeline = pipeline;
return VK_SUCCESS;
}
static void
panvk_pipeline_builder_finish(struct panvk_pipeline_builder *builder)
{
for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
if (!builder->shaders[i])
continue;
panvk_shader_destroy(builder->device, builder->shaders[i], builder->alloc);
}
}
static bool
panvk_pipeline_static_state(struct panvk_pipeline *pipeline, uint32_t id)
{
return !(pipeline->dynamic_state_mask & (1 << id));
}
static VkResult
panvk_pipeline_builder_compile_shaders(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = {
NULL
};
for (uint32_t i = 0; i < builder->create_info->stageCount; i++) {
gl_shader_stage stage = vk_to_mesa_shader_stage(builder->create_info->pStages[i].stage);
stage_infos[stage] = &builder->create_info->pStages[i];
}
/* compile shaders in reverse order */
unsigned sysval_ubo = builder->layout->num_ubos;
for (gl_shader_stage stage = MESA_SHADER_STAGES - 1;
stage > MESA_SHADER_NONE; stage--) {
const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage];
if (!stage_info)
continue;
struct panvk_shader *shader;
shader = panvk_per_arch(shader_create)(builder->device, stage, stage_info,
builder->layout, sysval_ubo,
&pipeline->blend.state,
panvk_pipeline_static_state(pipeline,
VK_DYNAMIC_STATE_BLEND_CONSTANTS),
builder->alloc);
if (!shader)
return VK_ERROR_OUT_OF_HOST_MEMORY;
if (shader->info.sysvals.sysval_count)
sysval_ubo++;
builder->shaders[stage] = shader;
builder->shader_total_size = ALIGN_POT(builder->shader_total_size, 128);
builder->stages[stage].shader_offset = builder->shader_total_size;
builder->shader_total_size +=
util_dynarray_num_elements(&shader->binary, uint8_t);
}
return VK_SUCCESS;
}
static VkResult
panvk_pipeline_builder_upload_shaders(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
struct panfrost_bo *bin_bo =
panfrost_bo_create(&builder->device->physical_device->pdev,
builder->shader_total_size, PAN_BO_EXECUTE,
"Shader");
pipeline->binary_bo = bin_bo;
panfrost_bo_mmap(bin_bo);
for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
const struct panvk_shader *shader = builder->shaders[i];
if (!shader)
continue;
memcpy(pipeline->binary_bo->ptr.cpu + builder->stages[i].shader_offset,
util_dynarray_element(&shader->binary, uint8_t, 0),
util_dynarray_num_elements(&shader->binary, uint8_t));
}
return VK_SUCCESS;
}
static bool
panvk_pipeline_static_sysval(struct panvk_pipeline *pipeline,
unsigned id)
{
switch (id) {
case PAN_SYSVAL_VIEWPORT_SCALE:
case PAN_SYSVAL_VIEWPORT_OFFSET:
return panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT);
default:
return false;
}
}
static void
panvk_pipeline_builder_alloc_static_state_bo(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
struct panfrost_device *pdev =
&builder->device->physical_device->pdev;
unsigned bo_size = 0;
for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
const struct panvk_shader *shader = builder->shaders[i];
if (!shader)
continue;
if (pipeline->fs.dynamic_rsd && i == MESA_SHADER_FRAGMENT)
continue;
bo_size = ALIGN_POT(bo_size, pan_alignment(RENDERER_STATE));
builder->stages[i].rsd_offset = bo_size;
bo_size += pan_size(RENDERER_STATE);
if (i == MESA_SHADER_FRAGMENT)
bo_size += pan_size(BLEND) * pipeline->blend.state.rt_count;
}
if (panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT) &&
panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_SCISSOR)) {
bo_size = ALIGN_POT(bo_size, pan_alignment(VIEWPORT));
builder->vpd_offset = bo_size;
bo_size += pan_size(VIEWPORT);
}
for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
const struct panvk_shader *shader = builder->shaders[i];
if (!shader || !shader->info.sysvals.sysval_count)
continue;
bool static_sysvals = true;
for (unsigned s = 0; s < shader->info.sysvals.sysval_count; s++) {
unsigned id = shader->info.sysvals.sysvals[i];
static_sysvals &= panvk_pipeline_static_sysval(pipeline, id);
switch (PAN_SYSVAL_TYPE(id)) {
case PAN_SYSVAL_VIEWPORT_SCALE:
case PAN_SYSVAL_VIEWPORT_OFFSET:
pipeline->sysvals[i].dirty_mask |= PANVK_DYNAMIC_VIEWPORT;
break;
default:
break;
}
}
if (!static_sysvals) {
builder->stages[i].sysvals_offset = ~0;
continue;
}
bo_size = ALIGN_POT(bo_size, 16);
builder->stages[i].sysvals_offset = bo_size;
bo_size += shader->info.sysvals.sysval_count * 16;
}
if (bo_size) {
pipeline->state_bo =
panfrost_bo_create(pdev, bo_size, 0, "Pipeline descriptors");
panfrost_bo_mmap(pipeline->state_bo);
}
}
static void
panvk_pipeline_builder_upload_sysval(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline,
unsigned id, union panvk_sysval_data *data)
{
switch (PAN_SYSVAL_TYPE(id)) {
case PAN_SYSVAL_VIEWPORT_SCALE:
panvk_sysval_upload_viewport_scale(builder->create_info->pViewportState->pViewports,
data);
break;
case PAN_SYSVAL_VIEWPORT_OFFSET:
panvk_sysval_upload_viewport_scale(builder->create_info->pViewportState->pViewports,
data);
break;
default:
unreachable("Invalid static sysval");
}
}
static void
panvk_pipeline_builder_init_sysvals(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline,
gl_shader_stage stage)
{
const struct panvk_shader *shader = builder->shaders[stage];
pipeline->sysvals[stage].ids = shader->info.sysvals;
pipeline->sysvals[stage].ubo_idx = shader->sysval_ubo;
if (!shader->info.sysvals.sysval_count ||
builder->stages[stage].sysvals_offset == ~0)
return;
union panvk_sysval_data *static_data =
pipeline->state_bo->ptr.cpu + builder->stages[stage].sysvals_offset;
pipeline->sysvals[stage].ubo =
pipeline->state_bo->ptr.gpu + builder->stages[stage].sysvals_offset;
for (unsigned i = 0; i < shader->info.sysvals.sysval_count; i++) {
unsigned id = shader->info.sysvals.sysvals[i];
panvk_pipeline_builder_upload_sysval(builder,
pipeline,
id, &static_data[i]);
}
}
static void
panvk_pipeline_builder_init_shaders(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
const struct panvk_shader *shader = builder->shaders[i];
if (!shader)
continue;
pipeline->tls_size = MAX2(pipeline->tls_size, shader->info.tls_size);
pipeline->wls_size = MAX2(pipeline->tls_size, shader->info.wls_size);
if (i == MESA_SHADER_VERTEX && shader->info.vs.writes_point_size)
pipeline->ia.writes_point_size = true;
mali_ptr shader_ptr = pipeline->binary_bo->ptr.gpu +
builder->stages[i].shader_offset;
void *rsd = pipeline->state_bo->ptr.cpu + builder->stages[i].rsd_offset;
mali_ptr gpu_rsd = pipeline->state_bo->ptr.gpu + builder->stages[i].rsd_offset;
if (i != MESA_SHADER_FRAGMENT) {
panvk_per_arch(emit_non_fs_rsd)(builder->device, &shader->info, shader_ptr, rsd);
} else if (!pipeline->fs.dynamic_rsd) {
void *bd = rsd + pan_size(RENDERER_STATE);
panvk_per_arch(emit_base_fs_rsd)(builder->device, pipeline, rsd);
for (unsigned rt = 0; rt < MAX2(pipeline->blend.state.rt_count, 1); rt++) {
panvk_per_arch(emit_blend)(builder->device, pipeline, rt, bd);
bd += pan_size(BLEND);
}
} else {
gpu_rsd = 0;
panvk_per_arch(emit_base_fs_rsd)(builder->device, pipeline, &pipeline->fs.rsd_template);
for (unsigned rt = 0; rt < MAX2(pipeline->blend.state.rt_count, 1); rt++) {
panvk_per_arch(emit_blend)(builder->device, pipeline, rt,
&pipeline->blend.bd_template[rt]);
}
}
pipeline->rsds[i] = gpu_rsd;
panvk_pipeline_builder_init_sysvals(builder, pipeline, i);
}
pipeline->num_ubos = builder->layout->num_ubos;
for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) {
if (pipeline->sysvals[i].ids.sysval_count)
pipeline->num_ubos = MAX2(pipeline->num_ubos, pipeline->sysvals[i].ubo_idx + 1);
}
pipeline->num_sysvals = 0;
for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++)
pipeline->num_sysvals += pipeline->sysvals[i].ids.sysval_count;
}
static void
panvk_pipeline_builder_parse_viewport(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
/* The spec says:
*
* pViewportState is a pointer to an instance of the
* VkPipelineViewportStateCreateInfo structure, and is ignored if the
* pipeline has rasterization disabled.
*/
if (!builder->rasterizer_discard &&
panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT) &&
panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_SCISSOR)) {
void *vpd = pipeline->state_bo->ptr.cpu + builder->vpd_offset;
panvk_per_arch(emit_viewport)(builder->create_info->pViewportState->pViewports,
builder->create_info->pViewportState->pScissors,
vpd);
pipeline->vpd = pipeline->state_bo->ptr.gpu +
builder->vpd_offset;
} else {
if (builder->create_info->pViewportState->pViewports)
pipeline->viewport = builder->create_info->pViewportState->pViewports[0];
if (builder->create_info->pViewportState->pScissors)
pipeline->scissor = builder->create_info->pViewportState->pScissors[0];
}
}
static void
panvk_pipeline_builder_parse_dynamic(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
const VkPipelineDynamicStateCreateInfo *dynamic_info =
builder->create_info->pDynamicState;
if (!dynamic_info)
return;
for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) {
VkDynamicState state = dynamic_info->pDynamicStates[i];
switch (state) {
case VK_DYNAMIC_STATE_VIEWPORT ... VK_DYNAMIC_STATE_STENCIL_REFERENCE:
pipeline->dynamic_state_mask |= 1 << state;
break;
default:
unreachable("unsupported dynamic state");
}
}
}
static enum mali_draw_mode
translate_prim_topology(VkPrimitiveTopology in)
{
switch (in) {
case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
return MALI_DRAW_MODE_POINTS;
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
return MALI_DRAW_MODE_LINES;
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
return MALI_DRAW_MODE_LINE_STRIP;
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
return MALI_DRAW_MODE_TRIANGLES;
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
return MALI_DRAW_MODE_TRIANGLE_STRIP;
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
return MALI_DRAW_MODE_TRIANGLE_FAN;
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
default:
unreachable("Invalid primitive type");
}
}
static void
panvk_pipeline_builder_parse_input_assembly(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
pipeline->ia.primitive_restart =
builder->create_info->pInputAssemblyState->primitiveRestartEnable;
pipeline->ia.topology =
translate_prim_topology(builder->create_info->pInputAssemblyState->topology);
}
static enum pipe_logicop
translate_logicop(VkLogicOp in)
{
switch (in) {
case VK_LOGIC_OP_CLEAR: return PIPE_LOGICOP_CLEAR;
case VK_LOGIC_OP_AND: return PIPE_LOGICOP_AND;
case VK_LOGIC_OP_AND_REVERSE: return PIPE_LOGICOP_AND_REVERSE;
case VK_LOGIC_OP_COPY: return PIPE_LOGICOP_COPY;
case VK_LOGIC_OP_AND_INVERTED: return PIPE_LOGICOP_AND_INVERTED;
case VK_LOGIC_OP_NO_OP: return PIPE_LOGICOP_NOOP;
case VK_LOGIC_OP_XOR: return PIPE_LOGICOP_XOR;
case VK_LOGIC_OP_OR: return PIPE_LOGICOP_OR;
case VK_LOGIC_OP_NOR: return PIPE_LOGICOP_NOR;
case VK_LOGIC_OP_EQUIVALENT: return PIPE_LOGICOP_EQUIV;
case VK_LOGIC_OP_INVERT: return PIPE_LOGICOP_INVERT;
case VK_LOGIC_OP_OR_REVERSE: return PIPE_LOGICOP_OR_REVERSE;
case VK_LOGIC_OP_COPY_INVERTED: return PIPE_LOGICOP_COPY_INVERTED;
case VK_LOGIC_OP_OR_INVERTED: return PIPE_LOGICOP_OR_INVERTED;
case VK_LOGIC_OP_NAND: return PIPE_LOGICOP_NAND;
case VK_LOGIC_OP_SET: return PIPE_LOGICOP_SET;
default: unreachable("Invalid logicop");
}
}
static enum blend_func
translate_blend_op(VkBlendOp in)
{
switch (in) {
case VK_BLEND_OP_ADD: return BLEND_FUNC_ADD;
case VK_BLEND_OP_SUBTRACT: return BLEND_FUNC_SUBTRACT;
case VK_BLEND_OP_REVERSE_SUBTRACT: return BLEND_FUNC_REVERSE_SUBTRACT;
case VK_BLEND_OP_MIN: return BLEND_FUNC_MIN;
case VK_BLEND_OP_MAX: return BLEND_FUNC_MAX;
default: unreachable("Invalid blend op");
}
}
static enum blend_factor
translate_blend_factor(VkBlendFactor in, bool dest_has_alpha)
{
switch (in) {
case VK_BLEND_FACTOR_ZERO:
case VK_BLEND_FACTOR_ONE:
return BLEND_FACTOR_ZERO;
case VK_BLEND_FACTOR_SRC_COLOR:
case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
return BLEND_FACTOR_SRC_COLOR;
case VK_BLEND_FACTOR_DST_COLOR:
case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
return BLEND_FACTOR_DST_COLOR;
case VK_BLEND_FACTOR_SRC_ALPHA:
case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
return BLEND_FACTOR_SRC_ALPHA;
case VK_BLEND_FACTOR_DST_ALPHA:
case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
return dest_has_alpha ? BLEND_FACTOR_DST_ALPHA : BLEND_FACTOR_ZERO;
case VK_BLEND_FACTOR_CONSTANT_COLOR:
case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
return BLEND_FACTOR_CONSTANT_COLOR;
case VK_BLEND_FACTOR_CONSTANT_ALPHA:
case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
return BLEND_FACTOR_CONSTANT_ALPHA;
case VK_BLEND_FACTOR_SRC1_COLOR:
case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
return BLEND_FACTOR_SRC1_COLOR;
case VK_BLEND_FACTOR_SRC1_ALPHA:
case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
return BLEND_FACTOR_SRC1_ALPHA;
case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
return BLEND_FACTOR_SRC_ALPHA_SATURATE;
default: unreachable("Invalid blend factor");
}
}
static bool
inverted_blend_factor(VkBlendFactor in, bool dest_has_alpha)
{
switch (in) {
case VK_BLEND_FACTOR_ONE:
case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
return true;
case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
return dest_has_alpha ? true : false;
case VK_BLEND_FACTOR_DST_ALPHA:
return !dest_has_alpha ? true : false;
default:
return false;
}
}
bool
panvk_per_arch(blend_needs_lowering)(const struct panfrost_device *dev,
const struct pan_blend_state *state,
unsigned rt)
{
/* LogicOp requires a blend shader */
if (state->logicop_enable)
return true;
/* Not all formats can be blended by fixed-function hardware */
if (!panfrost_blendable_formats_v7[state->rts[rt].format].internal)
return true;
unsigned constant_mask = pan_blend_constant_mask(state->rts[rt].equation);
/* v6 doesn't support blend constants in FF blend equations.
* v7 only uses the constant from RT 0 (TODO: what if it's the same
* constant? or a constant is shared?)
*/
if (constant_mask && (PAN_ARCH == 6 || (PAN_ARCH == 7 && rt > 0)))
return true;
if (!pan_blend_is_homogenous_constant(constant_mask, state->constants))
return true;
bool supports_2src = pan_blend_supports_2src(dev->arch);
return !pan_blend_can_fixed_function(state->rts[rt].equation, supports_2src);
}
static void
panvk_pipeline_builder_parse_color_blend(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
struct panfrost_device *pdev = &builder->device->physical_device->pdev;
pipeline->blend.state.logicop_enable =
builder->create_info->pColorBlendState->logicOpEnable;
pipeline->blend.state.logicop_func =
translate_logicop(builder->create_info->pColorBlendState->logicOp);
pipeline->blend.state.rt_count = util_last_bit(builder->active_color_attachments);
memcpy(pipeline->blend.state.constants,
builder->create_info->pColorBlendState->blendConstants,
sizeof(pipeline->blend.state.constants));
for (unsigned i = 0; i < pipeline->blend.state.rt_count; i++) {
const VkPipelineColorBlendAttachmentState *in =
&builder->create_info->pColorBlendState->pAttachments[i];
struct pan_blend_rt_state *out = &pipeline->blend.state.rts[i];
out->format = builder->color_attachment_formats[i];
bool dest_has_alpha = util_format_has_alpha(out->format);
out->nr_samples = builder->create_info->pMultisampleState->rasterizationSamples;
out->equation.blend_enable = in->blendEnable;
out->equation.color_mask = in->colorWriteMask;
out->equation.rgb_func = translate_blend_op(in->colorBlendOp);
out->equation.rgb_src_factor = translate_blend_factor(in->srcColorBlendFactor, dest_has_alpha);
out->equation.rgb_invert_src_factor = inverted_blend_factor(in->srcColorBlendFactor, dest_has_alpha);
out->equation.rgb_dst_factor = translate_blend_factor(in->dstColorBlendFactor, dest_has_alpha);
out->equation.rgb_invert_dst_factor = inverted_blend_factor(in->dstColorBlendFactor, dest_has_alpha);
out->equation.alpha_func = translate_blend_op(in->alphaBlendOp);
out->equation.alpha_src_factor = translate_blend_factor(in->srcAlphaBlendFactor, dest_has_alpha);
out->equation.alpha_invert_src_factor = inverted_blend_factor(in->srcAlphaBlendFactor, dest_has_alpha);
out->equation.alpha_dst_factor = translate_blend_factor(in->dstAlphaBlendFactor, dest_has_alpha);
out->equation.alpha_invert_dst_factor = inverted_blend_factor(in->dstAlphaBlendFactor, dest_has_alpha);
unsigned constant_mask =
panvk_per_arch(blend_needs_lowering)(pdev, &pipeline->blend.state, i) ?
0 : pan_blend_constant_mask(out->equation);
pipeline->blend.constant[i].index = ffs(constant_mask) - 1;
if (constant_mask && PAN_ARCH >= 6) {
/* On Bifrost, the blend constant is expressed with a UNORM of the
* size of the target format. The value is then shifted such that
* used bits are in the MSB. Here we calculate the factor at pipeline
* creation time so we only have to do a
* hw_constant = float_constant * factor;
* at descriptor emission time.
*/
const struct util_format_description *format_desc =
util_format_description(out->format);
unsigned chan_size = 0;
for (unsigned c = 0; c < format_desc->nr_channels; c++)
chan_size = MAX2(format_desc->channel[c].size, chan_size);
pipeline->blend.constant[i].bifrost_factor =
((1 << chan_size) - 1) << (16 - chan_size);
}
}
}
static void
panvk_pipeline_builder_parse_multisample(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
unsigned nr_samples =
MAX2(builder->create_info->pMultisampleState->rasterizationSamples, 1);
pipeline->ms.rast_samples =
builder->create_info->pMultisampleState->rasterizationSamples;
pipeline->ms.sample_mask =
builder->create_info->pMultisampleState->pSampleMask ?
builder->create_info->pMultisampleState->pSampleMask[0] : UINT16_MAX;
pipeline->ms.min_samples =
MAX2(builder->create_info->pMultisampleState->minSampleShading * nr_samples, 1);
}
static enum mali_stencil_op
translate_stencil_op(VkStencilOp in)
{
switch (in) {
case VK_STENCIL_OP_KEEP: return MALI_STENCIL_OP_KEEP;
case VK_STENCIL_OP_ZERO: return MALI_STENCIL_OP_ZERO;
case VK_STENCIL_OP_REPLACE: return MALI_STENCIL_OP_REPLACE;
case VK_STENCIL_OP_INCREMENT_AND_CLAMP: return MALI_STENCIL_OP_INCR_SAT;
case VK_STENCIL_OP_DECREMENT_AND_CLAMP: return MALI_STENCIL_OP_DECR_SAT;
case VK_STENCIL_OP_INCREMENT_AND_WRAP: return MALI_STENCIL_OP_INCR_WRAP;
case VK_STENCIL_OP_DECREMENT_AND_WRAP: return MALI_STENCIL_OP_DECR_WRAP;
case VK_STENCIL_OP_INVERT: return MALI_STENCIL_OP_INVERT;
default: unreachable("Invalid stencil op");
}
}
static void
panvk_pipeline_builder_parse_zs(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
pipeline->zs.z_test = builder->create_info->pDepthStencilState->depthTestEnable;
pipeline->zs.z_write = builder->create_info->pDepthStencilState->depthWriteEnable;
pipeline->zs.z_compare_func =
panvk_per_arch(translate_compare_func)(builder->create_info->pDepthStencilState->depthCompareOp);
pipeline->zs.s_test = builder->create_info->pDepthStencilState->stencilTestEnable;
pipeline->zs.s_front.fail_op =
translate_stencil_op(builder->create_info->pDepthStencilState->front.failOp);
pipeline->zs.s_front.pass_op =
translate_stencil_op(builder->create_info->pDepthStencilState->front.passOp);
pipeline->zs.s_front.z_fail_op =
translate_stencil_op(builder->create_info->pDepthStencilState->front.depthFailOp);
pipeline->zs.s_front.compare_func =
panvk_per_arch(translate_compare_func)(builder->create_info->pDepthStencilState->front.compareOp);
pipeline->zs.s_front.compare_mask =
builder->create_info->pDepthStencilState->front.compareMask;
pipeline->zs.s_front.write_mask =
builder->create_info->pDepthStencilState->front.writeMask;
pipeline->zs.s_front.ref =
builder->create_info->pDepthStencilState->front.reference;
pipeline->zs.s_back.fail_op =
translate_stencil_op(builder->create_info->pDepthStencilState->back.failOp);
pipeline->zs.s_back.pass_op =
translate_stencil_op(builder->create_info->pDepthStencilState->back.passOp);
pipeline->zs.s_back.z_fail_op =
translate_stencil_op(builder->create_info->pDepthStencilState->back.depthFailOp);
pipeline->zs.s_back.compare_func =
panvk_per_arch(translate_compare_func)(builder->create_info->pDepthStencilState->back.compareOp);
pipeline->zs.s_back.compare_mask =
builder->create_info->pDepthStencilState->back.compareMask;
pipeline->zs.s_back.write_mask =
builder->create_info->pDepthStencilState->back.writeMask;
pipeline->zs.s_back.ref =
builder->create_info->pDepthStencilState->back.reference;
}
static void
panvk_pipeline_builder_parse_rast(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
pipeline->rast.clamp_depth = builder->create_info->pRasterizationState->depthClampEnable;
pipeline->rast.depth_bias.enable = builder->create_info->pRasterizationState->depthBiasEnable;
pipeline->rast.depth_bias.constant_factor =
builder->create_info->pRasterizationState->depthBiasConstantFactor;
pipeline->rast.depth_bias.clamp = builder->create_info->pRasterizationState->depthBiasClamp;
pipeline->rast.depth_bias.slope_factor = builder->create_info->pRasterizationState->depthBiasSlopeFactor;
pipeline->rast.front_ccw = builder->create_info->pRasterizationState->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE;
pipeline->rast.cull_front_face = builder->create_info->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT;
pipeline->rast.cull_back_face = builder->create_info->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT;
}
static bool
panvk_fs_required(struct panvk_pipeline *pipeline)
{
const struct pan_shader_info *info = &pipeline->fs.info;
/* If we generally have side effects */
if (info->fs.sidefx)
return true;
/* If colour is written we need to execute */
const struct pan_blend_state *blend = &pipeline->blend.state;
for (unsigned i = 0; i < blend->rt_count; ++i) {
if (blend->rts[i].equation.color_mask)
return true;
}
/* If depth is written and not implied we need to execute.
* TODO: Predicate on Z/S writes being enabled */
return (info->fs.writes_depth || info->fs.writes_stencil);
}
#define PANVK_DYNAMIC_FS_RSD_MASK \
((1 << VK_DYNAMIC_STATE_DEPTH_BIAS) | \
(1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS) | \
(1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK) | \
(1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK) | \
(1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))
static void
panvk_pipeline_builder_init_fs_state(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
if (!builder->shaders[MESA_SHADER_FRAGMENT])
return;
pipeline->fs.dynamic_rsd =
pipeline->dynamic_state_mask & PANVK_DYNAMIC_FS_RSD_MASK;
pipeline->fs.address = pipeline->binary_bo->ptr.gpu +
builder->stages[MESA_SHADER_FRAGMENT].shader_offset;
pipeline->fs.info = builder->shaders[MESA_SHADER_FRAGMENT]->info;
pipeline->fs.required = panvk_fs_required(pipeline);
}
static void
panvk_pipeline_update_varying_slot(struct panvk_varyings_info *varyings,
gl_shader_stage stage,
const struct pan_shader_varying *varying,
bool input)
{
bool fs = stage == MESA_SHADER_FRAGMENT;
gl_varying_slot loc = varying->location;
enum panvk_varying_buf_id buf_id =
panvk_varying_buf_id(fs, loc);
varyings->stage[stage].loc[varyings->stage[stage].count++] = loc;
if (panvk_varying_is_builtin(stage, loc)) {
varyings->buf_mask |= 1 << buf_id;
return;
}
assert(loc < ARRAY_SIZE(varyings->varying));
enum pipe_format new_fmt = varying->format;
enum pipe_format old_fmt = varyings->varying[loc].format;
BITSET_SET(varyings->active, loc);
/* We expect inputs to either be set by a previous stage or be built
* in, skip the entry if that's not the case, we'll emit a const
* varying returning zero for those entries.
*/
if (input && old_fmt == PIPE_FORMAT_NONE)
return;
unsigned new_size = util_format_get_blocksize(new_fmt);
unsigned old_size = util_format_get_blocksize(old_fmt);
if (old_size < new_size)
varyings->varying[loc].format = new_fmt;
varyings->buf_mask |= 1 << buf_id;
}
static void
panvk_pipeline_builder_collect_varyings(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
for (uint32_t s = 0; s < MESA_SHADER_STAGES; s++) {
if (!builder->shaders[s])
continue;
const struct pan_shader_info *info = &builder->shaders[s]->info;
for (unsigned i = 0; i < info->varyings.input_count; i++) {
panvk_pipeline_update_varying_slot(&pipeline->varyings, s,
&info->varyings.input[i],
true);
}
for (unsigned i = 0; i < info->varyings.output_count; i++) {
panvk_pipeline_update_varying_slot(&pipeline->varyings, s,
&info->varyings.output[i],
false);
}
}
/* TODO: Xfb */
gl_varying_slot loc;
BITSET_FOREACH_SET(loc, pipeline->varyings.active, VARYING_SLOT_MAX) {
enum panvk_varying_buf_id buf_id =
panvk_varying_buf_id(false, loc);
unsigned buf_idx = panvk_varying_buf_index(&pipeline->varyings, buf_id);
unsigned varying_sz = panvk_varying_size(&pipeline->varyings, loc);
pipeline->varyings.varying[loc].buf = buf_idx;
pipeline->varyings.varying[loc].offset =
pipeline->varyings.buf[buf_idx].stride;
pipeline->varyings.buf[buf_idx].stride += varying_sz;
}
}
static void
panvk_pipeline_builder_parse_vertex_input(struct panvk_pipeline_builder *builder,
struct panvk_pipeline *pipeline)
{
struct panvk_attribs_info *attribs = &pipeline->attribs;
const VkPipelineVertexInputStateCreateInfo *info =
builder->create_info->pVertexInputState;
for (unsigned i = 0; i < info->vertexBindingDescriptionCount; i++) {
const VkVertexInputBindingDescription *desc =
&info->pVertexBindingDescriptions[i];
attribs->buf_count = MAX2(desc->binding + 1, attribs->buf_count);
attribs->buf[desc->binding].stride = desc->stride;
attribs->buf[desc->binding].special = false;
}
for (unsigned i = 0; i < info->vertexAttributeDescriptionCount; i++) {
const VkVertexInputAttributeDescription *desc =
&info->pVertexAttributeDescriptions[i];
attribs->attrib[desc->location].buf = desc->binding;
attribs->attrib[desc->location].format =
vk_format_to_pipe_format(desc->format);
attribs->attrib[desc->location].offset = desc->offset;
}
const struct pan_shader_info *vs =
&builder->shaders[MESA_SHADER_VERTEX]->info;
if (vs->attribute_count >= PAN_VERTEX_ID) {
attribs->buf[attribs->buf_count].special = true;
attribs->buf[attribs->buf_count].special_id = PAN_VERTEX_ID;
attribs->attrib[PAN_VERTEX_ID].buf = attribs->buf_count++;
attribs->attrib[PAN_VERTEX_ID].format = PIPE_FORMAT_R32_UINT;
}
if (vs->attribute_count >= PAN_INSTANCE_ID) {
attribs->buf[attribs->buf_count].special = true;
attribs->buf[attribs->buf_count].special_id = PAN_INSTANCE_ID;
attribs->attrib[PAN_INSTANCE_ID].buf = attribs->buf_count++;
attribs->attrib[PAN_INSTANCE_ID].format = PIPE_FORMAT_R32_UINT;
}
attribs->attrib_count = MAX2(attribs->attrib_count, vs->attribute_count);
}
static VkResult
panvk_pipeline_builder_build(struct panvk_pipeline_builder *builder,
struct panvk_pipeline **pipeline)
{
VkResult result = panvk_pipeline_builder_create_pipeline(builder, pipeline);
if (result != VK_SUCCESS)
return result;
/* TODO: make those functions return a result and handle errors */
panvk_pipeline_builder_parse_dynamic(builder, *pipeline);
panvk_pipeline_builder_parse_color_blend(builder, *pipeline);
panvk_pipeline_builder_compile_shaders(builder, *pipeline);
panvk_pipeline_builder_collect_varyings(builder, *pipeline);
panvk_pipeline_builder_parse_input_assembly(builder, *pipeline);
panvk_pipeline_builder_parse_multisample(builder, *pipeline);
panvk_pipeline_builder_parse_zs(builder, *pipeline);
panvk_pipeline_builder_parse_rast(builder, *pipeline);
panvk_pipeline_builder_parse_vertex_input(builder, *pipeline);
panvk_pipeline_builder_upload_shaders(builder, *pipeline);
panvk_pipeline_builder_init_fs_state(builder, *pipeline);
panvk_pipeline_builder_alloc_static_state_bo(builder, *pipeline);
panvk_pipeline_builder_init_shaders(builder, *pipeline);
panvk_pipeline_builder_parse_viewport(builder, *pipeline);
return VK_SUCCESS;
}
static void
panvk_pipeline_builder_init_graphics(struct panvk_pipeline_builder *builder,
struct panvk_device *dev,
struct panvk_pipeline_cache *cache,
const VkGraphicsPipelineCreateInfo *create_info,
const VkAllocationCallbacks *alloc)
{
VK_FROM_HANDLE(panvk_pipeline_layout, layout, create_info->layout);
assert(layout);
*builder = (struct panvk_pipeline_builder) {
.device = dev,
.cache = cache,
.layout = layout,
.create_info = create_info,
.alloc = alloc,
};
builder->rasterizer_discard =
create_info->pRasterizationState->rasterizerDiscardEnable;
if (builder->rasterizer_discard) {
builder->samples = VK_SAMPLE_COUNT_1_BIT;
} else {
builder->samples = create_info->pMultisampleState->rasterizationSamples;
const struct panvk_render_pass *pass = panvk_render_pass_from_handle(create_info->renderPass);
const struct panvk_subpass *subpass = &pass->subpasses[create_info->subpass];
builder->use_depth_stencil_attachment =
subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED;
assert(subpass->color_count == create_info->pColorBlendState->attachmentCount);
builder->active_color_attachments = 0;
for (uint32_t i = 0; i < subpass->color_count; i++) {
uint32_t idx = subpass->color_attachments[i].idx;
if (idx == VK_ATTACHMENT_UNUSED)
continue;
builder->active_color_attachments |= 1 << i;
builder->color_attachment_formats[i] = pass->attachments[idx].format;
}
}
}
VkResult
panvk_per_arch(CreateGraphicsPipelines)(VkDevice device,
VkPipelineCache pipelineCache,
uint32_t count,
const VkGraphicsPipelineCreateInfo *pCreateInfos,
const VkAllocationCallbacks *pAllocator,
VkPipeline *pPipelines)
{
VK_FROM_HANDLE(panvk_device, dev, device);
VK_FROM_HANDLE(panvk_pipeline_cache, cache, pipelineCache);
for (uint32_t i = 0; i < count; i++) {
struct panvk_pipeline_builder builder;
panvk_pipeline_builder_init_graphics(&builder, dev, cache,
&pCreateInfos[i], pAllocator);
struct panvk_pipeline *pipeline;
VkResult result = panvk_pipeline_builder_build(&builder, &pipeline);
panvk_pipeline_builder_finish(&builder);
if (result != VK_SUCCESS) {
for (uint32_t j = 0; j < i; j++) {
panvk_DestroyPipeline(device, pPipelines[j], pAllocator);
pPipelines[j] = VK_NULL_HANDLE;
}
return result;
}
pPipelines[i] = panvk_pipeline_to_handle(pipeline);
}
return VK_SUCCESS;
}

View File

@ -0,0 +1,386 @@
/*
* Copyright © 2021 Collabora Ltd.
*
* Derived from tu_shader.c which is:
* Copyright © 2019 Google LLC
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "gen_macros.h"
#include "panvk_private.h"
#include "nir_builder.h"
#include "nir_lower_blend.h"
#include "spirv/nir_spirv.h"
#include "util/mesa-sha1.h"
#include "panfrost-quirks.h"
#include "pan_shader.h"
#include "vk_util.h"
static nir_shader *
panvk_spirv_to_nir(const void *code,
size_t codesize,
gl_shader_stage stage,
const char *entry_point_name,
const VkSpecializationInfo *spec_info,
const nir_shader_compiler_options *nir_options)
{
/* TODO these are made-up */
const struct spirv_to_nir_options spirv_options = {
.caps = { false },
.ubo_addr_format = nir_address_format_32bit_index_offset,
.ssbo_addr_format = nir_address_format_32bit_index_offset,
};
/* convert VkSpecializationInfo */
uint32_t num_spec = 0;
struct nir_spirv_specialization *spec =
vk_spec_info_to_nir_spirv(spec_info, &num_spec);
nir_shader *nir = spirv_to_nir(code, codesize / sizeof(uint32_t), spec,
num_spec, stage, entry_point_name,
&spirv_options, nir_options);
free(spec);
assert(nir->info.stage == stage);
nir_validate_shader(nir, "after spirv_to_nir");
return nir;
}
struct panvk_lower_misc_ctx {
struct panvk_shader *shader;
const struct panvk_pipeline_layout *layout;
};
static unsigned
get_fixed_sampler_index(nir_deref_instr *deref,
const struct panvk_lower_misc_ctx *ctx)
{
nir_variable *var = nir_deref_instr_get_variable(deref);
unsigned set = var->data.descriptor_set;
unsigned binding = var->data.binding;
const struct panvk_descriptor_set_binding_layout *bind_layout =
&ctx->layout->sets[set].layout->bindings[binding];
return bind_layout->sampler_idx + ctx->layout->sets[set].sampler_offset;
}
static unsigned
get_fixed_texture_index(nir_deref_instr *deref,
const struct panvk_lower_misc_ctx *ctx)
{
nir_variable *var = nir_deref_instr_get_variable(deref);
unsigned set = var->data.descriptor_set;
unsigned binding = var->data.binding;
const struct panvk_descriptor_set_binding_layout *bind_layout =
&ctx->layout->sets[set].layout->bindings[binding];
return bind_layout->tex_idx + ctx->layout->sets[set].tex_offset;
}
static bool
lower_tex(nir_builder *b, nir_tex_instr *tex,
const struct panvk_lower_misc_ctx *ctx)
{
bool progress = false;
int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
b->cursor = nir_before_instr(&tex->instr);
if (sampler_src_idx >= 0) {
nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
tex->sampler_index = get_fixed_sampler_index(deref, ctx);
nir_tex_instr_remove_src(tex, sampler_src_idx);
progress = true;
}
int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
if (tex_src_idx >= 0) {
nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src);
tex->texture_index = get_fixed_texture_index(deref, ctx);
nir_tex_instr_remove_src(tex, tex_src_idx);
progress = true;
}
return progress;
}
static void
lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *intr,
const struct panvk_lower_misc_ctx *ctx)
{
nir_ssa_def *vulkan_idx = intr->src[0].ssa;
unsigned set = nir_intrinsic_desc_set(intr);
unsigned binding = nir_intrinsic_binding(intr);
struct panvk_descriptor_set_layout *set_layout = ctx->layout->sets[set].layout;
struct panvk_descriptor_set_binding_layout *binding_layout =
&set_layout->bindings[binding];
unsigned base;
switch (binding_layout->type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
base = binding_layout->ubo_idx + ctx->layout->sets[set].ubo_offset;
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
base = binding_layout->ssbo_idx + ctx->layout->sets[set].ssbo_offset;
break;
default:
unreachable("Invalid descriptor type");
break;
}
b->cursor = nir_before_instr(&intr->instr);
nir_ssa_def *idx = nir_iadd(b, nir_imm_int(b, base), vulkan_idx);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, idx);
nir_instr_remove(&intr->instr);
}
static void
lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin)
{
/* Loading the descriptor happens as part of the load/store instruction so
* this is a no-op.
*/
b->cursor = nir_before_instr(&intrin->instr);
nir_ssa_def *val = nir_vec2(b, intrin->src[0].ssa, nir_imm_int(b, 0));
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, val);
nir_instr_remove(&intrin->instr);
}
static bool
lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
const struct panvk_lower_misc_ctx *ctx)
{
switch (intr->intrinsic) {
case nir_intrinsic_vulkan_resource_index:
lower_vulkan_resource_index(b, intr, ctx);
return true;
case nir_intrinsic_load_vulkan_descriptor:
lower_load_vulkan_descriptor(b, intr);
return true;
default:
return false;
}
}
static bool
panvk_lower_misc_instr(nir_builder *b,
nir_instr *instr,
void *data)
{
const struct panvk_lower_misc_ctx *ctx = data;
switch (instr->type) {
case nir_instr_type_tex:
return lower_tex(b, nir_instr_as_tex(instr), ctx);
case nir_instr_type_intrinsic:
return lower_intrinsic(b, nir_instr_as_intrinsic(instr), ctx);
default:
return false;
}
}
static bool
panvk_lower_misc(nir_shader *nir, const struct panvk_lower_misc_ctx *ctx)
{
return nir_shader_instructions_pass(nir, panvk_lower_misc_instr,
nir_metadata_block_index |
nir_metadata_dominance,
(void *)ctx);
}
static void
panvk_lower_blend(struct panfrost_device *pdev,
nir_shader *nir,
struct pan_blend_state *blend_state,
bool static_blend_constants)
{
nir_lower_blend_options options = {
.logicop_enable = blend_state->logicop_enable,
.logicop_func = blend_state->logicop_func,
};
bool lower_blend = false;
for (unsigned rt = 0; rt < blend_state->rt_count; rt++) {
if (!panvk_per_arch(blend_needs_lowering)(pdev, blend_state, rt))
continue;
const struct pan_blend_rt_state *rt_state = &blend_state->rts[rt];
options.rt[rt].colormask = rt_state->equation.color_mask;
options.format[rt] = rt_state->format;
if (!rt_state->equation.blend_enable) {
static const nir_lower_blend_channel replace = {
.func = BLEND_FUNC_ADD,
.src_factor = BLEND_FACTOR_ZERO,
.invert_src_factor = true,
.dst_factor = BLEND_FACTOR_ZERO,
.invert_dst_factor = false,
};
options.rt[rt].rgb = replace;
options.rt[rt].alpha = replace;
} else {
options.rt[rt].rgb.func = rt_state->equation.rgb_func;
options.rt[rt].rgb.src_factor = rt_state->equation.rgb_src_factor;
options.rt[rt].rgb.invert_src_factor = rt_state->equation.rgb_invert_src_factor;
options.rt[rt].rgb.dst_factor = rt_state->equation.rgb_dst_factor;
options.rt[rt].rgb.invert_dst_factor = rt_state->equation.rgb_invert_dst_factor;
options.rt[rt].alpha.func = rt_state->equation.alpha_func;
options.rt[rt].alpha.src_factor = rt_state->equation.alpha_src_factor;
options.rt[rt].alpha.invert_src_factor = rt_state->equation.alpha_invert_src_factor;
options.rt[rt].alpha.dst_factor = rt_state->equation.alpha_dst_factor;
options.rt[rt].alpha.invert_dst_factor = rt_state->equation.alpha_invert_dst_factor;
}
lower_blend = true;
}
/* FIXME: currently untested */
assert(!lower_blend);
if (lower_blend)
NIR_PASS_V(nir, nir_lower_blend, options);
}
struct panvk_shader *
panvk_per_arch(shader_create)(struct panvk_device *dev,
gl_shader_stage stage,
const VkPipelineShaderStageCreateInfo *stage_info,
const struct panvk_pipeline_layout *layout,
unsigned sysval_ubo,
struct pan_blend_state *blend_state,
bool static_blend_constants,
const VkAllocationCallbacks *alloc)
{
const struct panvk_shader_module *module = panvk_shader_module_from_handle(stage_info->module);
struct panfrost_device *pdev = &dev->physical_device->pdev;
struct panvk_shader *shader;
shader = vk_zalloc2(&dev->vk.alloc, alloc, sizeof(*shader), 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!shader)
return NULL;
util_dynarray_init(&shader->binary, NULL);
/* translate SPIR-V to NIR */
assert(module->code_size % 4 == 0);
nir_shader *nir = panvk_spirv_to_nir(module->code,
module->code_size,
stage, stage_info->pName,
stage_info->pSpecializationInfo,
pan_shader_get_compiler_options(pdev));
if (!nir) {
vk_free2(&dev->vk.alloc, alloc, shader);
return NULL;
}
if (stage == MESA_SHADER_FRAGMENT)
panvk_lower_blend(pdev, nir, blend_state, static_blend_constants);
/* multi step inlining procedure */
NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
NIR_PASS_V(nir, nir_lower_returns);
NIR_PASS_V(nir, nir_inline_functions);
NIR_PASS_V(nir, nir_copy_prop);
NIR_PASS_V(nir, nir_opt_deref);
foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
if (!func->is_entrypoint)
exec_node_remove(&func->node);
}
assert(exec_list_length(&nir->functions) == 1);
NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp);
/* Split member structs. We do this before lower_io_to_temporaries so that
* it doesn't lower system values to temporaries by accident.
*/
NIR_PASS_V(nir, nir_split_var_copies);
NIR_PASS_V(nir, nir_split_per_member_structs);
NIR_PASS_V(nir, nir_remove_dead_variables,
nir_var_shader_in | nir_var_shader_out |
nir_var_system_value | nir_var_mem_shared,
NULL);
NIR_PASS_V(nir, nir_lower_io_to_temporaries,
nir_shader_get_entrypoint(nir), true, true);
NIR_PASS_V(nir, nir_lower_indirect_derefs,
nir_var_shader_in | nir_var_shader_out,
UINT32_MAX);
NIR_PASS_V(nir, nir_opt_copy_prop_vars);
NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);
NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, true, false);
NIR_PASS_V(nir, nir_lower_explicit_io,
nir_var_mem_ubo | nir_var_mem_ssbo,
nir_address_format_32bit_index_offset);
nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, stage);
nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, stage);
NIR_PASS_V(nir, nir_lower_system_values);
NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
NIR_PASS_V(nir, nir_lower_var_copies);
struct panvk_lower_misc_ctx ctx = {
.shader = shader,
.layout = layout,
};
NIR_PASS_V(nir, panvk_lower_misc, &ctx);
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
if (unlikely(dev->physical_device->instance->debug_flags & PANVK_DEBUG_NIR)) {
fprintf(stderr, "translated nir:\n");
nir_print_shader(nir, stderr);
}
struct panfrost_compile_inputs inputs = {
.gpu_id = pdev->gpu_id,
.no_ubo_to_push = true,
.sysval_ubo = sysval_ubo,
};
pan_shader_compile(pdev, nir, &inputs, &shader->binary, &shader->info);
/* Patch the descriptor count */
shader->info.ubo_count =
shader->info.sysvals.sysval_count ? sysval_ubo + 1 : layout->num_ubos;
shader->info.sampler_count = layout->num_samplers;
shader->info.texture_count = layout->num_textures;
shader->sysval_ubo = sysval_ubo;
ralloc_free(nir);
return shader;
}

View File

@ -28,9 +28,10 @@
#include "util/format/u_format.h"
#include "compiler/shader_enums.h"
#include "gen_macros.h"
#include "panfrost-job.h"
#include "pan_pool.h"
struct pan_pool;
struct panvk_device;
@ -69,15 +70,6 @@ struct panvk_varyings_info {
unsigned buf_mask;
};
void
panvk_varyings_alloc(struct panvk_varyings_info *varyings,
struct pan_pool *varying_mem_pool,
unsigned vertex_count);
unsigned
panvk_varyings_buf_count(const struct panvk_device *dev,
struct panvk_varyings_info *varyings);
static inline unsigned
panvk_varying_buf_index(const struct panvk_varyings_info *varyings,
enum panvk_varying_buf_id b)
@ -114,6 +106,7 @@ panvk_varying_is_builtin(gl_shader_stage stage, gl_varying_slot loc)
}
}
#if defined(PAN_ARCH) && PAN_ARCH <= 5
static inline enum mali_attribute_special
panvk_varying_special_buf_id(enum panvk_varying_buf_id buf_id)
{
@ -126,6 +119,7 @@ panvk_varying_special_buf_id(enum panvk_varying_buf_id buf_id)
return 0;
}
}
#endif
static inline unsigned
panvk_varying_size(const struct panvk_varyings_info *varyings,
@ -141,4 +135,34 @@ panvk_varying_size(const struct panvk_varyings_info *varyings,
}
}
#ifdef PAN_ARCH
static inline unsigned
panvk_varyings_buf_count(struct panvk_varyings_info *varyings)
{
return util_bitcount(varyings->buf_mask) + (PAN_ARCH >= 6 ? 1 : 0);
}
#endif
static inline void
panvk_varyings_alloc(struct panvk_varyings_info *varyings,
struct pan_pool *varying_mem_pool,
unsigned vertex_count)
{
for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) {
if (!(varyings->buf_mask & (1 << i))) continue;
unsigned buf_idx = panvk_varying_buf_index(varyings, i);
unsigned size = varyings->buf[buf_idx].stride * vertex_count;
if (!size)
continue;
struct panfrost_ptr ptr =
pan_pool_alloc_aligned(varying_mem_pool, size, 64);
varyings->buf[buf_idx].size = size;
varyings->buf[buf_idx].address = ptr.gpu;
varyings->buf[buf_idx].cpu = ptr.cpu;
}
}
#endif