mesa/src/asahi/lib/agx_ppp.h

165 lines
4.9 KiB
C

/*
* Copyright 2022 Alyssa Rosenzweig
* SPDX-License-Identifier: MIT
*/
#pragma once
#include "asahi/genxml/agx_pack.h"
#include "agx_bo.h"
/* Opaque structure representing a PPP update */
struct agx_ppp_update {
uint8_t *head;
uint64_t gpu_base;
size_t total_size;
#ifndef NDEBUG
uint8_t *cpu_base;
#endif
};
ALWAYS_INLINE static size_t
agx_ppp_update_size(struct AGX_PPP_HEADER *present)
{
size_t size = AGX_PPP_HEADER_LENGTH;
#define PPP_CASE(x, y) \
if (present->x) \
size += AGX_##y##_LENGTH;
PPP_CASE(fragment_control, FRAGMENT_CONTROL);
PPP_CASE(fragment_control_2, FRAGMENT_CONTROL);
PPP_CASE(fragment_front_face, FRAGMENT_FACE);
PPP_CASE(fragment_front_face_2, FRAGMENT_FACE_2);
PPP_CASE(fragment_front_stencil, FRAGMENT_STENCIL);
PPP_CASE(fragment_back_face, FRAGMENT_FACE);
PPP_CASE(fragment_back_face_2, FRAGMENT_FACE_2);
PPP_CASE(fragment_back_stencil, FRAGMENT_STENCIL);
PPP_CASE(depth_bias_scissor, DEPTH_BIAS_SCISSOR);
if (present->region_clip)
size += present->viewport_count * AGX_REGION_CLIP_LENGTH;
if (present->viewport) {
size += AGX_VIEWPORT_CONTROL_LENGTH +
(present->viewport_count * AGX_VIEWPORT_LENGTH);
}
PPP_CASE(w_clamp, W_CLAMP);
PPP_CASE(output_select, OUTPUT_SELECT);
PPP_CASE(varying_counts_32, VARYING_COUNTS);
PPP_CASE(varying_counts_16, VARYING_COUNTS);
PPP_CASE(cull, CULL);
PPP_CASE(cull_2, CULL_2);
if (present->fragment_shader) {
size +=
AGX_FRAGMENT_SHADER_WORD_0_LENGTH + AGX_FRAGMENT_SHADER_WORD_1_LENGTH +
AGX_FRAGMENT_SHADER_WORD_2_LENGTH + AGX_FRAGMENT_SHADER_WORD_3_LENGTH;
}
PPP_CASE(occlusion_query, FRAGMENT_OCCLUSION_QUERY);
PPP_CASE(occlusion_query_2, FRAGMENT_OCCLUSION_QUERY_2);
PPP_CASE(output_unknown, OUTPUT_UNKNOWN);
PPP_CASE(output_size, OUTPUT_SIZE);
PPP_CASE(varying_word_2, VARYING_2);
#undef PPP_CASE
assert((size % 4) == 0 && "PPP updates are aligned");
return size;
}
static inline bool
agx_ppp_validate(struct agx_ppp_update *ppp, size_t size)
{
#ifndef NDEBUG
/* Assert that we don't overflow. Ideally we'd assert that types match too
* but that's harder to do at the moment.
*/
assert(((ppp->head - ppp->cpu_base) + size) <= ppp->total_size);
#endif
return true;
}
#define agx_ppp_push(ppp, T, name) \
for (bool it = agx_ppp_validate((ppp), AGX_##T##_LENGTH); it; \
it = false, (ppp)->head += AGX_##T##_LENGTH) \
agx_pack((ppp)->head, T, name)
#define agx_ppp_push_packed(ppp, src, T) \
do { \
agx_ppp_validate((ppp), AGX_##T##_LENGTH); \
memcpy((ppp)->head, src, AGX_##T##_LENGTH); \
(ppp)->head += AGX_##T##_LENGTH; \
} while (0)
#define agx_ppp_push_merged(ppp, T, name, merge) \
for (uint8_t _tmp[AGX_##T##_LENGTH], it = 1; it; \
it = 0, agx_ppp_push_merged_blobs(ppp, AGX_##T##_LENGTH, \
(uint32_t *)_tmp, \
(uint32_t *)&merge)) \
agx_pack(_tmp, T, name)
ALWAYS_INLINE static struct agx_ppp_update
agx_new_ppp_update(struct agx_ptr out, size_t size,
struct AGX_PPP_HEADER *present)
{
struct agx_ppp_update ppp = {
.head = out.cpu,
.gpu_base = out.gpu,
.total_size = size,
#ifndef NDEBUG
.cpu_base = out.cpu,
#endif
};
agx_ppp_push(&ppp, PPP_HEADER, cfg) {
cfg = *present;
}
return ppp;
}
static inline void
agx_ppp_fini(uint8_t **out, struct agx_ppp_update *ppp)
{
size_t size = ppp->total_size;
assert((size % 4) == 0);
size_t size_words = size / 4;
#ifndef NDEBUG
assert(size == (ppp->head - ppp->cpu_base) && "mismatched ppp size");
#endif
assert(ppp->gpu_base < (1ull << 40));
assert(size_words < (1ull << 24));
agx_pack(*out, PPP_STATE, cfg) {
cfg.pointer_hi = (ppp->gpu_base >> 32);
cfg.pointer_lo = (uint32_t)ppp->gpu_base;
cfg.size_words = size_words;
};
*out += AGX_PPP_STATE_LENGTH;
}
static void
agx_ppp_push_merged_blobs(struct agx_ppp_update *ppp, size_t length,
void *src1_, void *src2_)
{
assert((length & 3) == 0);
assert(((uintptr_t)src1_ & 3) == 0);
assert(((uintptr_t)src2_ & 3) == 0);
uint32_t *dst = (uint32_t *)ppp->head;
uint32_t *src1 = (uint32_t *)src1_;
uint32_t *src2 = (uint32_t *)src2_;
for (unsigned i = 0; i < (length / 4); ++i) {
dst[i] = src1[i] | src2[i];
}
ppp->head += length;
}