mesa/src/freedreno/vulkan/tu_cs.h

416 lines
13 KiB
C

/*
* Copyright © 2019 Google LLC
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef TU_CS_H
#define TU_CS_H
#include "tu_private.h"
#include "adreno_pm4.xml.h"
#include "freedreno_pm4.h"
/* For breadcrumbs we may open a network socket based on the envvar,
* it's not something that should be enabled by default.
*/
#define TU_BREADCRUMBS_ENABLED 0
void
tu_cs_init(struct tu_cs *cs,
struct tu_device *device,
enum tu_cs_mode mode,
uint32_t initial_size);
void
tu_cs_init_external(struct tu_cs *cs, struct tu_device *device,
uint32_t *start, uint32_t *end);
void
tu_cs_init_suballoc(struct tu_cs *cs, struct tu_device *device,
struct tu_suballoc_bo *bo);
void
tu_cs_finish(struct tu_cs *cs);
void
tu_cs_begin(struct tu_cs *cs);
void
tu_cs_end(struct tu_cs *cs);
VkResult
tu_cs_begin_sub_stream(struct tu_cs *cs, uint32_t size, struct tu_cs *sub_cs);
VkResult
tu_cs_alloc(struct tu_cs *cs,
uint32_t count,
uint32_t size,
struct tu_cs_memory *memory);
struct tu_cs_entry
tu_cs_end_sub_stream(struct tu_cs *cs, struct tu_cs *sub_cs);
static inline struct tu_draw_state
tu_cs_end_draw_state(struct tu_cs *cs, struct tu_cs *sub_cs)
{
struct tu_cs_entry entry = tu_cs_end_sub_stream(cs, sub_cs);
return (struct tu_draw_state) {
.iova = entry.bo->iova + entry.offset,
.size = entry.size / sizeof(uint32_t),
};
}
VkResult
tu_cs_reserve_space(struct tu_cs *cs, uint32_t reserved_size);
static inline struct tu_draw_state
tu_cs_draw_state(struct tu_cs *sub_cs, struct tu_cs *cs, uint32_t size)
{
struct tu_cs_memory memory;
/* TODO: clean this up */
tu_cs_alloc(sub_cs, size, 1, &memory);
tu_cs_init_external(cs, sub_cs->device, memory.map, memory.map + size);
tu_cs_begin(cs);
tu_cs_reserve_space(cs, size);
return (struct tu_draw_state) {
.iova = memory.iova,
.size = size,
};
}
void
tu_cs_reset(struct tu_cs *cs);
VkResult
tu_cs_add_entries(struct tu_cs *cs, struct tu_cs *target);
/**
* Get the size of the command packets emitted since the last call to
* tu_cs_add_entry.
*/
static inline uint32_t
tu_cs_get_size(const struct tu_cs *cs)
{
return cs->cur - cs->start;
}
/**
* Return true if there is no command packet emitted since the last call to
* tu_cs_add_entry.
*/
static inline uint32_t
tu_cs_is_empty(const struct tu_cs *cs)
{
return tu_cs_get_size(cs) == 0;
}
/**
* Discard all entries. This allows \a cs to be reused while keeping the
* existing BOs and command packets intact.
*/
static inline void
tu_cs_discard_entries(struct tu_cs *cs)
{
assert(cs->mode == TU_CS_MODE_GROW);
cs->entry_count = 0;
}
/**
* Get the size needed for tu_cs_emit_call.
*/
static inline uint32_t
tu_cs_get_call_size(const struct tu_cs *cs)
{
assert(cs->mode == TU_CS_MODE_GROW);
/* each CP_INDIRECT_BUFFER needs 4 dwords */
return cs->entry_count * 4;
}
/**
* Assert that we did not exceed the reserved space.
*/
static inline void
tu_cs_sanity_check(const struct tu_cs *cs)
{
assert(cs->start <= cs->cur);
assert(cs->cur <= cs->reserved_end);
assert(cs->reserved_end <= cs->end);
}
void
tu_cs_emit_sync_breadcrumb(struct tu_cs *cs, uint8_t opcode, uint16_t cnt);
/**
* Emit a uint32_t value into a command stream, without boundary checking.
*/
static inline void
tu_cs_emit(struct tu_cs *cs, uint32_t value)
{
assert(cs->cur < cs->reserved_end);
*cs->cur = value;
++cs->cur;
#if TU_BREADCRUMBS_ENABLED
cs->breadcrumb_emit_after--;
if (cs->breadcrumb_emit_after == 0)
tu_cs_emit_sync_breadcrumb(cs, -1, 0);
#endif
}
/**
* Emit an array of uint32_t into a command stream, without boundary checking.
*/
static inline void
tu_cs_emit_array(struct tu_cs *cs, const uint32_t *values, uint32_t length)
{
assert(cs->cur + length <= cs->reserved_end);
memcpy(cs->cur, values, sizeof(uint32_t) * length);
cs->cur += length;
}
/**
* Get the size of the remaining space in the current BO.
*/
static inline uint32_t
tu_cs_get_space(const struct tu_cs *cs)
{
return cs->end - cs->cur;
}
static inline void
tu_cs_reserve(struct tu_cs *cs, uint32_t reserved_size)
{
if (cs->mode != TU_CS_MODE_GROW) {
assert(tu_cs_get_space(cs) >= reserved_size);
assert(cs->reserved_end == cs->end);
return;
}
if (tu_cs_get_space(cs) >= reserved_size &&
cs->entry_count < cs->entry_capacity) {
cs->reserved_end = cs->cur + reserved_size;
return;
}
ASSERTED VkResult result = tu_cs_reserve_space(cs, reserved_size);
/* TODO: set this error in tu_cs and use it */
assert(result == VK_SUCCESS);
}
/**
* Emit a type-4 command packet header into a command stream.
*/
static inline void
tu_cs_emit_pkt4(struct tu_cs *cs, uint16_t regindx, uint16_t cnt)
{
tu_cs_reserve(cs, cnt + 1);
tu_cs_emit(cs, pm4_pkt4_hdr(regindx, cnt));
}
/**
* Emit a type-7 command packet header into a command stream.
*/
static inline void
tu_cs_emit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt)
{
#if TU_BREADCRUMBS_ENABLED
tu_cs_emit_sync_breadcrumb(cs, opcode, cnt + 1);
#endif
tu_cs_reserve(cs, cnt + 1);
tu_cs_emit(cs, pm4_pkt7_hdr(opcode, cnt));
}
static inline void
tu_cs_emit_wfi(struct tu_cs *cs)
{
tu_cs_emit_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
}
static inline void
tu_cs_emit_qw(struct tu_cs *cs, uint64_t value)
{
tu_cs_emit(cs, (uint32_t) value);
tu_cs_emit(cs, (uint32_t) (value >> 32));
}
static inline void
tu_cs_emit_write_reg(struct tu_cs *cs, uint16_t reg, uint32_t value)
{
tu_cs_emit_pkt4(cs, reg, 1);
tu_cs_emit(cs, value);
}
/**
* Emit a CP_INDIRECT_BUFFER command packet.
*/
static inline void
tu_cs_emit_ib(struct tu_cs *cs, const struct tu_cs_entry *entry)
{
assert(entry->bo);
assert(entry->size && entry->offset + entry->size <= entry->bo->size);
assert(entry->size % sizeof(uint32_t) == 0);
assert(entry->offset % sizeof(uint32_t) == 0);
tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3);
tu_cs_emit_qw(cs, entry->bo->iova + entry->offset);
tu_cs_emit(cs, entry->size / sizeof(uint32_t));
}
/* for compute which isn't using SET_DRAW_STATE */
static inline void
tu_cs_emit_state_ib(struct tu_cs *cs, struct tu_draw_state state)
{
if (state.size) {
tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3);
tu_cs_emit_qw(cs, state.iova);
tu_cs_emit(cs, state.size);
}
}
/**
* Emit a CP_INDIRECT_BUFFER command packet for each entry in the target
* command stream.
*/
static inline void
tu_cs_emit_call(struct tu_cs *cs, const struct tu_cs *target)
{
assert(target->mode == TU_CS_MODE_GROW);
for (uint32_t i = 0; i < target->entry_count; i++)
tu_cs_emit_ib(cs, target->entries + i);
}
/* Helpers for bracketing a large sequence of commands of unknown size inside
* a CP_COND_REG_EXEC packet.
*/
static inline void
tu_cond_exec_start(struct tu_cs *cs, uint32_t cond_flags)
{
assert(cs->mode == TU_CS_MODE_GROW);
assert(cs->cond_stack_depth < TU_COND_EXEC_STACK_SIZE);
tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
tu_cs_emit(cs, cond_flags);
cs->cond_flags[cs->cond_stack_depth] = cond_flags;
cs->cond_dwords[cs->cond_stack_depth] = cs->cur;
/* Emit dummy DWORD field here */
tu_cs_emit(cs, CP_COND_REG_EXEC_1_DWORDS(0));
cs->cond_stack_depth++;
}
#define CP_COND_EXEC_0_RENDER_MODE_GMEM \
(CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_GMEM)
#define CP_COND_EXEC_0_RENDER_MODE_SYSMEM \
(CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_SYSMEM)
static inline void
tu_cond_exec_end(struct tu_cs *cs)
{
assert(cs->cond_stack_depth > 0);
cs->cond_stack_depth--;
cs->cond_flags[cs->cond_stack_depth] = 0;
/* Subtract one here to account for the DWORD field itself. */
*cs->cond_dwords[cs->cond_stack_depth] =
cs->cur - cs->cond_dwords[cs->cond_stack_depth] - 1;
}
#define fd_reg_pair tu_reg_value
#define __bo_type struct tu_bo *
#include "a6xx.xml.h"
#include "a6xx-pack.xml.h"
#define __assert_eq(a, b) \
do { \
if ((a) != (b)) { \
fprintf(stderr, "assert failed: " #a " (0x%x) != " #b " (0x%x)\n", a, b); \
assert((a) == (b)); \
} \
} while (0)
#define __ONE_REG(i, regs) \
do { \
if (i < ARRAY_SIZE(regs) && regs[i].reg > 0) { \
__assert_eq(regs[0].reg + i, regs[i].reg); \
if (regs[i].bo) { \
uint64_t v = regs[i].bo->iova + regs[i].bo_offset; \
v >>= regs[i].bo_shift; \
v |= regs[i].value; \
\
*p++ = v; \
*p++ = v >> 32; \
} else { \
*p++ = regs[i].value; \
if (regs[i].is_address) \
*p++ = regs[i].value >> 32; \
} \
} \
} while (0)
/* Emits a sequence of register writes in order using a pkt4. This will check
* (at runtime on a !NDEBUG build) that the registers were actually set up in
* order in the code.
*
* Note that references to buffers aren't automatically added to the CS,
* unlike in freedreno. We are clever in various places to avoid duplicating
* the reference add work.
*
* Also, 64-bit address registers don't have a way (currently) to set a 64-bit
* address without having a reference to a BO, since the .dword field in the
* register's struct is only 32-bit wide. We should fix this in the pack
* codegen later.
*/
#define tu_cs_emit_regs(cs, ...) do { \
const struct fd_reg_pair regs[] = { __VA_ARGS__ }; \
unsigned count = ARRAY_SIZE(regs); \
\
STATIC_ASSERT(ARRAY_SIZE(regs) > 0); \
STATIC_ASSERT(ARRAY_SIZE(regs) <= 16); \
\
tu_cs_emit_pkt4((cs), regs[0].reg, count); \
uint32_t *p = (cs)->cur; \
__ONE_REG( 0, regs); \
__ONE_REG( 1, regs); \
__ONE_REG( 2, regs); \
__ONE_REG( 3, regs); \
__ONE_REG( 4, regs); \
__ONE_REG( 5, regs); \
__ONE_REG( 6, regs); \
__ONE_REG( 7, regs); \
__ONE_REG( 8, regs); \
__ONE_REG( 9, regs); \
__ONE_REG(10, regs); \
__ONE_REG(11, regs); \
__ONE_REG(12, regs); \
__ONE_REG(13, regs); \
__ONE_REG(14, regs); \
__ONE_REG(15, regs); \
(cs)->cur = p; \
} while (0)
#endif /* TU_CS_H */