v3dv: implement vkCmdFillBuffer
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
This commit is contained in:
parent
b3f9a3beaf
commit
5ed78d91fe
|
@ -767,3 +767,250 @@ v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
|
|||
*/
|
||||
v3dv_job_add_extra_bo(copy_job, src_bo);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_fill_buffer_per_tile_list(struct v3dv_job *job,
|
||||
struct v3dv_bo *bo,
|
||||
uint32_t offset,
|
||||
uint32_t stride)
|
||||
{
|
||||
struct v3dv_cl *cl = &job->indirect;
|
||||
v3dv_cl_ensure_space(cl, 200, 1);
|
||||
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
|
||||
|
||||
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
|
||||
|
||||
cl_emit(cl, END_OF_LOADS, end);
|
||||
|
||||
cl_emit(cl, PRIM_LIST_FORMAT, fmt) {
|
||||
fmt.primitive_type = LIST_TRIANGLES;
|
||||
}
|
||||
|
||||
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
|
||||
|
||||
cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
|
||||
store.buffer_to_store = RENDER_TARGET_0;
|
||||
store.address = v3dv_cl_address(bo, offset);
|
||||
store.clear_buffer_being_stored = false;
|
||||
store.output_image_format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
|
||||
store.memory_format = VC5_TILING_RASTER;
|
||||
store.height_in_ub_or_stride = stride;
|
||||
store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
|
||||
}
|
||||
|
||||
cl_emit(cl, END_OF_TILE_MARKER, end);
|
||||
|
||||
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
|
||||
|
||||
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
|
||||
branch.start = tile_list_start;
|
||||
branch.end = v3dv_cl_get_address(cl);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_fill_buffer(struct v3dv_job *job,
|
||||
uint32_t min_x_supertile,
|
||||
uint32_t min_y_supertile,
|
||||
uint32_t max_x_supertile,
|
||||
uint32_t max_y_supertile,
|
||||
struct v3dv_bo *bo,
|
||||
uint32_t offset,
|
||||
struct v3dv_framebuffer *framebuffer)
|
||||
{
|
||||
struct v3dv_cl *rcl = &job->rcl;
|
||||
|
||||
cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
|
||||
list.address = v3dv_cl_address(job->tile_alloc, 0);
|
||||
}
|
||||
|
||||
cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
|
||||
config.number_of_bin_tile_lists = 1;
|
||||
config.total_frame_width_in_tiles = framebuffer->draw_tiles_x;
|
||||
config.total_frame_height_in_tiles = framebuffer->draw_tiles_y;
|
||||
|
||||
config.supertile_width_in_tiles = framebuffer->supertile_width;
|
||||
config.supertile_height_in_tiles = framebuffer->supertile_height;
|
||||
|
||||
config.total_frame_width_in_supertiles =
|
||||
framebuffer->frame_width_in_supertiles;
|
||||
config.total_frame_height_in_supertiles =
|
||||
framebuffer->frame_height_in_supertiles;
|
||||
}
|
||||
|
||||
/* Implement GFXH-1742 workaround and emit a clear of the tile buffers.
|
||||
* Since we fill by clearing, we need to do the clear here.
|
||||
*/
|
||||
for (int i = 0; i < 2; i++) {
|
||||
cl_emit(rcl, TILE_COORDINATES, coords);
|
||||
cl_emit(rcl, END_OF_LOADS, end);
|
||||
cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
|
||||
store.buffer_to_store = NONE;
|
||||
}
|
||||
if (i == 0) {
|
||||
cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
|
||||
clear.clear_z_stencil_buffer = true;
|
||||
clear.clear_all_render_targets = true;
|
||||
}
|
||||
}
|
||||
cl_emit(rcl, END_OF_TILE_MARKER, end);
|
||||
}
|
||||
|
||||
cl_emit(rcl, FLUSH_VCD_CACHE, flush);
|
||||
|
||||
const uint32_t stride = framebuffer->width * 4;
|
||||
emit_fill_buffer_per_tile_list(job, bo, offset, stride);
|
||||
|
||||
for (int y = min_y_supertile; y <= max_y_supertile; y++) {
|
||||
for (int x = min_x_supertile; x <= max_x_supertile; x++) {
|
||||
cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
|
||||
coords.column_number_in_supertiles = x;
|
||||
coords.row_number_in_supertiles = y;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_fill_buffer_rcl(struct v3dv_job *job,
|
||||
struct v3dv_bo *bo,
|
||||
uint32_t offset,
|
||||
struct v3dv_framebuffer *framebuffer,
|
||||
uint32_t internal_type,
|
||||
uint32_t data)
|
||||
{
|
||||
struct v3dv_cl *rcl = &job->rcl;
|
||||
v3dv_cl_ensure_space_with_branch(rcl, 200 +
|
||||
1 * 256 *
|
||||
cl_packet_length(SUPERTILE_COORDINATES));
|
||||
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
|
||||
config.early_z_disable = true;
|
||||
config.image_width_pixels = framebuffer->width;
|
||||
config.image_height_pixels = framebuffer->height;
|
||||
config.number_of_render_targets = 1;
|
||||
config.multisample_mode_4x = false;
|
||||
config.maximum_bpp_of_all_render_targets = framebuffer->internal_bpp;
|
||||
}
|
||||
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
|
||||
clear.clear_color_low_32_bits = data;
|
||||
clear.clear_color_next_24_bits = 0;
|
||||
clear.render_target_number = 0;
|
||||
};
|
||||
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
|
||||
rt.render_target_0_internal_bpp = framebuffer->internal_bpp;
|
||||
rt.render_target_0_internal_type = internal_type;
|
||||
rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
|
||||
}
|
||||
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
|
||||
clear.z_clear_value = 0;
|
||||
clear.stencil_clear_value = 0;
|
||||
};
|
||||
|
||||
cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
|
||||
init.use_auto_chained_tile_lists = true;
|
||||
init.size_of_first_block_in_chained_tile_lists =
|
||||
TILE_ALLOCATION_BLOCK_SIZE_64B;
|
||||
}
|
||||
|
||||
uint32_t supertile_w_in_pixels =
|
||||
framebuffer->tile_width * framebuffer->supertile_width;
|
||||
uint32_t supertile_h_in_pixels =
|
||||
framebuffer->tile_height * framebuffer->supertile_height;
|
||||
|
||||
const uint32_t min_x_supertile = 0;
|
||||
const uint32_t min_y_supertile = 0;
|
||||
|
||||
const uint32_t max_x_supertile =
|
||||
(framebuffer->width - 1) / supertile_w_in_pixels;
|
||||
const uint32_t max_y_supertile =
|
||||
(framebuffer->height - 1) / supertile_h_in_pixels;
|
||||
|
||||
emit_fill_buffer(job,
|
||||
min_x_supertile, min_y_supertile,
|
||||
max_x_supertile, max_y_supertile,
|
||||
bo, offset, framebuffer);
|
||||
|
||||
cl_emit(rcl, END_OF_RENDERING, end);
|
||||
}
|
||||
|
||||
static void
|
||||
fill_buffer(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
struct v3dv_bo *bo,
|
||||
uint32_t offset,
|
||||
uint32_t size,
|
||||
uint32_t data)
|
||||
{
|
||||
assert(size > 0 && size % 4 == 0);
|
||||
assert(offset + size <= bo->size);
|
||||
|
||||
const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
|
||||
const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
|
||||
uint32_t num_items = size / 4;
|
||||
|
||||
while (num_items > 0) {
|
||||
const uint32_t max_dim_items = 4096;
|
||||
const uint32_t max_items = max_dim_items * max_dim_items;
|
||||
uint32_t width, height;
|
||||
if (num_items > max_items) {
|
||||
width = max_dim_items;
|
||||
height = max_dim_items;
|
||||
} else {
|
||||
width = num_items;
|
||||
height = 1;
|
||||
while (width > max_dim_items ||
|
||||
((width % 2) == 0 && width > 2 * height)) {
|
||||
width >>= 1;
|
||||
height <<= 1;
|
||||
}
|
||||
}
|
||||
assert(width <= max_dim_items && height <= max_dim_items);
|
||||
assert(width * height <= num_items);
|
||||
|
||||
struct v3dv_framebuffer framebuffer;
|
||||
setup_framebuffer_params(&framebuffer, width, height, 1, internal_bpp);
|
||||
|
||||
struct v3dv_job *job = v3dv_cmd_buffer_start_job(cmd_buffer, false);
|
||||
v3dv_cmd_buffer_start_frame(cmd_buffer, &framebuffer);
|
||||
|
||||
v3dv_job_emit_binning_flush(job);
|
||||
|
||||
emit_fill_buffer_rcl(job, bo, offset,
|
||||
&framebuffer, internal_type, data);
|
||||
|
||||
v3dv_cmd_buffer_finish_job(cmd_buffer);
|
||||
|
||||
const uint32_t items_copied = width * height;
|
||||
const uint32_t bytes_copied = items_copied * 4;
|
||||
num_items -= items_copied;
|
||||
offset += bytes_copied;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,
|
||||
VkBuffer dstBuffer,
|
||||
VkDeviceSize dstOffset,
|
||||
VkDeviceSize size,
|
||||
uint32_t data)
|
||||
{
|
||||
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
|
||||
|
||||
struct v3dv_bo *bo = dst_buffer->mem->bo;
|
||||
|
||||
/* From the Vulkan spec:
|
||||
*
|
||||
* "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not
|
||||
* a multiple of 4, then the nearest smaller multiple is used."
|
||||
*/
|
||||
if (size == VK_WHOLE_SIZE) {
|
||||
size = dst_buffer->mem->bo->size;
|
||||
size -= size % 4;
|
||||
}
|
||||
|
||||
fill_buffer(cmd_buffer, bo, dstOffset, size, data);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue