radv/winsys: Add support for a fixed VA address for replay.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10570>
This commit is contained in:
Bas Nieuwenhuizen 2020-11-23 03:13:18 +01:00 committed by Marge Bot
parent 8025b4120f
commit f9cc94af7b
13 changed files with 37 additions and 33 deletions

View File

@ -522,7 +522,7 @@ radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, uint64_t m
device->ws->buffer_create(device->ws, new_size, 4096, device->ws->cs_domain(device->ws),
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC,
RADV_BO_PRIORITY_UPLOAD_BUFFER, &bo);
RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &bo);
if (result != VK_SUCCESS) {
cmd_buffer->record_result = result;

View File

@ -70,7 +70,7 @@ radv_init_trace(struct radv_device *device)
result = ws->buffer_create(
ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM,
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
RADV_BO_PRIORITY_UPLOAD_BUFFER, &device->trace_bo);
RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo);
if (result != VK_SUCCESS)
return false;
@ -839,7 +839,7 @@ radv_trap_handler_init(struct radv_device *device)
result = ws->buffer_create(ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM,
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT,
RADV_BO_PRIORITY_SCRATCH, &device->tma_bo);
RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo);
if (result != VK_SUCCESS)
return false;

View File

@ -804,7 +804,7 @@ radv_CreateDescriptorPool(VkDevice _device, const VkDescriptorPoolCreateInfo *pC
VkResult result = device->ws->buffer_create(
device->ws, bo_size, 32, RADEON_DOMAIN_VRAM,
RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT,
RADV_BO_PRIORITY_DESCRIPTOR, &pool->bo);
RADV_BO_PRIORITY_DESCRIPTOR, 0, &pool->bo);
if (result != VK_SUCCESS) {
radv_destroy_descriptor_pool(device, pAllocator, pool);
return vk_error(device->instance, result);

View File

@ -2761,7 +2761,7 @@ radv_device_init_border_color(struct radv_device *device)
result = device->ws->buffer_create(
device->ws, RADV_BORDER_COLOR_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM,
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING,
RADV_BO_PRIORITY_SHADER, &device->border_color_data.bo);
RADV_BO_PRIORITY_SHADER, 0, &device->border_color_data.bo);
if (result != VK_SUCCESS)
return vk_error(device->physical_device->instance, result);
@ -3824,7 +3824,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
if (scratch_size > queue_scratch_size) {
result =
queue->device->ws->buffer_create(queue->device->ws, scratch_size, 4096, RADEON_DOMAIN_VRAM,
ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, &scratch_bo);
ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &scratch_bo);
if (result != VK_SUCCESS)
goto fail;
} else
@ -3836,7 +3836,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
if (compute_scratch_size > compute_queue_scratch_size) {
result = queue->device->ws->buffer_create(queue->device->ws, compute_scratch_size, 4096,
RADEON_DOMAIN_VRAM, ring_bo_flags,
RADV_BO_PRIORITY_SCRATCH, &compute_scratch_bo);
RADV_BO_PRIORITY_SCRATCH, 0, &compute_scratch_bo);
if (result != VK_SUCCESS)
goto fail;
@ -3846,7 +3846,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
if (esgs_ring_size > queue->esgs_ring_size) {
result = queue->device->ws->buffer_create(queue->device->ws, esgs_ring_size, 4096,
RADEON_DOMAIN_VRAM, ring_bo_flags,
RADV_BO_PRIORITY_SCRATCH, &esgs_ring_bo);
RADV_BO_PRIORITY_SCRATCH, 0, &esgs_ring_bo);
if (result != VK_SUCCESS)
goto fail;
} else {
@ -3857,7 +3857,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
if (gsvs_ring_size > queue->gsvs_ring_size) {
result = queue->device->ws->buffer_create(queue->device->ws, gsvs_ring_size, 4096,
RADEON_DOMAIN_VRAM, ring_bo_flags,
RADV_BO_PRIORITY_SCRATCH, &gsvs_ring_bo);
RADV_BO_PRIORITY_SCRATCH, 0, &gsvs_ring_bo);
if (result != VK_SUCCESS)
goto fail;
} else {
@ -3868,7 +3868,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
if (add_tess_rings) {
result = queue->device->ws->buffer_create(
queue->device->ws, tess_offchip_ring_offset + tess_offchip_ring_size, 256,
RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, &tess_rings_bo);
RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &tess_rings_bo);
if (result != VK_SUCCESS)
goto fail;
} else {
@ -3881,8 +3881,9 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
/* 4 streamout GDS counters.
* We need 256B (64 dw) of GDS, otherwise streamout hangs.
*/
result = queue->device->ws->buffer_create(queue->device->ws, 256, 4, RADEON_DOMAIN_GDS,
ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, &gds_bo);
result =
queue->device->ws->buffer_create(queue->device->ws, 256, 4, RADEON_DOMAIN_GDS,
ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &gds_bo);
if (result != VK_SUCCESS)
goto fail;
} else {
@ -3894,7 +3895,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
result =
queue->device->ws->buffer_create(queue->device->ws, 4, 1, RADEON_DOMAIN_OA, ring_bo_flags,
RADV_BO_PRIORITY_SCRATCH, &gds_oa_bo);
RADV_BO_PRIORITY_SCRATCH, 0, &gds_oa_bo);
if (result != VK_SUCCESS)
goto fail;
} else {
@ -3915,7 +3916,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
result = queue->device->ws->buffer_create(
queue->device->ws, size, 4096, RADEON_DOMAIN_VRAM,
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,
RADV_BO_PRIORITY_DESCRIPTOR, &descriptor_bo);
RADV_BO_PRIORITY_DESCRIPTOR, 0, &descriptor_bo);
if (result != VK_SUCCESS)
goto fail;
} else
@ -5384,7 +5385,7 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc
result = device->ws->buffer_create(device->ws, alloc_size,
device->physical_device->rad_info.max_alignment, domain,
flags, priority, &mem->bo);
flags, priority, 0, &mem->bo);
if (result != VK_SUCCESS) {
if (device->overallocation_disallowed) {
@ -6283,7 +6284,7 @@ radv_CreateEvent(VkDevice _device, const VkEventCreateInfo *pCreateInfo,
VkResult result = device->ws->buffer_create(
device->ws, 8, 8, RADEON_DOMAIN_GTT,
RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
RADV_BO_PRIORITY_FENCE, &event->bo);
RADV_BO_PRIORITY_FENCE, 0, &event->bo);
if (result != VK_SUCCESS) {
radv_destroy_event(device, pAllocator, event);
return vk_error(device->instance, result);
@ -6386,7 +6387,7 @@ radv_CreateBuffer(VkDevice _device, const VkBufferCreateInfo *pCreateInfo,
if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
VkResult result =
device->ws->buffer_create(device->ws, align64(buffer->size, 4096), 4096, 0,
RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, &buffer->bo);
RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, 0, &buffer->bo);
if (result != VK_SUCCESS) {
radv_destroy_buffer(device, pAllocator, buffer);
return vk_error(device->instance, result);

View File

@ -1700,8 +1700,9 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_
image->size = align64(image->size, image->alignment);
image->offset = 0;
result = device->ws->buffer_create(device->ws, image->size, image->alignment, 0,
RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, &image->bo);
result =
device->ws->buffer_create(device->ws, image->size, image->alignment, 0,
RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, 0, &image->bo);
if (result != VK_SUCCESS) {
radv_destroy_image(device, alloc, image);
return vk_error(device->instance, result);

View File

@ -978,7 +978,7 @@ radv_CreateQueryPool(VkDevice _device, const VkQueryPoolCreateInfo *pCreateInfo,
VkResult result = device->ws->buffer_create(device->ws, pool->size, 64, RADEON_DOMAIN_GTT,
RADEON_FLAG_NO_INTERPROCESS_SHARING,
RADV_BO_PRIORITY_QUERY_POOL, &pool->bo);
RADV_BO_PRIORITY_QUERY_POOL, 0, &pool->bo);
if (result != VK_SUCCESS) {
radv_destroy_query_pool(device, pAllocator, pool);
return vk_error(device->instance, result);

View File

@ -224,7 +224,7 @@ struct radeon_winsys {
VkResult (*buffer_create)(struct radeon_winsys *ws, uint64_t size, unsigned alignment,
enum radeon_bo_domain domain, enum radeon_bo_flag flags,
unsigned priority, struct radeon_winsys_bo **out_bo);
unsigned priority, uint64_t address, struct radeon_winsys_bo **out_bo);
void (*buffer_destroy)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo);
void *(*buffer_map)(struct radeon_winsys_bo *bo);

View File

@ -923,7 +923,7 @@ radv_alloc_shader_memory(struct radv_device *device, struct radv_shader_variant
RADEON_FLAG_NO_INTERPROCESS_SHARING |
(device->physical_device->rad_info.cpdma_prefetch_writes_memory ? 0
: RADEON_FLAG_READ_ONLY),
RADV_BO_PRIORITY_SHADER, &slab->bo);
RADV_BO_PRIORITY_SHADER, 0, &slab->bo);
if (result != VK_SUCCESS) {
free(slab);
return NULL;

View File

@ -385,7 +385,7 @@ radv_thread_trace_init_bo(struct radv_device *device)
VkResult result = ws->buffer_create(
ws, size, 4096, RADEON_DOMAIN_VRAM,
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
RADV_BO_PRIORITY_SCRATCH, &bo);
RADV_BO_PRIORITY_SCRATCH, 0, &bo);
device->thread_trace.bo = bo;
if (result != VK_SUCCESS)
return false;

View File

@ -632,7 +632,7 @@ cik_create_gfx_config(struct radv_device *device)
device->ws->buffer_create(device->ws, cs->cdw * 4, 4096, device->ws->cs_domain(device->ws),
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
RADV_BO_PRIORITY_CS, &device->gfx_init);
RADV_BO_PRIORITY_CS, 0, &device->gfx_init);
if (result != VK_SUCCESS)
goto fail;

View File

@ -394,7 +394,8 @@ radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo
static VkResult
radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment,
enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags,
unsigned priority, struct radeon_winsys_bo **out_bo)
unsigned priority, uint64_t replay_address,
struct radeon_winsys_bo **out_bo)
{
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
struct radv_amdgpu_winsys_bo *bo;
@ -420,10 +421,11 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned
virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
r = amdgpu_va_range_alloc(
ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, 0, &va, &va_handle,
ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, replay_address, &va, &va_handle,
(flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) | AMDGPU_VA_RANGE_HIGH);
if (r) {
result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
result =
replay_address ? VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
goto error_va_alloc;
}

View File

@ -213,7 +213,7 @@ radv_amdgpu_cs_create(struct radeon_winsys *ws, enum ring_type ring_type)
ws->buffer_create(ws, ib_size, 0, radv_amdgpu_cs_domain(ws),
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
RADV_BO_PRIORITY_CS, &cs->ib_buffer);
RADV_BO_PRIORITY_CS, 0, &cs->ib_buffer);
if (result != VK_SUCCESS) {
free(cs);
return NULL;
@ -334,7 +334,7 @@ radv_amdgpu_cs_grow(struct radeon_cmdbuf *_cs, size_t min_size)
cs->ws->base.buffer_create(&cs->ws->base, ib_size, 0, radv_amdgpu_cs_domain(&cs->ws->base),
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
RADV_BO_PRIORITY_CS, &cs->ib_buffer);
RADV_BO_PRIORITY_CS, 0, &cs->ib_buffer);
if (result != VK_SUCCESS) {
cs->base.cdw = 0;
@ -1036,7 +1036,7 @@ radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, int queue_id
ws->buffer_create(
ws, 4 * size, 4096, radv_amdgpu_cs_domain(ws),
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,
RADV_BO_PRIORITY_CS, &bos[j]);
RADV_BO_PRIORITY_CS, 0, &bos[j]);
ptr = ws->buffer_map(bos[j]);
if (needs_preamble) {
@ -1079,7 +1079,7 @@ radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, int queue_id
ws->buffer_create(
ws, 4 * size, 4096, radv_amdgpu_cs_domain(ws),
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,
RADV_BO_PRIORITY_CS, &bos[0]);
RADV_BO_PRIORITY_CS, 0, &bos[0]);
ptr = ws->buffer_map(bos[0]);
if (preamble_cs) {
@ -1264,7 +1264,7 @@ radv_amdgpu_ctx_create(struct radeon_winsys *_ws, enum radeon_ctx_priority prior
assert(AMDGPU_HW_IP_NUM * MAX_RINGS_PER_TYPE * sizeof(uint64_t) <= 4096);
result = ws->base.buffer_create(&ws->base, 4096, 8, RADEON_DOMAIN_GTT,
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
RADV_BO_PRIORITY_CS, &ctx->fence_bo);
RADV_BO_PRIORITY_CS, 0, &ctx->fence_bo);
if (result != VK_SUCCESS) {
goto fail_alloc;
}

View File

@ -31,7 +31,7 @@
static VkResult
radv_null_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment,
enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags,
unsigned priority, struct radeon_winsys_bo **out_bo)
unsigned priority, uint64_t address, struct radeon_winsys_bo **out_bo)
{
struct radv_null_winsys_bo *bo;