tu: Add a "scratch bo" allocation mechanism
This is simpler than a full-blown memory reuse mechanism, but is good enough to make sure that repeatedly doing a copy that requires the linear staging buffer workaround won't use excessive memory or be slowed down due to repeated allocations. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5007>
This commit is contained in:
parent
7ce527a4fe
commit
ed79f805fa
|
@ -39,6 +39,7 @@
|
|||
#include "compiler/glsl_types.h"
|
||||
#include "util/debug.h"
|
||||
#include "util/disk_cache.h"
|
||||
#include "util/u_atomic.h"
|
||||
#include "vk_format.h"
|
||||
#include "vk_util.h"
|
||||
|
||||
|
@ -1256,6 +1257,9 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
|
||||
device->mem_cache = tu_pipeline_cache_from_handle(pc);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++)
|
||||
mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain);
|
||||
|
||||
*pDevice = tu_device_to_handle(device);
|
||||
return VK_SUCCESS;
|
||||
|
||||
|
@ -1302,6 +1306,11 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
|
|||
vk_free(&device->alloc, device->queues[i]);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) {
|
||||
if (device->scratch_bos[i].initialized)
|
||||
tu_bo_finish(device, &device->scratch_bos[i].bo);
|
||||
}
|
||||
|
||||
/* the compiler does not use pAllocator */
|
||||
ralloc_free(device->compiler);
|
||||
|
||||
|
@ -1311,6 +1320,51 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
|
|||
vk_free(&device->alloc, device);
|
||||
}
|
||||
|
||||
VkResult
|
||||
tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
|
||||
{
|
||||
unsigned size_log2 = MAX2(util_logbase2_ceil64(size), MIN_SCRATCH_BO_SIZE_LOG2);
|
||||
unsigned index = size_log2 - MIN_SCRATCH_BO_SIZE_LOG2;
|
||||
assert(index < ARRAY_SIZE(dev->scratch_bos));
|
||||
|
||||
for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) {
|
||||
if (p_atomic_read(&dev->scratch_bos[i].initialized)) {
|
||||
/* Fast path: just return the already-allocated BO. */
|
||||
*bo = &dev->scratch_bos[i].bo;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
/* Slow path: actually allocate the BO. We take a lock because the process
|
||||
* of allocating it is slow, and we don't want to block the CPU while it
|
||||
* finishes.
|
||||
*/
|
||||
mtx_lock(&dev->scratch_bos[index].construct_mtx);
|
||||
|
||||
/* Another thread may have allocated it already while we were waiting on
|
||||
* the lock. We need to check this in order to avoid double-allocating.
|
||||
*/
|
||||
if (dev->scratch_bos[index].initialized) {
|
||||
mtx_unlock(&dev->scratch_bos[index].construct_mtx);
|
||||
*bo = &dev->scratch_bos[index].bo;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
unsigned bo_size = 1ull << size_log2;
|
||||
VkResult result = tu_bo_init_new(dev, &dev->scratch_bos[index].bo, bo_size);
|
||||
if (result != VK_SUCCESS) {
|
||||
mtx_unlock(&dev->scratch_bos[index].construct_mtx);
|
||||
return result;
|
||||
}
|
||||
|
||||
p_atomic_set(&dev->scratch_bos[index].initialized, true);
|
||||
|
||||
mtx_unlock(&dev->scratch_bos[index].construct_mtx);
|
||||
|
||||
*bo = &dev->scratch_bos[index].bo;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
|
||||
VkLayerProperties *pProperties)
|
||||
|
|
|
@ -509,6 +509,17 @@ struct tu_device
|
|||
uint32_t vsc_draw_strm_pitch;
|
||||
uint32_t vsc_prim_strm_pitch;
|
||||
|
||||
#define MIN_SCRATCH_BO_SIZE_LOG2 12 /* A page */
|
||||
|
||||
/* Currently the kernel driver uses a 32-bit GPU address space, but it
|
||||
* should be impossible to go beyond 48 bits.
|
||||
*/
|
||||
struct {
|
||||
struct tu_bo bo;
|
||||
mtx_t construct_mtx;
|
||||
bool initialized;
|
||||
} scratch_bos[48 - MIN_SCRATCH_BO_SIZE_LOG2];
|
||||
|
||||
struct tu_bo border_color;
|
||||
|
||||
struct list_head shader_slabs;
|
||||
|
@ -531,6 +542,15 @@ tu_bo_finish(struct tu_device *dev, struct tu_bo *bo);
|
|||
VkResult
|
||||
tu_bo_map(struct tu_device *dev, struct tu_bo *bo);
|
||||
|
||||
/* Get a scratch bo for use inside a command buffer. This will always return
|
||||
* the same bo given the same size or similar sizes, so only one scratch bo
|
||||
* can be used at the same time. It's meant for short-lived things where we
|
||||
* need to write to some piece of memory, read from it, and then immediately
|
||||
* discard it.
|
||||
*/
|
||||
VkResult
|
||||
tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo);
|
||||
|
||||
struct tu_cs_entry
|
||||
{
|
||||
/* No ownership */
|
||||
|
|
Loading…
Reference in New Issue