anv: make use of the new smallbar uAPI
Instead of having 2 VkMemoryType pointing to the same VkMemoryHeap, we have each VkMemoryType with VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT (one host visible, the other not) point to its own VkMemoryHeap. For the local heap that is host visible, we'll use the I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS flag at GEM BO creation. When the smallbar uAPI is not available we fallback to a single heap and do not use I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS. v2: Handle probed_cpu_visible_size == probed_size (Matthew) v3: * Jordan: Use region info from devinfo v4: Also make the vram host visible heap as local (Ken) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: Jordan Justen <jordan.l.justen@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16739>
This commit is contained in:
parent
33bf0d7437
commit
fae88d8791
|
@ -1696,23 +1696,27 @@ anv_device_alloc_bo(struct anv_device *device,
|
|||
/* If we have vram size, we have multiple memory regions and should choose
|
||||
* one of them.
|
||||
*/
|
||||
if (device->physical->vram.size > 0) {
|
||||
if (anv_physical_device_has_vram(device->physical)) {
|
||||
struct drm_i915_gem_memory_class_instance regions[2];
|
||||
uint32_t nregions = 0;
|
||||
|
||||
if (alloc_flags & ANV_BO_ALLOC_LOCAL_MEM) {
|
||||
regions[nregions++] = device->physical->vram.region;
|
||||
/* vram_non_mappable & vram_mappable actually are the same region. */
|
||||
regions[nregions++] = device->physical->vram_non_mappable.region;
|
||||
} else {
|
||||
regions[nregions++] = device->physical->sys.region;
|
||||
}
|
||||
|
||||
/* TODO: Add I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS to flags for
|
||||
* after small BAR uapi is stabilized.
|
||||
*/
|
||||
assert(intel_vram_all_mappable(&device->info));
|
||||
uint32_t flags = 0;
|
||||
if (alloc_flags & ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE) {
|
||||
assert(alloc_flags & ANV_BO_ALLOC_LOCAL_MEM);
|
||||
/* We're required to add smem as a region when using mappable vram. */
|
||||
regions[nregions++] = device->physical->sys.region;
|
||||
flags |= I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS;
|
||||
}
|
||||
|
||||
gem_handle = anv_gem_create_regions(device, size + ccs_size,
|
||||
nregions, regions);
|
||||
flags, nregions, regions);
|
||||
} else {
|
||||
gem_handle = anv_gem_create(device, size + ccs_size);
|
||||
}
|
||||
|
|
|
@ -378,11 +378,18 @@ anv_init_meminfo(struct anv_physical_device *device, int fd)
|
|||
anv_compute_sys_heap_size(device, devinfo->mem.sram.mappable.size);
|
||||
device->sys.available = devinfo->mem.sram.mappable.free;
|
||||
|
||||
device->vram.region.memory_class = devinfo->mem.vram.mem_class;
|
||||
device->vram.region.memory_instance =
|
||||
device->vram_mappable.region.memory_class = devinfo->mem.vram.mem_class;
|
||||
device->vram_mappable.region.memory_instance =
|
||||
devinfo->mem.vram.mem_instance;
|
||||
device->vram.size = devinfo->mem.vram.mappable.size;
|
||||
device->vram.available = devinfo->mem.vram.mappable.free;
|
||||
device->vram_mappable.size = devinfo->mem.vram.mappable.size;
|
||||
device->vram_mappable.available = devinfo->mem.vram.mappable.free;
|
||||
|
||||
device->vram_non_mappable.region.memory_class =
|
||||
devinfo->mem.vram.mem_class;
|
||||
device->vram_non_mappable.region.memory_instance =
|
||||
devinfo->mem.vram.mem_instance;
|
||||
device->vram_non_mappable.size = devinfo->mem.vram.unmappable.size;
|
||||
device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
@ -395,7 +402,8 @@ anv_update_meminfo(struct anv_physical_device *device, int fd)
|
|||
|
||||
const struct intel_device_info *devinfo = &device->info;
|
||||
device->sys.available = devinfo->mem.sram.mappable.free;
|
||||
device->vram.available = devinfo->mem.vram.mappable.free;
|
||||
device->vram_mappable.available = devinfo->mem.vram.mappable.free;
|
||||
device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
|
||||
}
|
||||
|
||||
|
||||
|
@ -408,13 +416,19 @@ anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
|
|||
|
||||
assert(device->sys.size != 0);
|
||||
|
||||
if (device->vram.size > 0) {
|
||||
/* We can create 2 different heaps when we have local memory support,
|
||||
* first heap with local memory size and second with system memory size.
|
||||
if (anv_physical_device_has_vram(device)) {
|
||||
/* We can create 2 or 3 different heaps when we have local memory
|
||||
* support, first heap with local memory size and second with system
|
||||
* memory size and the third is added only if part of the vram is
|
||||
* mappable to the host.
|
||||
*/
|
||||
device->memory.heap_count = 2;
|
||||
device->memory.heaps[0] = (struct anv_memory_heap) {
|
||||
.size = device->vram.size,
|
||||
/* If there is a vram_non_mappable, use that for the device only
|
||||
* heap. Otherwise use the vram_mappable.
|
||||
*/
|
||||
.size = device->vram_non_mappable.size != 0 ?
|
||||
device->vram_non_mappable.size : device->vram_mappable.size,
|
||||
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
|
||||
.is_local_mem = true,
|
||||
};
|
||||
|
@ -423,6 +437,17 @@ anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
|
|||
.flags = 0,
|
||||
.is_local_mem = false,
|
||||
};
|
||||
/* Add an additional smaller vram mappable heap if we can't map all the
|
||||
* vram to the host.
|
||||
*/
|
||||
if (device->vram_non_mappable.size > 0) {
|
||||
device->memory.heap_count++;
|
||||
device->memory.heaps[2] = (struct anv_memory_heap) {
|
||||
.size = device->vram_mappable.size,
|
||||
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
|
||||
.is_local_mem = true,
|
||||
};
|
||||
}
|
||||
|
||||
device->memory.type_count = 3;
|
||||
device->memory.types[0] = (struct anv_memory_type) {
|
||||
|
@ -439,7 +464,11 @@ anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
|
|||
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||
.heapIndex = 0,
|
||||
/* This memory type either comes from heaps[0] if there is only
|
||||
* mappable vram region, or from heaps[2] if there is both mappable &
|
||||
* non-mappable vram regions.
|
||||
*/
|
||||
.heapIndex = device->vram_non_mappable.size > 0 ? 2 : 0,
|
||||
};
|
||||
} else if (device->info.has_llc) {
|
||||
device->memory.heap_count = 1;
|
||||
|
@ -843,7 +872,8 @@ anv_physical_device_try_create(struct anv_instance *instance,
|
|||
device->gtt_size > (4ULL << 30 /* GiB */);
|
||||
|
||||
/* Initialize memory regions struct to 0. */
|
||||
memset(&device->vram, 0, sizeof(device->vram));
|
||||
memset(&device->vram_non_mappable, 0, sizeof(device->vram_non_mappable));
|
||||
memset(&device->vram_mappable, 0, sizeof(device->vram_mappable));
|
||||
memset(&device->sys, 0, sizeof(device->sys));
|
||||
|
||||
result = anv_physical_device_init_heaps(device, fd);
|
||||
|
@ -2741,7 +2771,7 @@ anv_get_memory_budget(VkPhysicalDevice physicalDevice,
|
|||
|
||||
if (device->memory.heaps[i].is_local_mem) {
|
||||
total_heaps_size = total_vram_heaps_size;
|
||||
mem_available = device->vram.available;
|
||||
mem_available = device->vram_non_mappable.available;
|
||||
} else {
|
||||
total_heaps_size = total_sys_heaps_size;
|
||||
mem_available = device->sys.available;
|
||||
|
@ -3832,6 +3862,16 @@ VkResult anv_AllocateMemory(
|
|||
if (device->physical->has_implicit_ccs && device->info.has_aux_map)
|
||||
alloc_flags |= ANV_BO_ALLOC_IMPLICIT_CCS;
|
||||
|
||||
/* If i915 reported a mappable/non_mappable vram regions and the
|
||||
* application want lmem mappable, then we need to use the
|
||||
* I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS flag to create our BO.
|
||||
*/
|
||||
if (pdevice->vram_mappable.size > 0 &&
|
||||
pdevice->vram_non_mappable.size > 0 &&
|
||||
(mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
|
||||
(mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
|
||||
alloc_flags |= ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE;
|
||||
|
||||
if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)
|
||||
alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
|
||||
|
||||
|
|
|
@ -66,9 +66,12 @@ anv_gem_close(struct anv_device *device, uint32_t gem_handle)
|
|||
|
||||
uint32_t
|
||||
anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
|
||||
uint32_t num_regions,
|
||||
uint32_t flags, uint32_t num_regions,
|
||||
struct drm_i915_gem_memory_class_instance *regions)
|
||||
{
|
||||
/* Check for invalid flags */
|
||||
assert((flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) == 0);
|
||||
|
||||
struct drm_i915_gem_create_ext_memory_regions ext_regions = {
|
||||
.base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS },
|
||||
.num_regions = num_regions,
|
||||
|
@ -78,6 +81,7 @@ anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
|
|||
struct drm_i915_gem_create_ext gem_create = {
|
||||
.size = anv_bo_size,
|
||||
.extensions = (uintptr_t) &ext_regions,
|
||||
.flags = flags,
|
||||
};
|
||||
|
||||
int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE_EXT,
|
||||
|
|
|
@ -47,7 +47,7 @@ anv_gem_close(struct anv_device *device, uint32_t gem_handle)
|
|||
|
||||
uint32_t
|
||||
anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
|
||||
uint32_t num_regions,
|
||||
uint32_t flags, uint32_t num_regions,
|
||||
struct drm_i915_gem_memory_class_instance *regions)
|
||||
{
|
||||
return 0;
|
||||
|
|
|
@ -1040,7 +1040,11 @@ struct anv_physical_device {
|
|||
bool need_clflush;
|
||||
} memory;
|
||||
|
||||
struct anv_memregion vram;
|
||||
/* Either we have a single vram region and it's all mappable, or we have
|
||||
* both mappable & non-mappable parts. System memory is always available.
|
||||
*/
|
||||
struct anv_memregion vram_mappable;
|
||||
struct anv_memregion vram_non_mappable;
|
||||
struct anv_memregion sys;
|
||||
uint8_t driver_build_sha1[20];
|
||||
uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
|
||||
|
@ -1066,6 +1070,12 @@ struct anv_physical_device {
|
|||
struct intel_measure_device measure_device;
|
||||
};
|
||||
|
||||
static inline bool
|
||||
anv_physical_device_has_vram(const struct anv_physical_device *device)
|
||||
{
|
||||
return device->vram_mappable.size > 0;
|
||||
}
|
||||
|
||||
struct anv_app_info {
|
||||
const char* app_name;
|
||||
uint32_t app_version;
|
||||
|
@ -1368,6 +1378,9 @@ enum anv_bo_alloc_flags {
|
|||
|
||||
/** This buffer is allocated from local memory */
|
||||
ANV_BO_ALLOC_LOCAL_MEM = (1 << 10),
|
||||
|
||||
/** This buffer is allocated from local memory and should be cpu visible */
|
||||
ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE = (1 << 11),
|
||||
};
|
||||
|
||||
VkResult anv_device_alloc_bo(struct anv_device *device,
|
||||
|
@ -1431,7 +1444,7 @@ void anv_gem_munmap(struct anv_device *device, void *p, uint64_t size);
|
|||
uint32_t anv_gem_create(struct anv_device *device, uint64_t size);
|
||||
void anv_gem_close(struct anv_device *device, uint32_t gem_handle);
|
||||
uint32_t anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
|
||||
uint32_t num_regions,
|
||||
uint32_t flags, uint32_t num_regions,
|
||||
struct drm_i915_gem_memory_class_instance *regions);
|
||||
uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size);
|
||||
int anv_gem_busy(struct anv_device *device, uint32_t gem_handle);
|
||||
|
|
Loading…
Reference in New Issue