anv: make use of the new smallbar uAPI

Instead of having 2 VkMemoryType pointing to the same VkMemoryHeap, we
have each VkMemoryType with VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT (one
host visible, the other not) point to its own VkMemoryHeap. For the
local heap that is host visible, we'll use the
I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS flag at GEM BO creation.

When the smallbar uAPI is not available we fallback to a single heap
and do not use I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS.

v2: Handle probed_cpu_visible_size == probed_size (Matthew)

v3:
 * Jordan: Use region info from devinfo

v4: Also make the vram host visible heap as local (Ken)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16739>
This commit is contained in:
Lionel Landwerlin 2022-05-02 12:38:16 +03:00 committed by Marge Bot
parent 33bf0d7437
commit fae88d8791
5 changed files with 84 additions and 23 deletions

View File

@ -1696,23 +1696,27 @@ anv_device_alloc_bo(struct anv_device *device,
/* If we have vram size, we have multiple memory regions and should choose
* one of them.
*/
if (device->physical->vram.size > 0) {
if (anv_physical_device_has_vram(device->physical)) {
struct drm_i915_gem_memory_class_instance regions[2];
uint32_t nregions = 0;
if (alloc_flags & ANV_BO_ALLOC_LOCAL_MEM) {
regions[nregions++] = device->physical->vram.region;
/* vram_non_mappable & vram_mappable actually are the same region. */
regions[nregions++] = device->physical->vram_non_mappable.region;
} else {
regions[nregions++] = device->physical->sys.region;
}
/* TODO: Add I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS to flags for
* after small BAR uapi is stabilized.
*/
assert(intel_vram_all_mappable(&device->info));
uint32_t flags = 0;
if (alloc_flags & ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE) {
assert(alloc_flags & ANV_BO_ALLOC_LOCAL_MEM);
/* We're required to add smem as a region when using mappable vram. */
regions[nregions++] = device->physical->sys.region;
flags |= I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS;
}
gem_handle = anv_gem_create_regions(device, size + ccs_size,
nregions, regions);
flags, nregions, regions);
} else {
gem_handle = anv_gem_create(device, size + ccs_size);
}

View File

@ -378,11 +378,18 @@ anv_init_meminfo(struct anv_physical_device *device, int fd)
anv_compute_sys_heap_size(device, devinfo->mem.sram.mappable.size);
device->sys.available = devinfo->mem.sram.mappable.free;
device->vram.region.memory_class = devinfo->mem.vram.mem_class;
device->vram.region.memory_instance =
device->vram_mappable.region.memory_class = devinfo->mem.vram.mem_class;
device->vram_mappable.region.memory_instance =
devinfo->mem.vram.mem_instance;
device->vram.size = devinfo->mem.vram.mappable.size;
device->vram.available = devinfo->mem.vram.mappable.free;
device->vram_mappable.size = devinfo->mem.vram.mappable.size;
device->vram_mappable.available = devinfo->mem.vram.mappable.free;
device->vram_non_mappable.region.memory_class =
devinfo->mem.vram.mem_class;
device->vram_non_mappable.region.memory_instance =
devinfo->mem.vram.mem_instance;
device->vram_non_mappable.size = devinfo->mem.vram.unmappable.size;
device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
return VK_SUCCESS;
}
@ -395,7 +402,8 @@ anv_update_meminfo(struct anv_physical_device *device, int fd)
const struct intel_device_info *devinfo = &device->info;
device->sys.available = devinfo->mem.sram.mappable.free;
device->vram.available = devinfo->mem.vram.mappable.free;
device->vram_mappable.available = devinfo->mem.vram.mappable.free;
device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
}
@ -408,13 +416,19 @@ anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
assert(device->sys.size != 0);
if (device->vram.size > 0) {
/* We can create 2 different heaps when we have local memory support,
* first heap with local memory size and second with system memory size.
if (anv_physical_device_has_vram(device)) {
/* We can create 2 or 3 different heaps when we have local memory
* support, first heap with local memory size and second with system
* memory size and the third is added only if part of the vram is
* mappable to the host.
*/
device->memory.heap_count = 2;
device->memory.heaps[0] = (struct anv_memory_heap) {
.size = device->vram.size,
/* If there is a vram_non_mappable, use that for the device only
* heap. Otherwise use the vram_mappable.
*/
.size = device->vram_non_mappable.size != 0 ?
device->vram_non_mappable.size : device->vram_mappable.size,
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
.is_local_mem = true,
};
@ -423,6 +437,17 @@ anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
.flags = 0,
.is_local_mem = false,
};
/* Add an additional smaller vram mappable heap if we can't map all the
* vram to the host.
*/
if (device->vram_non_mappable.size > 0) {
device->memory.heap_count++;
device->memory.heaps[2] = (struct anv_memory_heap) {
.size = device->vram_mappable.size,
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
.is_local_mem = true,
};
}
device->memory.type_count = 3;
device->memory.types[0] = (struct anv_memory_type) {
@ -439,7 +464,11 @@ anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
.heapIndex = 0,
/* This memory type either comes from heaps[0] if there is only
* mappable vram region, or from heaps[2] if there is both mappable &
* non-mappable vram regions.
*/
.heapIndex = device->vram_non_mappable.size > 0 ? 2 : 0,
};
} else if (device->info.has_llc) {
device->memory.heap_count = 1;
@ -843,7 +872,8 @@ anv_physical_device_try_create(struct anv_instance *instance,
device->gtt_size > (4ULL << 30 /* GiB */);
/* Initialize memory regions struct to 0. */
memset(&device->vram, 0, sizeof(device->vram));
memset(&device->vram_non_mappable, 0, sizeof(device->vram_non_mappable));
memset(&device->vram_mappable, 0, sizeof(device->vram_mappable));
memset(&device->sys, 0, sizeof(device->sys));
result = anv_physical_device_init_heaps(device, fd);
@ -2741,7 +2771,7 @@ anv_get_memory_budget(VkPhysicalDevice physicalDevice,
if (device->memory.heaps[i].is_local_mem) {
total_heaps_size = total_vram_heaps_size;
mem_available = device->vram.available;
mem_available = device->vram_non_mappable.available;
} else {
total_heaps_size = total_sys_heaps_size;
mem_available = device->sys.available;
@ -3832,6 +3862,16 @@ VkResult anv_AllocateMemory(
if (device->physical->has_implicit_ccs && device->info.has_aux_map)
alloc_flags |= ANV_BO_ALLOC_IMPLICIT_CCS;
/* If i915 reported a mappable/non_mappable vram regions and the
* application want lmem mappable, then we need to use the
* I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS flag to create our BO.
*/
if (pdevice->vram_mappable.size > 0 &&
pdevice->vram_non_mappable.size > 0 &&
(mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
(mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
alloc_flags |= ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE;
if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)
alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;

View File

@ -66,9 +66,12 @@ anv_gem_close(struct anv_device *device, uint32_t gem_handle)
uint32_t
anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
uint32_t num_regions,
uint32_t flags, uint32_t num_regions,
struct drm_i915_gem_memory_class_instance *regions)
{
/* Check for invalid flags */
assert((flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) == 0);
struct drm_i915_gem_create_ext_memory_regions ext_regions = {
.base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS },
.num_regions = num_regions,
@ -78,6 +81,7 @@ anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
struct drm_i915_gem_create_ext gem_create = {
.size = anv_bo_size,
.extensions = (uintptr_t) &ext_regions,
.flags = flags,
};
int ret = intel_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE_EXT,

View File

@ -47,7 +47,7 @@ anv_gem_close(struct anv_device *device, uint32_t gem_handle)
uint32_t
anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
uint32_t num_regions,
uint32_t flags, uint32_t num_regions,
struct drm_i915_gem_memory_class_instance *regions)
{
return 0;

View File

@ -1040,7 +1040,11 @@ struct anv_physical_device {
bool need_clflush;
} memory;
struct anv_memregion vram;
/* Either we have a single vram region and it's all mappable, or we have
* both mappable & non-mappable parts. System memory is always available.
*/
struct anv_memregion vram_mappable;
struct anv_memregion vram_non_mappable;
struct anv_memregion sys;
uint8_t driver_build_sha1[20];
uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
@ -1066,6 +1070,12 @@ struct anv_physical_device {
struct intel_measure_device measure_device;
};
static inline bool
anv_physical_device_has_vram(const struct anv_physical_device *device)
{
return device->vram_mappable.size > 0;
}
struct anv_app_info {
const char* app_name;
uint32_t app_version;
@ -1368,6 +1378,9 @@ enum anv_bo_alloc_flags {
/** This buffer is allocated from local memory */
ANV_BO_ALLOC_LOCAL_MEM = (1 << 10),
/** This buffer is allocated from local memory and should be cpu visible */
ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE = (1 << 11),
};
VkResult anv_device_alloc_bo(struct anv_device *device,
@ -1431,7 +1444,7 @@ void anv_gem_munmap(struct anv_device *device, void *p, uint64_t size);
uint32_t anv_gem_create(struct anv_device *device, uint64_t size);
void anv_gem_close(struct anv_device *device, uint32_t gem_handle);
uint32_t anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
uint32_t num_regions,
uint32_t flags, uint32_t num_regions,
struct drm_i915_gem_memory_class_instance *regions);
uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size);
int anv_gem_busy(struct anv_device *device, uint32_t gem_handle);