intel: Add SUPPORT_INTEL_INTEGRATED_GPUS build argument

This is meant to remove any integrated GPU only code paths that can't
be compiled in CPU architectures different than x86.

Discrete GPUS don't have need_clflush set to true so it was just
matter of remove some code blocks around need_clflush but was left a
check in anv_physical_device_init_heaps() to fail physical device
initialization if it ever became false.

Signed-off-by: Philippe Lecluse <philippe.lecluse@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Marcin Ślusarz <marcin.slusarz@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19812>
This commit is contained in:
Philippe Lecluse 2022-11-22 07:26:58 -08:00 committed by Marge Bot
parent f2e535e4fe
commit a821dfbda5
7 changed files with 32 additions and 3 deletions

View File

@ -1572,6 +1572,11 @@ elif with_intel_vk or with_intel_hasvk
error('Intel "Anvil" Vulkan driver requires the dl_iterate_phdr function')
endif
# only used in Iris and ANV
if with_any_intel and ['x86', 'x86_64'].contains(host_machine.cpu_family())
pre_args += '-DSUPPORT_INTEL_INTEGRATED_GPUS'
endif
# Determine whether or not the rt library is needed for time functions
if host_machine.system() == 'windows' or cc.has_function('clock_gettime')
dep_clock = null_dep

View File

@ -27,6 +27,7 @@
#define CACHELINE_SIZE 64
#define CACHELINE_MASK 63
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
static inline void
intel_clflush_range(void *start, size_t size)
{
@ -67,5 +68,6 @@ intel_invalidate_range(void *start, size_t size)
__builtin_ia32_clflush(start + size - 1);
__builtin_ia32_mfence();
}
#endif /* SUPPORT_INTEL_INTEGRATED_GPUS */
#endif

View File

@ -185,11 +185,13 @@ padding_is_good(int fd, uint32_t handle)
}
mapped = (uint8_t*) (uintptr_t) mmap_arg.addr_ptr;
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
/* bah-humbug, we need to see the latest contents and
* if the bo is not cache coherent we likely need to
* invalidate the cache lines to get it.
*/
intel_invalidate_range(mapped, PADDING_SIZE);
#endif
expected_value = handle & 0xFF;
for (uint32_t i = 0; i < PADDING_SIZE; ++i) {

View File

@ -388,8 +388,10 @@ anv_batch_bo_link(struct anv_cmd_buffer *cmd_buffer,
uint64_t *map = prev_bbo->bo->map + bb_start_offset + 4;
*map = intel_canonical_address(next_bbo->bo->offset + next_bbo_offset);
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
if (cmd_buffer->device->physical->memory.need_clflush)
intel_flush_range(map, sizeof(uint64_t));
#endif
}
static void
@ -1508,6 +1510,7 @@ setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
first_batch_bo->bo->exec_obj_index = last_idx;
}
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
if (device->physical->memory.need_clflush) {
__builtin_ia32_mfence();
struct anv_batch_bo **bbo;
@ -1518,6 +1521,7 @@ setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
}
}
}
#endif
execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
.buffers_ptr = (uintptr_t) execbuf->objects,
@ -1594,8 +1598,10 @@ setup_utrace_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue,
flush->batch_bo->exec_obj_index = last_idx;
}
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
if (device->physical->memory.need_clflush)
intel_flush_range(flush->batch_bo->map, flush->batch_bo->size);
#endif
execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
.buffers_ptr = (uintptr_t) execbuf->objects,
@ -2064,8 +2070,10 @@ anv_queue_submit_simple_batch(struct anv_queue *queue,
return result;
memcpy(batch_bo->map, batch->start, batch_size);
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
if (device->physical->memory.need_clflush)
intel_flush_range(batch_bo->map, batch_size);
#endif
if (INTEL_DEBUG(DEBUG_BATCH)) {
intel_print_batch(&device->decoder_ctx,

View File

@ -535,12 +535,16 @@ anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
};
}
device->memory.need_clflush = false;
for (unsigned i = 0; i < device->memory.type_count; i++) {
VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
!(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
device->memory.need_clflush = true;
#else
return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
"Memory configuration requires flushing, but it's not implemented for this architecture");
#endif
}
return VK_SUCCESS;
@ -3023,8 +3027,10 @@ anv_device_init_trivial_batch(struct anv_device *device)
anv_batch_emit(&batch, GFX7_MI_BATCH_BUFFER_END, bbe);
anv_batch_emit(&batch, GFX7_MI_NOOP, noop);
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
if (device->physical->memory.need_clflush)
intel_clflush_range(batch.start, batch.next - batch.start);
#endif
return VK_SUCCESS;
}
@ -4392,6 +4398,7 @@ VkResult anv_FlushMappedMemoryRanges(
uint32_t memoryRangeCount,
const VkMappedMemoryRange* pMemoryRanges)
{
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
ANV_FROM_HANDLE(anv_device, device, _device);
if (!device->physical->memory.need_clflush)
@ -4413,7 +4420,7 @@ VkResult anv_FlushMappedMemoryRanges(
MIN2(pMemoryRanges[i].size,
mem->map_size - map_offset));
}
#endif
return VK_SUCCESS;
}
@ -4422,6 +4429,7 @@ VkResult anv_InvalidateMappedMemoryRanges(
uint32_t memoryRangeCount,
const VkMappedMemoryRange* pMemoryRanges)
{
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
ANV_FROM_HANDLE(anv_device, device, _device);
if (!device->physical->memory.need_clflush)
@ -4443,7 +4451,7 @@ VkResult anv_InvalidateMappedMemoryRanges(
/* Make sure no reads get moved up above the invalidate. */
__builtin_ia32_mfence();
#endif
return VK_SUCCESS;
}

View File

@ -1019,7 +1019,9 @@ struct anv_physical_device {
struct anv_memory_type types[VK_MAX_MEMORY_TYPES];
uint32_t heap_count;
struct anv_memory_heap heaps[VK_MAX_MEMORY_HEAPS];
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
bool need_clflush;
#endif
} memory;
/* Either we have a single vram region and it's all mappable, or we have

View File

@ -97,10 +97,12 @@ VkResult anv_QueuePresentKHR(
if (device->debug_frame_desc) {
device->debug_frame_desc->frame_id++;
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
if (device->physical->memory.need_clflush) {
intel_clflush_range(device->debug_frame_desc,
sizeof(*device->debug_frame_desc));
}
#endif
}
result = vk_queue_wait_before_present(&queue->vk, pPresentInfo);