From c24ba6cecbacf2d81345c1112f083006f22b65ea Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 13 Jul 2021 17:06:44 -0500 Subject: [PATCH] intel/dev: Handle CHV CS thread weirdness in get_device_info_from_fd Cherryview is weird in that the actual limits we can expose through GL are dependent on fusing information which is only obtainable at runtime. The same PCI ID may have different configurations with different maximum CS thread counts. We currently handle this in i965 and ANV by doing the calculation in the driver. This dates back to when intel_device_info was computed from the PCI ID. Now that we have get_device_info_from_fd, we can move the CHV stuff there and get it out of the driver. This fixes CHV thread counts on crocus as well. Reviewed-by: Caio Marcelo de Oliveira Filho Part-of: --- src/intel/dev/intel_device_info.c | 26 +++++++++++++++++++++++++ src/intel/vulkan/anv_device.c | 11 ----------- src/intel/vulkan/anv_private.h | 1 - src/mesa/drivers/dri/i965/brw_context.c | 12 ------------ 4 files changed, 26 insertions(+), 24 deletions(-) diff --git a/src/intel/dev/intel_device_info.c b/src/intel/dev/intel_device_info.c index 1bf31494536..c4279d29e74 100644 --- a/src/intel/dev/intel_device_info.c +++ b/src/intel/dev/intel_device_info.c @@ -1410,6 +1410,29 @@ has_get_tiling(int fd) return ret == 0; } +static void +fixup_chv_device_info(struct intel_device_info *devinfo) +{ + assert(devinfo->is_cherryview); + + /* Cherryview is annoying. The number of EUs is depending on fusing and + * isn't determinable from the PCI ID alone. We default to the minimum + * available for that PCI ID and then compute the real value from the + * subslice information we get from the kernel. + */ + const uint32_t subslice_total = intel_device_info_eu_total(devinfo); + const uint32_t eu_total = intel_device_info_eu_total(devinfo); + + /* Logical CS threads = EUs per subslice * num threads per EU */ + uint32_t max_cs_threads = + eu_total / subslice_total * devinfo->num_thread_per_eu; + + /* Fuse configurations may give more threads than expected, never less. */ + assert(max_cs_threads >= devinfo->max_cs_threads); + if (max_cs_threads > devinfo->max_cs_threads) + devinfo->max_cs_threads = max_cs_threads; +} + bool intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo) { @@ -1482,6 +1505,9 @@ intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo) getparam_topology(devinfo, fd); } + if (devinfo->is_cherryview) + fixup_chv_device_info(devinfo); + intel_get_aperture_size(fd, &devinfo->aperture_bytes); devinfo->has_tiling_uapi = has_get_tiling(fd); diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index edb1e156013..f39b7f78573 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -911,17 +911,6 @@ anv_physical_device_try_create(struct anv_instance *instance, /* GENs prior to 8 do not support EU/Subslice info */ device->subslice_total = intel_device_info_subslice_total(&device->info); - device->eu_total = intel_device_info_eu_total(&device->info); - - if (device->info.is_cherryview) { - /* Logical CS threads = EUs per subslice * num threads per EU */ - uint32_t max_cs_threads = - device->eu_total / device->subslice_total * device->info.num_thread_per_eu; - - /* Fuse configurations may give more threads than expected, never less. */ - if (max_cs_threads > device->info.max_cs_threads) - device->info.max_cs_threads = max_cs_threads; - } device->compiler = brw_compiler_create(NULL, &device->info); if (device->compiler == NULL) { diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 423d178d35f..c2971de764d 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -961,7 +961,6 @@ struct anv_physical_device { bool always_flush_cache; - uint32_t eu_total; uint32_t subslice_total; struct { diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index bd473915334..392c644a795 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -856,20 +856,8 @@ static void brw_initialize_cs_context_constants(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; - const struct brw_screen *screen = brw->screen; struct intel_device_info *devinfo = &brw->screen->devinfo; - /* FINISHME: Do this for all platforms that the kernel supports */ - if (devinfo->is_cherryview && - screen->subslice_total > 0 && screen->eu_total > 0) { - /* Logical CS threads = EUs per subslice * 7 threads per EU */ - uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7; - - /* Fuse configurations may give more threads than expected, never less. */ - if (max_cs_threads > devinfo->max_cs_threads) - devinfo->max_cs_threads = max_cs_threads; - } - /* Maximum number of scalar compute shader invocations that can be run in * parallel in the same subslice assuming SIMD32 dispatch. *