intel/dev: Handle CHV CS thread weirdness in get_device_info_from_fd

Cherryview is weird in that the actual limits we can expose through GL
are dependent on fusing information which is only obtainable at runtime.
The same PCI ID may have different configurations with different maximum
CS thread counts.  We currently handle this in i965 and ANV by doing the
calculation in the driver.

This dates back to when intel_device_info was computed from the PCI ID.
Now that we have get_device_info_from_fd, we can move the CHV stuff
there and get it out of the driver.  This fixes CHV thread counts on
crocus as well.

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11861>
This commit is contained in:
Jason Ekstrand 2021-07-13 17:06:44 -05:00 committed by Marge Bot
parent 1744372714
commit c24ba6cecb
4 changed files with 26 additions and 24 deletions

View File

@ -1410,6 +1410,29 @@ has_get_tiling(int fd)
return ret == 0;
}
static void
fixup_chv_device_info(struct intel_device_info *devinfo)
{
assert(devinfo->is_cherryview);
/* Cherryview is annoying. The number of EUs is depending on fusing and
* isn't determinable from the PCI ID alone. We default to the minimum
* available for that PCI ID and then compute the real value from the
* subslice information we get from the kernel.
*/
const uint32_t subslice_total = intel_device_info_eu_total(devinfo);
const uint32_t eu_total = intel_device_info_eu_total(devinfo);
/* Logical CS threads = EUs per subslice * num threads per EU */
uint32_t max_cs_threads =
eu_total / subslice_total * devinfo->num_thread_per_eu;
/* Fuse configurations may give more threads than expected, never less. */
assert(max_cs_threads >= devinfo->max_cs_threads);
if (max_cs_threads > devinfo->max_cs_threads)
devinfo->max_cs_threads = max_cs_threads;
}
bool
intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo)
{
@ -1482,6 +1505,9 @@ intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo)
getparam_topology(devinfo, fd);
}
if (devinfo->is_cherryview)
fixup_chv_device_info(devinfo);
intel_get_aperture_size(fd, &devinfo->aperture_bytes);
devinfo->has_tiling_uapi = has_get_tiling(fd);

View File

@ -911,17 +911,6 @@ anv_physical_device_try_create(struct anv_instance *instance,
/* GENs prior to 8 do not support EU/Subslice info */
device->subslice_total = intel_device_info_subslice_total(&device->info);
device->eu_total = intel_device_info_eu_total(&device->info);
if (device->info.is_cherryview) {
/* Logical CS threads = EUs per subslice * num threads per EU */
uint32_t max_cs_threads =
device->eu_total / device->subslice_total * device->info.num_thread_per_eu;
/* Fuse configurations may give more threads than expected, never less. */
if (max_cs_threads > device->info.max_cs_threads)
device->info.max_cs_threads = max_cs_threads;
}
device->compiler = brw_compiler_create(NULL, &device->info);
if (device->compiler == NULL) {

View File

@ -961,7 +961,6 @@ struct anv_physical_device {
bool always_flush_cache;
uint32_t eu_total;
uint32_t subslice_total;
struct {

View File

@ -856,20 +856,8 @@ static void
brw_initialize_cs_context_constants(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
const struct brw_screen *screen = brw->screen;
struct intel_device_info *devinfo = &brw->screen->devinfo;
/* FINISHME: Do this for all platforms that the kernel supports */
if (devinfo->is_cherryview &&
screen->subslice_total > 0 && screen->eu_total > 0) {
/* Logical CS threads = EUs per subslice * 7 threads per EU */
uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7;
/* Fuse configurations may give more threads than expected, never less. */
if (max_cs_threads > devinfo->max_cs_threads)
devinfo->max_cs_threads = max_cs_threads;
}
/* Maximum number of scalar compute shader invocations that can be run in
* parallel in the same subslice assuming SIMD32 dispatch.
*