intel/dev: Handle CHV CS thread weirdness in get_device_info_from_fd
Cherryview is weird in that the actual limits we can expose through GL are dependent on fusing information which is only obtainable at runtime. The same PCI ID may have different configurations with different maximum CS thread counts. We currently handle this in i965 and ANV by doing the calculation in the driver. This dates back to when intel_device_info was computed from the PCI ID. Now that we have get_device_info_from_fd, we can move the CHV stuff there and get it out of the driver. This fixes CHV thread counts on crocus as well. Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11861>
This commit is contained in:
parent
1744372714
commit
c24ba6cecb
|
@ -1410,6 +1410,29 @@ has_get_tiling(int fd)
|
|||
return ret == 0;
|
||||
}
|
||||
|
||||
static void
|
||||
fixup_chv_device_info(struct intel_device_info *devinfo)
|
||||
{
|
||||
assert(devinfo->is_cherryview);
|
||||
|
||||
/* Cherryview is annoying. The number of EUs is depending on fusing and
|
||||
* isn't determinable from the PCI ID alone. We default to the minimum
|
||||
* available for that PCI ID and then compute the real value from the
|
||||
* subslice information we get from the kernel.
|
||||
*/
|
||||
const uint32_t subslice_total = intel_device_info_eu_total(devinfo);
|
||||
const uint32_t eu_total = intel_device_info_eu_total(devinfo);
|
||||
|
||||
/* Logical CS threads = EUs per subslice * num threads per EU */
|
||||
uint32_t max_cs_threads =
|
||||
eu_total / subslice_total * devinfo->num_thread_per_eu;
|
||||
|
||||
/* Fuse configurations may give more threads than expected, never less. */
|
||||
assert(max_cs_threads >= devinfo->max_cs_threads);
|
||||
if (max_cs_threads > devinfo->max_cs_threads)
|
||||
devinfo->max_cs_threads = max_cs_threads;
|
||||
}
|
||||
|
||||
bool
|
||||
intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo)
|
||||
{
|
||||
|
@ -1482,6 +1505,9 @@ intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo)
|
|||
getparam_topology(devinfo, fd);
|
||||
}
|
||||
|
||||
if (devinfo->is_cherryview)
|
||||
fixup_chv_device_info(devinfo);
|
||||
|
||||
intel_get_aperture_size(fd, &devinfo->aperture_bytes);
|
||||
devinfo->has_tiling_uapi = has_get_tiling(fd);
|
||||
|
||||
|
|
|
@ -911,17 +911,6 @@ anv_physical_device_try_create(struct anv_instance *instance,
|
|||
|
||||
/* GENs prior to 8 do not support EU/Subslice info */
|
||||
device->subslice_total = intel_device_info_subslice_total(&device->info);
|
||||
device->eu_total = intel_device_info_eu_total(&device->info);
|
||||
|
||||
if (device->info.is_cherryview) {
|
||||
/* Logical CS threads = EUs per subslice * num threads per EU */
|
||||
uint32_t max_cs_threads =
|
||||
device->eu_total / device->subslice_total * device->info.num_thread_per_eu;
|
||||
|
||||
/* Fuse configurations may give more threads than expected, never less. */
|
||||
if (max_cs_threads > device->info.max_cs_threads)
|
||||
device->info.max_cs_threads = max_cs_threads;
|
||||
}
|
||||
|
||||
device->compiler = brw_compiler_create(NULL, &device->info);
|
||||
if (device->compiler == NULL) {
|
||||
|
|
|
@ -961,7 +961,6 @@ struct anv_physical_device {
|
|||
|
||||
bool always_flush_cache;
|
||||
|
||||
uint32_t eu_total;
|
||||
uint32_t subslice_total;
|
||||
|
||||
struct {
|
||||
|
|
|
@ -856,20 +856,8 @@ static void
|
|||
brw_initialize_cs_context_constants(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
const struct brw_screen *screen = brw->screen;
|
||||
struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
/* FINISHME: Do this for all platforms that the kernel supports */
|
||||
if (devinfo->is_cherryview &&
|
||||
screen->subslice_total > 0 && screen->eu_total > 0) {
|
||||
/* Logical CS threads = EUs per subslice * 7 threads per EU */
|
||||
uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7;
|
||||
|
||||
/* Fuse configurations may give more threads than expected, never less. */
|
||||
if (max_cs_threads > devinfo->max_cs_threads)
|
||||
devinfo->max_cs_threads = max_cs_threads;
|
||||
}
|
||||
|
||||
/* Maximum number of scalar compute shader invocations that can be run in
|
||||
* parallel in the same subslice assuming SIMD32 dispatch.
|
||||
*
|
||||
|
|
Loading…
Reference in New Issue