vc4: Try compiling our FSes in multithreaded mode on new kernels.
Multithreaded fragment shaders let us hide texturing latency by a hyperthreading-style switch to another fragment shader. This gets us up to 20% framerate improvements on glmark2 tests.
This commit is contained in:
parent
45c022f2b0
commit
7f27ad5597
|
@ -42,6 +42,9 @@
|
|||
#ifndef DRM_VC4_PARAM_SUPPORTS_ETC1
|
||||
#define DRM_VC4_PARAM_SUPPORTS_ETC1 4
|
||||
#endif
|
||||
#ifndef DRM_VC4_PARAM_SUPPORTS_THREADED_FS
|
||||
#define DRM_VC4_PARAM_SUPPORTS_THREADED_FS 5
|
||||
#endif
|
||||
|
||||
#ifdef USE_VC4_SIMULATOR
|
||||
#define using_vc4_simulator true
|
||||
|
|
|
@ -2158,7 +2158,7 @@ count_nir_instrs(nir_shader *nir)
|
|||
|
||||
static struct vc4_compile *
|
||||
vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
|
||||
struct vc4_key *key)
|
||||
struct vc4_key *key, bool fs_threaded)
|
||||
{
|
||||
struct vc4_compile *c = qir_compile_init();
|
||||
|
||||
|
@ -2168,6 +2168,7 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
|
|||
c->program_id = key->shader_state->program_id;
|
||||
c->variant_id =
|
||||
p_atomic_inc_return(&key->shader_state->compiled_variant_count);
|
||||
c->fs_threaded = fs_threaded;
|
||||
|
||||
c->key = key;
|
||||
switch (stage) {
|
||||
|
@ -2496,12 +2497,16 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
|
|||
{
|
||||
struct hash_table *ht;
|
||||
uint32_t key_size;
|
||||
bool try_threading;
|
||||
|
||||
if (stage == QSTAGE_FRAG) {
|
||||
ht = vc4->fs_cache;
|
||||
key_size = sizeof(struct vc4_fs_key);
|
||||
try_threading = vc4->screen->has_threaded_fs;
|
||||
} else {
|
||||
ht = vc4->vs_cache;
|
||||
key_size = sizeof(struct vc4_vs_key);
|
||||
try_threading = false;
|
||||
}
|
||||
|
||||
struct vc4_compiled_shader *shader;
|
||||
|
@ -2509,7 +2514,13 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
|
|||
if (entry)
|
||||
return entry->data;
|
||||
|
||||
struct vc4_compile *c = vc4_shader_ntq(vc4, stage, key);
|
||||
struct vc4_compile *c = vc4_shader_ntq(vc4, stage, key, try_threading);
|
||||
/* If the FS failed to compile threaded, fall back to single threaded. */
|
||||
if (try_threading && c->failed) {
|
||||
qir_compile_destroy(c);
|
||||
c = vc4_shader_ntq(vc4, stage, key, false);
|
||||
}
|
||||
|
||||
shader = rzalloc(NULL, struct vc4_compiled_shader);
|
||||
|
||||
shader->program_id = vc4->next_compiled_program_id++;
|
||||
|
|
|
@ -614,6 +614,8 @@ vc4_screen_create(int fd)
|
|||
vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_BRANCHES);
|
||||
screen->has_etc1 =
|
||||
vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_ETC1);
|
||||
screen->has_threaded_fs =
|
||||
vc4_has_feature(screen, DRM_VC4_PARAM_SUPPORTS_THREADED_FS);
|
||||
|
||||
if (!vc4_get_chip_info(screen))
|
||||
goto fail;
|
||||
|
|
|
@ -90,6 +90,7 @@ struct vc4_screen {
|
|||
uint32_t bo_count;
|
||||
bool has_control_flow;
|
||||
bool has_etc1;
|
||||
bool has_threaded_fs;
|
||||
|
||||
struct vc4_simulator_file *sim_file;
|
||||
};
|
||||
|
|
|
@ -613,6 +613,7 @@ vc4_simulator_get_param_ioctl(int fd, struct drm_vc4_get_param *args)
|
|||
switch (args->param) {
|
||||
case DRM_VC4_PARAM_SUPPORTS_BRANCHES:
|
||||
case DRM_VC4_PARAM_SUPPORTS_ETC1:
|
||||
case DRM_VC4_PARAM_SUPPORTS_THREADED_FS:
|
||||
args->value = true;
|
||||
return 0;
|
||||
|
||||
|
|
Loading…
Reference in New Issue