diff --git a/src/freedreno/computerator/a6xx.c b/src/freedreno/computerator/a6xx.c index 90889ae91c4..5679f212111 100644 --- a/src/freedreno/computerator/a6xx.c +++ b/src/freedreno/computerator/a6xx.c @@ -490,7 +490,7 @@ a6xx_init(struct fd_device *dev, uint32_t gpu_id) .read_perfcntrs = a6xx_read_perfcntrs, }; - a6xx_backend->compiler = ir3_compiler_create(dev, gpu_id); + a6xx_backend->compiler = ir3_compiler_create(dev, gpu_id, false); a6xx_backend->dev = dev; a6xx_backend->control_mem = fd_bo_new(dev, 0x1000, diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index ed8b43364c5..41847e1db55 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -63,7 +63,7 @@ ir3_compiler_destroy(struct ir3_compiler *compiler) } struct ir3_compiler * -ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id) +ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id, bool robust_ubo_access) { struct ir3_compiler *compiler = rzalloc(NULL, struct ir3_compiler); @@ -77,6 +77,7 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id) compiler->dev = dev; compiler->gpu_id = gpu_id; + compiler->robust_ubo_access = robust_ubo_access; compiler->set = ir3_ra_alloc_reg_set(compiler, false); /* All known GPU's have 32k local memory (aka shared) */ diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index 6f7058f37e5..2366bf6a7ac 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -44,6 +44,11 @@ struct ir3_compiler { struct disk_cache *disk_cache; + /* If true, UBO accesses are assumed to be bounds-checked as defined by + * VK_EXT_robustness2 and optimizations may have to be more conservative. + */ + bool robust_ubo_access; + /* * Configuration options for things that are handled differently on * different generations: @@ -153,7 +158,8 @@ struct ir3_compiler { }; void ir3_compiler_destroy(struct ir3_compiler *compiler); -struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id); +struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id, + bool robust_ubo_access); void ir3_disk_cache_init(struct ir3_compiler *compiler); void ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler, @@ -190,6 +196,9 @@ enum ir3_shader_debug { /* DEBUG-only options: */ IR3_DBG_SCHEDMSGS = BITFIELD_BIT(20), IR3_DBG_RAMSGS = BITFIELD_BIT(21), + + /* Only used for the disk-caching logic: */ + IR3_DBG_ROBUST_UBO_ACCESS = BITFIELD_BIT(30), }; extern enum ir3_shader_debug ir3_shader_debug; diff --git a/src/freedreno/ir3/ir3_disk_cache.c b/src/freedreno/ir3/ir3_disk_cache.c index 29a2c8c2157..7a5f88cf5f8 100644 --- a/src/freedreno/ir3/ir3_disk_cache.c +++ b/src/freedreno/ir3/ir3_disk_cache.c @@ -67,7 +67,9 @@ ir3_disk_cache_init(struct ir3_compiler *compiler) char timestamp[41]; _mesa_sha1_format(timestamp, id_sha1); - const uint64_t driver_flags = ir3_shader_debug; + uint64_t driver_flags = ir3_shader_debug; + if (compiler->robust_ubo_access) + driver_flags |= IR3_DBG_ROBUST_UBO_ACCESS; compiler->disk_cache = disk_cache_create(renderer, timestamp, driver_flags); } diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 759b2ccc18f..e3bf7c9b79c 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -190,7 +190,7 @@ ir3_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset, #define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__) void -ir3_optimize_loop(nir_shader *s) +ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s) { bool progress; unsigned lower_flrp = @@ -227,7 +227,7 @@ ir3_optimize_loop(nir_shader *s) nir_load_store_vectorize_options vectorize_opts = { .modes = nir_var_mem_ubo, .callback = ir3_nir_should_vectorize_mem, - .robust_modes = 0, + .robust_modes = compiler->robust_ubo_access ? nir_var_mem_ubo : 0, }; progress |= OPT(s, nir_opt_load_store_vectorize, &vectorize_opts); @@ -315,7 +315,7 @@ ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s) if (compiler->gpu_id < 500) OPT_V(s, ir3_nir_lower_tg4_to_tex); - ir3_optimize_loop(s); + ir3_optimize_loop(compiler, s); /* do idiv lowering after first opt loop to get a chance to propagate * constants for divide by immed power-of-two: @@ -327,7 +327,7 @@ ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s) const bool idiv_progress = OPT(s, nir_lower_idiv, &idiv_options); if (idiv_progress) - ir3_optimize_loop(s); + ir3_optimize_loop(compiler, s); OPT_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL); @@ -375,7 +375,7 @@ ir3_nir_post_finalize(struct ir3_compiler *compiler, nir_shader *s) */ OPT_V(s, ir3_nir_apply_trig_workarounds); - ir3_optimize_loop(s); + ir3_optimize_loop(compiler, s); } static bool @@ -523,14 +523,14 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s) OPT_V(s, ir3_nir_lower_io_offsets, so->shader->compiler->gpu_id); if (progress) - ir3_optimize_loop(s); + ir3_optimize_loop(so->shader->compiler, s); /* Fixup indirect load_uniform's which end up with a const base offset * which is too large to encode. Do this late(ish) so we actually * can differentiate indirect vs non-indirect. */ if (OPT(s, ir3_nir_fixup_load_uniform)) - ir3_optimize_loop(s); + ir3_optimize_loop(so->shader->compiler, s); /* Do late algebraic optimization to turn add(a, neg(b)) back into * subs, then the mandatory cleanup after algebraic. Note that it may diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index 17dc4aa155c..76eef3b2646 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -52,7 +52,7 @@ void ir3_nir_lower_tess_eval(nir_shader *shader, struct ir3_shader_variant *v, u void ir3_nir_lower_gs(nir_shader *shader); const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler); -void ir3_optimize_loop(nir_shader *s); +void ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s); void ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s); void ir3_nir_post_finalize(struct ir3_compiler *compiler, nir_shader *s); void ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s); diff --git a/src/freedreno/ir3/tests/delay.c b/src/freedreno/ir3/tests/delay.c index 5ffc688c32b..ef6cd555ef2 100644 --- a/src/freedreno/ir3/tests/delay.c +++ b/src/freedreno/ir3/tests/delay.c @@ -181,7 +181,7 @@ main(int argc, char **argv) struct ir3_compiler *c; int result = 0; - c = ir3_compiler_create(NULL, 630); + c = ir3_compiler_create(NULL, 630, false); for (int i = 0; i < ARRAY_SIZE(tests); i++) { const struct test *test = &tests[i]; diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c index bd4bff47d46..5d6d052f7fb 100644 --- a/src/freedreno/ir3/tests/disasm.c +++ b/src/freedreno/ir3/tests/disasm.c @@ -393,7 +393,7 @@ main(int argc, char **argv) unsigned gen = test->gpu_id / 100; if (!compilers[gen]) { - compilers[gen] = ir3_compiler_create(NULL, test->gpu_id); + compilers[gen] = ir3_compiler_create(NULL, test->gpu_id, false); } FILE *fasm = fmemopen((void *)test->expected, strlen(test->expected), "r"); diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index 19229294315..aa13ddea071 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -1078,6 +1078,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, struct tu_device *device; bool custom_border_colors = false; bool perf_query_pools = false; + bool robust_buffer_access2 = false; /* Check enabled features */ if (pCreateInfo->pEnabledFeatures) { @@ -1110,6 +1111,11 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, perf_query_pools = feature->performanceCounterQueryPools; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: { + VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext; + robust_buffer_access2 = features->robustBufferAccess2; + break; + } default: break; } @@ -1166,7 +1172,8 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, } } - device->compiler = ir3_compiler_create(NULL, physical_device->gpu_id); + device->compiler = ir3_compiler_create(NULL, physical_device->gpu_id, + robust_buffer_access2); if (!device->compiler) { result = vk_startup_errorf(physical_device->instance, VK_ERROR_INITIALIZATION_FAILED, diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index 481d4568b1d..6c4eff30ce3 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -192,7 +192,7 @@ tu_spirv_to_nir(struct tu_device *dev, NIR_PASS_V(nir, nir_lower_frexp); - ir3_optimize_loop(nir); + ir3_optimize_loop(dev->compiler, nir); return nir; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c index 1989ba2d0b0..ea8a26c19c1 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c @@ -362,7 +362,7 @@ main(int argc, char **argv) nir_shader *nir; - compiler = ir3_compiler_create(NULL, gpu_id); + compiler = ir3_compiler_create(NULL, gpu_id, false); if (from_tgsi) { struct tgsi_token toks[65536]; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c index 18e3a860ef5..65e13efac87 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c @@ -515,7 +515,7 @@ ir3_screen_init(struct pipe_screen *pscreen) { struct fd_screen *screen = fd_screen(pscreen); - screen->compiler = ir3_compiler_create(screen->dev, screen->gpu_id); + screen->compiler = ir3_compiler_create(screen->dev, screen->gpu_id, false); /* TODO do we want to limit things to # of fast cores, or just limit * based on total # of both big and little cores. The little cores