From 7c127ca0188a837b810f7269e7bb648523715c46 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 1 Mar 2021 21:25:11 -0600 Subject: [PATCH] nir/opt_memcpy: Add another case for function_temp Reviewed-by: Kristian H. Kristensen Reviewed-by: Jason Ekstrand (1.5 years later) Part-of: --- src/compiler/nir/nir_opt_memcpy.c | 45 +++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir_opt_memcpy.c b/src/compiler/nir/nir_opt_memcpy.c index d022e5811ab..d984c131e28 100644 --- a/src/compiler/nir/nir_opt_memcpy.c +++ b/src/compiler/nir/nir_opt_memcpy.c @@ -123,7 +123,8 @@ type_is_tightly_packed(const struct glsl_type *type, unsigned *size_out) } static bool -try_lower_memcpy(nir_builder *b, nir_intrinsic_instr *cpy) +try_lower_memcpy(nir_builder *b, nir_intrinsic_instr *cpy, + struct set *complex_vars) { nir_deref_instr *dst = nir_src_as_deref(cpy->src[0]); nir_deref_instr *src = nir_src_as_deref(cpy->src[1]); @@ -192,6 +193,26 @@ try_lower_memcpy(nir_builder *b, nir_intrinsic_instr *cpy) return true; } + /* If we can get at the variable AND the only complex use of that variable + * is as a memcpy destination, then we don't have to care about any empty + * space in the variable. In particular, we know that the variable is never + * cast to any other type and it's never used as a memcpy source so nothing + * can see any padding bytes. This holds even if some other memcpy only + * writes to part of the variable. + */ + if (dst->deref_type == nir_deref_type_var && + dst->modes == nir_var_function_temp && + _mesa_set_search(complex_vars, dst->var) == NULL && + glsl_get_explicit_size(dst->type, false) <= size) { + b->cursor = nir_instr_remove(&cpy->instr); + src = nir_build_deref_cast(b, &src->dest.ssa, + src->modes, dst->type, 0); + nir_copy_deref_with_access(b, dst, src, + nir_intrinsic_dst_access(cpy), + nir_intrinsic_src_access(cpy)); + return true; + } + if (src->modes == nir_var_function_temp && type_is_tightly_packed(src->type, &type_size) && type_size == size) { @@ -215,6 +236,24 @@ opt_memcpy_impl(nir_function_impl *impl) nir_builder b; nir_builder_init(&b, impl); + struct set *complex_vars = _mesa_pointer_set_create(NULL); + + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_deref) + continue; + + nir_deref_instr *deref = nir_instr_as_deref(instr); + if (deref->deref_type != nir_deref_type_var) + continue; + + nir_deref_instr_has_complex_use_options opts = + nir_deref_instr_has_complex_use_allow_memcpy_dst; + if (nir_deref_instr_has_complex_use(deref, opts)) + _mesa_set_add(complex_vars, deref->var); + } + } + nir_foreach_block(block, impl) { nir_foreach_instr_safe(instr, block) { if (instr->type != nir_instr_type_intrinsic) @@ -229,13 +268,15 @@ opt_memcpy_impl(nir_function_impl *impl) while (opt_memcpy_deref_cast(cpy, &cpy->src[1])) progress = true; - if (try_lower_memcpy(&b, cpy)) { + if (try_lower_memcpy(&b, cpy, complex_vars)) { progress = true; continue; } } } + _mesa_set_destroy(complex_vars, NULL); + if (progress) { nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance);