nir/opt_load_store_vectorize: loop internally

To vectorize to vec8/16 or vec4 (without vec3), we can't incrementally add
components to a load/store. This patch loops vectorization so that two new
vec2/4/8 operations can be combined into a larger operation.

fossil-db (GFX10.3):
Totals from 22 (0.02% of 139391) affected shaders:
SpillVGPRs: 1749 -> 1771 (+1.26%)
CodeSize: 901212 -> 892532 (-0.96%); split: -1.19%, +0.22%
Scratch: 178176 -> 184320 (+3.45%)
Instrs: 159358 -> 158027 (-0.84%); split: -0.99%, +0.16%
Cycles: 37046772 -> 36738544 (-0.83%); split: -1.00%, +0.17%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10384>
This commit is contained in:
Rhys Perry 2020-09-29 17:00:45 +01:00 committed by Marge Bot
parent 447820d003
commit 89b759c4f9
1 changed files with 25 additions and 14 deletions

View File

@ -1110,23 +1110,12 @@ update_align(struct entry *entry)
}
static bool
vectorize_entries(struct vectorize_ctx *ctx, nir_function_impl *impl, struct hash_table *ht)
vectorize_sorted_entries(struct vectorize_ctx *ctx, nir_function_impl *impl,
struct util_dynarray *arr)
{
if (!ht)
return false;
bool progress = false;
hash_table_foreach(ht, entry) {
struct util_dynarray *arr = entry->data;
if (!arr->size)
continue;
qsort(util_dynarray_begin(arr),
util_dynarray_num_elements(arr, struct entry *),
sizeof(struct entry *), &sort_entries);
unsigned num_entries = util_dynarray_num_elements(arr, struct entry *);
bool progress = false;
for (unsigned first_idx = 0; first_idx < num_entries; first_idx++) {
struct entry *low = *util_dynarray_element(arr, struct entry *, first_idx);
if (!low)
@ -1154,6 +1143,28 @@ vectorize_entries(struct vectorize_ctx *ctx, nir_function_impl *impl, struct has
*util_dynarray_element(arr, struct entry *, first_idx) = low;
}
return progress;
}
static bool
vectorize_entries(struct vectorize_ctx *ctx, nir_function_impl *impl, struct hash_table *ht)
{
if (!ht)
return false;
bool progress = false;
hash_table_foreach(ht, entry) {
struct util_dynarray *arr = entry->data;
if (!arr->size)
continue;
qsort(util_dynarray_begin(arr),
util_dynarray_num_elements(arr, struct entry *),
sizeof(struct entry *), &sort_entries);
while (vectorize_sorted_entries(ctx, impl, arr))
progress = true;
util_dynarray_foreach(arr, struct entry *, elem) {
if (*elem)
progress |= update_align(*elem);