radv: reorder some NIR passes
Totals from 6171 (4.57% of 134913) affected shaders: (GFX10.3) CodeSize: 61916968 -> 61916676 (-0.00%); split: -0.01%, +0.01% Instrs: 11473620 -> 11473797 (+0.00%); split: -0.01%, +0.01% Latency: 161997216 -> 161997029 (-0.00%); split: -0.00%, +0.00% InvThroughput: 29075944 -> 29075862 (-0.00%); split: -0.00%, +0.00% VClause: 199793 -> 199790 (-0.00%); split: -0.01%, +0.00% SClause: 418180 -> 418013 (-0.04%) Copies: 786921 -> 786884 (-0.00%); split: -0.06%, +0.06% Branches: 348058 -> 348106 (+0.01%); split: -0.04%, +0.06% PreSGPRs: 604400 -> 604396 (-0.00%) PreVGPRs: 469415 -> 469430 (+0.00%) Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17279>
This commit is contained in:
parent
7750281b43
commit
3ae0c39a58
|
@ -4917,7 +4917,6 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
|
|||
radv_use_llvm_for_stage(device, i));
|
||||
radv_optimize_nir_algebraic(
|
||||
stages[i].nir, io_to_mem || lowered_ngg || i == MESA_SHADER_COMPUTE || i == MESA_SHADER_TASK);
|
||||
NIR_PASS(_, stages[i].nir, nir_lower_alu_width, opt_vectorize_callback, device);
|
||||
|
||||
if (stages[i].nir->info.bit_sizes_int & (8 | 16)) {
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX8) {
|
||||
|
@ -4927,7 +4926,6 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
|
|||
|
||||
if (nir_lower_bit_size(stages[i].nir, lower_bit_size_callback, device)) {
|
||||
NIR_PASS(_, stages[i].nir, nir_opt_constant_folding);
|
||||
NIR_PASS(_, stages[i].nir, nir_opt_dce);
|
||||
}
|
||||
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX8)
|
||||
|
@ -4935,26 +4933,23 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
|
|||
}
|
||||
if (((stages[i].nir->info.bit_sizes_int | stages[i].nir->info.bit_sizes_float) & 16) &&
|
||||
device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
bool copy_prop = false;
|
||||
uint32_t sampler_dims = UINT32_MAX;
|
||||
/* Skip because AMD doesn't support 16-bit types with these. */
|
||||
sampler_dims &= ~BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE);
|
||||
// TODO: also optimize the tex srcs. see radeonSI for reference */
|
||||
/* Skip if there are potentially conflicting rounding modes */
|
||||
if (!nir_has_any_rounding_mode_enabled(stages[i].nir->info.float_controls_execution_mode))
|
||||
NIR_PASS(copy_prop, stages[i].nir, nir_fold_16bit_sampler_conversions, 0, sampler_dims);
|
||||
NIR_PASS(copy_prop, stages[i].nir, nir_fold_16bit_image_load_store_conversions);
|
||||
|
||||
if (copy_prop) {
|
||||
NIR_PASS(_, stages[i].nir, nir_copy_prop);
|
||||
NIR_PASS(_, stages[i].nir, nir_opt_dce);
|
||||
}
|
||||
NIR_PASS(_, stages[i].nir, nir_fold_16bit_sampler_conversions, 0, sampler_dims);
|
||||
|
||||
NIR_PASS(_, stages[i].nir, nir_fold_16bit_image_load_store_conversions);
|
||||
NIR_PASS(_, stages[i].nir, nir_opt_vectorize, opt_vectorize_callback, device);
|
||||
}
|
||||
|
||||
/* cleanup passes */
|
||||
NIR_PASS(_, stages[i].nir, nir_lower_alu_width, opt_vectorize_callback, device);
|
||||
NIR_PASS(_, stages[i].nir, nir_lower_load_const_to_scalar);
|
||||
NIR_PASS(_, stages[i].nir, nir_copy_prop);
|
||||
NIR_PASS(_, stages[i].nir, nir_opt_dce);
|
||||
|
||||
sink_opts |= nir_move_comparisons | nir_move_load_ubo | nir_move_load_ssbo;
|
||||
NIR_PASS(_, stages[i].nir, nir_opt_sink, sink_opts);
|
||||
|
|
Loading…
Reference in New Issue