zink: fix 32bit bo rewriting

this was correct for 64bit loads and manually converted 32bit loads (e.g., bindless),
but it was broken for the case where 64bit was not supported, as the offset wasn't
being correctly adjusted

break out the offset division to hopefully make this a little clearer

Fixes: 150d6ee97e ("zink: move all 64-32bit shader load rewriting to nir pass")

Reviewed-by: Adam Jackson <ajax@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16669>
This commit is contained in:
Mike Blumenkrantz 2022-05-20 11:04:11 -04:00 committed by Marge Bot
parent ea8fc23119
commit bbe5136658
2 changed files with 19 additions and 163 deletions

View File

@ -19,160 +19,13 @@ GTF-GL46.gtf40.GL3Tests.transform_feedback3.transform_feedback3_streams_overflow
GTF-GL46.gtf40.GL3Tests.transform_feedback3.transform_feedback3_streams_queried,Fail
KHR-GL46.buffer_storage.map_persistent_draw,Fail
KHR-GL46.compute_shader.fp64-case1,Crash
KHR-GL46.compute_shader.fp64-case2,Fail
KHR-GL46.compute_shader.fp64-case3,Crash
KHR-GL46.copy_image.functional,Fail
KHR-GL46.direct_state_access.buffers_functional,Fail
KHR-GL46.geometry_shader.api.max_atomic_counters,Fail
KHR-GL46.gpu_shader_fp64.builtin.abs_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.abs_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.abs_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.ceil_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.ceil_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.ceil_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.clamp_against_scalar_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.clamp_against_scalar_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.clamp_against_scalar_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.clamp_double,Fail
KHR-GL46.gpu_shader_fp64.builtin.clamp_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.clamp_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.clamp_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.cross_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.determinant_dmat2,Fail
KHR-GL46.gpu_shader_fp64.builtin.determinant_dmat3,Fail
KHR-GL46.gpu_shader_fp64.builtin.distance_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.distance_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.distance_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.dot_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.dot_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.dot_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.equal_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.equal_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.equal_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.faceforward_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.faceforward_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.faceforward_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.floor_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.floor_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.floor_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.fma_double,Fail
KHR-GL46.gpu_shader_fp64.builtin.fma_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.fma_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.fma_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.fract_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.fract_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.fract_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.frexp_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.frexp_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.frexp_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.greaterthan_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.greaterthan_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.greaterthan_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.greaterthanequal_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.greaterthanequal_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.greaterthanequal_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.inverse_dmat2,Fail
KHR-GL46.gpu_shader_fp64.builtin.inverse_dmat3,Fail
KHR-GL46.gpu_shader_fp64.builtin.inversesqrt_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.inversesqrt_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.inversesqrt_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.ldexp_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.ldexp_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.ldexp_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.length_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.length_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.length_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.lessthan_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.lessthan_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.lessthan_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.lessthanequal_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.lessthanequal_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.lessthanequal_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.matrixcompmult_dmat2,Fail
KHR-GL46.gpu_shader_fp64.builtin.matrixcompmult_dmat2x3,Fail
KHR-GL46.gpu_shader_fp64.builtin.matrixcompmult_dmat2x4,Fail
KHR-GL46.gpu_shader_fp64.builtin.matrixcompmult_dmat3,Fail
KHR-GL46.gpu_shader_fp64.builtin.matrixcompmult_dmat3x2,Fail
KHR-GL46.gpu_shader_fp64.builtin.matrixcompmult_dmat3x4,Fail
KHR-GL46.gpu_shader_fp64.builtin.matrixcompmult_dmat4,Fail
KHR-GL46.gpu_shader_fp64.builtin.matrixcompmult_dmat4x2,Fail
KHR-GL46.gpu_shader_fp64.builtin.matrixcompmult_dmat4x3,Fail
KHR-GL46.gpu_shader_fp64.builtin.max_against_scalar_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.max_against_scalar_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.max_against_scalar_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.max_double,Fail
KHR-GL46.gpu_shader_fp64.builtin.max_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.max_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.max_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.min_against_scalar_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.min_against_scalar_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.min_against_scalar_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.min_double,Fail
KHR-GL46.gpu_shader_fp64.builtin.min_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.min_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.min_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.mix_double,Fail
KHR-GL46.gpu_shader_fp64.builtin.mix_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.mix_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.mix_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.mod_against_scalar_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.mod_against_scalar_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.mod_against_scalar_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.mod_double,Fail
KHR-GL46.gpu_shader_fp64.builtin.mod_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.mod_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.mod_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.modf_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.modf_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.modf_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.normalize_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.normalize_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.normalize_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.notequal_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.notequal_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.notequal_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.outerproduct_dmat2,Fail
KHR-GL46.gpu_shader_fp64.builtin.outerproduct_dmat2x3,Fail
KHR-GL46.gpu_shader_fp64.builtin.outerproduct_dmat2x4,Fail
KHR-GL46.gpu_shader_fp64.builtin.outerproduct_dmat3,Fail
KHR-GL46.gpu_shader_fp64.builtin.outerproduct_dmat3x2,Fail
KHR-GL46.gpu_shader_fp64.builtin.outerproduct_dmat3x4,Fail
KHR-GL46.gpu_shader_fp64.builtin.outerproduct_dmat4,Fail
KHR-GL46.gpu_shader_fp64.builtin.outerproduct_dmat4x2,Fail
KHR-GL46.gpu_shader_fp64.builtin.outerproduct_dmat4x3,Fail
KHR-GL46.gpu_shader_fp64.builtin.reflect_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.reflect_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.reflect_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.refract_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.refract_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.refract_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.round_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.round_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.round_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.roundeven_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.roundeven_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.roundeven_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.smoothstep_against_scalar_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.smoothstep_against_scalar_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.smoothstep_against_scalar_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.smoothstep_double,Fail
KHR-GL46.gpu_shader_fp64.builtin.smoothstep_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.smoothstep_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.smoothstep_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.sqrt_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.sqrt_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.sqrt_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.step_against_scalar_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.step_against_scalar_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.step_against_scalar_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.step_double,Fail
KHR-GL46.gpu_shader_fp64.builtin.step_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.step_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.step_dvec4,Fail
KHR-GL46.gpu_shader_fp64.builtin.trunc_dvec2,Fail
KHR-GL46.gpu_shader_fp64.builtin.trunc_dvec3,Fail
KHR-GL46.gpu_shader_fp64.builtin.trunc_dvec4,Fail
KHR-GL46.gpu_shader_fp64.fp64.max_uniform_components,Fail
KHR-GL46.limits.max_fragment_input_components,Fail
KHR-GL46.packed_pixels.pbo_rectangle.r11f_g11f_b10f,Fail
KHR-GL46.packed_pixels.pbo_rectangle.r16,Fail
@ -235,14 +88,9 @@ KHR-GL46.texture_view.view_classes,Fail
KHR-GL46.vertex_attrib_64bit.vao,Fail
KHR-Single-GL46.arrays_of_arrays_gl.AtomicUsage,Fail
KHR-Single-GL46.arrays_of_arrays_gl.SubroutineFunctionCalls2,Crash
KHR-Single-GL46.enhanced_layouts.ssb_member_offset_and_align,Fail
KHR-Single-GL46.enhanced_layouts.uniform_block_member_offset_and_align,Fail
KHR-Single-GL46.enhanced_layouts.varying_structure_locations,Crash
KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_block_member,Fail
KHR-Single-GL46.enhanced_layouts.xfb_capture_struct,Fail
KHR-Single-GL46.enhanced_layouts.xfb_global_buffer,Fail
KHR-Single-GL46.enhanced_layouts.xfb_override_qualifiers_with_api,Fail
KHR-Single-GL46.enhanced_layouts.xfb_stride,Fail
KHR-Single-GL46.enhanced_layouts.xfb_struct_explicit_location,Crash
KHR-Single-GL46.enhanced_layouts.xfb_vertex_streams,Fail
dEQP-GLES3.functional.occlusion_query.depth_clear,Fail
@ -338,7 +186,6 @@ dEQP-GLES31.functional.texture.border_clamp.formats.depth24_stencil8_sample_dept
dEQP-GLES31.functional.texture.border_clamp.formats.depth24_stencil8_sample_depth.nearest_size_pot,Fail
dEQP-GLES31.functional.texture.border_clamp.formats.depth24_stencil8_sample_stencil.nearest_size_npot,Fail
dEQP-GLES31.functional.texture.border_clamp.formats.depth24_stencil8_sample_stencil.nearest_size_pot,Fail
dEQP-GLES31.functional.texture.border_clamp.formats.depth32f_stencil8_sample_depth.nearest_size_pot,Fail
dEQP-GLES31.functional.texture.border_clamp.formats.depth32f_stencil8_sample_stencil.nearest_size_npot,Fail
dEQP-GLES31.functional.texture.border_clamp.formats.depth32f_stencil8_sample_stencil.nearest_size_pot,Fail
dEQP-GLES31.functional.texture.border_clamp.formats.depth_component24.gather_size_npot,Fail

View File

@ -948,10 +948,11 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
nir_src_as_uint(intr->src[0]) == 0 &&
nir_dest_bit_size(intr->dest) == 64 &&
nir_intrinsic_align_offset(intr) % 8 != 0;
nir_instr_rewrite_src_ssa(instr, &intr->src[1], nir_udiv_imm(b, intr->src[1].ssa,
(force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8));
force_2x32 |= nir_dest_bit_size(intr->dest) == 64 && !has_int64;
nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
/* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
if (force_2x32 || (nir_dest_bit_size(intr->dest) == 64 && !has_int64)) {
if (force_2x32) {
/* this is always scalarized */
assert(intr->dest.ssa.num_components == 1);
/* rewrite as 2x32 */
@ -972,9 +973,11 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
}
case nir_intrinsic_load_shared:
b->cursor = nir_before_instr(instr);
nir_instr_rewrite_src_ssa(instr, &intr->src[0], nir_udiv_imm(b, intr->src[0].ssa, nir_dest_bit_size(intr->dest) / 8));
bool force_2x32 = nir_dest_bit_size(intr->dest) == 64 && !has_int64;
nir_ssa_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
nir_instr_rewrite_src_ssa(instr, &intr->src[0], offset);
/* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
if (nir_dest_bit_size(intr->dest) == 64 && !has_int64) {
if (force_2x32) {
/* this is always scalarized */
assert(intr->dest.ssa.num_components == 1);
/* rewrite as 2x32 */
@ -988,11 +991,13 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
return true;
}
break;
case nir_intrinsic_store_ssbo:
case nir_intrinsic_store_ssbo: {
b->cursor = nir_before_instr(instr);
nir_instr_rewrite_src_ssa(instr, &intr->src[2], nir_udiv_imm(b, intr->src[2].ssa, nir_src_bit_size(intr->src[0]) / 8));
bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
nir_ssa_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
nir_instr_rewrite_src_ssa(instr, &intr->src[2], offset);
/* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
if (force_2x32) {
/* this is always scalarized */
assert(intr->src[0].ssa->num_components == 1);
nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
@ -1001,9 +1006,12 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
nir_instr_remove(instr);
}
return true;
case nir_intrinsic_store_shared:
}
case nir_intrinsic_store_shared: {
b->cursor = nir_before_instr(instr);
nir_instr_rewrite_src_ssa(instr, &intr->src[1], nir_udiv_imm(b, intr->src[1].ssa, nir_src_bit_size(intr->src[0]) / 8));
bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
/* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
/* this is always scalarized */
@ -1014,6 +1022,7 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
nir_instr_remove(instr);
}
return true;
}
default:
break;
}