ir3: Initial support for spilling non-shared registers
Support for spilling shared registers to normal registers is still TODO. There are also several improvements to be made, like rematerialization. Note, there is one behavior change to register pressure accounting: we now include half registers in the current full pressure directly in mergedregs mode, rather than adding the max half pressure to the max full pressure afterwards, which might result in lower calculated max pressure in some cases with half registers. This is needed for spilling, since we need to make sure the total pressure including half registers is below the maximum at each instruction. Because the entire pass is rewritten, including the register pressure calculating parts, it didn't seem worth it to separate out this change. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12033>
This commit is contained in:
parent
6a5312bf86
commit
613eaac7b5
|
@ -14,11 +14,6 @@ KHR-GL33.transform_feedback.query_vertex_separate_test,Fail
|
|||
# "*** Color comparison failed"
|
||||
KHR-GLES3.packed_depth_stencil.verify_read_pixels.depth24_stencil8,Fail
|
||||
|
||||
# "MESA: error: ir3_ra() failed!"
|
||||
KHR-GLES31.core.arrays_of_arrays.InteractionFunctionCalls2,Fail
|
||||
KHR-GLES31.core.arrays_of_arrays.InteractionArgumentAliasing5,Fail
|
||||
KHR-GLES31.core.arrays_of_arrays.InteractionArgumentAliasing6,Fail
|
||||
|
||||
# "The values of resultStd[i] & 0xFFFFFFFE and resultFma[i] & 0xFFFFFFFE and resultCPU[i] & 0xFFFFFFFE are not bitwise equal for i = 0..99 "
|
||||
KHR-GLES31.core.gpu_shader5.fma_precision_float,Fail
|
||||
KHR-GLES31.core.gpu_shader5.fma_precision_vec2,Fail
|
||||
|
@ -86,11 +81,6 @@ dEQP-VK.api.info.get_physical_device_properties2.properties,Fail
|
|||
dEQP-VK.api.object_management.alloc_callback_fail.device,Fail
|
||||
dEQP-VK.api.object_management.alloc_callback_fail.device_group,Fail
|
||||
|
||||
# "MESA: error: ir3_ra() failed!"
|
||||
# https://gitlab.freedesktop.org/mesa/mesa/-/issues/33
|
||||
dEQP-VK.graphicsfuzz.spv-stable-maze-flatten-copy-composite,Fail
|
||||
dEQP-VK.graphicsfuzz.spv-stable-pillars-volatile-nontemporal-store,Fail
|
||||
|
||||
# https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/3019
|
||||
# should be fixed by https://gerrit.khronos.org/c/vk-gl-cts/+/7745
|
||||
dEQP-VK.renderpass.dedicated_allocation.attachment_allocation.input_output.7,Fail
|
||||
|
@ -98,10 +88,6 @@ dEQP-VK.renderpass.suballocation.attachment_allocation.input_output.7,Fail
|
|||
dEQP-VK.renderpass2.dedicated_allocation.attachment_allocation.input_output.7,Fail
|
||||
dEQP-VK.renderpass2.suballocation.attachment_allocation.input_output.7,Fail
|
||||
|
||||
# "MESA: error: ir3_ra() failed!
|
||||
# https://gitlab.freedesktop.org/mesa/mesa/-/issues/33
|
||||
dEQP-VK.spirv_assembly.instruction.compute.opcopymemory.array,Fail
|
||||
|
||||
# "deqp-vk: ../src/freedreno/vulkan/tu_cs.h:186: tu_cs_reserve: Assertion `tu_cs_get_space(cs) >= reserved_size' failed."
|
||||
# https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8841
|
||||
dEQP-VK.spirv_assembly.instruction.compute.opphi.wide,Crash
|
||||
|
@ -120,14 +106,6 @@ dEQP-VK.spirv_assembly.instruction.graphics.variable_pointers.graphics.writes_si
|
|||
dEQP-VK.spirv_assembly.instruction.graphics.variable_pointers.graphics.writes_two_buffers_geom,Fail
|
||||
dEQP-VK.spirv_assembly.instruction.graphics.variable_pointers.graphics.writes_two_buffers_vert,Fail
|
||||
|
||||
# "MESA: error: ir3_ra() failed!"
|
||||
# https://gitlab.freedesktop.org/mesa/mesa/-/issues/33
|
||||
# Needs spilling, or maybe some scheduling (though throwing a bit of nir_move/sink
|
||||
# at it didn't help).
|
||||
dEQP-VK.spirv_assembly.instruction.spirv1p4.opcopylogical.nested_arrays_different_inner_stride,Fail
|
||||
dEQP-VK.spirv_assembly.instruction.spirv1p4.opcopylogical.nested_arrays_different_outer_stride,Fail
|
||||
dEQP-VK.spirv_assembly.instruction.spirv1p4.opcopylogical.nested_arrays_different_strides,Fail
|
||||
|
||||
dEQP-VK.texture.filtering.2d.formats.d24_unorm_s8_uint_stencil.nearest,Fail
|
||||
dEQP-VK.texture.filtering.2d_array.formats.d24_unorm_s8_uint_stencil.d24_unorm_s8_uint_stencil_nearest,Fail
|
||||
dEQP-VK.texture.filtering.cube.formats.d24_unorm_s8_uint_stencil.nearest,Fail
|
||||
|
@ -136,205 +114,6 @@ dEQP-VK.texture.filtering.unnormal.formats.d24_unorm_s8_uint_stencil.nearest,Fai
|
|||
# Broken on all drivers: https://gitlab.freedesktop.org/mesa/mesa/-/issues/4582
|
||||
dEQP-VK.wsi.display_control.register_device_event,Fail
|
||||
|
||||
# "MESA: error: ir3_ra() failed!"
|
||||
# https://gitlab.freedesktop.org/mesa/mesa/-/issues/33
|
||||
dEQP-VK.ssbo.layout.2_level_array.scalar.row_major_mat4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.2_level_array.scalar.row_major_mat4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.2_level_array.scalar.row_major_mat4,Fail
|
||||
dEQP-VK.ssbo.layout.2_level_array.scalar.row_major_mat4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.2_level_array.std140.row_major_mat4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.2_level_array.std140.row_major_mat4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.2_level_array.std140.row_major_mat4,Fail
|
||||
dEQP-VK.ssbo.layout.2_level_array.std140.row_major_mat4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.2_level_array.std430.row_major_mat4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.2_level_array.std430.row_major_mat4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.2_level_array.std430.row_major_mat4,Fail
|
||||
dEQP-VK.ssbo.layout.2_level_array.std430.row_major_mat4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat2x4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat2x4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat2x4,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat2x4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat3_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat3_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat3,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat3_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat3x4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat3x4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat3x4,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat3x4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4x2_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4x2_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4x2,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4x2_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4x3_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4x3_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4x3,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.scalar.row_major_mat4x3_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat2x4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat2x4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat2x4,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat2x4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat3_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat3_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat3,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat3_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat3x4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat3x4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat3x4,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat3x4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4x2_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4x2_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4x2,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4x2_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4x3_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4x3_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4x3,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std140.row_major_mat4x3_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat2x4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat2x4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat2x4,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat2x4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat3_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat3_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat3,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat3_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat3x4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat3x4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat3x4,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat3x4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4x2_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4x2_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4x2,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4x2_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4x3_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4x3_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4x3,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_array.std430.row_major_mat4x3_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat2x4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat2x4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat2x4,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat2x4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat3_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat3_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat3,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat3_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat3x4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat3x4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat3x4,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat3x4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4x2_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4x2_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4x2,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4x2_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4x3_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4x3_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4x3,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.scalar.row_major_mat4x3_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat2x4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat2x4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat2x4,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat2x4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat3_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat3_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat3,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat3_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat3x4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat3x4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat3x4,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat3x4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4x2_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4x2_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4x2,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4x2_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4x3_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4x3_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4x3,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std140.row_major_mat4x3_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat2x4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat2x4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat2x4,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat2x4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat3_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat3_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat3,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat3_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat3x4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat3x4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat3x4,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat3x4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4x2_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4x2_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4x2,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4x2_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4x3_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4x3_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4x3,Fail
|
||||
dEQP-VK.ssbo.layout.3_level_unsized_array.std430.row_major_mat4x3_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat3x4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat3x4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat3x4,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat3x4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat4,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat4x3_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat4x3_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat4x3,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.scalar.row_major_mat4x3_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat3x4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat3x4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat3x4,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat3x4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat4,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat4x3_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat4x3_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat4x3,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std140.row_major_mat4x3_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat3x4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat3x4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat3x4,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat3x4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat4_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat4_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat4,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat4_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat4x3_comp_access,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat4x3_comp_access_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat4x3,Fail
|
||||
dEQP-VK.ssbo.layout.basic_unsized_array.std430.row_major_mat4x3_store_cols,Fail
|
||||
dEQP-VK.ssbo.layout.random.all_shared_buffer.5,Fail
|
||||
dEQP-VK.ssbo.layout.random.nested_structs_arrays.0,Fail
|
||||
dEQP-VK.ssbo.layout.random.nested_structs_arrays.17,Fail
|
||||
dEQP-VK.ssbo.layout.random.scalar.19,Fail
|
||||
|
||||
bypass-dEQP-VK.renderpass.dedicated_allocation.attachment_allocation.input_output.7,Fail
|
||||
bypass-dEQP-VK.renderpass.suballocation.attachment_allocation.input_output.7,Fail
|
||||
bypass-dEQP-VK.renderpass2.dedicated_allocation.attachment_allocation.input_output.7,Fail
|
||||
|
|
|
@ -25,3 +25,8 @@ dEQP-VK.ubo.random.all_shared_buffer.48
|
|||
|
||||
# Still running after 3 hours, time is spent in batch_draw_tracking().
|
||||
KHR-GLES31.core.shader_image_load_store.basic-allFormats-store-fs
|
||||
|
||||
# causes a hangcheck timeout on a630:
|
||||
# msm ae00000.mdss: [drm:hangcheck_handler] *ERROR* A630: hangcheck detected gpu lockup rb 0!
|
||||
dEQP-VK.graphicsfuzz.spv-stable-maze-flatten-copy-composite
|
||||
dEQP-VK.graphicsfuzz.spv-stable-pillars-volatile-nontemporal-store
|
||||
|
|
|
@ -348,6 +348,9 @@ static const struct opc_info {
|
|||
OPC(6, OPC_GETSPID, getspid),
|
||||
OPC(6, OPC_GETWID, getwid),
|
||||
|
||||
OPC(6, OPC_SPILL_MACRO, spill.macro),
|
||||
OPC(6, OPC_RELOAD_MACRO, reload.macro),
|
||||
|
||||
OPC(7, OPC_BAR, bar),
|
||||
OPC(7, OPC_FENCE, fence),
|
||||
/* clang-format on */
|
||||
|
|
|
@ -308,6 +308,9 @@ typedef enum {
|
|||
OPC_LDG_A = _OPC(6, 55),
|
||||
OPC_STG_A = _OPC(6, 56),
|
||||
|
||||
OPC_SPILL_MACRO = _OPC(6, 57),
|
||||
OPC_RELOAD_MACRO = _OPC(6, 58),
|
||||
|
||||
/* category 7: */
|
||||
OPC_BAR = _OPC(7, 0),
|
||||
OPC_FENCE = _OPC(7, 1),
|
||||
|
|
|
@ -89,6 +89,7 @@ struct ir3_merge_set {
|
|||
uint16_t alignment;
|
||||
|
||||
unsigned interval_start;
|
||||
unsigned spill_slot;
|
||||
|
||||
unsigned regs_count;
|
||||
struct ir3_register **regs;
|
||||
|
@ -202,6 +203,8 @@ struct ir3_register {
|
|||
*/
|
||||
struct ir3_register *tied;
|
||||
|
||||
unsigned spill_slot, next_use;
|
||||
|
||||
unsigned merge_set_offset;
|
||||
struct ir3_merge_set *merge_set;
|
||||
unsigned interval_start, interval_end;
|
||||
|
@ -711,6 +714,17 @@ ir3_instr_move_after(struct ir3_instruction *instr,
|
|||
list_add(&instr->node, &before->node);
|
||||
}
|
||||
|
||||
/**
|
||||
* Move 'instr' to the beginning of the block:
|
||||
*/
|
||||
static inline void
|
||||
ir3_instr_move_before_block(struct ir3_instruction *instr,
|
||||
struct ir3_block *block)
|
||||
{
|
||||
list_delinit(&instr->node);
|
||||
list_add(&instr->node, &block->instr_list);
|
||||
}
|
||||
|
||||
void ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx, bool falsedeps);
|
||||
|
||||
void ir3_set_dst_type(struct ir3_instruction *instr, bool half);
|
||||
|
|
|
@ -44,6 +44,7 @@ static const struct debug_named_value shader_debug_options[] = {
|
|||
{"nouboopt", IR3_DBG_NOUBOOPT, "Disable lowering UBO to uniform"},
|
||||
{"nofp16", IR3_DBG_NOFP16, "Don't lower mediump to fp16"},
|
||||
{"nocache", IR3_DBG_NOCACHE, "Disable shader cache"},
|
||||
{"spillall", IR3_DBG_SPILLALL, "Spill as much as possible to test the spiller"},
|
||||
#ifdef DEBUG
|
||||
/* DEBUG-only options: */
|
||||
{"schedmsgs", IR3_DBG_SCHEDMSGS, "Enable scheduler debug messages"},
|
||||
|
|
|
@ -194,6 +194,7 @@ enum ir3_shader_debug {
|
|||
IR3_DBG_NOUBOOPT = BITFIELD_BIT(9),
|
||||
IR3_DBG_NOFP16 = BITFIELD_BIT(10),
|
||||
IR3_DBG_NOCACHE = BITFIELD_BIT(11),
|
||||
IR3_DBG_SPILLALL = BITFIELD_BIT(12),
|
||||
|
||||
/* DEBUG-only options: */
|
||||
IR3_DBG_SCHEDMSGS = BITFIELD_BIT(20),
|
||||
|
|
|
@ -0,0 +1,163 @@
|
|||
/*
|
||||
* Copyright (C) 2021 Valve Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "ir3_ra.h"
|
||||
|
||||
/* The spilling pass leaves out a few details required to successfully operate
|
||||
* ldp/stp:
|
||||
*
|
||||
* 1. ldp/stp can only load/store 4 components at a time, but spilling ignores
|
||||
* that and just spills/restores entire values, including arrays and values
|
||||
* created for texture setup which can be more than 4 components.
|
||||
* 2. The spiller doesn't add barrier dependencies needed for post-RA
|
||||
* scheduling.
|
||||
*
|
||||
* The first one, in particular, is much easier to handle after RA because
|
||||
* arrays and normal values can be treated the same way. Therefore this pass
|
||||
* runs after RA, and handles both issues. This keeps the complexity out of the
|
||||
* spiller.
|
||||
*/
|
||||
|
||||
static void
|
||||
split_spill(struct ir3_instruction *spill)
|
||||
{
|
||||
unsigned orig_components = spill->srcs[2]->uim_val;
|
||||
|
||||
/* We don't handle splitting dependencies. */
|
||||
assert(spill->deps_count == 0);
|
||||
|
||||
if (orig_components <= 4) {
|
||||
if (spill->srcs[1]->flags & IR3_REG_ARRAY) {
|
||||
spill->srcs[1]->wrmask = MASK(orig_components);
|
||||
spill->srcs[1]->num = spill->srcs[1]->array.base;
|
||||
spill->srcs[1]->flags &= ~IR3_REG_ARRAY;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for (unsigned comp = 0; comp < orig_components; comp += 4) {
|
||||
unsigned components = MIN2(orig_components - comp, 4);
|
||||
struct ir3_instruction *clone = ir3_instr_clone(spill);
|
||||
ir3_instr_move_before(clone, spill);
|
||||
|
||||
clone->srcs[1]->wrmask = MASK(components);
|
||||
if (clone->srcs[1]->flags & IR3_REG_ARRAY) {
|
||||
clone->srcs[1]->num = clone->srcs[1]->array.base + comp;
|
||||
clone->srcs[1]->flags &= ~IR3_REG_ARRAY;
|
||||
}
|
||||
|
||||
clone->srcs[2]->uim_val = components;
|
||||
clone->cat6.dst_offset +=
|
||||
comp * ((spill->srcs[1]->flags & IR3_REG_HALF) ? 2 : 4);
|
||||
}
|
||||
|
||||
list_delinit(&spill->node);
|
||||
}
|
||||
|
||||
static void
|
||||
split_reload(struct ir3_instruction *reload)
|
||||
{
|
||||
unsigned orig_components = reload->srcs[2]->uim_val;
|
||||
|
||||
assert(reload->deps_count == 0);
|
||||
|
||||
if (orig_components <= 4) {
|
||||
if (reload->dsts[0]->flags & IR3_REG_ARRAY) {
|
||||
reload->dsts[0]->wrmask = MASK(orig_components);
|
||||
reload->dsts[0]->num = reload->dsts[0]->array.base;
|
||||
reload->dsts[0]->flags &= ~IR3_REG_ARRAY;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for (unsigned comp = 0; comp < orig_components; comp += 4) {
|
||||
unsigned components = MIN2(orig_components - comp, 4);
|
||||
struct ir3_instruction *clone = ir3_instr_clone(reload);
|
||||
ir3_instr_move_before(clone, reload);
|
||||
|
||||
clone->dsts[0]->wrmask = MASK(components);
|
||||
if (clone->dsts[0]->flags & IR3_REG_ARRAY) {
|
||||
clone->dsts[0]->num = clone->dsts[0]->array.base + comp;
|
||||
clone->dsts[0]->flags &= ~IR3_REG_ARRAY;
|
||||
}
|
||||
|
||||
clone->srcs[2]->uim_val = components;
|
||||
clone->srcs[1]->uim_val +=
|
||||
comp * ((reload->dsts[0]->flags & IR3_REG_HALF) ? 2 : 4);
|
||||
}
|
||||
|
||||
list_delinit(&reload->node);
|
||||
}
|
||||
|
||||
static void
|
||||
add_spill_reload_deps(struct ir3_block *block)
|
||||
{
|
||||
struct ir3_instruction *last_spill = NULL;
|
||||
|
||||
foreach_instr (instr, &block->instr_list) {
|
||||
if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
|
||||
last_spill) {
|
||||
ir3_instr_add_dep(instr, last_spill);
|
||||
}
|
||||
|
||||
if (instr->opc == OPC_SPILL_MACRO)
|
||||
last_spill = instr;
|
||||
}
|
||||
|
||||
|
||||
last_spill = NULL;
|
||||
|
||||
foreach_instr_rev (instr, &block->instr_list) {
|
||||
if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
|
||||
last_spill) {
|
||||
ir3_instr_add_dep(last_spill, instr);
|
||||
}
|
||||
|
||||
if (instr->opc == OPC_SPILL_MACRO)
|
||||
last_spill = instr;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
ir3_lower_spill(struct ir3 *ir)
|
||||
{
|
||||
foreach_block (block, &ir->block_list) {
|
||||
foreach_instr_safe (instr, &block->instr_list) {
|
||||
if (instr->opc == OPC_SPILL_MACRO)
|
||||
split_spill(instr);
|
||||
else if (instr->opc == OPC_RELOAD_MACRO)
|
||||
split_reload(instr);
|
||||
}
|
||||
|
||||
add_spill_reload_deps(block);
|
||||
|
||||
foreach_instr (instr, &block->instr_list) {
|
||||
if (instr->opc == OPC_SPILL_MACRO)
|
||||
instr->opc = OPC_STP;
|
||||
else if (instr->opc == OPC_RELOAD_MACRO)
|
||||
instr->opc = OPC_LDP;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
|
@ -198,6 +198,7 @@ get_merge_set(struct ir3_register *def)
|
|||
struct ir3_merge_set *set = ralloc(def, struct ir3_merge_set);
|
||||
set->preferred_reg = ~0;
|
||||
set->interval_start = ~0;
|
||||
set->spill_slot = ~0;
|
||||
set->size = reg_size(def);
|
||||
set->alignment = (def->flags & IR3_REG_HALF) ? 1 : 2;
|
||||
set->regs_count = 1;
|
||||
|
@ -339,6 +340,19 @@ try_merge_defs(struct ir3_liveness *live, struct ir3_register *a,
|
|||
merge_merge_sets(a_set, b_set, b_set_offset);
|
||||
}
|
||||
|
||||
void
|
||||
ir3_force_merge(struct ir3_register *a, struct ir3_register *b, int b_offset)
|
||||
{
|
||||
struct ir3_merge_set *a_set = get_merge_set(a);
|
||||
struct ir3_merge_set *b_set = get_merge_set(b);
|
||||
|
||||
if (a_set == b_set)
|
||||
return;
|
||||
|
||||
int b_set_offset = a->merge_set_offset + b_offset - b->merge_set_offset;
|
||||
merge_merge_sets(a_set, b_set, b_set_offset);
|
||||
}
|
||||
|
||||
static void
|
||||
coalesce_phi(struct ir3_liveness *live, struct ir3_instruction *phi)
|
||||
{
|
||||
|
@ -462,7 +476,7 @@ ir3_create_parallel_copies(struct ir3 *ir)
|
|||
}
|
||||
|
||||
static void
|
||||
index_merge_sets(struct ir3 *ir)
|
||||
index_merge_sets(struct ir3_liveness *live, struct ir3 *ir)
|
||||
{
|
||||
unsigned offset = 0;
|
||||
foreach_block (block, &ir->block_list) {
|
||||
|
@ -489,6 +503,8 @@ index_merge_sets(struct ir3 *ir)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
live->interval_offset = offset;
|
||||
}
|
||||
|
||||
#define RESET "\x1b[0m"
|
||||
|
@ -559,7 +575,7 @@ ir3_merge_regs(struct ir3_liveness *live, struct ir3 *ir)
|
|||
}
|
||||
}
|
||||
|
||||
index_merge_sets(ir);
|
||||
index_merge_sets(live, ir);
|
||||
|
||||
if (ir3_shader_debug & IR3_DBG_RAMSGS)
|
||||
dump_merge_sets(ir);
|
||||
|
|
|
@ -1990,6 +1990,152 @@ calc_target_full_pressure(struct ir3_shader_variant *v, unsigned pressure)
|
|||
return (target - 1) * 2 * 4;
|
||||
}
|
||||
|
||||
static void
|
||||
add_pressure(struct ir3_pressure *pressure, struct ir3_register *reg,
|
||||
bool merged_regs)
|
||||
{
|
||||
unsigned size = reg_size(reg);
|
||||
if (reg->flags & IR3_REG_HALF)
|
||||
pressure->half += size;
|
||||
if (!(reg->flags & IR3_REG_HALF) || merged_regs)
|
||||
pressure->full += size;
|
||||
}
|
||||
|
||||
static void
|
||||
dummy_interval_add(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *interval)
|
||||
{
|
||||
}
|
||||
|
||||
static void
|
||||
dummy_interval_delete(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *interval)
|
||||
{
|
||||
}
|
||||
|
||||
static void
|
||||
dummy_interval_readd(struct ir3_reg_ctx *ctx, struct ir3_reg_interval *parent,
|
||||
struct ir3_reg_interval *child)
|
||||
{
|
||||
}
|
||||
|
||||
/* Calculate the minimum possible limit on register pressure so that spilling
|
||||
* still succeeds. Used to implement IR3_SHADER_DEBUG=spillall.
|
||||
*/
|
||||
|
||||
static void
|
||||
calc_min_limit_pressure(struct ir3_shader_variant *v,
|
||||
struct ir3_liveness *live,
|
||||
struct ir3_pressure *limit)
|
||||
{
|
||||
struct ir3_block *start = ir3_start_block(v->ir);
|
||||
struct ir3_reg_ctx *ctx = ralloc(NULL, struct ir3_reg_ctx);
|
||||
struct ir3_reg_interval *intervals =
|
||||
rzalloc_array(ctx, struct ir3_reg_interval, live->definitions_count);
|
||||
|
||||
ctx->interval_add = dummy_interval_add;
|
||||
ctx->interval_delete = dummy_interval_delete;
|
||||
ctx->interval_readd = dummy_interval_readd;
|
||||
|
||||
limit->full = limit->half = 0;
|
||||
|
||||
struct ir3_pressure cur_pressure = {0};
|
||||
foreach_instr (input, &start->instr_list) {
|
||||
if (input->opc != OPC_META_INPUT &&
|
||||
input->opc != OPC_META_TEX_PREFETCH)
|
||||
break;
|
||||
|
||||
add_pressure(&cur_pressure, input->dsts[0], v->mergedregs);
|
||||
}
|
||||
|
||||
limit->full = MAX2(limit->full, cur_pressure.full);
|
||||
limit->half = MAX2(limit->half, cur_pressure.half);
|
||||
|
||||
foreach_instr (input, &start->instr_list) {
|
||||
if (input->opc != OPC_META_INPUT &&
|
||||
input->opc != OPC_META_TEX_PREFETCH)
|
||||
break;
|
||||
|
||||
/* pre-colored inputs may have holes, which increases the pressure. */
|
||||
struct ir3_register *dst = input->dsts[0];
|
||||
if (dst->num != INVALID_REG) {
|
||||
unsigned physreg = ra_reg_get_physreg(dst) + reg_size(dst);
|
||||
if (dst->flags & IR3_REG_HALF)
|
||||
limit->half = MAX2(limit->half, physreg);
|
||||
if (!(dst->flags & IR3_REG_HALF) || v->mergedregs)
|
||||
limit->full = MAX2(limit->full, physreg);
|
||||
}
|
||||
}
|
||||
|
||||
foreach_block (block, &v->ir->block_list) {
|
||||
rb_tree_init(&ctx->intervals);
|
||||
|
||||
unsigned name;
|
||||
BITSET_FOREACH_SET (name, live->live_in[block->index],
|
||||
live->definitions_count) {
|
||||
struct ir3_register *reg = live->definitions[name];
|
||||
ir3_reg_interval_init(&intervals[reg->name], reg);
|
||||
ir3_reg_interval_insert(ctx, &intervals[reg->name]);
|
||||
}
|
||||
|
||||
foreach_instr (instr, &block->instr_list) {
|
||||
ra_foreach_dst (dst, instr) {
|
||||
ir3_reg_interval_init(&intervals[dst->name], dst);
|
||||
}
|
||||
/* phis and parallel copies can be deleted via spilling */
|
||||
|
||||
if (instr->opc == OPC_META_PHI) {
|
||||
ir3_reg_interval_insert(ctx, &intervals[instr->dsts[0]->name]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (instr->opc == OPC_META_PARALLEL_COPY)
|
||||
continue;
|
||||
|
||||
cur_pressure = (struct ir3_pressure) {0};
|
||||
|
||||
ra_foreach_dst (dst, instr) {
|
||||
if (dst->tied && !(dst->tied->flags & IR3_REG_KILL))
|
||||
add_pressure(&cur_pressure, dst, v->mergedregs);
|
||||
}
|
||||
|
||||
ra_foreach_src_rev (src, instr) {
|
||||
/* We currently don't support spilling the parent of a source when
|
||||
* making space for sources, so we have to keep track of the
|
||||
* intervals and figure out the root of the tree to figure out how
|
||||
* much space we need.
|
||||
*
|
||||
* TODO: We should probably support this in the spiller.
|
||||
*/
|
||||
struct ir3_reg_interval *interval = &intervals[src->def->name];
|
||||
while (interval->parent)
|
||||
interval = interval->parent;
|
||||
add_pressure(&cur_pressure, interval->reg, v->mergedregs);
|
||||
|
||||
if (src->flags & IR3_REG_FIRST_KILL)
|
||||
ir3_reg_interval_remove(ctx, &intervals[src->def->name]);
|
||||
}
|
||||
|
||||
limit->full = MAX2(limit->full, cur_pressure.full);
|
||||
limit->half = MAX2(limit->half, cur_pressure.half);
|
||||
|
||||
cur_pressure = (struct ir3_pressure) {0};
|
||||
|
||||
ra_foreach_dst (dst, instr) {
|
||||
ir3_reg_interval_init(&intervals[dst->name], dst);
|
||||
ir3_reg_interval_insert(ctx, &intervals[dst->name]);
|
||||
add_pressure(&cur_pressure, dst, v->mergedregs);
|
||||
}
|
||||
|
||||
limit->full = MAX2(limit->full, cur_pressure.full);
|
||||
limit->half = MAX2(limit->half, cur_pressure.half);
|
||||
}
|
||||
}
|
||||
|
||||
/* Account for the base register, which needs to be available everywhere. */
|
||||
limit->full += 2;
|
||||
|
||||
ralloc_free(ctx);
|
||||
}
|
||||
|
||||
int
|
||||
ir3_ra(struct ir3_shader_variant *v)
|
||||
{
|
||||
|
@ -2010,15 +2156,35 @@ ir3_ra(struct ir3_shader_variant *v)
|
|||
d("\thalf: %u", max_pressure.half);
|
||||
d("\tshared: %u", max_pressure.shared);
|
||||
|
||||
if (v->mergedregs) {
|
||||
max_pressure.full += max_pressure.half;
|
||||
max_pressure.half = 0;
|
||||
/* TODO: calculate half/full limit correctly for CS with barrier */
|
||||
struct ir3_pressure limit_pressure;
|
||||
limit_pressure.full = RA_FULL_SIZE;
|
||||
limit_pressure.half = RA_HALF_SIZE;
|
||||
limit_pressure.shared = RA_SHARED_SIZE;
|
||||
|
||||
/* If requested, lower the limit so that spilling happens more often. */
|
||||
if (ir3_shader_debug & IR3_DBG_SPILLALL)
|
||||
calc_min_limit_pressure(v, live, &limit_pressure);
|
||||
|
||||
if (max_pressure.shared > limit_pressure.shared) {
|
||||
/* TODO shared reg -> normal reg spilling */
|
||||
d("shared max pressure exceeded!");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (max_pressure.full > RA_FULL_SIZE || max_pressure.half > RA_HALF_SIZE ||
|
||||
max_pressure.shared > RA_SHARED_SIZE) {
|
||||
d("max pressure exceeded!");
|
||||
return 1;
|
||||
bool spilled = false;
|
||||
if (max_pressure.full > limit_pressure.full ||
|
||||
max_pressure.half > limit_pressure.half) {
|
||||
if (!v->shader->compiler->has_pvtmem) {
|
||||
d("max pressure exceeded!");
|
||||
return 1;
|
||||
}
|
||||
d("max pressure exceeded, spilling!");
|
||||
IR3_PASS(v->ir, ir3_spill, v, &live, &limit_pressure);
|
||||
ir3_calc_pressure(v, live, &max_pressure);
|
||||
assert(max_pressure.full <= limit_pressure.full &&
|
||||
max_pressure.half <= limit_pressure.half);
|
||||
spilled = true;
|
||||
}
|
||||
|
||||
struct ra_ctx *ctx = rzalloc(NULL, struct ra_ctx);
|
||||
|
@ -2054,19 +2220,20 @@ ir3_ra(struct ir3_shader_variant *v)
|
|||
for (unsigned i = 0; i < instr->dsts_count; i++) {
|
||||
instr->dsts[i]->flags &= ~IR3_REG_SSA;
|
||||
|
||||
/* Parallel copies of array registers copy the whole register,
|
||||
* and we need some way to let the parallel copy code know
|
||||
* that this was an array whose size is determined by
|
||||
* reg->size. So keep the array flag on those.
|
||||
/* Parallel copies of array registers copy the whole register, and
|
||||
* we need some way to let the parallel copy code know that this was
|
||||
* an array whose size is determined by reg->size. So keep the array
|
||||
* flag on those. spill/reload also need to work on the entire
|
||||
* array.
|
||||
*/
|
||||
if (!is_meta(instr))
|
||||
if (!is_meta(instr) && instr->opc != OPC_RELOAD_MACRO)
|
||||
instr->dsts[i]->flags &= ~IR3_REG_ARRAY;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < instr->srcs_count; i++) {
|
||||
instr->srcs[i]->flags &= ~IR3_REG_SSA;
|
||||
|
||||
if (!is_meta(instr))
|
||||
if (!is_meta(instr) && instr->opc != OPC_SPILL_MACRO)
|
||||
instr->srcs[i]->flags &= ~IR3_REG_ARRAY;
|
||||
}
|
||||
}
|
||||
|
@ -2074,6 +2241,10 @@ ir3_ra(struct ir3_shader_variant *v)
|
|||
|
||||
ir3_debug_print(v->ir, "AFTER: register allocation");
|
||||
|
||||
if (spilled) {
|
||||
IR3_PASS(v->ir, ir3_lower_spill);
|
||||
}
|
||||
|
||||
ir3_lower_copies(v);
|
||||
|
||||
ir3_debug_print(v->ir, "AFTER: ir3_lower_copies");
|
||||
|
|
|
@ -137,6 +137,7 @@ ra_reg_is_dst(const struct ir3_register *reg)
|
|||
|
||||
struct ir3_liveness {
|
||||
unsigned block_count;
|
||||
unsigned interval_offset;
|
||||
DECLARE_ARRAY(struct ir3_register *, definitions);
|
||||
DECLARE_ARRAY(BITSET_WORD *, live_out);
|
||||
DECLARE_ARRAY(BITSET_WORD *, live_in);
|
||||
|
@ -151,6 +152,9 @@ void ir3_create_parallel_copies(struct ir3 *ir);
|
|||
|
||||
void ir3_merge_regs(struct ir3_liveness *live, struct ir3 *ir);
|
||||
|
||||
void ir3_force_merge(struct ir3_register *a, struct ir3_register *b,
|
||||
int b_offset);
|
||||
|
||||
struct ir3_pressure {
|
||||
unsigned full, half, shared;
|
||||
};
|
||||
|
@ -158,6 +162,12 @@ struct ir3_pressure {
|
|||
void ir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live,
|
||||
struct ir3_pressure *max_pressure);
|
||||
|
||||
bool ir3_spill(struct ir3 *ir, struct ir3_shader_variant *v,
|
||||
struct ir3_liveness **live,
|
||||
const struct ir3_pressure *limit_pressure);
|
||||
|
||||
bool ir3_lower_spill(struct ir3 *ir);
|
||||
|
||||
void ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
|
||||
unsigned half_size, unsigned block_count);
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -187,7 +187,7 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr)
|
|||
/* end/chmask/etc are allowed to have different size sources */
|
||||
} else if (instr->opc == OPC_META_PARALLEL_COPY) {
|
||||
/* pcopy sources have to match with their destination but can have
|
||||
* different size.
|
||||
* different sizes from each other.
|
||||
*/
|
||||
} else if (n > 0) {
|
||||
validate_assert(ctx, (last_reg->flags & IR3_REG_HALF) ==
|
||||
|
@ -303,6 +303,7 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr)
|
|||
case OPC_STL:
|
||||
case OPC_STP:
|
||||
case OPC_STLW:
|
||||
case OPC_SPILL_MACRO:
|
||||
validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
|
||||
validate_reg_size(ctx, instr->srcs[1], instr->cat6.type);
|
||||
validate_assert(ctx, !(instr->srcs[2]->flags & IR3_REG_HALF));
|
||||
|
|
|
@ -88,6 +88,7 @@ libfreedreno_ir3_files = files(
|
|||
'ir3_legalize.c',
|
||||
'ir3_liveness.c',
|
||||
'ir3_lower_parallelcopy.c',
|
||||
'ir3_lower_spill.c',
|
||||
'ir3_lower_subgroups.c',
|
||||
'ir3_merge_regs.c',
|
||||
'ir3_nir.c',
|
||||
|
|
|
@ -345,7 +345,6 @@ spec@glsl-1.50@execution@compatibility@vs-gs-ff-frag,Crash
|
|||
spec@glsl-1.50@execution@compatibility@vs-gs-texcoord-array-2,Crash
|
||||
spec@glsl-1.50@execution@compatibility@vs-gs-texcoord-array,Crash
|
||||
spec@glsl-1.50@execution@geometry@end-primitive 0,Fail
|
||||
spec@glsl-1.50@execution@geometry@max-input-components,Fail
|
||||
spec@glsl-1.50@execution@geometry@primitive-id-restart gl_line_loop ffs,Fail
|
||||
spec@glsl-1.50@execution@geometry@primitive-id-restart gl_line_loop other,Fail
|
||||
spec@glsl-1.50@execution@geometry@primitive-id-restart gl_lines_adjacency ffs,Fail
|
||||
|
@ -385,11 +384,7 @@ spec@glsl-1.50@execution@geometry@tri-strip-ordering-with-prim-restart gl_triang
|
|||
spec@glsl-1.50@execution@geometry@tri-strip-ordering-with-prim-restart gl_triangle_strip other,Fail
|
||||
spec@glsl-1.50@execution@primitive-id-no-gs-quads,Fail
|
||||
spec@glsl-1.50@execution@primitive-id-no-gs-quad-strip,Fail
|
||||
spec@glsl-1.50@execution@variable-indexing@gs-input-array-vec2-index-rd,Fail
|
||||
spec@glsl-1.50@execution@variable-indexing@gs-input-array-vec3-index-rd,Fail
|
||||
spec@glsl-1.50@execution@variable-indexing@gs-output-array-vec3-index-wr,Fail
|
||||
spec@glsl-1.50@execution@variable-indexing@gs-output-array-vec4-index-wr,Crash
|
||||
spec@glsl-1.50@execution@variable-indexing@vs-output-array-vec4-index-wr-before-gs,Fail
|
||||
spec@glsl-1.50@gs-max-output-components,Fail
|
||||
spec@intel_performance_query@intel_performance_query-issue_2235,Fail
|
||||
spec@khr_texture_compression_astc@array-gl@12x12 Block Dim,Fail
|
||||
|
|
Loading…
Reference in New Issue