tu: Implement clip/cull distances

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6959>
This commit is contained in:
Connor Abbott 2020-09-24 16:04:18 +02:00 committed by Marge Bot
parent 47f825ac63
commit b4224c39e1
3 changed files with 34 additions and 6 deletions

View File

@ -423,8 +423,8 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
.shaderStorageImageArrayDynamicIndexing = true,
.shaderStorageImageReadWithoutFormat = true,
.shaderStorageImageWriteWithoutFormat = true,
.shaderClipDistance = false,
.shaderCullDistance = false,
.shaderClipDistance = true,
.shaderCullDistance = true,
.shaderFloat64 = false,
.shaderInt64 = false,
.shaderInt16 = false,

View File

@ -803,7 +803,11 @@ tu6_emit_vpc(struct tu_cs *cs,
const struct reg_config *cfg = &reg_config[last_shader->type];
struct ir3_shader_linkage linkage = { .primid_loc = 0xff };
struct ir3_shader_linkage linkage = {
.primid_loc = 0xff,
.clip0_loc = 0xff,
.clip1_loc = 0xff,
};
if (fs)
ir3_link_shaders(&linkage, last_shader, fs, true);
@ -829,6 +833,10 @@ tu6_emit_vpc(struct tu_cs *cs,
ir3_find_output_regid(last_shader, VARYING_SLOT_LAYER);
const uint32_t view_regid =
ir3_find_output_regid(last_shader, VARYING_SLOT_VIEWPORT);
const uint32_t clip0_regid =
ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST0);
const uint32_t clip1_regid =
ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST1);
uint32_t primitive_regid = gs ?
ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID) : regid(63, 0);
uint32_t flags_regid = gs ?
@ -865,6 +873,19 @@ tu6_emit_vpc(struct tu_cs *cs,
ir3_link_add(&linkage, pointsize_regid, 0x1, linkage.max_loc);
}
uint8_t clip_cull_mask = last_shader->clip_mask | last_shader->cull_mask;
/* Handle the case where clip/cull distances aren't read by the FS */
uint32_t clip0_loc = linkage.clip0_loc, clip1_loc = linkage.clip1_loc;
if (clip0_loc == 0xff && clip0_regid != regid(63, 0)) {
clip0_loc = linkage.max_loc;
ir3_link_add(&linkage, clip0_regid, clip_cull_mask & 0xf, linkage.max_loc);
}
if (clip1_loc == 0xff && clip1_regid != regid(63, 0)) {
clip1_loc = linkage.max_loc;
ir3_link_add(&linkage, clip1_regid, clip_cull_mask >> 4, linkage.max_loc);
}
tu6_setup_streamout(cs, last_shader, &linkage);
/* The GPU hangs on some models when there are no outputs (xs_pack::CNT),
@ -902,17 +923,21 @@ tu6_emit_vpc(struct tu_cs *cs,
A6XX_VPC_VS_PACK_EXTRAPOS(extra_pos));
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_clip_cntl, 1);
tu_cs_emit(cs, 0xffff00);
tu_cs_emit(cs, A6XX_VPC_VS_CLIP_CNTL_CLIP_MASK(clip_cull_mask) |
A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_03_LOC(clip0_loc) |
A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_47_LOC(clip1_loc));
tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_cl_cntl, 1);
tu_cs_emit(cs, 0);
tu_cs_emit(cs, A6XX_GRAS_VS_CL_CNTL_CLIP_MASK(last_shader->clip_mask) |
A6XX_GRAS_VS_CL_CNTL_CULL_MASK(last_shader->cull_mask));
tu_cs_emit_pkt4(cs, cfg->reg_pc_xs_out_cntl, 1);
tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
CONDREG(pointsize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) |
CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) |
CONDREG(view_regid, A6XX_PC_VS_OUT_CNTL_VIEW) |
CONDREG(primitive_regid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID));
CONDREG(primitive_regid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID) |
A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_primitive_cntl, 1);
tu_cs_emit(cs, A6XX_SP_VS_PRIMITIVE_CNTL_OUT(linkage.cnt) |
@ -2052,6 +2077,7 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
key.layer_zero = !(outputs_written & VARYING_BIT_LAYER);
key.view_zero = !(outputs_written & VARYING_BIT_VIEWPORT);
key.ucp_enables = MASK(last_shader->ir3_shader->nir->info.clip_distance_array_size);
pipeline->tess.patch_type = key.tessellation;

View File

@ -172,6 +172,8 @@ tu_spirv_to_nir(struct tu_device *dev,
NIR_PASS_V(nir, nir_lower_system_values);
NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
NIR_PASS_V(nir, nir_lower_frexp);
ir3_optimize_loop(nir);