diff --git a/.gitlab-ci/deqp-freedreno-a630-fails.txt b/.gitlab-ci/deqp-freedreno-a630-fails.txt index f4ad6b7fbcd..19519c512d0 100644 --- a/.gitlab-ci/deqp-freedreno-a630-fails.txt +++ b/.gitlab-ci/deqp-freedreno-a630-fails.txt @@ -13,7 +13,6 @@ dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_positive_round dEQP-VK.tessellation.invariance.outer_edge_index_independence.quads_fractional_even_spacing_ccw_point_mode dEQP-VK.tessellation.invariance.outer_edge_symmetry.triangles_fractional_odd_spacing_cw_point_mode -KHR-GL30.clip_distance.functional KHR-GL30.transform_feedback.api_errors_test KHR-GL30.transform_feedback.capture_vertex_interleaved_test KHR-GL30.transform_feedback.capture_vertex_separate_test diff --git a/.gitlab-ci/traces-freedreno.yml b/.gitlab-ci/traces-freedreno.yml index 2bc3281b5bf..8d47fcd103e 100644 --- a/.gitlab-ci/traces-freedreno.yml +++ b/.gitlab-ci/traces-freedreno.yml @@ -236,7 +236,7 @@ traces: - path: neverball/neverball.trace expectations: - device: freedreno-a630 - checksum: e67cdf15590f1729201eb82393f5513e + checksum: 3e0a972c2a2180b349cb1c529d3ceca5 - path: pathfinder/canvas_moire.trace expectations: - device: freedreno-a630 diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index 4bc246ba164..9080ed7671d 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -94,6 +94,9 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id) */ compiler->max_const_compute = 256; + /* TODO: implement clip+cull distances on earlier gen's */ + compiler->has_clip_cull = true; + if (compiler->gpu_id == 650) compiler->tess_use_shared = true; } else { diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index 5d7d140e416..0c9a2a4bc75 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -105,6 +105,9 @@ struct ir3_compiler { * vec4 units): */ uint32_t const_upload_unit; + + /* Whether clip+cull distances are supported */ + bool has_clip_cull; }; void ir3_compiler_destroy(struct ir3_compiler *compiler); diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 1ee29568790..dfb5f29cc8f 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -460,7 +460,7 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s) bool layer_zero = so->key.layer_zero && (s->info.inputs_read & VARYING_BIT_LAYER); bool view_zero = so->key.view_zero && (s->info.inputs_read & VARYING_BIT_VIEWPORT); - if (so->key.ucp_enables) + if (so->key.ucp_enables && !so->shader->compiler->has_clip_cull) progress |= OPT(s, nir_lower_clip_fs, so->key.ucp_enables, false); if (so->key.fclamp_color) progress |= OPT(s, nir_lower_clamp_color_outputs); diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index b29a66a0071..bc5cb510b19 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -338,7 +338,12 @@ ir3_setup_used_key(struct ir3_shader *shader) key->safe_constlen = true; - key->ucp_enables = 0xff; + /* When clip/cull distances are natively supported, we only use + * ucp_enables to determine whether to lower legacy clip planes to + * gl_ClipDistance. + */ + if (info->stage != MESA_SHADER_FRAGMENT || !shader->compiler->has_clip_cull) + key->ucp_enables = 0xff; if (info->stage == MESA_SHADER_FRAGMENT) { key->fsaturate_s = ~0; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index 5b6c2ca17d2..406ad0b54b0 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -307,6 +307,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, bool binning_pass) { uint32_t pos_regid, psize_regid, color_regid[8], posz_regid; + uint32_t clip0_regid, clip1_regid; uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid; uint32_t smask_in_regid, smask_regid; uint32_t vertex_regid, instance_regid, layer_regid, primitive_regid; @@ -316,6 +317,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, uint32_t gs_header_regid; enum a3xx_threadsize fssz; uint8_t psize_loc = ~0, pos_loc = ~0, layer_loc = ~0; + uint8_t clip0_loc, clip1_loc; int i, j; static const struct ir3_shader_variant dummy_fs = {0}; @@ -337,6 +339,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, pos_regid = ir3_find_output_regid(vs, VARYING_SLOT_POS); psize_regid = ir3_find_output_regid(vs, VARYING_SLOT_PSIZ); + clip0_regid = ir3_find_output_regid(vs, VARYING_SLOT_CLIP_DIST0); + clip1_regid = ir3_find_output_regid(vs, VARYING_SLOT_CLIP_DIST1); vertex_regid = ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID); instance_regid = ir3_find_sysval_regid(vs, SYSTEM_VALUE_INSTANCE_ID); @@ -349,6 +353,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, pos_regid = ir3_find_output_regid(ds, VARYING_SLOT_POS); psize_regid = ir3_find_output_regid(ds, VARYING_SLOT_PSIZ); + clip0_regid = ir3_find_output_regid(ds, VARYING_SLOT_CLIP_DIST0); + clip1_regid = ir3_find_output_regid(ds, VARYING_SLOT_CLIP_DIST1); } else { tess_coord_x_regid = regid(63, 0); tess_coord_y_regid = regid(63, 0); @@ -362,6 +368,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, primitive_regid = ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID); pos_regid = ir3_find_output_regid(gs, VARYING_SLOT_POS); psize_regid = ir3_find_output_regid(gs, VARYING_SLOT_PSIZ); + clip0_regid = ir3_find_output_regid(gs, VARYING_SLOT_CLIP_DIST0); + clip1_regid = ir3_find_output_regid(gs, VARYING_SLOT_CLIP_DIST1); layer_regid = ir3_find_output_regid(gs, VARYING_SLOT_LAYER); } else { gs_header_regid = regid(63, 0); @@ -464,6 +472,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, const struct ir3_shader_variant *last_shader = fd6_last_shader(state); bool do_streamout = (last_shader->shader->stream_output.num_outputs > 0); + uint8_t clip_mask = last_shader->clip_mask, cull_mask = last_shader->cull_mask; + uint8_t clip_cull_mask = clip_mask | cull_mask; /* If we have streamout, link against the real FS, rather than the * dummy FS used for binning pass state, to ensure the OUTLOC's @@ -475,6 +485,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, ir3_link_shaders(&l, last_shader, do_streamout ? state->fs : fs, true); bool primid_passthru = l.primid_loc != 0xff; + clip0_loc = l.clip0_loc; + clip1_loc = l.clip1_loc; OUT_PKT4(ring, REG_A6XX_VPC_VAR_DISABLE(0), 4); OUT_RING(ring, ~l.varmask[0]); /* VPC_VAR[0].DISABLE */ @@ -500,6 +512,20 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, ir3_link_add(&l, psize_regid, 0x1, l.max_loc); } + /* Handle the case where clip/cull distances aren't read by the FS. Make + * sure to avoid adding an output with an empty writemask if the user + * disables all the clip distances in the API so that the slot is unused. + */ + if (clip0_loc == 0xff && VALIDREG(clip0_regid) && (clip_cull_mask & 0xf) != 0) { + clip0_loc = l.max_loc; + ir3_link_add(&l, clip0_regid, clip_cull_mask & 0xf, l.max_loc); + } + + if (clip1_loc == 0xff && VALIDREG(clip1_regid) && (clip_cull_mask >> 4) != 0) { + clip1_loc = l.max_loc; + ir3_link_add(&l, clip1_regid, clip_cull_mask >> 4, l.max_loc); + } + /* If we have stream-out, we use the full shader for binning * pass, rather than the optimized binning pass one, so that we * have all the varying outputs available for xfb. So streamout @@ -602,7 +628,9 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, A6XX_PC_TESS_CNTL_OUTPUT(output)); OUT_PKT4(ring, REG_A6XX_VPC_DS_CLIP_CNTL, 1); - OUT_RING(ring, 0x00ffff00); + OUT_RING(ring, A6XX_VPC_DS_CLIP_CNTL_CLIP_MASK(clip_cull_mask) | + A6XX_VPC_DS_CLIP_CNTL_CLIP_DIST_03_LOC(clip0_loc) | + A6XX_VPC_DS_CLIP_CNTL_CLIP_DIST_47_LOC(clip1_loc)); OUT_PKT4(ring, REG_A6XX_VPC_DS_LAYER_CNTL, 1); OUT_RING(ring, 0x0000ffff); @@ -611,7 +639,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, OUT_RING(ring, 0x0); OUT_PKT4(ring, REG_A6XX_GRAS_DS_CL_CNTL, 1); - OUT_RING(ring, 0x0); + OUT_RING(ring, A6XX_GRAS_DS_CL_CNTL_CLIP_MASK(clip_mask) | + A6XX_GRAS_DS_CL_CNTL_CULL_MASK(cull_mask)); OUT_PKT4(ring, REG_A6XX_VPC_VS_PACK, 1); OUT_RING(ring, A6XX_VPC_VS_PACK_POSITIONLOC(pos_loc) | @@ -628,7 +657,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, OUT_PKT4(ring, REG_A6XX_PC_DS_OUT_CNTL, 1); OUT_RING(ring, A6XX_PC_DS_OUT_CNTL_STRIDE_IN_VPC(l.max_loc) | - CONDREG(psize_regid, 0x100)); + CONDREG(psize_regid, A6XX_PC_DS_OUT_CNTL_PSIZE) | + A6XX_PC_DS_OUT_CNTL_CLIP_MASK(clip_cull_mask)); } else { OUT_PKT4(ring, REG_A6XX_SP_HS_UNKNOWN_A831, 1); @@ -648,7 +678,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, OUT_PKT4(ring, REG_A6XX_PC_VS_OUT_CNTL, 1); OUT_RING(ring, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(l.max_loc) | - CONDREG(psize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE)); + CONDREG(psize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) | + A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask)); OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_3, 1); OUT_RING(ring, 0); @@ -785,7 +816,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, OUT_RING(ring, A6XX_PC_GS_OUT_CNTL_STRIDE_IN_VPC(l.max_loc) | CONDREG(psize_regid, A6XX_PC_GS_OUT_CNTL_PSIZE) | CONDREG(layer_regid, A6XX_PC_GS_OUT_CNTL_LAYER) | - CONDREG(primitive_regid, A6XX_PC_GS_OUT_CNTL_PRIMITIVE_ID)); + CONDREG(primitive_regid, A6XX_PC_GS_OUT_CNTL_PRIMITIVE_ID) | + A6XX_PC_GS_OUT_CNTL_CLIP_MASK(clip_cull_mask)); uint32_t output; switch (gs->shader->nir->info.gs.output_primitive) { @@ -808,13 +840,16 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, A6XX_PC_PRIMITIVE_CNTL_5_GS_INVOCATIONS(gs->shader->nir->info.gs.invocations - 1)); OUT_PKT4(ring, REG_A6XX_GRAS_GS_CL_CNTL, 1); - OUT_RING(ring, 0); + OUT_RING(ring, A6XX_GRAS_GS_CL_CNTL_CLIP_MASK(clip_mask) | + A6XX_GRAS_GS_CL_CNTL_CULL_MASK(cull_mask)); OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9100, 1); OUT_RING(ring, 0xff); OUT_PKT4(ring, REG_A6XX_VPC_GS_CLIP_CNTL, 1); - OUT_RING(ring, 0xffff00); + OUT_RING(ring, A6XX_VPC_GS_CLIP_CNTL_CLIP_MASK(clip_cull_mask) | + A6XX_VPC_GS_CLIP_CNTL_CLIP_DIST_03_LOC(clip0_loc) | + A6XX_VPC_GS_CLIP_CNTL_CLIP_DIST_47_LOC(clip1_loc)); const struct ir3_shader_variant *prev = state->ds ? state->ds : state->vs; @@ -838,7 +873,13 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, } OUT_PKT4(ring, REG_A6XX_VPC_VS_CLIP_CNTL, 1); - OUT_RING(ring, 0xffff00); + OUT_RING(ring, A6XX_VPC_VS_CLIP_CNTL_CLIP_MASK(clip_cull_mask) | + A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_03_LOC(clip0_loc) | + A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_47_LOC(clip1_loc)); + + OUT_PKT4(ring, REG_A6XX_GRAS_VS_CL_CNTL, 1); + OUT_RING(ring, A6XX_GRAS_VS_CL_CNTL_CLIP_MASK(clip_mask) | + A6XX_GRAS_VS_CL_CNTL_CULL_MASK(cull_mask)); OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9107, 1); OUT_RING(ring, 0); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.c b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.c index 1f6bf70d904..3532477a654 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.c @@ -58,8 +58,7 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx, .unk5 = !cso->depth_clip_near || !cso->depth_clip_far, .vp_clip_code_ignore = 1, .zero_gb_scale_z = cso->clip_halfz - ), - A6XX_GRAS_VS_CL_CNTL()); + )); OUT_REG(ring, A6XX_GRAS_SU_CNTL( diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index bea23999026..08dbcf5b553 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -461,6 +461,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return fd_device_version(screen->dev) >= FD_VERSION_FENCE_FD; case PIPE_CAP_FENCE_SIGNAL: return screen->has_syncobj; + case PIPE_CAP_CULL_DISTANCE: + return is_a6xx(screen); default: return u_pipe_screen_get_param_defaults(pscreen, param); } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c index 810c3b2051e..cb28ed559cf 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c @@ -188,6 +188,7 @@ ir3_shader_create(struct ir3_compiler *compiler, */ struct ir3_shader_key key = { .tessellation = IR3_TESS_NONE, + .ucp_enables = MASK(nir->info.clip_distance_array_size), .msaa = true, };