diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 3c87dd690f7..8bfaf85df5b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -542,6 +542,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen) sscreen->vs_prologs, sscreen->vs_epilogs, sscreen->tcs_epilogs, + sscreen->ps_prologs, sscreen->ps_epilogs }; unsigned i; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 5d204ec6462..1ac7bc4bd85 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -92,6 +92,7 @@ struct si_screen { struct si_shader_part *vs_prologs; struct si_shader_part *vs_epilogs; struct si_shader_part *tcs_epilogs; + struct si_shader_part *ps_prologs; struct si_shader_part *ps_epilogs; }; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 8e50d06b30f..bb30c0f5019 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -879,7 +879,8 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location) static unsigned select_interp_param(struct si_shader_context *ctx, unsigned param) { - if (!ctx->shader->key.ps.prolog.force_persample_interp) + if (!ctx->shader->key.ps.prolog.force_persample_interp || + !ctx->is_monolithic) return param; /* If the shader doesn't use center/centroid, just return the parameter. @@ -1023,6 +1024,7 @@ static void declare_input_fs( unsigned input_index, const struct tgsi_full_declaration *decl) { + struct lp_build_context *base = &radeon_bld->soa.bld_base.base; struct si_shader_context *ctx = si_shader_context(&radeon_bld->soa.bld_base); struct si_shader *shader = ctx->shader; @@ -1030,6 +1032,26 @@ static void declare_input_fs( LLVMValueRef interp_param = NULL; int interp_param_idx; + /* Get colors from input VGPRs (set by the prolog). */ + if (!ctx->is_monolithic && + decl->Semantic.Name == TGSI_SEMANTIC_COLOR) { + unsigned i = decl->Semantic.Index; + unsigned colors_read = shader->selector->info.colors_read; + unsigned mask = colors_read >> (i * 4); + unsigned offset = SI_PARAM_POS_FIXED_PT + 1 + + (i ? util_bitcount(colors_read & 0xf) : 0); + + radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] = + mask & 0x1 ? LLVMGetParam(main_fn, offset++) : base->undef; + radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] = + mask & 0x2 ? LLVMGetParam(main_fn, offset++) : base->undef; + radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] = + mask & 0x4 ? LLVMGetParam(main_fn, offset++) : base->undef; + radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] = + mask & 0x8 ? LLVMGetParam(main_fn, offset++) : base->undef; + return; + } + interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate, decl->Interp.Location); if (interp_param_idx == -1) @@ -3970,6 +3992,16 @@ static void create_function(struct si_shader_context *ctx) num_params = SI_PARAM_POS_FIXED_PT+1; if (!ctx->is_monolithic) { + /* Color inputs from the prolog. */ + if (shader->selector->info.colors_read) { + unsigned num_color_elements = + util_bitcount(shader->selector->info.colors_read); + + assert(num_params + num_color_elements <= ARRAY_SIZE(params)); + for (i = 0; i < num_color_elements; i++) + params[num_params++] = ctx->f32; + } + /* Outputs for the epilog. */ num_return_sgprs = SI_SGPR_ALPHA_REF + 1; num_returns = @@ -4001,6 +4033,20 @@ static void create_function(struct si_shader_context *ctx) si_create_function(ctx, returns, num_returns, params, num_params, last_array_pointer, last_sgpr); + /* Reserve register locations for VGPR inputs the PS prolog may need. */ + if (ctx->type == TGSI_PROCESSOR_FRAGMENT && + !ctx->is_monolithic) { + radeon_llvm_add_attribute(ctx->radeon_bld.main_fn, + "InitialPSInputAddr", + S_0286D0_PERSP_SAMPLE_ENA(1) | + S_0286D0_PERSP_CENTER_ENA(1) | + S_0286D0_PERSP_CENTROID_ENA(1) | + S_0286D0_LINEAR_SAMPLE_ENA(1) | + S_0286D0_LINEAR_CENTER_ENA(1) | + S_0286D0_LINEAR_CENTROID_ENA(1) | + S_0286D0_FRONT_FACE_ENA(1)); + } + shader->num_input_sgprs = 0; shader->num_input_vgprs = 0; @@ -5304,6 +5350,157 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen, return shader->epilog != NULL; } +/** + * Compile the pixel shader prolog. This handles: + * - two-side color selection and interpolation + * - overriding interpolation parameters for the API PS + * - polygon stippling + * + * All preloaded SGPRs and VGPRs are passed through unmodified unless they are + * overriden by other states. (e.g. per-sample interpolation) + * Interpolated colors are stored after the preloaded VGPRs. + */ +static bool si_compile_ps_prolog(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct pipe_debug_callback *debug, + struct si_shader_part *out) +{ + union si_shader_part_key *key = &out->key; + struct si_shader shader = {}; + struct si_shader_context ctx; + struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm; + LLVMTypeRef *params; + LLVMValueRef ret, func; + int last_sgpr, num_params, num_returns, i, num_color_channels; + bool status = true; + + si_init_shader_ctx(&ctx, sscreen, &shader, tm, NULL); + ctx.type = TGSI_PROCESSOR_FRAGMENT; + shader.key.ps.prolog = key->ps_prolog.states; + + /* Number of inputs + 8 color elements. */ + params = alloca((key->ps_prolog.num_input_sgprs + + key->ps_prolog.num_input_vgprs + 8) * + sizeof(LLVMTypeRef)); + + /* Declare inputs. */ + num_params = 0; + for (i = 0; i < key->ps_prolog.num_input_sgprs; i++) + params[num_params++] = ctx.i32; + last_sgpr = num_params - 1; + + for (i = 0; i < key->ps_prolog.num_input_vgprs; i++) + params[num_params++] = ctx.f32; + + /* Declare outputs (same as inputs + add colors if needed) */ + num_returns = num_params; + num_color_channels = util_bitcount(key->ps_prolog.colors_read); + for (i = 0; i < num_color_channels; i++) + params[num_returns++] = ctx.f32; + + /* Create the function. */ + si_create_function(&ctx, params, num_returns, params, + num_params, -1, last_sgpr); + func = ctx.radeon_bld.main_fn; + + /* Copy inputs to outputs. This should be no-op, as the registers match, + * but it will prevent the compiler from overwriting them unintentionally. + */ + ret = ctx.return_value; + for (i = 0; i < num_params; i++) { + LLVMValueRef p = LLVMGetParam(func, i); + ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, ""); + } + + /* Interpolate colors. */ + for (i = 0; i < 2; i++) { + unsigned writemask = (key->ps_prolog.colors_read >> (i * 4)) & 0xf; + unsigned face_vgpr = key->ps_prolog.num_input_sgprs + + key->ps_prolog.face_vgpr_index; + LLVMValueRef interp[2], color[4]; + LLVMValueRef interp_ij = NULL, prim_mask = NULL, face = NULL; + + if (!writemask) + continue; + + /* If the interpolation qualifier is not CONSTANT (-1). */ + if (key->ps_prolog.color_interp_vgpr_index[i] != -1) { + unsigned interp_vgpr = key->ps_prolog.num_input_sgprs + + key->ps_prolog.color_interp_vgpr_index[i]; + + interp[0] = LLVMGetParam(func, interp_vgpr); + interp[1] = LLVMGetParam(func, interp_vgpr + 1); + interp_ij = lp_build_gather_values(gallivm, interp, 2); + interp_ij = LLVMBuildBitCast(gallivm->builder, interp_ij, + ctx.v2i32, ""); + } + + /* Use the absolute location of the input. */ + prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR); + + if (key->ps_prolog.states.color_two_side) { + face = LLVMGetParam(func, face_vgpr); + face = LLVMBuildBitCast(gallivm->builder, face, ctx.i32, ""); + } + + interp_fs_input(&ctx, + key->ps_prolog.color_attr_index[i], + TGSI_SEMANTIC_COLOR, i, + key->ps_prolog.num_interp_inputs, + key->ps_prolog.colors_read, interp_ij, + prim_mask, face, color); + + while (writemask) { + unsigned chan = u_bit_scan(&writemask); + ret = LLVMBuildInsertValue(gallivm->builder, ret, color[chan], + num_params++, ""); + } + } + + /* Force per-sample interpolation. */ + if (key->ps_prolog.states.force_persample_interp) { + unsigned i, base = key->ps_prolog.num_input_sgprs; + LLVMValueRef persp_sample[2], linear_sample[2]; + + /* Read PERSP_SAMPLE. */ + for (i = 0; i < 2; i++) + persp_sample[i] = LLVMGetParam(func, base + i); + /* Overwrite PERSP_CENTER. */ + for (i = 0; i < 2; i++) + ret = LLVMBuildInsertValue(gallivm->builder, ret, + persp_sample[i], base + 2 + i, ""); + /* Overwrite PERSP_CENTROID. */ + for (i = 0; i < 2; i++) + ret = LLVMBuildInsertValue(gallivm->builder, ret, + persp_sample[i], base + 4 + i, ""); + /* Read LINEAR_SAMPLE. */ + for (i = 0; i < 2; i++) + linear_sample[i] = LLVMGetParam(func, base + 6 + i); + /* Overwrite LINEAR_CENTER. */ + for (i = 0; i < 2; i++) + ret = LLVMBuildInsertValue(gallivm->builder, ret, + linear_sample[i], base + 8 + i, ""); + /* Overwrite LINEAR_CENTROID. */ + for (i = 0; i < 2; i++) + ret = LLVMBuildInsertValue(gallivm->builder, ret, + linear_sample[i], base + 10 + i, ""); + } + + /* TODO: polygon stippling */ + + /* Compile. */ + LLVMBuildRet(gallivm->builder, ret); + radeon_llvm_finalize_module(&ctx.radeon_bld); + + if (si_compile_llvm(sscreen, &out->binary, &out->config, tm, + gallivm->module, debug, ctx.type, + "Fragment Shader Prolog")) + status = false; + + radeon_llvm_dispose(&ctx.radeon_bld); + return status; +} + /** * Compile the pixel shader epilog. This handles everything that must be * emulated for pixel shader exports. (alpha-test, format conversions, etc) @@ -5430,7 +5627,103 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, struct pipe_debug_callback *debug) { struct tgsi_shader_info *info = &shader->selector->info; + union si_shader_part_key prolog_key; union si_shader_part_key epilog_key; + unsigned i; + + /* Get the prolog. */ + memset(&prolog_key, 0, sizeof(prolog_key)); + prolog_key.ps_prolog.states = shader->key.ps.prolog; + prolog_key.ps_prolog.colors_read = info->colors_read; + prolog_key.ps_prolog.num_input_sgprs = shader->num_input_sgprs; + prolog_key.ps_prolog.num_input_vgprs = shader->num_input_vgprs; + + if (info->colors_read) { + unsigned *color = shader->selector->color_attr_index; + + if (shader->key.ps.prolog.color_two_side) { + /* BCOLORs are stored after the last input. */ + prolog_key.ps_prolog.num_interp_inputs = info->num_inputs; + prolog_key.ps_prolog.face_vgpr_index = shader->face_vgpr_index; + shader->config.spi_ps_input_ena |= S_0286CC_FRONT_FACE_ENA(1); + } + + for (i = 0; i < 2; i++) { + unsigned location = info->input_interpolate_loc[color[i]]; + + if (!(info->colors_read & (0xf << i*4))) + continue; + + prolog_key.ps_prolog.color_attr_index[i] = color[i]; + + /* Force per-sample interpolation for the colors here. */ + if (shader->key.ps.prolog.force_persample_interp) + location = TGSI_INTERPOLATE_LOC_SAMPLE; + + switch (info->input_interpolate[color[i]]) { + case TGSI_INTERPOLATE_CONSTANT: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = -1; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + case TGSI_INTERPOLATE_COLOR: + switch (location) { + case TGSI_INTERPOLATE_LOC_SAMPLE: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = 0; + shader->config.spi_ps_input_ena |= + S_0286CC_PERSP_SAMPLE_ENA(1); + break; + case TGSI_INTERPOLATE_LOC_CENTER: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = 2; + shader->config.spi_ps_input_ena |= + S_0286CC_PERSP_CENTER_ENA(1); + break; + case TGSI_INTERPOLATE_LOC_CENTROID: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = 4; + shader->config.spi_ps_input_ena |= + S_0286CC_PERSP_CENTROID_ENA(1); + break; + default: + assert(0); + } + break; + case TGSI_INTERPOLATE_LINEAR: + switch (location) { + case TGSI_INTERPOLATE_LOC_SAMPLE: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = 6; + shader->config.spi_ps_input_ena |= + S_0286CC_LINEAR_SAMPLE_ENA(1); + break; + case TGSI_INTERPOLATE_LOC_CENTER: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = 8; + shader->config.spi_ps_input_ena |= + S_0286CC_LINEAR_CENTER_ENA(1); + break; + case TGSI_INTERPOLATE_LOC_CENTROID: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = 10; + shader->config.spi_ps_input_ena |= + S_0286CC_LINEAR_CENTROID_ENA(1); + break; + default: + assert(0); + } + break; + default: + assert(0); + } + } + } + + /* The prolog is a no-op if these aren't set. */ + if (prolog_key.ps_prolog.colors_read || + prolog_key.ps_prolog.states.force_persample_interp || + prolog_key.ps_prolog.states.poly_stipple) { + shader->prolog = + si_get_shader_part(sscreen, &sscreen->ps_prologs, + &prolog_key, tm, debug, + si_compile_ps_prolog); + if (!shader->prolog) + return false; + } /* Get the epilog. */ memset(&epilog_key, 0, sizeof(epilog_key)); @@ -5447,6 +5740,35 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, if (!shader->epilog) return false; + /* Set up the enable bits for per-sample shading if needed. */ + if (shader->key.ps.prolog.force_persample_interp) { + if (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_ena) || + G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena)) { + shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTER_ENA; + shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA; + shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1); + } + if (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_ena) || + G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena)) { + shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTER_ENA; + shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA; + shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1); + } + } + + /* POW_W_FLOAT requires that one of the perspective weights is enabled. */ + if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) && + !(shader->config.spi_ps_input_ena & 0xf)) { + shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1); + assert(G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_addr)); + } + + /* At least one pair of interpolation weights must be enabled. */ + if (!(shader->config.spi_ps_input_ena & 0x7f)) { + shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1); + assert(G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr)); + } + /* The sample mask input is always enabled, because the API shader always * passes it through to the epilog. Disable it here if it's unused. */ diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 928cb2e18e9..196fa3e9086 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -169,7 +169,7 @@ struct radeon_shader_reloc; #define SI_PARAM_SAMPLE_COVERAGE 20 #define SI_PARAM_POS_FIXED_PT 21 -#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1) +#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 9) /* +8 for COLOR[0..1] */ struct si_shader; @@ -199,6 +199,7 @@ struct si_shader_selector { unsigned max_gsvs_emit_size; /* PS parameters. */ + unsigned color_attr_index[2]; unsigned db_shader_control; /* Set 0xf or 0x0 (4 bits) per each written output. * ANDed with spi_shader_col_format. @@ -281,6 +282,17 @@ union si_shader_part_key { struct { struct si_tcs_epilog_bits states; } tcs_epilog; + struct { + struct si_ps_prolog_bits states; + unsigned num_input_sgprs:5; + unsigned num_input_vgprs:5; + /* Color interpolation and two-side color selection. */ + unsigned colors_read:8; /* color input components read */ + unsigned num_interp_inputs:5; /* BCOLOR is at this location */ + unsigned face_vgpr_index:5; + char color_attr_index[2]; + char color_interp_vgpr_index[2]; /* -1 == constant */ + } ps_prolog; struct { struct si_ps_epilog_bits states; unsigned colors_written:8; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 08f5d88e8dc..fbc377a6a4e 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -903,6 +903,13 @@ static void *si_create_shader_selector(struct pipe_context *ctx, for (i = 0; i < 8; i++) if (sel->info.colors_written & (1 << i)) sel->colors_written_4bit |= 0xf << (4 * i); + + for (i = 0; i < sel->info.num_inputs; i++) { + if (sel->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) { + int index = sel->info.input_semantic_index[i]; + sel->color_attr_index[index] = i; + } + } break; }