2015-12-22 02:34:11 +00:00
|
|
|
/*
|
|
|
|
* Copyright © 2015 Red Hat
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
|
|
* SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "st_nir.h"
|
|
|
|
|
|
|
|
#include "pipe/p_defines.h"
|
|
|
|
#include "pipe/p_screen.h"
|
|
|
|
#include "pipe/p_context.h"
|
|
|
|
|
|
|
|
#include "program/program.h"
|
|
|
|
#include "program/prog_statevars.h"
|
|
|
|
#include "program/prog_parameter.h"
|
|
|
|
#include "program/ir_to_mesa.h"
|
|
|
|
#include "main/mtypes.h"
|
|
|
|
#include "main/errors.h"
|
|
|
|
#include "main/shaderapi.h"
|
|
|
|
#include "main/uniforms.h"
|
|
|
|
|
2019-01-10 06:24:57 +00:00
|
|
|
#include "main/shaderobj.h"
|
2015-12-22 02:34:11 +00:00
|
|
|
#include "st_context.h"
|
2018-03-09 00:57:52 +00:00
|
|
|
#include "st_glsl_types.h"
|
2015-12-22 02:34:11 +00:00
|
|
|
#include "st_program.h"
|
|
|
|
|
|
|
|
#include "compiler/nir/nir.h"
|
|
|
|
#include "compiler/glsl_types.h"
|
2016-05-26 00:00:38 +01:00
|
|
|
#include "compiler/glsl/glsl_to_nir.h"
|
2018-04-30 11:39:43 +01:00
|
|
|
#include "compiler/glsl/gl_nir.h"
|
2015-12-22 02:34:11 +00:00
|
|
|
#include "compiler/glsl/ir.h"
|
2018-02-20 16:56:47 +00:00
|
|
|
#include "compiler/glsl/ir_optimization.h"
|
2017-08-26 02:37:11 +01:00
|
|
|
#include "compiler/glsl/string_to_uint_map.h"
|
2015-12-22 02:34:11 +00:00
|
|
|
|
2017-07-04 09:22:02 +01:00
|
|
|
static int
|
|
|
|
type_size(const struct glsl_type *type)
|
|
|
|
{
|
|
|
|
return type->count_attribute_slots(false);
|
|
|
|
}
|
|
|
|
|
2015-12-22 02:34:11 +00:00
|
|
|
/* Depending on PIPE_CAP_TGSI_TEXCOORD (st->needs_texcoord_semantic) we
|
|
|
|
* may need to fix up varying slots so the glsl->nir path is aligned
|
|
|
|
* with the anything->tgsi->nir path.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
st_nir_fixup_varying_slots(struct st_context *st, struct exec_list *var_list)
|
|
|
|
{
|
|
|
|
if (st->needs_texcoord_semantic)
|
|
|
|
return;
|
|
|
|
|
|
|
|
nir_foreach_variable(var, var_list) {
|
|
|
|
if (var->data.location >= VARYING_SLOT_VAR0) {
|
|
|
|
var->data.location += 9;
|
|
|
|
} else if ((var->data.location >= VARYING_SLOT_TEX0) &&
|
|
|
|
(var->data.location <= VARYING_SLOT_TEX7)) {
|
|
|
|
var->data.location += VARYING_SLOT_VAR0 - VARYING_SLOT_TEX0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* input location assignment for VS inputs must be handled specially, so
|
|
|
|
* that it is aligned w/ st's vbo state.
|
|
|
|
* (This isn't the case with, for ex, FS inputs, which only need to agree
|
|
|
|
* on varying-slot w/ the VS outputs)
|
|
|
|
*/
|
|
|
|
static void
|
2018-10-30 05:38:57 +00:00
|
|
|
st_nir_assign_vs_in_locations(nir_shader *nir)
|
2015-12-22 02:34:11 +00:00
|
|
|
{
|
2019-08-22 12:19:07 +01:00
|
|
|
nir->num_inputs = util_bitcount64(nir->info.inputs_read);
|
2016-08-20 01:12:12 +01:00
|
|
|
nir_foreach_variable_safe(var, &nir->inputs) {
|
2018-08-31 13:55:07 +01:00
|
|
|
/* NIR already assigns dual-slot inputs to two locations so all we have
|
|
|
|
* to do is compact everything down.
|
|
|
|
*/
|
|
|
|
if (var->data.location == VERT_ATTRIB_EDGEFLAG) {
|
|
|
|
/* bit of a hack, mirroring st_translate_vertex_program */
|
2019-08-22 12:19:07 +01:00
|
|
|
var->data.driver_location = nir->num_inputs++;
|
2018-08-31 13:55:07 +01:00
|
|
|
} else if (nir->info.inputs_read & BITFIELD64_BIT(var->data.location)) {
|
|
|
|
var->data.driver_location =
|
2018-08-21 17:46:46 +01:00
|
|
|
util_bitcount64(nir->info.inputs_read &
|
2018-08-31 13:55:07 +01:00
|
|
|
BITFIELD64_MASK(var->data.location));
|
2016-08-20 01:12:12 +01:00
|
|
|
} else {
|
|
|
|
/* Move unused input variables to the globals list (with no
|
|
|
|
* initialization), to avoid confusing drivers looking through the
|
|
|
|
* inputs array and expecting to find inputs with a driver_location
|
|
|
|
* set.
|
|
|
|
*/
|
|
|
|
exec_node_remove(&var->node);
|
2019-01-15 22:56:29 +00:00
|
|
|
var->data.mode = nir_var_shader_temp;
|
2016-08-20 01:12:12 +01:00
|
|
|
exec_list_push_tail(&nir->globals, &var->node);
|
|
|
|
}
|
2015-12-22 02:34:11 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
st_nir_lookup_parameter_index(const struct gl_program_parameter_list *params,
|
|
|
|
const char *name)
|
|
|
|
{
|
|
|
|
int loc = _mesa_lookup_parameter_index(params, name);
|
|
|
|
|
|
|
|
/* is there a better way to do this? If we have something like:
|
|
|
|
*
|
|
|
|
* struct S {
|
|
|
|
* float f;
|
|
|
|
* vec4 v;
|
|
|
|
* };
|
|
|
|
* uniform S color;
|
|
|
|
*
|
|
|
|
* Then what we get in prog->Parameters looks like:
|
|
|
|
*
|
|
|
|
* 0: Name=color.f, Type=6, DataType=1406, Size=1
|
|
|
|
* 1: Name=color.v, Type=6, DataType=8b52, Size=4
|
|
|
|
*
|
|
|
|
* So the name doesn't match up and _mesa_lookup_parameter_index()
|
|
|
|
* fails. In this case just find the first matching "color.*"..
|
|
|
|
*
|
|
|
|
* Note for arrays you could end up w/ color[n].f, for example.
|
|
|
|
*
|
|
|
|
* glsl_to_tgsi works slightly differently in this regard. It is
|
|
|
|
* emitting something more low level, so it just translates the
|
|
|
|
* params list 1:1 to CONST[] regs. Going from GLSL IR to TGSI,
|
|
|
|
* it just calculates the additional offset of struct field members
|
|
|
|
* in glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) or
|
|
|
|
* glsl_to_tgsi_visitor::visit(ir_dereference_array *ir). It never
|
|
|
|
* needs to work backwards to get base var loc from the param-list
|
|
|
|
* which already has them separated out.
|
|
|
|
*/
|
|
|
|
if (loc < 0) {
|
|
|
|
int namelen = strlen(name);
|
|
|
|
for (unsigned i = 0; i < params->NumParameters; i++) {
|
|
|
|
struct gl_program_parameter *p = ¶ms->Parameters[i];
|
|
|
|
if ((strncmp(p->Name, name, namelen) == 0) &&
|
|
|
|
((p->Name[namelen] == '.') || (p->Name[namelen] == '['))) {
|
|
|
|
loc = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return loc;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2018-03-13 09:50:27 +00:00
|
|
|
st_nir_assign_uniform_locations(struct gl_context *ctx,
|
|
|
|
struct gl_program *prog,
|
2019-03-26 16:53:38 +00:00
|
|
|
struct exec_list *uniform_list)
|
2015-12-22 02:34:11 +00:00
|
|
|
{
|
|
|
|
int shaderidx = 0;
|
2017-11-03 16:47:51 +00:00
|
|
|
int imageidx = 0;
|
2015-12-22 02:34:11 +00:00
|
|
|
|
|
|
|
nir_foreach_variable(uniform, uniform_list) {
|
|
|
|
int loc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* UBO's have their own address spaces, so don't count them towards the
|
|
|
|
* number of global uniforms
|
|
|
|
*/
|
2019-01-15 23:11:23 +00:00
|
|
|
if (uniform->data.mode == nir_var_mem_ubo || uniform->data.mode == nir_var_mem_ssbo)
|
2015-12-22 02:34:11 +00:00
|
|
|
continue;
|
|
|
|
|
2018-06-11 19:49:12 +01:00
|
|
|
const struct glsl_type *type = glsl_without_array(uniform->type);
|
|
|
|
if (!uniform->data.bindless && (type->is_sampler() || type->is_image())) {
|
|
|
|
if (type->is_sampler()) {
|
|
|
|
loc = shaderidx;
|
|
|
|
shaderidx += type_size(uniform->type);
|
|
|
|
} else {
|
|
|
|
loc = imageidx;
|
|
|
|
imageidx += type_size(uniform->type);
|
|
|
|
}
|
2015-12-22 02:34:11 +00:00
|
|
|
} else if (strncmp(uniform->name, "gl_", 3) == 0) {
|
2017-11-16 03:29:35 +00:00
|
|
|
const gl_state_index16 *const stateTokens = uniform->state_slots[0].tokens;
|
2015-12-22 02:34:11 +00:00
|
|
|
/* This state reference has already been setup by ir_to_mesa, but we'll
|
|
|
|
* get the same index back here.
|
|
|
|
*/
|
2018-03-13 09:50:27 +00:00
|
|
|
|
|
|
|
unsigned comps;
|
2019-03-05 05:07:12 +00:00
|
|
|
if (glsl_type_is_struct_or_ifc(type)) {
|
2018-03-13 09:50:27 +00:00
|
|
|
comps = 4;
|
|
|
|
} else {
|
|
|
|
comps = glsl_get_vector_elements(type);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ctx->Const.PackedDriverUniformStorage) {
|
|
|
|
loc = _mesa_add_sized_state_reference(prog->Parameters,
|
|
|
|
stateTokens, comps, false);
|
2018-03-22 00:52:19 +00:00
|
|
|
loc = prog->Parameters->ParameterValueOffset[loc];
|
2018-03-13 09:50:27 +00:00
|
|
|
} else {
|
|
|
|
loc = _mesa_add_state_reference(prog->Parameters, stateTokens);
|
|
|
|
}
|
2015-12-22 02:34:11 +00:00
|
|
|
} else {
|
|
|
|
loc = st_nir_lookup_parameter_index(prog->Parameters, uniform->name);
|
2018-03-22 00:52:19 +00:00
|
|
|
|
2019-03-01 10:35:41 +00:00
|
|
|
/* We need to check that loc is not -1 here before accessing the
|
|
|
|
* array. It can be negative for example when we have a struct that
|
|
|
|
* only contains opaque types.
|
|
|
|
*/
|
|
|
|
if (loc >= 0 && ctx->Const.PackedDriverUniformStorage) {
|
2018-03-22 00:52:19 +00:00
|
|
|
loc = prog->Parameters->ParameterValueOffset[loc];
|
|
|
|
}
|
2015-12-22 02:34:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
uniform->data.driver_location = loc;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-09 04:25:46 +01:00
|
|
|
void
|
2018-06-15 11:29:59 +01:00
|
|
|
st_nir_opts(nir_shader *nir, bool scalar)
|
2017-11-13 23:13:58 +00:00
|
|
|
{
|
|
|
|
bool progress;
|
2018-08-19 00:42:04 +01:00
|
|
|
unsigned lower_flrp =
|
|
|
|
(nir->options->lower_flrp16 ? 16 : 0) |
|
|
|
|
(nir->options->lower_flrp32 ? 32 : 0) |
|
|
|
|
(nir->options->lower_flrp64 ? 64 : 0);
|
|
|
|
|
2017-11-13 23:13:58 +00:00
|
|
|
do {
|
|
|
|
progress = false;
|
|
|
|
|
2018-01-31 01:58:48 +00:00
|
|
|
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
|
2019-09-04 12:54:13 +01:00
|
|
|
|
|
|
|
/* Linking deals with unused inputs/outputs, but here we can remove
|
|
|
|
* things local to the shader in the hopes that we can cleanup other
|
|
|
|
* things. This pass will also remove variables with only stores, so we
|
|
|
|
* might be able to make progress after it.
|
|
|
|
*/
|
|
|
|
NIR_PASS(progress, nir, nir_remove_dead_variables,
|
|
|
|
(nir_variable_mode)(nir_var_function_temp |
|
|
|
|
nir_var_shader_temp |
|
|
|
|
nir_var_mem_shared));
|
2018-06-15 11:29:59 +01:00
|
|
|
|
2019-06-09 18:25:07 +01:00
|
|
|
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
|
|
|
|
NIR_PASS(progress, nir, nir_opt_dead_write_vars);
|
|
|
|
|
2018-06-15 11:29:59 +01:00
|
|
|
if (scalar) {
|
2019-05-08 17:45:48 +01:00
|
|
|
NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL);
|
2018-06-15 11:29:59 +01:00
|
|
|
NIR_PASS_V(nir, nir_lower_phis_to_scalar);
|
|
|
|
}
|
2018-01-31 01:58:48 +00:00
|
|
|
|
2018-05-08 19:24:40 +01:00
|
|
|
NIR_PASS_V(nir, nir_lower_alu);
|
2018-04-27 08:28:48 +01:00
|
|
|
NIR_PASS_V(nir, nir_lower_pack);
|
2017-11-13 23:13:58 +00:00
|
|
|
NIR_PASS(progress, nir, nir_copy_prop);
|
|
|
|
NIR_PASS(progress, nir, nir_opt_remove_phis);
|
|
|
|
NIR_PASS(progress, nir, nir_opt_dce);
|
|
|
|
if (nir_opt_trivial_continues(nir)) {
|
|
|
|
progress = true;
|
|
|
|
NIR_PASS(progress, nir, nir_copy_prop);
|
|
|
|
NIR_PASS(progress, nir, nir_opt_dce);
|
|
|
|
}
|
2019-04-08 11:13:49 +01:00
|
|
|
NIR_PASS(progress, nir, nir_opt_if, false);
|
2017-11-13 23:13:58 +00:00
|
|
|
NIR_PASS(progress, nir, nir_opt_dead_cf);
|
|
|
|
NIR_PASS(progress, nir, nir_opt_cse);
|
2018-06-19 00:11:55 +01:00
|
|
|
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
|
2017-11-13 23:13:58 +00:00
|
|
|
|
|
|
|
NIR_PASS(progress, nir, nir_opt_algebraic);
|
|
|
|
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
|
|
|
|
2018-08-19 00:42:04 +01:00
|
|
|
if (lower_flrp != 0) {
|
2019-05-08 15:32:43 +01:00
|
|
|
bool lower_flrp_progress = false;
|
2018-08-19 00:42:04 +01:00
|
|
|
|
|
|
|
NIR_PASS(lower_flrp_progress, nir, nir_lower_flrp,
|
|
|
|
lower_flrp,
|
|
|
|
false /* always_precise */,
|
|
|
|
nir->options->lower_ffma);
|
|
|
|
if (lower_flrp_progress) {
|
|
|
|
NIR_PASS(progress, nir,
|
|
|
|
nir_opt_constant_folding);
|
|
|
|
progress = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Nothing should rematerialize any flrps, so we only need to do this
|
|
|
|
* lowering once.
|
|
|
|
*/
|
|
|
|
lower_flrp = 0;
|
|
|
|
}
|
|
|
|
|
2019-06-04 13:18:54 +01:00
|
|
|
NIR_PASS(progress, nir, gl_nir_opt_access);
|
|
|
|
|
2017-11-13 23:13:58 +00:00
|
|
|
NIR_PASS(progress, nir, nir_opt_undef);
|
|
|
|
NIR_PASS(progress, nir, nir_opt_conditional_discard);
|
|
|
|
if (nir->options->max_unroll_iterations) {
|
|
|
|
NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0);
|
|
|
|
}
|
|
|
|
} while (progress);
|
|
|
|
}
|
|
|
|
|
2017-11-13 22:15:54 +00:00
|
|
|
/* First third of converting glsl_to_nir.. this leaves things in a pre-
|
2015-12-22 02:34:11 +00:00
|
|
|
* nir_lower_io state, so that shader variants can more easily insert/
|
|
|
|
* replace variables, etc.
|
|
|
|
*/
|
2017-11-13 23:06:47 +00:00
|
|
|
static nir_shader *
|
2015-12-22 02:34:11 +00:00
|
|
|
st_glsl_to_nir(struct st_context *st, struct gl_program *prog,
|
|
|
|
struct gl_shader_program *shader_program,
|
|
|
|
gl_shader_stage stage)
|
|
|
|
{
|
2018-01-30 00:51:31 +00:00
|
|
|
const nir_shader_compiler_options *options =
|
|
|
|
st->ctx->Const.ShaderCompilerOptions[prog->info.stage].NirOptions;
|
2018-06-15 11:29:59 +01:00
|
|
|
enum pipe_shader_type type = pipe_shader_type_from_mesa(stage);
|
|
|
|
struct pipe_screen *screen = st->pipe->screen;
|
|
|
|
bool is_scalar = screen->get_shader_param(screen, type, PIPE_SHADER_CAP_SCALAR_ISA);
|
2015-12-22 02:34:11 +00:00
|
|
|
assert(options);
|
2019-01-10 06:24:57 +00:00
|
|
|
bool lower_64bit =
|
|
|
|
options->lower_int64_options || options->lower_doubles_options;
|
2015-12-22 02:34:11 +00:00
|
|
|
|
|
|
|
if (prog->nir)
|
|
|
|
return prog->nir;
|
|
|
|
|
2019-02-20 06:13:49 +00:00
|
|
|
nir_shader *nir = glsl_to_nir(st->ctx, shader_program, stage, options);
|
2017-11-14 01:56:20 +00:00
|
|
|
|
2018-02-26 09:40:38 +00:00
|
|
|
/* Set the next shader stage hint for VS and TES. */
|
|
|
|
if (!nir->info.separate_shader &&
|
|
|
|
(nir->info.stage == MESA_SHADER_VERTEX ||
|
|
|
|
nir->info.stage == MESA_SHADER_TESS_EVAL)) {
|
|
|
|
|
|
|
|
unsigned prev_stages = (1 << (prog->info.stage + 1)) - 1;
|
|
|
|
unsigned stages_mask =
|
|
|
|
~prev_stages & shader_program->data->linked_stages;
|
|
|
|
|
|
|
|
nir->info.next_stage = stages_mask ?
|
2018-10-20 05:02:59 +01:00
|
|
|
(gl_shader_stage) u_bit_scan(&stages_mask) : MESA_SHADER_FRAGMENT;
|
2018-02-26 09:40:38 +00:00
|
|
|
} else {
|
|
|
|
nir->info.next_stage = MESA_SHADER_FRAGMENT;
|
|
|
|
}
|
|
|
|
|
2019-01-10 06:24:57 +00:00
|
|
|
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
2019-07-09 08:32:42 +01:00
|
|
|
if (!st->ctx->SoftFP64 && nir->info.uses_64bit &&
|
2019-01-10 06:24:57 +00:00
|
|
|
(options->lower_doubles_options & nir_lower_fp64_full_software) != 0) {
|
2019-07-09 08:32:42 +01:00
|
|
|
st->ctx->SoftFP64 = glsl_float64_funcs_to_nir(st->ctx, options);
|
2019-01-10 06:24:57 +00:00
|
|
|
}
|
|
|
|
|
2018-01-29 06:33:57 +00:00
|
|
|
nir_variable_mode mask =
|
|
|
|
(nir_variable_mode) (nir_var_shader_in | nir_var_shader_out);
|
|
|
|
nir_remove_dead_variables(nir, mask);
|
|
|
|
|
2018-01-31 01:58:48 +00:00
|
|
|
if (options->lower_all_io_to_temps ||
|
|
|
|
nir->info.stage == MESA_SHADER_VERTEX ||
|
|
|
|
nir->info.stage == MESA_SHADER_GEOMETRY) {
|
|
|
|
NIR_PASS_V(nir, nir_lower_io_to_temporaries,
|
|
|
|
nir_shader_get_entrypoint(nir),
|
|
|
|
true, true);
|
|
|
|
} else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
|
|
|
NIR_PASS_V(nir, nir_lower_io_to_temporaries,
|
|
|
|
nir_shader_get_entrypoint(nir),
|
|
|
|
true, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
NIR_PASS_V(nir, nir_lower_global_vars_to_local);
|
|
|
|
NIR_PASS_V(nir, nir_split_var_copies);
|
|
|
|
NIR_PASS_V(nir, nir_lower_var_copies);
|
|
|
|
|
2019-01-10 06:24:57 +00:00
|
|
|
if (is_scalar) {
|
2019-05-08 17:45:48 +01:00
|
|
|
NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL);
|
2019-01-10 06:24:57 +00:00
|
|
|
}
|
|
|
|
|
2019-03-31 10:30:12 +01:00
|
|
|
/* before buffers and vars_to_ssa */
|
|
|
|
NIR_PASS_V(nir, gl_nir_lower_bindless_images);
|
2019-03-04 23:02:39 +00:00
|
|
|
st_nir_opts(nir, is_scalar);
|
|
|
|
|
2019-03-10 13:57:51 +00:00
|
|
|
NIR_PASS_V(nir, gl_nir_lower_buffers, shader_program);
|
|
|
|
/* Do a round of constant folding to clean up address calculations */
|
|
|
|
NIR_PASS_V(nir, nir_opt_constant_folding);
|
|
|
|
|
2019-01-10 06:24:57 +00:00
|
|
|
if (lower_64bit) {
|
|
|
|
bool lowered_64bit_ops = false;
|
2019-07-11 22:59:31 +01:00
|
|
|
if (options->lower_doubles_options) {
|
|
|
|
NIR_PASS(lowered_64bit_ops, nir, nir_lower_doubles,
|
|
|
|
st->ctx->SoftFP64, options->lower_doubles_options);
|
|
|
|
}
|
|
|
|
if (options->lower_int64_options) {
|
|
|
|
NIR_PASS(lowered_64bit_ops, nir, nir_lower_int64,
|
|
|
|
options->lower_int64_options);
|
|
|
|
}
|
2019-01-10 06:24:57 +00:00
|
|
|
|
2019-04-02 06:56:07 +01:00
|
|
|
if (lowered_64bit_ops)
|
2019-03-04 23:02:39 +00:00
|
|
|
st_nir_opts(nir, is_scalar);
|
|
|
|
}
|
2017-11-14 01:56:20 +00:00
|
|
|
|
|
|
|
return nir;
|
2017-11-13 22:15:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Second third of converting glsl_to_nir. This creates uniforms, gathers
|
|
|
|
* info on varyings, etc after NIR link time opts have been applied.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
st_glsl_to_nir_post_opts(struct st_context *st, struct gl_program *prog,
|
|
|
|
struct gl_shader_program *shader_program)
|
|
|
|
{
|
|
|
|
nir_shader *nir = prog->nir;
|
2015-12-22 02:34:11 +00:00
|
|
|
|
2017-11-01 03:15:22 +00:00
|
|
|
/* Make a pass over the IR to add state references for any built-in
|
|
|
|
* uniforms that are used. This has to be done now (during linking).
|
|
|
|
* Code generation doesn't happen until the first time this shader is
|
|
|
|
* used for rendering. Waiting until then to generate the parameters is
|
|
|
|
* too late. At that point, the values for the built-in uniforms won't
|
|
|
|
* get sent to the shader.
|
|
|
|
*/
|
|
|
|
nir_foreach_variable(var, &nir->uniforms) {
|
|
|
|
if (strncmp(var->name, "gl_", 3) == 0) {
|
|
|
|
const nir_state_slot *const slots = var->state_slots;
|
|
|
|
assert(var->state_slots != NULL);
|
|
|
|
|
2018-03-13 09:50:27 +00:00
|
|
|
const struct glsl_type *type = glsl_without_array(var->type);
|
2017-11-01 03:15:22 +00:00
|
|
|
for (unsigned int i = 0; i < var->num_state_slots; i++) {
|
2018-03-13 09:50:27 +00:00
|
|
|
unsigned comps;
|
2019-03-05 05:07:12 +00:00
|
|
|
if (glsl_type_is_struct_or_ifc(type)) {
|
2018-03-13 09:50:27 +00:00
|
|
|
/* Builtin struct require specical handling for now we just
|
|
|
|
* make all members vec4. See st_nir_lower_builtin.
|
|
|
|
*/
|
|
|
|
comps = 4;
|
|
|
|
} else {
|
|
|
|
comps = glsl_get_vector_elements(type);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (st->ctx->Const.PackedDriverUniformStorage) {
|
|
|
|
_mesa_add_sized_state_reference(prog->Parameters,
|
|
|
|
slots[i].tokens,
|
|
|
|
comps, false);
|
|
|
|
} else {
|
|
|
|
_mesa_add_state_reference(prog->Parameters,
|
|
|
|
slots[i].tokens);
|
|
|
|
}
|
2017-11-01 03:15:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Avoid reallocation of the program parameter list, because the uniform
|
|
|
|
* storage is only associated with the original parameter list.
|
|
|
|
* This should be enough for Bitmap and DrawPixels constants.
|
|
|
|
*/
|
|
|
|
_mesa_reserve_parameter_storage(prog->Parameters, 8);
|
|
|
|
|
|
|
|
/* This has to be done last. Any operation the can cause
|
|
|
|
* prog->ParameterValues to get reallocated (e.g., anything that adds a
|
|
|
|
* program constant) has to happen before creating this linkage.
|
|
|
|
*/
|
2019-05-29 04:13:44 +01:00
|
|
|
_mesa_associate_uniform_storage(st->ctx, shader_program, prog);
|
2017-11-01 03:15:22 +00:00
|
|
|
|
2017-11-01 09:32:12 +00:00
|
|
|
st_set_prog_affected_state_flags(prog);
|
|
|
|
|
|
|
|
NIR_PASS_V(nir, st_nir_lower_builtin);
|
2018-04-30 11:39:43 +01:00
|
|
|
NIR_PASS_V(nir, gl_nir_lower_atomics, shader_program, true);
|
2019-04-10 02:40:33 +01:00
|
|
|
NIR_PASS_V(nir, nir_opt_intrinsics);
|
2017-11-01 09:32:12 +00:00
|
|
|
|
2019-01-31 04:59:18 +00:00
|
|
|
nir_variable_mode mask = nir_var_function_temp;
|
|
|
|
nir_remove_dead_variables(nir, mask);
|
|
|
|
|
2015-12-22 02:34:11 +00:00
|
|
|
if (st->ctx->_Shader->Flags & GLSL_DUMP) {
|
|
|
|
_mesa_log("\n");
|
|
|
|
_mesa_log("NIR IR for linked %s program %d:\n",
|
2017-11-13 22:15:54 +00:00
|
|
|
_mesa_shader_stage_to_string(prog->info.stage),
|
2015-12-22 02:34:11 +00:00
|
|
|
shader_program->Name);
|
|
|
|
nir_print_shader(nir, _mesa_get_log_file());
|
|
|
|
_mesa_log("\n\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-13 22:03:45 +00:00
|
|
|
static void
|
|
|
|
set_st_program(struct gl_program *prog,
|
|
|
|
struct gl_shader_program *shader_program,
|
|
|
|
nir_shader *nir)
|
2015-12-22 02:34:11 +00:00
|
|
|
{
|
|
|
|
struct st_vertex_program *stvp;
|
2017-10-23 06:48:30 +01:00
|
|
|
struct st_common_program *stp;
|
2015-12-22 02:34:11 +00:00
|
|
|
struct st_fragment_program *stfp;
|
2017-04-04 20:55:25 +01:00
|
|
|
struct st_compute_program *stcp;
|
2015-12-22 02:34:11 +00:00
|
|
|
|
2017-11-13 22:03:45 +00:00
|
|
|
switch (prog->info.stage) {
|
2016-06-05 04:17:51 +01:00
|
|
|
case MESA_SHADER_VERTEX:
|
2015-12-22 02:34:11 +00:00
|
|
|
stvp = (struct st_vertex_program *)prog;
|
|
|
|
stvp->shader_program = shader_program;
|
2017-10-30 13:56:43 +00:00
|
|
|
stvp->tgsi.type = PIPE_SHADER_IR_NIR;
|
|
|
|
stvp->tgsi.ir.nir = nir;
|
2015-12-22 02:34:11 +00:00
|
|
|
break;
|
2017-10-23 06:48:30 +01:00
|
|
|
case MESA_SHADER_GEOMETRY:
|
|
|
|
case MESA_SHADER_TESS_CTRL:
|
|
|
|
case MESA_SHADER_TESS_EVAL:
|
|
|
|
stp = (struct st_common_program *)prog;
|
|
|
|
stp->shader_program = shader_program;
|
2017-10-30 13:56:43 +00:00
|
|
|
stp->tgsi.type = PIPE_SHADER_IR_NIR;
|
|
|
|
stp->tgsi.ir.nir = nir;
|
2017-10-23 06:48:30 +01:00
|
|
|
break;
|
2016-06-05 04:17:51 +01:00
|
|
|
case MESA_SHADER_FRAGMENT:
|
2015-12-22 02:34:11 +00:00
|
|
|
stfp = (struct st_fragment_program *)prog;
|
|
|
|
stfp->shader_program = shader_program;
|
2017-10-30 13:56:43 +00:00
|
|
|
stfp->tgsi.type = PIPE_SHADER_IR_NIR;
|
|
|
|
stfp->tgsi.ir.nir = nir;
|
2015-12-22 02:34:11 +00:00
|
|
|
break;
|
2017-04-04 20:55:25 +01:00
|
|
|
case MESA_SHADER_COMPUTE:
|
|
|
|
stcp = (struct st_compute_program *)prog;
|
|
|
|
stcp->shader_program = shader_program;
|
2017-10-30 13:56:43 +00:00
|
|
|
stcp->tgsi.ir_type = PIPE_SHADER_IR_NIR;
|
2017-12-05 14:53:56 +00:00
|
|
|
stcp->tgsi.prog = nir;
|
2017-04-04 20:55:25 +01:00
|
|
|
break;
|
2015-12-22 02:34:11 +00:00
|
|
|
default:
|
2017-11-13 22:03:45 +00:00
|
|
|
unreachable("unknown shader stage");
|
2015-12-22 02:34:11 +00:00
|
|
|
}
|
2017-11-13 22:03:45 +00:00
|
|
|
}
|
|
|
|
|
2017-11-13 22:15:54 +00:00
|
|
|
static void
|
2017-11-13 22:03:45 +00:00
|
|
|
st_nir_get_mesa_program(struct gl_context *ctx,
|
|
|
|
struct gl_shader_program *shader_program,
|
|
|
|
struct gl_linked_shader *shader)
|
|
|
|
{
|
|
|
|
struct st_context *st = st_context(ctx);
|
2018-02-20 16:56:47 +00:00
|
|
|
struct pipe_screen *pscreen = ctx->st->pipe->screen;
|
2017-11-13 22:03:45 +00:00
|
|
|
struct gl_program *prog;
|
|
|
|
|
|
|
|
validate_ir_tree(shader->ir);
|
|
|
|
|
|
|
|
prog = shader->Program;
|
|
|
|
|
|
|
|
prog->Parameters = _mesa_new_parameter_list();
|
|
|
|
|
|
|
|
_mesa_copy_linked_program_data(shader_program, shader);
|
|
|
|
_mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader,
|
|
|
|
prog->Parameters);
|
|
|
|
|
2018-02-20 16:56:47 +00:00
|
|
|
/* Remove reads from output registers. */
|
|
|
|
if (!pscreen->get_param(pscreen, PIPE_CAP_TGSI_CAN_READ_OUTPUTS))
|
|
|
|
lower_output_reads(shader->Stage, shader->ir);
|
|
|
|
|
2017-11-13 22:03:45 +00:00
|
|
|
if (ctx->_Shader->Flags & GLSL_DUMP) {
|
|
|
|
_mesa_log("\n");
|
|
|
|
_mesa_log("GLSL IR for linked %s program %d:\n",
|
|
|
|
_mesa_shader_stage_to_string(shader->Stage),
|
|
|
|
shader_program->Name);
|
|
|
|
_mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL);
|
|
|
|
_mesa_log("\n\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
prog->ExternalSamplersUsed = gl_external_samplers(prog);
|
|
|
|
_mesa_update_shader_textures_used(shader_program, prog);
|
|
|
|
|
|
|
|
nir_shader *nir = st_glsl_to_nir(st, prog, shader_program, shader->Stage);
|
2015-12-22 02:34:11 +00:00
|
|
|
|
2017-11-13 22:03:45 +00:00
|
|
|
set_st_program(prog, shader_program, nir);
|
2017-11-13 22:15:54 +00:00
|
|
|
prog->nir = nir;
|
2015-12-22 02:34:11 +00:00
|
|
|
}
|
|
|
|
|
st/nir: Re-vectorize shader IO
We scalarize IO to enable further optimizations, such as propagating
constant components across shaders, eliminating dead components, and
so on. This patch attempts to re-vectorize those operations after
the varying optimizations are done.
Intel GPUs are a scalar architecture, but IO operations work on whole
vec4's at a time, so we'd prefer to have a single IO load per vector
rather than 4 scalar IO loads. This re-vectorization can help a lot.
Broadcom GPUs, however, really do want scalar IO. radeonsi may want
this, or may want to leave it to LLVM. So, we make a new flag in the
NIR compiler options struct, and key it off of that, allowing drivers
to pick. (It's a bit awkward because we have per-stage settings, but
this is about IO between two stages...but I expect drivers to globally
prefer one way or the other. We can adjust later if needed.)
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
2019-04-11 20:28:48 +01:00
|
|
|
static void
|
|
|
|
st_nir_vectorize_io(nir_shader *producer, nir_shader *consumer)
|
|
|
|
{
|
|
|
|
NIR_PASS_V(producer, nir_lower_io_to_vector, nir_var_shader_out);
|
|
|
|
NIR_PASS_V(producer, nir_opt_combine_stores, nir_var_shader_out);
|
|
|
|
NIR_PASS_V(consumer, nir_lower_io_to_vector, nir_var_shader_in);
|
|
|
|
|
|
|
|
if ((producer)->info.stage != MESA_SHADER_TESS_CTRL) {
|
|
|
|
/* Calling lower_io_to_vector creates output variable writes with
|
|
|
|
* write-masks. We only support these for TCS outputs, so for other
|
|
|
|
* stages, we need to call nir_lower_io_to_temporaries to get rid of
|
|
|
|
* them. This, in turn, creates temporary variables and extra
|
|
|
|
* copy_deref intrinsics that we need to clean up.
|
|
|
|
*/
|
|
|
|
NIR_PASS_V(producer, nir_lower_io_to_temporaries,
|
|
|
|
nir_shader_get_entrypoint(producer), true, false);
|
|
|
|
NIR_PASS_V(producer, nir_lower_global_vars_to_local);
|
|
|
|
NIR_PASS_V(producer, nir_split_var_copies);
|
|
|
|
NIR_PASS_V(producer, nir_lower_var_copies);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-20 06:20:35 +00:00
|
|
|
static void
|
2018-06-15 11:29:59 +01:00
|
|
|
st_nir_link_shaders(nir_shader **producer, nir_shader **consumer, bool scalar)
|
2017-11-20 06:20:35 +00:00
|
|
|
{
|
2019-01-02 23:03:05 +00:00
|
|
|
if (scalar) {
|
|
|
|
NIR_PASS_V(*producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
|
|
|
|
NIR_PASS_V(*consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
|
|
|
|
}
|
|
|
|
|
2018-05-17 19:38:24 +01:00
|
|
|
nir_lower_io_arrays_to_elements(*producer, *consumer);
|
2017-11-20 06:20:35 +00:00
|
|
|
|
2019-01-02 23:03:05 +00:00
|
|
|
st_nir_opts(*producer, scalar);
|
|
|
|
st_nir_opts(*consumer, scalar);
|
|
|
|
|
2018-12-11 00:49:00 +00:00
|
|
|
if (nir_link_opt_varyings(*producer, *consumer))
|
2018-11-09 22:20:10 +00:00
|
|
|
st_nir_opts(*consumer, scalar);
|
|
|
|
|
2017-11-20 06:20:35 +00:00
|
|
|
NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
|
|
|
|
NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in);
|
|
|
|
|
|
|
|
if (nir_remove_unused_varyings(*producer, *consumer)) {
|
|
|
|
NIR_PASS_V(*producer, nir_lower_global_vars_to_local);
|
|
|
|
NIR_PASS_V(*consumer, nir_lower_global_vars_to_local);
|
|
|
|
|
|
|
|
/* The backend might not be able to handle indirects on
|
|
|
|
* temporaries so we need to lower indirects on any of the
|
|
|
|
* varyings we have demoted here.
|
|
|
|
*
|
|
|
|
* TODO: radeonsi shouldn't need to do this, however LLVM isn't
|
|
|
|
* currently smart enough to handle indirects without causing excess
|
|
|
|
* spilling causing the gpu to hang.
|
|
|
|
*
|
|
|
|
* See the following thread for more details of the problem:
|
|
|
|
* https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
|
|
|
|
*/
|
2019-01-15 23:05:04 +00:00
|
|
|
nir_variable_mode indirect_mask = nir_var_function_temp;
|
2017-11-20 06:20:35 +00:00
|
|
|
|
|
|
|
NIR_PASS_V(*producer, nir_lower_indirect_derefs, indirect_mask);
|
|
|
|
NIR_PASS_V(*consumer, nir_lower_indirect_derefs, indirect_mask);
|
|
|
|
|
2018-06-15 11:29:59 +01:00
|
|
|
st_nir_opts(*producer, scalar);
|
|
|
|
st_nir_opts(*consumer, scalar);
|
2019-02-06 09:37:14 +00:00
|
|
|
|
|
|
|
/* Lowering indirects can cause varying to become unused.
|
|
|
|
* nir_compact_varyings() depends on all dead varyings being removed so
|
|
|
|
* we need to call nir_remove_dead_variables() again here.
|
|
|
|
*/
|
|
|
|
NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
|
|
|
|
NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in);
|
2017-11-20 06:20:35 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-07-19 02:46:09 +01:00
|
|
|
static void
|
|
|
|
st_lower_patch_vertices_in(struct gl_shader_program *shader_prog)
|
|
|
|
{
|
|
|
|
struct gl_linked_shader *linked_tcs =
|
|
|
|
shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
|
|
|
|
struct gl_linked_shader *linked_tes =
|
|
|
|
shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
|
|
|
|
|
|
|
|
/* If we have a TCS and TES linked together, lower TES patch vertices. */
|
|
|
|
if (linked_tcs && linked_tes) {
|
|
|
|
nir_shader *tcs_nir = linked_tcs->Program->nir;
|
|
|
|
nir_shader *tes_nir = linked_tes->Program->nir;
|
|
|
|
|
|
|
|
/* The TES input vertex count is the TCS output vertex count,
|
|
|
|
* lower TES gl_PatchVerticesIn to a constant.
|
|
|
|
*/
|
|
|
|
uint32_t tes_patch_verts = tcs_nir->info.tess.tcs_vertices_out;
|
|
|
|
NIR_PASS_V(tes_nir, nir_lower_patch_vertices, tes_patch_verts, NULL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-13 23:06:47 +00:00
|
|
|
extern "C" {
|
|
|
|
|
2018-10-25 10:22:33 +01:00
|
|
|
void
|
|
|
|
st_nir_lower_wpos_ytransform(struct nir_shader *nir,
|
|
|
|
struct gl_program *prog,
|
|
|
|
struct pipe_screen *pscreen)
|
|
|
|
{
|
|
|
|
if (nir->info.stage != MESA_SHADER_FRAGMENT)
|
|
|
|
return;
|
|
|
|
|
|
|
|
static const gl_state_index16 wposTransformState[STATE_LENGTH] = {
|
|
|
|
STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
|
|
|
|
};
|
|
|
|
nir_lower_wpos_ytransform_options wpos_options = { { 0 } };
|
|
|
|
|
|
|
|
memcpy(wpos_options.state_tokens, wposTransformState,
|
|
|
|
sizeof(wpos_options.state_tokens));
|
|
|
|
wpos_options.fs_coord_origin_upper_left =
|
|
|
|
pscreen->get_param(pscreen,
|
|
|
|
PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
|
|
|
|
wpos_options.fs_coord_origin_lower_left =
|
|
|
|
pscreen->get_param(pscreen,
|
|
|
|
PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
|
|
|
|
wpos_options.fs_coord_pixel_center_integer =
|
|
|
|
pscreen->get_param(pscreen,
|
|
|
|
PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
|
|
|
|
wpos_options.fs_coord_pixel_center_half_integer =
|
|
|
|
pscreen->get_param(pscreen,
|
|
|
|
PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
|
|
|
|
|
|
|
|
if (nir_lower_wpos_ytransform(nir, &wpos_options)) {
|
|
|
|
nir_validate_shader(nir, "after nir_lower_wpos_ytransform");
|
|
|
|
_mesa_add_state_reference(prog->Parameters, wposTransformState);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-09 06:32:08 +00:00
|
|
|
bool
|
|
|
|
st_link_nir(struct gl_context *ctx,
|
|
|
|
struct gl_shader_program *shader_program)
|
|
|
|
{
|
2017-11-13 22:15:54 +00:00
|
|
|
struct st_context *st = st_context(ctx);
|
2018-06-15 11:29:59 +01:00
|
|
|
struct pipe_screen *screen = st->pipe->screen;
|
|
|
|
bool is_scalar[MESA_SHADER_STAGES];
|
|
|
|
|
2019-01-02 23:03:05 +00:00
|
|
|
unsigned last_stage = 0;
|
2018-06-15 11:29:59 +01:00
|
|
|
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
|
|
|
struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
|
|
|
|
if (shader == NULL)
|
|
|
|
continue;
|
|
|
|
|
2019-01-02 23:03:05 +00:00
|
|
|
/* Determine scalar property of each shader stage */
|
|
|
|
enum pipe_shader_type type = pipe_shader_type_from_mesa(shader->Stage);
|
|
|
|
is_scalar[i] = screen->get_shader_param(screen, type,
|
|
|
|
PIPE_SHADER_CAP_SCALAR_ISA);
|
2017-11-09 06:32:08 +00:00
|
|
|
|
2017-11-13 22:15:54 +00:00
|
|
|
st_nir_get_mesa_program(ctx, shader_program, shader);
|
2019-01-02 23:03:05 +00:00
|
|
|
last_stage = i;
|
mesa/st: Only call nir_lower_io_to_scalar_early on scalar ISAs
On scalar ISAs, nir_lower_io_to_scalar_early enables significant
optimizations. However, on vector ISAs, it is counterproductive and
impedes optimal codegen. This patch only calls
nir_lower_io_to_scalar_early for scalar ISAs. It appears that at present
there are no upstreamed drivers using Gallium, NIR, and a vector ISA, so
for existing code, this should be a no-op. However, this patch is
necessary for the upcoming Panfrost (Midgard) and Lima (Utgard)
compilers, which are vector.
With this patch, Panfrost is able to consume NIR directly, rather than
TGSI with the TGSI->NIR conversion.
For how this affects Lima, see
https://www.mail-archive.com/mesa-dev@lists.freedesktop.org/msg189216.html
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
Reviewed-by: Christian Gmeiner <christian.gmeiner@gmail.com>
2018-10-21 19:29:37 +01:00
|
|
|
|
2018-12-11 04:06:37 +00:00
|
|
|
if (is_scalar[i]) {
|
2019-01-02 23:03:05 +00:00
|
|
|
NIR_PASS_V(shader->Program->nir, nir_lower_load_const_to_scalar);
|
2018-12-11 04:06:37 +00:00
|
|
|
}
|
2017-11-20 06:20:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Linking the stages in the opposite order (from fragment to vertex)
|
|
|
|
* ensures that inter-shader outputs written to in an earlier stage
|
|
|
|
* are eliminated if they are (transitively) not used in a later
|
|
|
|
* stage.
|
|
|
|
*/
|
2019-01-02 23:03:05 +00:00
|
|
|
int next = last_stage;
|
2017-11-20 06:20:35 +00:00
|
|
|
for (int i = next - 1; i >= 0; i--) {
|
|
|
|
struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
|
|
|
|
if (shader == NULL)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
st_nir_link_shaders(&shader->Program->nir,
|
2018-06-15 11:29:59 +01:00
|
|
|
&shader_program->_LinkedShaders[next]->Program->nir,
|
|
|
|
is_scalar[i]);
|
2017-11-20 06:20:35 +00:00
|
|
|
next = i;
|
2017-11-13 22:15:54 +00:00
|
|
|
}
|
2017-11-09 06:32:08 +00:00
|
|
|
|
2017-11-20 06:20:35 +00:00
|
|
|
int prev = -1;
|
2017-11-13 22:15:54 +00:00
|
|
|
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
|
|
|
struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
|
|
|
|
if (shader == NULL)
|
|
|
|
continue;
|
|
|
|
|
2017-11-17 06:45:32 +00:00
|
|
|
nir_shader *nir = shader->Program->nir;
|
|
|
|
|
2018-10-25 10:22:33 +01:00
|
|
|
NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, shader->Program,
|
|
|
|
st->pipe->screen);
|
2017-11-17 06:45:32 +00:00
|
|
|
|
|
|
|
NIR_PASS_V(nir, nir_lower_system_values);
|
2019-01-23 10:44:28 +00:00
|
|
|
NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
|
2017-11-17 06:45:32 +00:00
|
|
|
|
|
|
|
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
|
|
|
shader->Program->info = nir->info;
|
2018-08-31 13:55:07 +01:00
|
|
|
if (i == MESA_SHADER_VERTEX) {
|
|
|
|
/* NIR expands dual-slot inputs out to two locations. We need to
|
|
|
|
* compact things back down GL-style single-slot inputs to avoid
|
|
|
|
* confusing the state tracker.
|
|
|
|
*/
|
|
|
|
shader->Program->info.inputs_read =
|
|
|
|
nir_get_single_slot_attribs_mask(nir->info.inputs_read,
|
|
|
|
shader->Program->DualSlotInputs);
|
|
|
|
}
|
2017-11-17 06:45:32 +00:00
|
|
|
|
2017-11-20 06:20:35 +00:00
|
|
|
if (prev != -1) {
|
2018-06-20 21:26:52 +01:00
|
|
|
struct gl_program *prev_shader =
|
|
|
|
shader_program->_LinkedShaders[prev]->Program;
|
|
|
|
|
|
|
|
/* We can't use nir_compact_varyings with transform feedback, since
|
|
|
|
* the pipe_stream_output->output_register field is based on the
|
|
|
|
* pre-compacted driver_locations.
|
|
|
|
*/
|
2018-12-08 18:21:52 +00:00
|
|
|
if (!(prev_shader->sh.LinkedTransformFeedback &&
|
|
|
|
prev_shader->sh.LinkedTransformFeedback->NumVarying > 0))
|
2018-06-20 21:26:52 +01:00
|
|
|
nir_compact_varyings(shader_program->_LinkedShaders[prev]->Program->nir,
|
2017-11-20 06:20:35 +00:00
|
|
|
nir, ctx->API != API_OPENGL_COMPAT);
|
st/nir: Re-vectorize shader IO
We scalarize IO to enable further optimizations, such as propagating
constant components across shaders, eliminating dead components, and
so on. This patch attempts to re-vectorize those operations after
the varying optimizations are done.
Intel GPUs are a scalar architecture, but IO operations work on whole
vec4's at a time, so we'd prefer to have a single IO load per vector
rather than 4 scalar IO loads. This re-vectorization can help a lot.
Broadcom GPUs, however, really do want scalar IO. radeonsi may want
this, or may want to leave it to LLVM. So, we make a new flag in the
NIR compiler options struct, and key it off of that, allowing drivers
to pick. (It's a bit awkward because we have per-stage settings, but
this is about IO between two stages...but I expect drivers to globally
prefer one way or the other. We can adjust later if needed.)
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
2019-04-11 20:28:48 +01:00
|
|
|
|
|
|
|
if (ctx->Const.ShaderCompilerOptions[i].NirOptions->vectorize_io)
|
|
|
|
st_nir_vectorize_io(prev_shader->nir, nir);
|
2017-11-20 06:20:35 +00:00
|
|
|
}
|
|
|
|
prev = i;
|
2017-12-14 03:48:49 +00:00
|
|
|
}
|
|
|
|
|
2018-07-19 02:46:09 +01:00
|
|
|
st_lower_patch_vertices_in(shader_program);
|
|
|
|
|
2017-12-14 03:48:49 +00:00
|
|
|
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
|
|
|
struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
|
|
|
|
if (shader == NULL)
|
|
|
|
continue;
|
2017-11-20 06:20:35 +00:00
|
|
|
|
2017-11-13 22:15:54 +00:00
|
|
|
st_glsl_to_nir_post_opts(st, shader->Program, shader_program);
|
|
|
|
|
|
|
|
assert(shader->Program);
|
|
|
|
if (!ctx->Driver.ProgramStringNotify(ctx,
|
|
|
|
_mesa_shader_stage_to_program(i),
|
|
|
|
shader->Program)) {
|
|
|
|
_mesa_reference_program(ctx, &shader->Program, NULL);
|
|
|
|
return false;
|
2017-11-09 06:32:08 +00:00
|
|
|
}
|
2018-07-10 09:51:45 +01:00
|
|
|
|
|
|
|
nir_sweep(shader->Program->nir);
|
2019-03-28 06:09:11 +00:00
|
|
|
|
|
|
|
/* The GLSL IR won't be needed anymore. */
|
|
|
|
ralloc_free(shader->ir);
|
|
|
|
shader->ir = NULL;
|
2017-11-09 06:32:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-11-13 23:06:47 +00:00
|
|
|
void
|
2018-10-30 05:41:18 +00:00
|
|
|
st_nir_assign_varying_locations(struct st_context *st, nir_shader *nir)
|
2017-11-13 23:06:47 +00:00
|
|
|
{
|
|
|
|
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
|
|
|
/* Needs special handling so drvloc matches the vbo state: */
|
2018-10-30 05:38:57 +00:00
|
|
|
st_nir_assign_vs_in_locations(nir);
|
2017-11-13 23:06:47 +00:00
|
|
|
/* Re-lower global vars, to deal with any dead VS inputs. */
|
|
|
|
NIR_PASS_V(nir, nir_lower_global_vars_to_local);
|
|
|
|
|
2019-05-10 09:18:12 +01:00
|
|
|
nir_assign_io_var_locations(&nir->outputs,
|
2017-11-10 10:33:37 +00:00
|
|
|
&nir->num_outputs,
|
|
|
|
nir->info.stage);
|
|
|
|
st_nir_fixup_varying_slots(st, &nir->outputs);
|
2017-12-06 02:32:17 +00:00
|
|
|
} else if (nir->info.stage == MESA_SHADER_GEOMETRY ||
|
|
|
|
nir->info.stage == MESA_SHADER_TESS_CTRL ||
|
|
|
|
nir->info.stage == MESA_SHADER_TESS_EVAL) {
|
2019-05-10 09:18:12 +01:00
|
|
|
nir_assign_io_var_locations(&nir->inputs,
|
2017-11-10 10:33:37 +00:00
|
|
|
&nir->num_inputs,
|
|
|
|
nir->info.stage);
|
|
|
|
st_nir_fixup_varying_slots(st, &nir->inputs);
|
|
|
|
|
2019-05-10 09:18:12 +01:00
|
|
|
nir_assign_io_var_locations(&nir->outputs,
|
2017-11-22 06:37:32 +00:00
|
|
|
&nir->num_outputs,
|
|
|
|
nir->info.stage);
|
2017-11-13 23:06:47 +00:00
|
|
|
st_nir_fixup_varying_slots(st, &nir->outputs);
|
|
|
|
} else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
2019-05-10 09:18:12 +01:00
|
|
|
nir_assign_io_var_locations(&nir->inputs,
|
2017-11-22 06:37:32 +00:00
|
|
|
&nir->num_inputs,
|
|
|
|
nir->info.stage);
|
2017-11-13 23:06:47 +00:00
|
|
|
st_nir_fixup_varying_slots(st, &nir->inputs);
|
2019-05-10 09:18:12 +01:00
|
|
|
nir_assign_io_var_locations(&nir->outputs,
|
2017-11-22 06:37:32 +00:00
|
|
|
&nir->num_outputs,
|
|
|
|
nir->info.stage);
|
2017-11-13 23:06:47 +00:00
|
|
|
} else if (nir->info.stage == MESA_SHADER_COMPUTE) {
|
|
|
|
/* TODO? */
|
|
|
|
} else {
|
2018-10-30 05:41:18 +00:00
|
|
|
unreachable("invalid shader type");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-06 08:03:49 +00:00
|
|
|
void
|
|
|
|
st_nir_lower_samplers(struct pipe_screen *screen, nir_shader *nir,
|
nir: Gather texture bitmasks in gl_nir_lower_samplers_as_deref.
Eric and I would like a bitmask of which samplers are used, similar to
prog->SamplersUsed, but available in NIR. The linker uses SamplersUsed
for resource limit checking, but later optimizations may eliminate more
samplers. So instead of propagating it through, we gather a new one.
While there, we also gather the existing textures_used_by_txf bitmask.
Gathering these bitfields in nir_shader_gather_info is awkward at best.
The main reason is that it introduces an ordering dependency between the
two passes. If gathering runs before lower_samplers_as_deref, it can't
look at var->data.binding. If the driver doesn't use the full lowering
to texture_index/texture_array_size (like radeonsi), then the gathering
can't use those fields. Gathering might be run early /and/ late, first
to get varying info, and later to update it after variant lowering. At
this point, should gathering work on pre-lowered or post-lowered code?
Pre-lowered is also harder due to the presence of structure types.
Just doing the gathering when we do the lowering alleviates these
ordering problems. This fixes ordering issues in i965 and makes the
txf info gathering work for radeonsi (though they don't use it).
Reviewed-by: Eric Anholt <eric@anholt.net>
2019-01-13 18:39:41 +00:00
|
|
|
struct gl_shader_program *shader_program,
|
|
|
|
struct gl_program *prog)
|
2019-02-06 08:03:49 +00:00
|
|
|
{
|
|
|
|
if (screen->get_param(screen, PIPE_CAP_NIR_SAMPLERS_AS_DEREF))
|
|
|
|
NIR_PASS_V(nir, gl_nir_lower_samplers_as_deref, shader_program);
|
|
|
|
else
|
|
|
|
NIR_PASS_V(nir, gl_nir_lower_samplers, shader_program);
|
nir: Gather texture bitmasks in gl_nir_lower_samplers_as_deref.
Eric and I would like a bitmask of which samplers are used, similar to
prog->SamplersUsed, but available in NIR. The linker uses SamplersUsed
for resource limit checking, but later optimizations may eliminate more
samplers. So instead of propagating it through, we gather a new one.
While there, we also gather the existing textures_used_by_txf bitmask.
Gathering these bitfields in nir_shader_gather_info is awkward at best.
The main reason is that it introduces an ordering dependency between the
two passes. If gathering runs before lower_samplers_as_deref, it can't
look at var->data.binding. If the driver doesn't use the full lowering
to texture_index/texture_array_size (like radeonsi), then the gathering
can't use those fields. Gathering might be run early /and/ late, first
to get varying info, and later to update it after variant lowering. At
this point, should gathering work on pre-lowered or post-lowered code?
Pre-lowered is also harder due to the presence of structure types.
Just doing the gathering when we do the lowering alleviates these
ordering problems. This fixes ordering issues in i965 and makes the
txf info gathering work for radeonsi (though they don't use it).
Reviewed-by: Eric Anholt <eric@anholt.net>
2019-01-13 18:39:41 +00:00
|
|
|
|
|
|
|
if (prog) {
|
|
|
|
prog->info.textures_used = nir->info.textures_used;
|
|
|
|
prog->info.textures_used_by_txf = nir->info.textures_used_by_txf;
|
|
|
|
}
|
2019-02-06 08:03:49 +00:00
|
|
|
}
|
|
|
|
|
2018-10-30 05:41:18 +00:00
|
|
|
/* Last third of preparing nir from glsl, which happens after shader
|
|
|
|
* variant lowering.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
st_finalize_nir(struct st_context *st, struct gl_program *prog,
|
|
|
|
struct gl_shader_program *shader_program, nir_shader *nir)
|
|
|
|
{
|
|
|
|
struct pipe_screen *screen = st->pipe->screen;
|
|
|
|
const nir_shader_compiler_options *options =
|
|
|
|
st->ctx->Const.ShaderCompilerOptions[prog->info.stage].NirOptions;
|
|
|
|
|
|
|
|
NIR_PASS_V(nir, nir_split_var_copies);
|
|
|
|
NIR_PASS_V(nir, nir_lower_var_copies);
|
|
|
|
if (options->lower_all_io_to_temps ||
|
2019-03-28 14:57:31 +00:00
|
|
|
options->lower_all_io_to_elements ||
|
2018-10-30 05:41:18 +00:00
|
|
|
nir->info.stage == MESA_SHADER_VERTEX ||
|
|
|
|
nir->info.stage == MESA_SHADER_GEOMETRY) {
|
|
|
|
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
|
|
|
|
} else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
|
|
|
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
|
2017-11-13 23:06:47 +00:00
|
|
|
}
|
|
|
|
|
2018-10-30 05:41:18 +00:00
|
|
|
st_nir_assign_varying_locations(st, nir);
|
|
|
|
|
2017-11-13 23:06:47 +00:00
|
|
|
NIR_PASS_V(nir, nir_lower_atomics_to_ssbo,
|
|
|
|
st->ctx->Const.Program[nir->info.stage].MaxAtomicBuffers);
|
|
|
|
|
2018-10-25 10:22:34 +01:00
|
|
|
st_nir_assign_uniform_locations(st->ctx, prog,
|
2019-03-26 16:53:38 +00:00
|
|
|
&nir->uniforms);
|
|
|
|
|
|
|
|
/* Set num_uniforms in number of attribute slots (vec4s) */
|
|
|
|
nir->num_uniforms = DIV_ROUND_UP(prog->Parameters->NumParameterValues, 4);
|
2017-11-13 23:06:47 +00:00
|
|
|
|
2018-03-13 22:51:23 +00:00
|
|
|
if (st->ctx->Const.PackedDriverUniformStorage) {
|
|
|
|
NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, st_glsl_type_dword_size,
|
2018-03-09 00:57:52 +00:00
|
|
|
(nir_lower_io_options)0);
|
2019-02-28 09:53:11 +00:00
|
|
|
NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 4);
|
2019-04-09 00:32:01 +01:00
|
|
|
} else {
|
|
|
|
NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, st_glsl_uniforms_type_size,
|
|
|
|
(nir_lower_io_options)0);
|
2018-03-09 00:57:52 +00:00
|
|
|
}
|
|
|
|
|
nir: Gather texture bitmasks in gl_nir_lower_samplers_as_deref.
Eric and I would like a bitmask of which samplers are used, similar to
prog->SamplersUsed, but available in NIR. The linker uses SamplersUsed
for resource limit checking, but later optimizations may eliminate more
samplers. So instead of propagating it through, we gather a new one.
While there, we also gather the existing textures_used_by_txf bitmask.
Gathering these bitfields in nir_shader_gather_info is awkward at best.
The main reason is that it introduces an ordering dependency between the
two passes. If gathering runs before lower_samplers_as_deref, it can't
look at var->data.binding. If the driver doesn't use the full lowering
to texture_index/texture_array_size (like radeonsi), then the gathering
can't use those fields. Gathering might be run early /and/ late, first
to get varying info, and later to update it after variant lowering. At
this point, should gathering work on pre-lowered or post-lowered code?
Pre-lowered is also harder due to the presence of structure types.
Just doing the gathering when we do the lowering alleviates these
ordering problems. This fixes ordering issues in i965 and makes the
txf info gathering work for radeonsi (though they don't use it).
Reviewed-by: Eric Anholt <eric@anholt.net>
2019-01-13 18:39:41 +00:00
|
|
|
st_nir_lower_samplers(screen, nir, shader_program, prog);
|
2017-11-13 23:06:47 +00:00
|
|
|
}
|
|
|
|
|
2015-12-22 02:34:11 +00:00
|
|
|
} /* extern "C" */
|