r600/sfn: Add lowering passes for Tesselation IO
Lower the input and output intrinsics to r600 specific LDS intrinsics Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4714>
This commit is contained in:
parent
1b3e103d0b
commit
d77b81ce50
|
@ -134,6 +134,7 @@ CXX_SOURCES = \
|
|||
sfn/sfn_nir.h \
|
||||
sfn/sfn_nir_lower_fs_out_to_vector.cpp \
|
||||
sfn/sfn_nir_lower_fs_out_to_vector.h \
|
||||
sfn/sfn_nir_lower_tess_io.cpp \
|
||||
sfn/sfn_nir_vectorize_vs_inputs.c \
|
||||
sfn/sfn_shader_base.cpp \
|
||||
sfn/sfn_shader_base.h \
|
||||
|
|
|
@ -151,6 +151,7 @@ files_r600 = files(
|
|||
'sfn/sfn_nir.h',
|
||||
'sfn/sfn_nir_lower_fs_out_to_vector.cpp',
|
||||
'sfn/sfn_nir_lower_fs_out_to_vector.h',
|
||||
'sfn/sfn_nir_lower_tess_io.cpp',
|
||||
'sfn/sfn_nir_vectorize_vs_inputs.c',
|
||||
'sfn/sfn_shader_base.cpp',
|
||||
'sfn/sfn_shader_base.h',
|
||||
|
|
|
@ -214,6 +214,8 @@ struct r600_bytecode_cf {
|
|||
struct r600_bytecode_alu *prev_bs_head;
|
||||
struct r600_bytecode_alu *prev2_bs_head;
|
||||
unsigned isa[2];
|
||||
unsigned nlds_read;
|
||||
unsigned nqueue_read;
|
||||
};
|
||||
|
||||
#define FC_NONE 0
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#define SFN_NIR_H
|
||||
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
#include "sfn_shader_base.h"
|
||||
|
@ -96,15 +97,33 @@ private:
|
|||
|
||||
#endif
|
||||
|
||||
static inline nir_ssa_def *
|
||||
r600_imm_ivec3(nir_builder *build, int x, int y, int z)
|
||||
{
|
||||
nir_const_value v[3] = {
|
||||
nir_const_value_for_int(x, 32),
|
||||
nir_const_value_for_int(y, 32),
|
||||
nir_const_value_for_int(z, 32),
|
||||
};
|
||||
|
||||
return nir_build_imm(build, 3, 32, v);
|
||||
}
|
||||
|
||||
bool r600_lower_tess_io(nir_shader *shader, enum pipe_prim_type prim_type);
|
||||
bool r600_append_tcs_TF_emission(nir_shader *shader, enum pipe_prim_type prim_type);
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
bool r600_vectorize_vs_inputs(nir_shader *shader);
|
||||
|
||||
|
||||
int r600_shader_from_nir(struct r600_context *rctx,
|
||||
struct r600_pipe_shader *pipeshader,
|
||||
union r600_shader_key *key);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,464 @@
|
|||
#include "sfn_nir.h"
|
||||
|
||||
bool r600_lower_tess_io_filter(const nir_instr *instr)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
|
||||
switch (op->intrinsic) {
|
||||
case nir_intrinsic_load_input:
|
||||
case nir_intrinsic_store_output:
|
||||
case nir_intrinsic_load_output:
|
||||
case nir_intrinsic_load_per_vertex_input:
|
||||
case nir_intrinsic_load_per_vertex_output:
|
||||
case nir_intrinsic_store_per_vertex_output:
|
||||
case nir_intrinsic_load_patch_vertices_in:
|
||||
case nir_intrinsic_load_tess_level_outer:
|
||||
case nir_intrinsic_load_tess_level_inner:
|
||||
return true;
|
||||
default:
|
||||
;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
emit_load_param_base(nir_builder *b, nir_intrinsic_op op)
|
||||
{
|
||||
nir_intrinsic_instr *result = nir_intrinsic_instr_create(b->shader, op);
|
||||
result->num_components = 4;
|
||||
nir_ssa_dest_init(&result->instr, &result->dest,
|
||||
result->num_components, 32, NULL);
|
||||
nir_builder_instr_insert(b, &result->instr);
|
||||
return &result->dest.ssa;
|
||||
}
|
||||
|
||||
static int get_tcs_varying_offset(exec_list *io, unsigned index)
|
||||
{
|
||||
nir_foreach_variable(var, io){
|
||||
if (var->data.driver_location == index) {
|
||||
switch (var->data.location) {
|
||||
case VARYING_SLOT_POS:
|
||||
return 0;
|
||||
case VARYING_SLOT_PSIZ:
|
||||
return 0x10;
|
||||
case VARYING_SLOT_CLIP_DIST0:
|
||||
return 0x20;
|
||||
case VARYING_SLOT_CLIP_DIST1:
|
||||
return 0x30;
|
||||
case VARYING_SLOT_TESS_LEVEL_OUTER:
|
||||
return 0;
|
||||
case VARYING_SLOT_TESS_LEVEL_INNER:
|
||||
return 0x10;
|
||||
default:
|
||||
if (var->data.location >= VARYING_SLOT_VAR0 &&
|
||||
var->data.location <= VARYING_SLOT_VAR31)
|
||||
return 0x10 * (var->data.location - VARYING_SLOT_VAR0) + 0x40;
|
||||
|
||||
if (var->data.location >= VARYING_SLOT_PATCH0) {
|
||||
return 0x10 * (var->data.location - VARYING_SLOT_PATCH0) + 0x20;
|
||||
}
|
||||
}
|
||||
/* TODO: PATCH is missing */
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
r600_umad_24(nir_builder *b, nir_ssa_def *op1, nir_ssa_def *op2, nir_ssa_def *op3)
|
||||
{
|
||||
return nir_build_alu(b, nir_op_umad24, op1, op2, op3, NULL);
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
r600_tcs_base_address(nir_builder *b, nir_ssa_def *param_base, nir_ssa_def *rel_patch_id)
|
||||
{
|
||||
return r600_umad_24(b, nir_channel(b, param_base, 0),
|
||||
rel_patch_id,
|
||||
nir_channel(b, param_base, 3));
|
||||
}
|
||||
|
||||
|
||||
static nir_ssa_def *
|
||||
emil_lsd_in_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op)
|
||||
{
|
||||
nir_ssa_def *addr = nir_build_alu(b, nir_op_umul24,
|
||||
nir_channel(b, base, 0),
|
||||
patch_id, NULL, NULL);
|
||||
|
||||
auto idx1 = nir_src_as_const_value(op->src[0]);
|
||||
if (!idx1 || idx1->u32 != 0)
|
||||
addr = r600_umad_24(b, nir_channel(b, base, 1),
|
||||
op->src[0].ssa, addr);
|
||||
|
||||
auto offset = nir_imm_int(b, get_tcs_varying_offset(&b->shader->inputs, nir_intrinsic_base(op)));
|
||||
|
||||
auto idx2 = nir_src_as_const_value(op->src[1]);
|
||||
if (!idx2 || idx2->u32 != 0)
|
||||
offset = nir_iadd(b, offset, nir_ishl(b, op->src[1].ssa, nir_imm_int(b, 4)));
|
||||
|
||||
return nir_iadd(b, addr, offset);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
emil_lsd_out_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op, exec_list *io, int src_offset)
|
||||
{
|
||||
|
||||
nir_ssa_def *addr1 = r600_umad_24(b, nir_channel(b, base, 0),
|
||||
patch_id,
|
||||
nir_channel(b, base, 2));
|
||||
nir_ssa_def *addr2 = r600_umad_24(b, nir_channel(b, base, 1),
|
||||
op->src[src_offset].ssa, addr1);
|
||||
|
||||
int offset = get_tcs_varying_offset(io, nir_intrinsic_base(op));
|
||||
return nir_iadd(b, nir_iadd(b, addr2,
|
||||
nir_ishl(b, op->src[src_offset + 1].ssa, nir_imm_int(b,4))),
|
||||
nir_imm_int(b, offset));
|
||||
}
|
||||
|
||||
static nir_ssa_def *load_offset_group(nir_builder *b, int ncomponents)
|
||||
{
|
||||
switch (ncomponents) {
|
||||
/* tess outer offsets */
|
||||
case 1: return nir_imm_int(b, 0);
|
||||
case 2: return nir_imm_ivec2(b, 0, 4);
|
||||
case 3: return r600_imm_ivec3(b, 0, 4, 8);
|
||||
case 4: return nir_imm_ivec4(b, 0, 4, 8, 12);
|
||||
/* tess inner offsets */
|
||||
case 5: return nir_imm_int(b, 16);
|
||||
case 6: return nir_imm_ivec2(b, 16, 20);
|
||||
default:
|
||||
debug_printf("Got %d components\n", ncomponents);
|
||||
unreachable("Unsupported component count");
|
||||
}
|
||||
}
|
||||
|
||||
static void replace_load_instr(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr)
|
||||
{
|
||||
nir_intrinsic_instr *load_tcs_in = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
|
||||
load_tcs_in->num_components = op->num_components;
|
||||
nir_ssa_dest_init(&load_tcs_in->instr, &load_tcs_in->dest,
|
||||
load_tcs_in->num_components, 32, NULL);
|
||||
|
||||
nir_ssa_def *addr_outer = nir_iadd(b, addr, load_offset_group(b, load_tcs_in->num_components));
|
||||
load_tcs_in->src[0] = nir_src_for_ssa(addr_outer);
|
||||
nir_intrinsic_set_component(load_tcs_in, nir_intrinsic_component(op));
|
||||
nir_builder_instr_insert(b, &load_tcs_in->instr);
|
||||
nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&load_tcs_in->dest.ssa));
|
||||
nir_instr_remove(&op->instr);
|
||||
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
r600_load_rel_patch_id(nir_builder *b)
|
||||
{
|
||||
auto patch_id = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_tcs_rel_patch_id_r600);
|
||||
patch_id->num_components = 1;
|
||||
nir_ssa_dest_init(&patch_id->instr, &patch_id->dest,
|
||||
patch_id->num_components, 32, NULL);
|
||||
nir_builder_instr_insert(b, &patch_id->instr);
|
||||
return &patch_id->dest.ssa;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_store_lds(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr)
|
||||
{
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
unsigned test_mask = (0x3 << 2 * i);
|
||||
if (!(nir_intrinsic_write_mask(op) & test_mask))
|
||||
continue;
|
||||
|
||||
auto store_tcs_out = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_local_shared_r600);
|
||||
unsigned writemask = nir_intrinsic_write_mask(op) & test_mask;
|
||||
nir_intrinsic_set_write_mask(store_tcs_out, writemask);
|
||||
store_tcs_out->src[0] = nir_src_for_ssa(op->src[0].ssa);
|
||||
store_tcs_out->num_components = store_tcs_out->src[0].ssa->num_components;
|
||||
bool start_even = (writemask & (1u << (2 * i)));
|
||||
|
||||
auto addr2 = nir_iadd(b, addr, nir_imm_int(b, 8 * i + (start_even ? 0 : 4)));
|
||||
store_tcs_out->src[1] = nir_src_for_ssa(addr2);
|
||||
|
||||
nir_builder_instr_insert(b, &store_tcs_out->instr);
|
||||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
emil_tcs_io_offset(nir_builder *b, nir_ssa_def *addr, nir_intrinsic_instr *op, exec_list *io, int src_offset)
|
||||
{
|
||||
|
||||
int offset = get_tcs_varying_offset(io, nir_intrinsic_base(op));
|
||||
return nir_iadd(b, nir_iadd(b, addr,
|
||||
nir_ishl(b, op->src[src_offset].ssa, nir_imm_int(b,4))),
|
||||
nir_imm_int(b, offset));
|
||||
}
|
||||
|
||||
|
||||
inline unsigned
|
||||
outer_tf_components(pipe_prim_type prim_type)
|
||||
{
|
||||
switch (prim_type) {
|
||||
case PIPE_PRIM_LINES: return 2;
|
||||
case PIPE_PRIM_TRIANGLES: return 3;
|
||||
case PIPE_PRIM_QUADS: return 4;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
static bool
|
||||
r600_lower_tess_io_impl(nir_builder *b, nir_instr *instr, enum pipe_prim_type prim_type)
|
||||
{
|
||||
static nir_ssa_def *load_in_param_base = nullptr;
|
||||
static nir_ssa_def *load_out_param_base = nullptr;
|
||||
|
||||
b->cursor = nir_before_instr(instr);
|
||||
nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
|
||||
|
||||
if (b->shader->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
load_in_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600);
|
||||
load_out_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
|
||||
} else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
load_in_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
|
||||
} else if (b->shader->info.stage == MESA_SHADER_VERTEX) {
|
||||
load_out_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600);
|
||||
}
|
||||
|
||||
auto rel_patch_id = r600_load_rel_patch_id(b);
|
||||
|
||||
unsigned tf_inner_address_offset = 0;
|
||||
unsigned ncomps_correct = 0;
|
||||
|
||||
switch (op->intrinsic) {
|
||||
case nir_intrinsic_load_patch_vertices_in: {
|
||||
auto vertices_in = nir_channel(b, load_in_param_base, 2);
|
||||
nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(vertices_in));
|
||||
nir_instr_remove(&op->instr);
|
||||
return true;
|
||||
}
|
||||
case nir_intrinsic_load_per_vertex_input: {
|
||||
nir_ssa_def *addr =
|
||||
b->shader->info.stage == MESA_SHADER_TESS_CTRL ?
|
||||
emil_lsd_in_addr(b, load_in_param_base, rel_patch_id, op) :
|
||||
emil_lsd_out_addr(b, load_in_param_base, rel_patch_id, op, &b->shader->inputs, 0);
|
||||
replace_load_instr(b, op, addr);
|
||||
return true;
|
||||
}
|
||||
case nir_intrinsic_store_per_vertex_output: {
|
||||
nir_ssa_def *addr = emil_lsd_out_addr(b, load_out_param_base, rel_patch_id, op, &b->shader->outputs, 1);
|
||||
emit_store_lds(b, op, addr);
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
}
|
||||
case nir_intrinsic_load_per_vertex_output: {
|
||||
nir_ssa_def *addr = emil_lsd_out_addr(b, load_out_param_base, rel_patch_id, op, &b->shader->outputs, 0);
|
||||
replace_load_instr(b, op, addr);
|
||||
return true;
|
||||
}
|
||||
case nir_intrinsic_store_output: {
|
||||
nir_ssa_def *addr = (b->shader->info.stage == MESA_SHADER_TESS_CTRL) ?
|
||||
r600_tcs_base_address(b, load_out_param_base, rel_patch_id):
|
||||
nir_build_alu(b, nir_op_umul24,
|
||||
nir_channel(b, load_out_param_base, 1),
|
||||
rel_patch_id, NULL, NULL);
|
||||
addr = emil_tcs_io_offset(b, addr, op, &b->shader->outputs, 1);
|
||||
emit_store_lds(b, op, addr);
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
}
|
||||
case nir_intrinsic_load_output: {
|
||||
nir_ssa_def *addr = r600_tcs_base_address(b, load_out_param_base, rel_patch_id);
|
||||
addr = emil_tcs_io_offset(b, addr, op, &b->shader->outputs, 0);
|
||||
replace_load_instr(b, op, addr);
|
||||
return true;
|
||||
}
|
||||
case nir_intrinsic_load_input: {
|
||||
nir_ssa_def *addr = r600_tcs_base_address(b, load_in_param_base, rel_patch_id);
|
||||
addr = emil_tcs_io_offset(b, addr, op, &b->shader->inputs, 0);
|
||||
replace_load_instr(b, op, addr);
|
||||
return true;
|
||||
}
|
||||
case nir_intrinsic_load_tess_level_inner:
|
||||
tf_inner_address_offset = 4;
|
||||
ncomps_correct = 2;
|
||||
/* fallthrough */
|
||||
case nir_intrinsic_load_tess_level_outer: {
|
||||
auto ncomps = outer_tf_components(prim_type);
|
||||
if (!ncomps)
|
||||
return false;
|
||||
ncomps -= ncomps_correct;
|
||||
auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
|
||||
auto rel_patch_id = r600_load_rel_patch_id(b);
|
||||
nir_ssa_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
|
||||
nir_ssa_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, tf_inner_address_offset + ncomps));
|
||||
|
||||
auto tf = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
|
||||
tf->num_components = ncomps;
|
||||
tf->src[0] = nir_src_for_ssa(addr_outer);
|
||||
nir_ssa_dest_init(&tf->instr, &tf->dest,
|
||||
tf->num_components, 32, NULL);
|
||||
nir_intrinsic_set_component(tf, 0);
|
||||
nir_builder_instr_insert(b, &tf->instr);
|
||||
|
||||
nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&tf->dest.ssa));
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool r600_lower_tess_io(nir_shader *shader, enum pipe_prim_type prim_type)
|
||||
{
|
||||
bool progress = false;
|
||||
nir_foreach_function(function, shader) {
|
||||
if (function->impl) {
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, function->impl);
|
||||
|
||||
nir_foreach_block(block, function->impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
if (r600_lower_tess_io_filter(instr))
|
||||
progress |= r600_lower_tess_io_impl(&b, instr, prim_type);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return progress;
|
||||
}
|
||||
|
||||
bool r600_emit_tf(nir_builder *b, nir_ssa_def *val)
|
||||
{
|
||||
nir_intrinsic_instr *store_tf = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_tf_r600);
|
||||
store_tf->num_components = val->num_components;
|
||||
store_tf->src[0] = nir_src_for_ssa(val);
|
||||
nir_builder_instr_insert(b, &store_tf->instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool r600_append_tcs_TF_emission(nir_shader *shader, enum pipe_prim_type prim_type) {
|
||||
if (shader->info.stage != MESA_SHADER_TESS_CTRL)
|
||||
return false;
|
||||
|
||||
nir_foreach_function(function, shader) {
|
||||
nir_foreach_block(block, function->impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if (intr->intrinsic == nir_intrinsic_store_tf_r600) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
nir_builder builder;
|
||||
nir_builder *b = &builder;
|
||||
|
||||
assert(exec_list_length(&shader->functions) == 1);
|
||||
nir_function *f = (nir_function *)shader->functions.get_head();
|
||||
nir_builder_init(b, f->impl);
|
||||
|
||||
auto outer_comps = outer_tf_components(prim_type);
|
||||
if (!outer_comps)
|
||||
return false;
|
||||
|
||||
unsigned inner_comps = outer_comps - 2;
|
||||
unsigned stride = (inner_comps + outer_comps) * 4;
|
||||
|
||||
b->cursor = nir_after_cf_list(&f->impl->body);
|
||||
|
||||
auto invocation_id = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_invocation_id);
|
||||
invocation_id->num_components = 1;
|
||||
nir_ssa_dest_init(&invocation_id->instr, &invocation_id->dest,
|
||||
invocation_id->num_components, 32, NULL);
|
||||
nir_builder_instr_insert(b, &invocation_id->instr);
|
||||
|
||||
nir_push_if(b, nir_ieq(b, &invocation_id->dest.ssa, nir_imm_int(b, 0)));
|
||||
auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
|
||||
auto rel_patch_id = r600_load_rel_patch_id(b);
|
||||
|
||||
nir_ssa_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
|
||||
|
||||
nir_ssa_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, outer_comps));
|
||||
auto tf_outer = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
|
||||
tf_outer->num_components = outer_comps;
|
||||
tf_outer->src[0] = nir_src_for_ssa(addr_outer);
|
||||
nir_ssa_dest_init(&tf_outer->instr, &tf_outer->dest,
|
||||
tf_outer->num_components, 32, NULL);
|
||||
nir_intrinsic_set_component(tf_outer, 15);
|
||||
nir_builder_instr_insert(b, &tf_outer->instr);
|
||||
|
||||
std::vector<nir_ssa_def *> tf_out;
|
||||
|
||||
|
||||
auto tf_out_base = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_tcs_tess_factor_base_r600);
|
||||
tf_out_base->num_components = 1;
|
||||
nir_ssa_dest_init(&tf_out_base->instr, &tf_out_base->dest,
|
||||
tf_out_base->num_components, 32, NULL);
|
||||
nir_builder_instr_insert(b, &tf_out_base->instr);
|
||||
|
||||
auto out_addr0 = nir_build_alu(b, nir_op_umad24,
|
||||
rel_patch_id,
|
||||
nir_imm_int(b, stride),
|
||||
&tf_out_base->dest.ssa,
|
||||
NULL);
|
||||
int chanx = 0;
|
||||
int chany = 1;
|
||||
|
||||
if (prim_type == PIPE_PRIM_LINES)
|
||||
std::swap(chanx, chany);
|
||||
|
||||
|
||||
auto v0 = nir_vec4(b, out_addr0, nir_channel(b, &tf_outer->dest.ssa, chanx),
|
||||
nir_iadd(b, out_addr0, nir_imm_int(b, 4)),
|
||||
nir_channel(b, &tf_outer->dest.ssa, chany));
|
||||
|
||||
tf_out.push_back(v0);
|
||||
if (outer_comps > 2) {
|
||||
auto v1 = (outer_comps > 3) ? nir_vec4(b, nir_iadd(b, out_addr0, nir_imm_int(b, 8)),
|
||||
nir_channel(b, &tf_outer->dest.ssa, 2),
|
||||
nir_iadd(b, out_addr0, nir_imm_int(b, 12)),
|
||||
nir_channel(b, &tf_outer->dest.ssa, 3)) :
|
||||
nir_vec2(b, nir_iadd(b, out_addr0, nir_imm_int(b, 8)),
|
||||
nir_channel(b, &tf_outer->dest.ssa, 2));
|
||||
tf_out.push_back(v1);
|
||||
}
|
||||
|
||||
if (inner_comps) {
|
||||
nir_ssa_def *addr1 = nir_iadd(b, addr0, load_offset_group(b, 4 + inner_comps));
|
||||
auto tf_inner = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
|
||||
tf_inner->num_components = inner_comps;
|
||||
tf_inner->src[0] = nir_src_for_ssa(addr1);
|
||||
nir_ssa_dest_init(&tf_inner->instr, &tf_inner->dest,
|
||||
tf_inner->num_components, 32, NULL);
|
||||
nir_intrinsic_set_component(tf_inner, 3);
|
||||
nir_builder_instr_insert(b, &tf_inner->instr);
|
||||
|
||||
auto v2 = (inner_comps > 1) ? nir_vec4(b, nir_iadd(b, out_addr0, nir_imm_int(b, 16)),
|
||||
nir_channel(b, &tf_inner->dest.ssa, 0),
|
||||
nir_iadd(b, out_addr0, nir_imm_int(b, 20)),
|
||||
nir_channel(b, &tf_inner->dest.ssa, 1)):
|
||||
nir_vec2(b, nir_iadd(b, out_addr0, nir_imm_int(b, 12)),
|
||||
nir_channel(b, &tf_inner->dest.ssa, 0));
|
||||
tf_out.push_back(v2);
|
||||
}
|
||||
|
||||
for (auto tf: tf_out)
|
||||
r600_emit_tf(b, tf);
|
||||
|
||||
nir_pop_if(b, nullptr);
|
||||
|
||||
nir_metadata_preserve(f->impl, nir_metadata_none);
|
||||
|
||||
return true;
|
||||
}
|
Loading…
Reference in New Issue