diff --git a/src/gallium/drivers/r600/Makefile.sources b/src/gallium/drivers/r600/Makefile.sources index 45342e4ad21..7451e72674b 100644 --- a/src/gallium/drivers/r600/Makefile.sources +++ b/src/gallium/drivers/r600/Makefile.sources @@ -134,6 +134,7 @@ CXX_SOURCES = \ sfn/sfn_nir.h \ sfn/sfn_nir_lower_fs_out_to_vector.cpp \ sfn/sfn_nir_lower_fs_out_to_vector.h \ + sfn/sfn_nir_lower_tess_io.cpp \ sfn/sfn_nir_vectorize_vs_inputs.c \ sfn/sfn_shader_base.cpp \ sfn/sfn_shader_base.h \ diff --git a/src/gallium/drivers/r600/meson.build b/src/gallium/drivers/r600/meson.build index 468a8165dd5..227168a7b24 100644 --- a/src/gallium/drivers/r600/meson.build +++ b/src/gallium/drivers/r600/meson.build @@ -151,6 +151,7 @@ files_r600 = files( 'sfn/sfn_nir.h', 'sfn/sfn_nir_lower_fs_out_to_vector.cpp', 'sfn/sfn_nir_lower_fs_out_to_vector.h', + 'sfn/sfn_nir_lower_tess_io.cpp', 'sfn/sfn_nir_vectorize_vs_inputs.c', 'sfn/sfn_shader_base.cpp', 'sfn/sfn_shader_base.h', diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 71a3ae1bad4..f132b720421 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -214,6 +214,8 @@ struct r600_bytecode_cf { struct r600_bytecode_alu *prev_bs_head; struct r600_bytecode_alu *prev2_bs_head; unsigned isa[2]; + unsigned nlds_read; + unsigned nqueue_read; }; #define FC_NONE 0 diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.h b/src/gallium/drivers/r600/sfn/sfn_nir.h index ee80d37c25a..162b2e47b9d 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.h +++ b/src/gallium/drivers/r600/sfn/sfn_nir.h @@ -28,6 +28,7 @@ #define SFN_NIR_H #include "nir.h" +#include "nir_builder.h" #ifdef __cplusplus #include "sfn_shader_base.h" @@ -96,15 +97,33 @@ private: #endif +static inline nir_ssa_def * +r600_imm_ivec3(nir_builder *build, int x, int y, int z) +{ + nir_const_value v[3] = { + nir_const_value_for_int(x, 32), + nir_const_value_for_int(y, 32), + nir_const_value_for_int(z, 32), + }; + + return nir_build_imm(build, 3, 32, v); +} + +bool r600_lower_tess_io(nir_shader *shader, enum pipe_prim_type prim_type); +bool r600_append_tcs_TF_emission(nir_shader *shader, enum pipe_prim_type prim_type); + #ifdef __cplusplus extern "C" { #endif bool r600_vectorize_vs_inputs(nir_shader *shader); + + int r600_shader_from_nir(struct r600_context *rctx, struct r600_pipe_shader *pipeshader, union r600_shader_key *key); + #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp new file mode 100644 index 00000000000..9346190d4c0 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp @@ -0,0 +1,464 @@ +#include "sfn_nir.h" + +bool r600_lower_tess_io_filter(const nir_instr *instr) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr); + switch (op->intrinsic) { + case nir_intrinsic_load_input: + case nir_intrinsic_store_output: + case nir_intrinsic_load_output: + case nir_intrinsic_load_per_vertex_input: + case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_store_per_vertex_output: + case nir_intrinsic_load_patch_vertices_in: + case nir_intrinsic_load_tess_level_outer: + case nir_intrinsic_load_tess_level_inner: + return true; + default: + ; + } + return false; +} + +static nir_ssa_def * +emit_load_param_base(nir_builder *b, nir_intrinsic_op op) +{ + nir_intrinsic_instr *result = nir_intrinsic_instr_create(b->shader, op); + result->num_components = 4; + nir_ssa_dest_init(&result->instr, &result->dest, + result->num_components, 32, NULL); + nir_builder_instr_insert(b, &result->instr); + return &result->dest.ssa; +} + +static int get_tcs_varying_offset(exec_list *io, unsigned index) +{ + nir_foreach_variable(var, io){ + if (var->data.driver_location == index) { + switch (var->data.location) { + case VARYING_SLOT_POS: + return 0; + case VARYING_SLOT_PSIZ: + return 0x10; + case VARYING_SLOT_CLIP_DIST0: + return 0x20; + case VARYING_SLOT_CLIP_DIST1: + return 0x30; + case VARYING_SLOT_TESS_LEVEL_OUTER: + return 0; + case VARYING_SLOT_TESS_LEVEL_INNER: + return 0x10; + default: + if (var->data.location >= VARYING_SLOT_VAR0 && + var->data.location <= VARYING_SLOT_VAR31) + return 0x10 * (var->data.location - VARYING_SLOT_VAR0) + 0x40; + + if (var->data.location >= VARYING_SLOT_PATCH0) { + return 0x10 * (var->data.location - VARYING_SLOT_PATCH0) + 0x20; + } + } + /* TODO: PATCH is missing */ + } + } + return 0; +} + +static inline nir_ssa_def * +r600_umad_24(nir_builder *b, nir_ssa_def *op1, nir_ssa_def *op2, nir_ssa_def *op3) +{ + return nir_build_alu(b, nir_op_umad24, op1, op2, op3, NULL); +} + +static inline nir_ssa_def * +r600_tcs_base_address(nir_builder *b, nir_ssa_def *param_base, nir_ssa_def *rel_patch_id) +{ + return r600_umad_24(b, nir_channel(b, param_base, 0), + rel_patch_id, + nir_channel(b, param_base, 3)); +} + + +static nir_ssa_def * +emil_lsd_in_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op) +{ + nir_ssa_def *addr = nir_build_alu(b, nir_op_umul24, + nir_channel(b, base, 0), + patch_id, NULL, NULL); + + auto idx1 = nir_src_as_const_value(op->src[0]); + if (!idx1 || idx1->u32 != 0) + addr = r600_umad_24(b, nir_channel(b, base, 1), + op->src[0].ssa, addr); + + auto offset = nir_imm_int(b, get_tcs_varying_offset(&b->shader->inputs, nir_intrinsic_base(op))); + + auto idx2 = nir_src_as_const_value(op->src[1]); + if (!idx2 || idx2->u32 != 0) + offset = nir_iadd(b, offset, nir_ishl(b, op->src[1].ssa, nir_imm_int(b, 4))); + + return nir_iadd(b, addr, offset); +} + +static nir_ssa_def * +emil_lsd_out_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op, exec_list *io, int src_offset) +{ + + nir_ssa_def *addr1 = r600_umad_24(b, nir_channel(b, base, 0), + patch_id, + nir_channel(b, base, 2)); + nir_ssa_def *addr2 = r600_umad_24(b, nir_channel(b, base, 1), + op->src[src_offset].ssa, addr1); + + int offset = get_tcs_varying_offset(io, nir_intrinsic_base(op)); + return nir_iadd(b, nir_iadd(b, addr2, + nir_ishl(b, op->src[src_offset + 1].ssa, nir_imm_int(b,4))), + nir_imm_int(b, offset)); +} + +static nir_ssa_def *load_offset_group(nir_builder *b, int ncomponents) +{ + switch (ncomponents) { + /* tess outer offsets */ + case 1: return nir_imm_int(b, 0); + case 2: return nir_imm_ivec2(b, 0, 4); + case 3: return r600_imm_ivec3(b, 0, 4, 8); + case 4: return nir_imm_ivec4(b, 0, 4, 8, 12); + /* tess inner offsets */ + case 5: return nir_imm_int(b, 16); + case 6: return nir_imm_ivec2(b, 16, 20); + default: + debug_printf("Got %d components\n", ncomponents); + unreachable("Unsupported component count"); + } +} + +static void replace_load_instr(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr) +{ + nir_intrinsic_instr *load_tcs_in = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600); + load_tcs_in->num_components = op->num_components; + nir_ssa_dest_init(&load_tcs_in->instr, &load_tcs_in->dest, + load_tcs_in->num_components, 32, NULL); + + nir_ssa_def *addr_outer = nir_iadd(b, addr, load_offset_group(b, load_tcs_in->num_components)); + load_tcs_in->src[0] = nir_src_for_ssa(addr_outer); + nir_intrinsic_set_component(load_tcs_in, nir_intrinsic_component(op)); + nir_builder_instr_insert(b, &load_tcs_in->instr); + nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&load_tcs_in->dest.ssa)); + nir_instr_remove(&op->instr); + +} + +static nir_ssa_def * +r600_load_rel_patch_id(nir_builder *b) +{ + auto patch_id = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_tcs_rel_patch_id_r600); + patch_id->num_components = 1; + nir_ssa_dest_init(&patch_id->instr, &patch_id->dest, + patch_id->num_components, 32, NULL); + nir_builder_instr_insert(b, &patch_id->instr); + return &patch_id->dest.ssa; +} + +static void +emit_store_lds(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr) +{ + for (int i = 0; i < 2; ++i) { + unsigned test_mask = (0x3 << 2 * i); + if (!(nir_intrinsic_write_mask(op) & test_mask)) + continue; + + auto store_tcs_out = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_local_shared_r600); + unsigned writemask = nir_intrinsic_write_mask(op) & test_mask; + nir_intrinsic_set_write_mask(store_tcs_out, writemask); + store_tcs_out->src[0] = nir_src_for_ssa(op->src[0].ssa); + store_tcs_out->num_components = store_tcs_out->src[0].ssa->num_components; + bool start_even = (writemask & (1u << (2 * i))); + + auto addr2 = nir_iadd(b, addr, nir_imm_int(b, 8 * i + (start_even ? 0 : 4))); + store_tcs_out->src[1] = nir_src_for_ssa(addr2); + + nir_builder_instr_insert(b, &store_tcs_out->instr); + } +} + +static nir_ssa_def * +emil_tcs_io_offset(nir_builder *b, nir_ssa_def *addr, nir_intrinsic_instr *op, exec_list *io, int src_offset) +{ + + int offset = get_tcs_varying_offset(io, nir_intrinsic_base(op)); + return nir_iadd(b, nir_iadd(b, addr, + nir_ishl(b, op->src[src_offset].ssa, nir_imm_int(b,4))), + nir_imm_int(b, offset)); +} + + +inline unsigned +outer_tf_components(pipe_prim_type prim_type) +{ + switch (prim_type) { + case PIPE_PRIM_LINES: return 2; + case PIPE_PRIM_TRIANGLES: return 3; + case PIPE_PRIM_QUADS: return 4; + default: + return 0; + } +} + + + +static bool +r600_lower_tess_io_impl(nir_builder *b, nir_instr *instr, enum pipe_prim_type prim_type) +{ + static nir_ssa_def *load_in_param_base = nullptr; + static nir_ssa_def *load_out_param_base = nullptr; + + b->cursor = nir_before_instr(instr); + nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr); + + if (b->shader->info.stage == MESA_SHADER_TESS_CTRL) { + load_in_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600); + load_out_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600); + } else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) { + load_in_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600); + } else if (b->shader->info.stage == MESA_SHADER_VERTEX) { + load_out_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600); + } + + auto rel_patch_id = r600_load_rel_patch_id(b); + + unsigned tf_inner_address_offset = 0; + unsigned ncomps_correct = 0; + + switch (op->intrinsic) { + case nir_intrinsic_load_patch_vertices_in: { + auto vertices_in = nir_channel(b, load_in_param_base, 2); + nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(vertices_in)); + nir_instr_remove(&op->instr); + return true; + } + case nir_intrinsic_load_per_vertex_input: { + nir_ssa_def *addr = + b->shader->info.stage == MESA_SHADER_TESS_CTRL ? + emil_lsd_in_addr(b, load_in_param_base, rel_patch_id, op) : + emil_lsd_out_addr(b, load_in_param_base, rel_patch_id, op, &b->shader->inputs, 0); + replace_load_instr(b, op, addr); + return true; + } + case nir_intrinsic_store_per_vertex_output: { + nir_ssa_def *addr = emil_lsd_out_addr(b, load_out_param_base, rel_patch_id, op, &b->shader->outputs, 1); + emit_store_lds(b, op, addr); + nir_instr_remove(instr); + return true; + } + case nir_intrinsic_load_per_vertex_output: { + nir_ssa_def *addr = emil_lsd_out_addr(b, load_out_param_base, rel_patch_id, op, &b->shader->outputs, 0); + replace_load_instr(b, op, addr); + return true; + } + case nir_intrinsic_store_output: { + nir_ssa_def *addr = (b->shader->info.stage == MESA_SHADER_TESS_CTRL) ? + r600_tcs_base_address(b, load_out_param_base, rel_patch_id): + nir_build_alu(b, nir_op_umul24, + nir_channel(b, load_out_param_base, 1), + rel_patch_id, NULL, NULL); + addr = emil_tcs_io_offset(b, addr, op, &b->shader->outputs, 1); + emit_store_lds(b, op, addr); + nir_instr_remove(instr); + return true; + } + case nir_intrinsic_load_output: { + nir_ssa_def *addr = r600_tcs_base_address(b, load_out_param_base, rel_patch_id); + addr = emil_tcs_io_offset(b, addr, op, &b->shader->outputs, 0); + replace_load_instr(b, op, addr); + return true; + } + case nir_intrinsic_load_input: { + nir_ssa_def *addr = r600_tcs_base_address(b, load_in_param_base, rel_patch_id); + addr = emil_tcs_io_offset(b, addr, op, &b->shader->inputs, 0); + replace_load_instr(b, op, addr); + return true; + } + case nir_intrinsic_load_tess_level_inner: + tf_inner_address_offset = 4; + ncomps_correct = 2; + /* fallthrough */ + case nir_intrinsic_load_tess_level_outer: { + auto ncomps = outer_tf_components(prim_type); + if (!ncomps) + return false; + ncomps -= ncomps_correct; + auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600); + auto rel_patch_id = r600_load_rel_patch_id(b); + nir_ssa_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id); + nir_ssa_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, tf_inner_address_offset + ncomps)); + + auto tf = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600); + tf->num_components = ncomps; + tf->src[0] = nir_src_for_ssa(addr_outer); + nir_ssa_dest_init(&tf->instr, &tf->dest, + tf->num_components, 32, NULL); + nir_intrinsic_set_component(tf, 0); + nir_builder_instr_insert(b, &tf->instr); + + nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&tf->dest.ssa)); + nir_instr_remove(instr); + return true; + } + default: + ; + } + + return false; +} + +bool r600_lower_tess_io(nir_shader *shader, enum pipe_prim_type prim_type) +{ + bool progress = false; + nir_foreach_function(function, shader) { + if (function->impl) { + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + if (r600_lower_tess_io_filter(instr)) + progress |= r600_lower_tess_io_impl(&b, instr, prim_type); + } + } + } + } + return progress; +} + +bool r600_emit_tf(nir_builder *b, nir_ssa_def *val) +{ + nir_intrinsic_instr *store_tf = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_tf_r600); + store_tf->num_components = val->num_components; + store_tf->src[0] = nir_src_for_ssa(val); + nir_builder_instr_insert(b, &store_tf->instr); + return true; +} + +bool r600_append_tcs_TF_emission(nir_shader *shader, enum pipe_prim_type prim_type) { + if (shader->info.stage != MESA_SHADER_TESS_CTRL) + return false; + + nir_foreach_function(function, shader) { + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic == nir_intrinsic_store_tf_r600) { + return false; + } + } + } + } + nir_builder builder; + nir_builder *b = &builder; + + assert(exec_list_length(&shader->functions) == 1); + nir_function *f = (nir_function *)shader->functions.get_head(); + nir_builder_init(b, f->impl); + + auto outer_comps = outer_tf_components(prim_type); + if (!outer_comps) + return false; + + unsigned inner_comps = outer_comps - 2; + unsigned stride = (inner_comps + outer_comps) * 4; + + b->cursor = nir_after_cf_list(&f->impl->body); + + auto invocation_id = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_invocation_id); + invocation_id->num_components = 1; + nir_ssa_dest_init(&invocation_id->instr, &invocation_id->dest, + invocation_id->num_components, 32, NULL); + nir_builder_instr_insert(b, &invocation_id->instr); + + nir_push_if(b, nir_ieq(b, &invocation_id->dest.ssa, nir_imm_int(b, 0))); + auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600); + auto rel_patch_id = r600_load_rel_patch_id(b); + + nir_ssa_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id); + + nir_ssa_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, outer_comps)); + auto tf_outer = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600); + tf_outer->num_components = outer_comps; + tf_outer->src[0] = nir_src_for_ssa(addr_outer); + nir_ssa_dest_init(&tf_outer->instr, &tf_outer->dest, + tf_outer->num_components, 32, NULL); + nir_intrinsic_set_component(tf_outer, 15); + nir_builder_instr_insert(b, &tf_outer->instr); + + std::vector tf_out; + + + auto tf_out_base = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_tcs_tess_factor_base_r600); + tf_out_base->num_components = 1; + nir_ssa_dest_init(&tf_out_base->instr, &tf_out_base->dest, + tf_out_base->num_components, 32, NULL); + nir_builder_instr_insert(b, &tf_out_base->instr); + + auto out_addr0 = nir_build_alu(b, nir_op_umad24, + rel_patch_id, + nir_imm_int(b, stride), + &tf_out_base->dest.ssa, + NULL); + int chanx = 0; + int chany = 1; + + if (prim_type == PIPE_PRIM_LINES) + std::swap(chanx, chany); + + + auto v0 = nir_vec4(b, out_addr0, nir_channel(b, &tf_outer->dest.ssa, chanx), + nir_iadd(b, out_addr0, nir_imm_int(b, 4)), + nir_channel(b, &tf_outer->dest.ssa, chany)); + + tf_out.push_back(v0); + if (outer_comps > 2) { + auto v1 = (outer_comps > 3) ? nir_vec4(b, nir_iadd(b, out_addr0, nir_imm_int(b, 8)), + nir_channel(b, &tf_outer->dest.ssa, 2), + nir_iadd(b, out_addr0, nir_imm_int(b, 12)), + nir_channel(b, &tf_outer->dest.ssa, 3)) : + nir_vec2(b, nir_iadd(b, out_addr0, nir_imm_int(b, 8)), + nir_channel(b, &tf_outer->dest.ssa, 2)); + tf_out.push_back(v1); + } + + if (inner_comps) { + nir_ssa_def *addr1 = nir_iadd(b, addr0, load_offset_group(b, 4 + inner_comps)); + auto tf_inner = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600); + tf_inner->num_components = inner_comps; + tf_inner->src[0] = nir_src_for_ssa(addr1); + nir_ssa_dest_init(&tf_inner->instr, &tf_inner->dest, + tf_inner->num_components, 32, NULL); + nir_intrinsic_set_component(tf_inner, 3); + nir_builder_instr_insert(b, &tf_inner->instr); + + auto v2 = (inner_comps > 1) ? nir_vec4(b, nir_iadd(b, out_addr0, nir_imm_int(b, 16)), + nir_channel(b, &tf_inner->dest.ssa, 0), + nir_iadd(b, out_addr0, nir_imm_int(b, 20)), + nir_channel(b, &tf_inner->dest.ssa, 1)): + nir_vec2(b, nir_iadd(b, out_addr0, nir_imm_int(b, 12)), + nir_channel(b, &tf_inner->dest.ssa, 0)); + tf_out.push_back(v2); + } + + for (auto tf: tf_out) + r600_emit_tf(b, tf); + + nir_pop_if(b, nullptr); + + nir_metadata_preserve(f->impl, nir_metadata_none); + + return true; +}