From 44a6b0107b37ad9644d3435cf6d2d29b6779654f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 5 Sep 2019 15:46:31 +1000 Subject: [PATCH] gallivm: add nir->llvm translation (v2) This add the initial implementation of the NIR->LLVM conversion for llvmpipe NIR support. v2: lower bool to int32 in nir not llvm Acked-by: Roland Scheidegger --- src/gallium/auxiliary/Makefile.sources | 3 + src/gallium/auxiliary/gallivm/lp_bld_nir.c | 1652 +++++++++++++++++ src/gallium/auxiliary/gallivm/lp_bld_nir.h | 210 +++ .../auxiliary/gallivm/lp_bld_nir_soa.c | 1274 +++++++++++++ src/gallium/auxiliary/meson.build | 3 + 5 files changed, 3142 insertions(+) create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_nir.c create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_nir.h create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index 567cb00d7ee..4ee99b8f063 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -422,6 +422,9 @@ GALLIVM_SOURCES := \ gallivm/lp_bld_logic.h \ gallivm/lp_bld_misc.cpp \ gallivm/lp_bld_misc.h \ + gallivm/lp_bld_nir.c \ + gallivm/lp_bld_nir.h \ + gallivm/lp_bld_nir_soa.c \ gallivm/lp_bld_pack.c \ gallivm/lp_bld_pack.h \ gallivm/lp_bld_printf.c \ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c new file mode 100644 index 00000000000..abaa82a0331 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c @@ -0,0 +1,1652 @@ +/************************************************************************** + * + * Copyright 2019 Red Hat. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **************************************************************************/ + +#include "lp_bld_nir.h" +#include "lp_bld_arit.h" +#include "lp_bld_bitarit.h" +#include "lp_bld_const.h" +#include "lp_bld_gather.h" +#include "lp_bld_logic.h" +#include "lp_bld_quad.h" +#include "lp_bld_flow.h" +#include "lp_bld_struct.h" +#include "lp_bld_debug.h" +#include "lp_bld_printf.h" +#include "nir_deref.h" + +static void visit_cf_list(struct lp_build_nir_context *bld_base, + struct exec_list *list); + +static LLVMValueRef cast_type(struct lp_build_nir_context *bld_base, LLVMValueRef val, + nir_alu_type alu_type, unsigned bit_size) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + switch (alu_type) { + case nir_type_float: + switch (bit_size) { + case 32: + return LLVMBuildBitCast(builder, val, bld_base->base.vec_type, ""); + case 64: + return LLVMBuildBitCast(builder, val, bld_base->dbl_bld.vec_type, ""); + default: + assert(0); + break; + } + break; + case nir_type_int: + switch (bit_size) { + case 32: + return LLVMBuildBitCast(builder, val, bld_base->int_bld.vec_type, ""); + case 64: + return LLVMBuildBitCast(builder, val, bld_base->int64_bld.vec_type, ""); + default: + assert(0); + break; + } + break; + case nir_type_uint: + switch (bit_size) { + case 32: + return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, ""); + case 64: + return LLVMBuildBitCast(builder, val, bld_base->uint64_bld.vec_type, ""); + default: + assert(0); + break; + } + break; + case nir_type_uint32: + return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, ""); + default: + return val; + } + return NULL; +} + +static struct lp_build_context *get_int_bld(struct lp_build_nir_context *bld_base, + bool is_unsigned, + unsigned op_bit_size) +{ + if (is_unsigned) + if (op_bit_size == 64) + return &bld_base->uint64_bld; + else + return &bld_base->uint_bld; + else if (op_bit_size == 64) + return &bld_base->int64_bld; + else + return &bld_base->int_bld; +} + +static struct lp_build_context *get_flt_bld(struct lp_build_nir_context *bld_base, + unsigned op_bit_size) +{ + if (op_bit_size == 64) + return &bld_base->dbl_bld; + else + return &bld_base->base; +} + +static unsigned glsl_sampler_to_pipe(int sampler_dim, bool is_array) +{ + unsigned pipe_target = PIPE_BUFFER; + switch (sampler_dim) { + case GLSL_SAMPLER_DIM_1D: + pipe_target = is_array ? PIPE_TEXTURE_1D_ARRAY : PIPE_TEXTURE_1D; + break; + case GLSL_SAMPLER_DIM_2D: + pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D; + break; + case GLSL_SAMPLER_DIM_3D: + pipe_target = PIPE_TEXTURE_3D; + break; + case GLSL_SAMPLER_DIM_CUBE: + pipe_target = is_array ? PIPE_TEXTURE_CUBE_ARRAY : PIPE_TEXTURE_CUBE; + break; + case GLSL_SAMPLER_DIM_RECT: + pipe_target = PIPE_TEXTURE_RECT; + break; + case GLSL_SAMPLER_DIM_BUF: + pipe_target = PIPE_BUFFER; + break; + default: + break; + } + return pipe_target; +} + +static LLVMValueRef get_ssa_src(struct lp_build_nir_context *bld_base, nir_ssa_def *ssa) +{ + return bld_base->ssa_defs[ssa->index]; +} + +static LLVMValueRef get_src(struct lp_build_nir_context *bld_base, nir_src src); + +static LLVMValueRef get_reg_src(struct lp_build_nir_context *bld_base, nir_reg_src src) +{ + struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, src.reg); + LLVMValueRef reg_storage = (LLVMValueRef)entry->data; + struct lp_build_context *reg_bld = get_int_bld(bld_base, true, src.reg->bit_size); + LLVMValueRef indir_src = NULL; + if (src.indirect) + indir_src = get_src(bld_base, *src.indirect); + return bld_base->load_reg(bld_base, reg_bld, &src, indir_src, reg_storage); +} + +static LLVMValueRef get_src(struct lp_build_nir_context *bld_base, nir_src src) +{ + if (src.is_ssa) + return get_ssa_src(bld_base, src.ssa); + else + return get_reg_src(bld_base, src.reg); +} + +static void assign_ssa(struct lp_build_nir_context *bld_base, int idx, LLVMValueRef ptr) +{ + bld_base->ssa_defs[idx] = ptr; +} + +static void assign_ssa_dest(struct lp_build_nir_context *bld_base, const nir_ssa_def *ssa, + LLVMValueRef vals[4]) +{ + assign_ssa(bld_base, ssa->index, ssa->num_components == 1 ? vals[0] : lp_nir_array_build_gather_values(bld_base->base.gallivm->builder, vals, ssa->num_components)); +} + +static void assign_reg(struct lp_build_nir_context *bld_base, const nir_reg_dest *reg, + unsigned write_mask, + LLVMValueRef vals[4]) +{ + struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, reg->reg); + LLVMValueRef reg_storage = (LLVMValueRef)entry->data; + struct lp_build_context *reg_bld = get_int_bld(bld_base, true, reg->reg->bit_size); + LLVMValueRef indir_src = NULL; + if (reg->indirect) + indir_src = get_src(bld_base, *reg->indirect); + bld_base->store_reg(bld_base, reg_bld, reg, write_mask ? write_mask : 0xf, indir_src, reg_storage, vals); +} + +static void assign_dest(struct lp_build_nir_context *bld_base, const nir_dest *dest, LLVMValueRef vals[4]) +{ + if (dest->is_ssa) + assign_ssa_dest(bld_base, &dest->ssa, vals); + else + assign_reg(bld_base, &dest->reg, 0, vals); +} + +static void assign_alu_dest(struct lp_build_nir_context *bld_base, const nir_alu_dest *dest, LLVMValueRef vals[4]) +{ + if (dest->dest.is_ssa) + assign_ssa_dest(bld_base, &dest->dest.ssa, vals); + else + assign_reg(bld_base, &dest->dest.reg, dest->write_mask, vals); +} + +static LLVMValueRef int_to_bool32(struct lp_build_nir_context *bld_base, + uint32_t src_bit_size, + bool is_unsigned, + LLVMValueRef val) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size); + LLVMValueRef result = lp_build_compare(bld_base->base.gallivm, int_bld->type, PIPE_FUNC_NOTEQUAL, val, int_bld->zero); + if (src_bit_size == 64) + result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, ""); + return result; +} + +static LLVMValueRef flt_to_bool32(struct lp_build_nir_context *bld_base, + uint32_t src_bit_size, + LLVMValueRef val) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size); + LLVMValueRef result = lp_build_compare(bld_base->base.gallivm, flt_bld->type, PIPE_FUNC_NOTEQUAL, val, flt_bld->zero); + if (src_bit_size == 64) + result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, ""); + return result; +} + +static LLVMValueRef fcmp32(struct lp_build_nir_context *bld_base, + enum pipe_compare_func compare, + uint32_t src_bit_size, + LLVMValueRef src[4]) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMValueRef result = lp_build_compare(bld_base->base.gallivm, get_flt_bld(bld_base, src_bit_size)->type, compare, src[0], src[1]); + if (src_bit_size == 64) + result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, ""); + return result; +} + +static LLVMValueRef icmp32(struct lp_build_nir_context *bld_base, + enum pipe_compare_func compare, + bool is_unsigned, + uint32_t src_bit_size, + LLVMValueRef src[4]) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMValueRef result = lp_build_compare(bld_base->base.gallivm, get_int_bld(bld_base, is_unsigned, src_bit_size)->type, compare, src[0], src[1]); + if (src_bit_size == 64) + result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, ""); + return result; +} + +static LLVMValueRef get_alu_src(struct lp_build_nir_context *bld_base, + nir_alu_src src, + unsigned num_components) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMValueRef value = get_src(bld_base, src.src); + bool need_swizzle = false; + + assert(value); + unsigned src_components = nir_src_num_components(src.src); + for (unsigned i = 0; i < num_components; ++i) { + assert(src.swizzle[i] < src_components); + if (src.swizzle[i] != i) + need_swizzle = true; + } + + if (need_swizzle || num_components != src_components) { + if (src_components > 1 && num_components == 1) { + value = LLVMBuildExtractValue(gallivm->builder, value, + src.swizzle[0], ""); + } else if (src_components == 1 && num_components > 1) { + LLVMValueRef values[] = {value, value, value, value}; + value = lp_nir_array_build_gather_values(builder, values, num_components); + } else { + LLVMValueRef arr = LLVMGetUndef(LLVMArrayType(LLVMTypeOf(LLVMBuildExtractValue(builder, value, 0, "")), num_components)); + for (unsigned i = 0; i < num_components; i++) + arr = LLVMBuildInsertValue(builder, arr, LLVMBuildExtractValue(builder, value, src.swizzle[i], ""), i, ""); + value = arr; + } + } + assert(!src.negate); + assert(!src.abs); + return value; +} + +static LLVMValueRef emit_b2f(struct lp_build_nir_context *bld_base, + LLVMValueRef src0, + unsigned bitsize) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMValueRef result = LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32), + LLVMBuildBitCast(builder, lp_build_const_vec(bld_base->base.gallivm, bld_base->base.type, + 1.0), bld_base->int_bld.vec_type, ""), + ""); + result = LLVMBuildBitCast(builder, result, bld_base->base.vec_type, ""); + switch (bitsize) { + case 32: + break; + case 64: + result = LLVMBuildFPExt(builder, result, bld_base->dbl_bld.vec_type, ""); + break; + default: + unreachable("unsupported bit size."); + } + return result; +} + +static LLVMValueRef emit_b2i(struct lp_build_nir_context *bld_base, + LLVMValueRef src0, + unsigned bitsize) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMValueRef result = LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32), + lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, 1), ""); + switch (bitsize) { + case 32: + return result; + case 64: + return LLVMBuildZExt(builder, result, bld_base->int64_bld.vec_type, ""); + default: + unreachable("unsupported bit size."); + } +} + +static LLVMValueRef emit_b32csel(struct lp_build_nir_context *bld_base, + unsigned src_bit_size[4], + LLVMValueRef src[4]) +{ + LLVMValueRef sel = cast_type(bld_base, src[0], nir_type_int, 32); + LLVMValueRef v = lp_build_compare(bld_base->base.gallivm, bld_base->int_bld.type, PIPE_FUNC_NOTEQUAL, sel, bld_base->int_bld.zero); + struct lp_build_context *bld = get_int_bld(bld_base, false, src_bit_size[1]); + return lp_build_select(bld, v, src[1], src[2]); +} + +static LLVMValueRef split_64bit(struct lp_build_nir_context *bld_base, + LLVMValueRef src, + bool hi) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32]; + LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32]; + int len = bld_base->base.type.length * 2; + for (unsigned i = 0; i < bld_base->base.type.length; i++) { + shuffles[i] = lp_build_const_int32(gallivm, i * 2); + shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1); + } + + src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), len), ""); + return LLVMBuildShuffleVector(gallivm->builder, src, + LLVMGetUndef(LLVMTypeOf(src)), + LLVMConstVector(hi ? shuffles2 : shuffles, + bld_base->base.type.length), + ""); +} + +static LLVMValueRef +merge_64bit(struct lp_build_nir_context *bld_base, + LLVMValueRef input, + LLVMValueRef input2) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + int i; + LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)]; + int len = bld_base->base.type.length * 2; + assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32))); + + for (i = 0; i < bld_base->base.type.length * 2; i+=2) { + shuffles[i] = lp_build_const_int32(gallivm, i / 2); + shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length); + } + return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), ""); +} + +static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, + nir_op op, unsigned src_bit_size[4], LLVMValueRef src[4]) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef result; + switch (op) { + case nir_op_b2f32: + result = emit_b2f(bld_base, src[0], 32); + break; + case nir_op_b2f64: + result = emit_b2f(bld_base, src[0], 64); + break; + case nir_op_b2i32: + result = emit_b2i(bld_base, src[0], 32); + break; + case nir_op_b2i64: + result = emit_b2i(bld_base, src[0], 64); + break; + case nir_op_b32csel: + result = emit_b32csel(bld_base, src_bit_size, src); + break; + case nir_op_bit_count: + result = lp_build_popcount(get_int_bld(bld_base, false, src_bit_size[0]), src[0]); + break; + case nir_op_bitfield_select: + result = lp_build_xor(&bld_base->uint_bld, src[2], lp_build_and(&bld_base->uint_bld, src[0], lp_build_xor(&bld_base->uint_bld, src[1], src[2]))); + break; + case nir_op_f2b32: + result = flt_to_bool32(bld_base, src_bit_size[0], src[0]); + break; + case nir_op_f2f32: + result = LLVMBuildFPTrunc(builder, src[0], + bld_base->base.vec_type, ""); + break; + case nir_op_f2f64: + result = LLVMBuildFPExt(builder, src[0], + bld_base->dbl_bld.vec_type, ""); + break; + case nir_op_f2i32: + result = LLVMBuildFPToSI(builder, src[0], bld_base->base.int_vec_type, ""); + break; + case nir_op_f2u32: + result = LLVMBuildFPToUI(builder, + src[0], + bld_base->base.int_vec_type, ""); + break; + case nir_op_f2i64: + result = LLVMBuildFPToSI(builder, + src[0], + bld_base->int64_bld.vec_type, ""); + break; + case nir_op_f2u64: + result = LLVMBuildFPToUI(builder, + src[0], + bld_base->uint64_bld.vec_type, ""); + break; + case nir_op_fabs: + result = lp_build_abs(get_flt_bld(bld_base, src_bit_size[0]), src[0]); + break; + case nir_op_fadd: + result = lp_build_add(get_flt_bld(bld_base, src_bit_size[0]), + src[0], src[1]); + break; + case nir_op_fceil: + result = lp_build_ceil(get_flt_bld(bld_base, src_bit_size[0]), src[0]); + break; + case nir_op_fcos: + result = lp_build_cos(&bld_base->base, src[0]); + break; + case nir_op_fddx: + result = lp_build_ddx(&bld_base->base, src[0]); + break; + case nir_op_fddy: + result = lp_build_ddy(&bld_base->base, src[0]); + break; + case nir_op_fdiv: + result = lp_build_div(get_flt_bld(bld_base, src_bit_size[0]), + src[0], src[1]); + break; + case nir_op_feq32: + result = fcmp32(bld_base, PIPE_FUNC_EQUAL, src_bit_size[0], src); + break; + case nir_op_fexp2: + result = lp_build_exp2(&bld_base->base, src[0]); + break; + case nir_op_ffloor: + result = lp_build_floor(get_flt_bld(bld_base, src_bit_size[0]), src[0]); + break; + case nir_op_ffma: + result = lp_build_fmuladd(builder, src[0], src[1], src[2]); + break; + case nir_op_ffract: { + struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]); + LLVMValueRef tmp = lp_build_floor(flt_bld, src[0]); + result = lp_build_sub(flt_bld, src[0], tmp); + break; + } + case nir_op_fge32: + result = fcmp32(bld_base, PIPE_FUNC_GEQUAL, src_bit_size[0], src); + break; + case nir_op_find_lsb: + result = lp_build_cttz(get_int_bld(bld_base, false, src_bit_size[0]), src[0]); + break; + case nir_op_flog2: + result = lp_build_log2_safe(&bld_base->base, src[0]); + break; + case nir_op_flt32: + result = fcmp32(bld_base, PIPE_FUNC_LESS, src_bit_size[0], src); + break; + case nir_op_fmin: + result = lp_build_min(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1]); + break; + case nir_op_fmod: { + struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]); + result = lp_build_div(flt_bld, src[0], src[1]); + result = lp_build_floor(flt_bld, result); + result = lp_build_mul(flt_bld, src[1], result); + result = lp_build_sub(flt_bld, src[0], result); + break; + } + case nir_op_fmul: + result = lp_build_mul(get_flt_bld(bld_base, src_bit_size[0]), + src[0], src[1]); + break; + case nir_op_fmax: + result = lp_build_max(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1]); + break; + case nir_op_fne32: + result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src); + break; + case nir_op_fneg: + result = lp_build_negate(get_flt_bld(bld_base, src_bit_size[0]), src[0]); + break; + case nir_op_fpow: + result = lp_build_pow(&bld_base->base, src[0], src[1]); + break; + case nir_op_frcp: + result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]); + break; + case nir_op_fround_even: + result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]); + break; + case nir_op_frsq: + result = lp_build_rsqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]); + break; + case nir_op_fsat: + result = lp_build_clamp_zero_one_nanzero(get_flt_bld(bld_base, src_bit_size[0]), src[0]); + break; + case nir_op_fsign: + result = lp_build_sgn(get_flt_bld(bld_base, src_bit_size[0]), src[0]); + break; + case nir_op_fsin: + result = lp_build_sin(&bld_base->base, src[0]); + break; + case nir_op_fsqrt: + result = lp_build_sqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]); + break; + case nir_op_ftrunc: + result = lp_build_trunc(get_flt_bld(bld_base, src_bit_size[0]), src[0]); + break; + case nir_op_i2b32: + result = int_to_bool32(bld_base, src_bit_size[0], false, src[0]); + break; + case nir_op_i2f32: + result = lp_build_int_to_float(&bld_base->base, src[0]); + break; + case nir_op_i2f64: + result = lp_build_int_to_float(&bld_base->dbl_bld, src[0]); + break; + case nir_op_i2i32: + result = LLVMBuildTrunc(builder, src[0], bld_base->int_bld.vec_type, ""); + break; + case nir_op_i2i64: + result = LLVMBuildSExt(builder, src[0], bld_base->int64_bld.vec_type, ""); + break; + case nir_op_iabs: + result = lp_build_abs(&bld_base->int_bld, src[0]); + break; + case nir_op_iadd: + result = lp_build_add(get_int_bld(bld_base, false, src_bit_size[0]), + src[0], src[1]); + break; + case nir_op_iand: + result = lp_build_and(get_int_bld(bld_base, false, src_bit_size[0]), + src[0], src[1]); + break; + case nir_op_idiv: + result = lp_build_div(&bld_base->int_bld, + src[0], src[1]); + break; + case nir_op_ieq32: + result = icmp32(bld_base, PIPE_FUNC_EQUAL, false, src_bit_size[0], src); + break; + case nir_op_ige32: + result = icmp32(bld_base, PIPE_FUNC_GEQUAL, false, src_bit_size[0], src); + break; + case nir_op_ilt32: + result = icmp32(bld_base, PIPE_FUNC_LESS, false, src_bit_size[0], src); + break; + case nir_op_imax: + result = lp_build_max(&bld_base->int_bld, src[0], src[1]); + break; + case nir_op_imin: + result = lp_build_min(&bld_base->int_bld, src[0], src[1]); + break; + case nir_op_imul: + result = lp_build_mul(&bld_base->int_bld, + src[0], src[1]); + break; + case nir_op_imul_high: { + LLVMValueRef hi_bits; + lp_build_mul_32_lohi(&bld_base->int_bld, src[0], src[1], &hi_bits); + result = hi_bits; + break; + } + case nir_op_ine32: + result = icmp32(bld_base, PIPE_FUNC_NOTEQUAL, false, src_bit_size[0], src); + break; + case nir_op_ineg: + result = lp_build_negate(get_int_bld(bld_base, false, src_bit_size[0]), src[0]); + break; + case nir_op_inot: + result = lp_build_not(get_int_bld(bld_base, false, src_bit_size[0]), src[0]); + break; + case nir_op_ior: + result = lp_build_or(get_int_bld(bld_base, false, src_bit_size[0]), + src[0], src[1]); + break; + case nir_op_ishl: + src[1] = lp_build_and(&bld_base->uint_bld, src[1], lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, (src_bit_size[0] - 1))); + result = lp_build_shl(&bld_base->int_bld, src[0], src[1]); + break; + case nir_op_ishr: + src[1] = lp_build_and(&bld_base->uint_bld, src[1], lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, (src_bit_size[0] - 1))); + result = lp_build_shr(&bld_base->int_bld, src[0], src[1]); + break; + case nir_op_isign: + result = lp_build_sgn(&bld_base->int_bld, src[0]); + break; + case nir_op_ixor: + result = lp_build_xor(get_int_bld(bld_base, false, src_bit_size[0]), + src[0], src[1]); + break; + case nir_op_mov: + result = src[0]; + break; + case nir_op_unpack_64_2x32_split_x: + result = split_64bit(bld_base, src[0], false); + break; + case nir_op_unpack_64_2x32_split_y: + result = split_64bit(bld_base, src[0], true); + break; + + case nir_op_pack_64_2x32_split: { + LLVMValueRef tmp = merge_64bit(bld_base, src[0], src[1]); + result = LLVMBuildBitCast(builder, tmp, bld_base->dbl_bld.vec_type, ""); + break; + } + case nir_op_u2f32: + result = LLVMBuildUIToFP(builder, src[0], bld_base->base.vec_type, ""); + break; + case nir_op_u2f64: + result = LLVMBuildUIToFP(builder, src[0], bld_base->dbl_bld.vec_type, ""); + break; + case nir_op_u2u32: + result = LLVMBuildTrunc(builder, src[0], bld_base->uint_bld.vec_type, ""); + break; + case nir_op_u2u64: + result = LLVMBuildZExt(builder, src[0], bld_base->uint64_bld.vec_type, ""); + break; + case nir_op_udiv: + result = lp_build_div(&bld_base->uint_bld, + src[0], src[1]); + break; + case nir_op_uge32: + result = icmp32(bld_base, PIPE_FUNC_GEQUAL, true, src_bit_size[0], src); + break; + case nir_op_ult32: + result = icmp32(bld_base, PIPE_FUNC_LESS, true, src_bit_size[0], src); + break; + case nir_op_umax: + result = lp_build_max(&bld_base->uint_bld, src[0], src[1]); + break; + case nir_op_umin: + result = lp_build_min(&bld_base->uint_bld, src[0], src[1]); + break; + case nir_op_umod: + result = lp_build_mod(&bld_base->uint_bld, src[0], src[1]); + break; + case nir_op_umul_high: { + LLVMValueRef hi_bits; + lp_build_mul_32_lohi(&bld_base->uint_bld, src[0], src[1], &hi_bits); + result = hi_bits; + break; + } + case nir_op_ushr: + src[1] = lp_build_and(&bld_base->uint_bld, src[1], lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, (src_bit_size[0] - 1))); + result = lp_build_shr(&bld_base->uint_bld, src[0], src[1]); + break; + default: + assert(0); + break; + } + return result; +} + +static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr *instr) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMValueRef src[4]; + unsigned src_bit_size[4]; + unsigned num_components = nir_dest_num_components(instr->dest.dest); + unsigned src_components; + switch (instr->op) { + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + src_components = 1; + break; + case nir_op_pack_half_2x16: + src_components = 2; + break; + case nir_op_unpack_half_2x16: + src_components = 1; + break; + case nir_op_cube_face_coord: + case nir_op_cube_face_index: + src_components = 3; + break; + default: + src_components = num_components; + break; + } + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + src[i] = get_alu_src(bld_base, instr->src[i], src_components); + src_bit_size[i] = nir_src_bit_size(instr->src[i].src); + } + + LLVMValueRef result[4]; + if (instr->op == nir_op_vec4 || instr->op == nir_op_vec3 || instr->op == nir_op_vec2) { + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + result[i] = cast_type(bld_base, src[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]); + } + } else { + for (unsigned c = 0; c < num_components; c++) { + LLVMValueRef src_chan[4]; + + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + if (num_components > 1) { + src_chan[i] = LLVMBuildExtractValue(gallivm->builder, + src[i], c, ""); + } else + src_chan[i] = src[i]; + src_chan[i] = cast_type(bld_base, src_chan[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]); + } + result[c] = do_alu_action(bld_base, instr->op, src_bit_size, src_chan); + result[c] = cast_type(bld_base, result[c], nir_op_infos[instr->op].output_type, nir_dest_bit_size(instr->dest.dest)); + } + } + assign_alu_dest(bld_base, &instr->dest, result); + } + +static void visit_load_const(struct lp_build_nir_context *bld_base, + const nir_load_const_instr *instr) +{ + LLVMValueRef result[4]; + struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size); + for (unsigned i = 0; i < instr->def.num_components; i++) + result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->value[i].u64); + assign_ssa_dest(bld_base, &instr->def, result); +} + +static void +get_deref_offset(struct lp_build_nir_context *bld_base, nir_deref_instr *instr, + bool vs_in, unsigned *vertex_index_out, + LLVMValueRef *vertex_index_ref, + unsigned *const_out, LLVMValueRef *indir_out) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + nir_variable *var = nir_deref_instr_get_variable(instr); + nir_deref_path path; + unsigned idx_lvl = 1; + + nir_deref_path_init(&path, instr, NULL); + + if (vertex_index_out != NULL || vertex_index_ref != NULL) { + if (vertex_index_ref) { + *vertex_index_ref = get_src(bld_base, path.path[idx_lvl]->arr.index); + if (vertex_index_out) + *vertex_index_out = 0; + } else { + *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index); + } + ++idx_lvl; + } + + uint32_t const_offset = 0; + LLVMValueRef offset = NULL; + + if (var->data.compact) { + assert(instr->deref_type == nir_deref_type_array); + const_offset = nir_src_as_uint(instr->arr.index); + goto out; + } + + for (; path.path[idx_lvl]; ++idx_lvl) { + const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type; + if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) { + unsigned index = path.path[idx_lvl]->strct.index; + + for (unsigned i = 0; i < index; i++) { + const struct glsl_type *ft = glsl_get_struct_field(parent_type, i); + const_offset += glsl_count_attribute_slots(ft, vs_in); + } + } else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) { + unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in); + if (nir_src_is_const(path.path[idx_lvl]->arr.index)) { + const_offset += nir_src_comp_as_int(path.path[idx_lvl]->arr.index, 0) * size; + } else { + LLVMValueRef idx_src = get_src(bld_base, path.path[idx_lvl]->arr.index); + idx_src = cast_type(bld_base, idx_src, nir_type_uint, 32); + LLVMValueRef array_off = lp_build_mul(&bld_base->uint_bld, lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, size), + idx_src); + if (offset) + offset = lp_build_add(&bld_base->uint_bld, offset, array_off); + else + offset = array_off; + } + } else + unreachable("Uhandled deref type in get_deref_instr_offset"); + } + +out: + nir_deref_path_finish(&path); + + if (const_offset && offset) + offset = LLVMBuildAdd(builder, offset, + lp_build_const_int_vec(bld_base->base.gallivm, bld_base->uint_bld.type, const_offset), + ""); + *const_out = const_offset; + *indir_out = offset; +} + +static void visit_load_var(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr, + LLVMValueRef result[4]) +{ + nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); + nir_variable *var = nir_deref_instr_get_variable(deref); + nir_variable_mode mode = deref->mode; + unsigned const_index; + LLVMValueRef indir_index; + unsigned vertex_index = 0; + unsigned nc = nir_dest_num_components(instr->dest); + unsigned bit_size = nir_dest_bit_size(instr->dest); + if (var) { + bool vs_in = bld_base->shader->info.stage == MESA_SHADER_VERTEX && + var->data.mode == nir_var_shader_in; + bool gs_in = bld_base->shader->info.stage == MESA_SHADER_GEOMETRY && + var->data.mode == nir_var_shader_in; + mode = var->data.mode; + + get_deref_offset(bld_base, deref, vs_in, gs_in ? &vertex_index : NULL, NULL, + &const_index, &indir_index); + } + bld_base->load_var(bld_base, mode, nc, bit_size, var, vertex_index, const_index, indir_index, result); +} + +static void +visit_store_var(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr) +{ + nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); + nir_variable *var = nir_deref_instr_get_variable(deref); + nir_variable_mode mode = deref->mode; + int writemask = instr->const_index[0]; + unsigned bit_size = nir_src_bit_size(instr->src[1]); + LLVMValueRef src = get_src(bld_base, instr->src[1]); + unsigned const_index = 0; + LLVMValueRef indir_index; + if (var) + get_deref_offset(bld_base, deref, false, NULL, NULL, + &const_index, &indir_index); + bld_base->store_var(bld_base, mode, bit_size, instr->num_components, writemask, const_index, var, src); +} + +static void visit_load_ubo(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr, + LLVMValueRef result[4]) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef idx = get_src(bld_base, instr->src[0]); + LLVMValueRef offset = get_src(bld_base, instr->src[1]); + + bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[1]); + idx = LLVMBuildExtractElement(builder, idx, lp_build_const_int32(gallivm, 0), ""); + bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest), + offset_is_uniform, idx, offset, result); +} + + +static void visit_load_ssbo(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr, + LLVMValueRef result[4]) +{ + LLVMValueRef idx = get_src(bld_base, instr->src[0]); + LLVMValueRef offset = get_src(bld_base, instr->src[1]); + bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest), + idx, offset, result); +} + +static void visit_store_ssbo(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr) +{ + LLVMValueRef val = get_src(bld_base, instr->src[0]); + LLVMValueRef idx = get_src(bld_base, instr->src[1]); + LLVMValueRef offset = get_src(bld_base, instr->src[2]); + int writemask = instr->const_index[0]; + int nc = nir_src_num_components(instr->src[0]); + int bitsize = nir_src_bit_size(instr->src[0]); + bld_base->store_mem(bld_base, writemask, nc, bitsize, idx, offset, val); +} + +static void visit_get_buffer_size(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr, + LLVMValueRef result[4]) +{ + LLVMValueRef idx = get_src(bld_base, instr->src[0]); + result[0] = bld_base->get_buffer_size(bld_base, idx); +} + +static void visit_ssbo_atomic(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr, + LLVMValueRef result[4]) +{ + LLVMValueRef idx = get_src(bld_base, instr->src[0]); + LLVMValueRef offset = get_src(bld_base, instr->src[1]); + LLVMValueRef val = get_src(bld_base, instr->src[2]); + LLVMValueRef val2 = NULL; + if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) + val2 = get_src(bld_base, instr->src[3]); + + bld_base->atomic_mem(bld_base, instr->intrinsic, idx, offset, val, val2, &result[0]); + +} + +static void visit_load_image(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr, + LLVMValueRef result[4]) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); + nir_variable *var = nir_deref_instr_get_variable(deref); + LLVMValueRef coord_val = get_src(bld_base, instr->src[1]); + LLVMValueRef coords[5]; + struct lp_img_params params; + const struct glsl_type *type = glsl_without_array(var->type); + + memset(¶ms, 0, sizeof(params)); + params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type)); + for (unsigned i = 0; i < 4; i++) + coords[i] = LLVMBuildExtractValue(builder, coord_val, i, ""); + if (params.target == PIPE_TEXTURE_1D_ARRAY) + coords[2] = coords[1]; + + params.coords = coords; + params.outdata = result; + params.img_op = LP_IMG_LOAD; + params.image_index = var->data.binding; + bld_base->image_op(bld_base, ¶ms); +} + +static void visit_store_image(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); + nir_variable *var = nir_deref_instr_get_variable(deref); + LLVMValueRef coord_val = get_src(bld_base, instr->src[1]); + LLVMValueRef in_val = get_src(bld_base, instr->src[3]); + LLVMValueRef coords[5]; + struct lp_img_params params; + const struct glsl_type *type = glsl_without_array(var->type); + + memset(¶ms, 0, sizeof(params)); + params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type)); + for (unsigned i = 0; i < 4; i++) + coords[i] = LLVMBuildExtractValue(builder, coord_val, i, ""); + if (params.target == PIPE_TEXTURE_1D_ARRAY) + coords[2] = coords[1]; + params.coords = coords; + + for (unsigned i = 0; i < 4; i++) { + params.indata[i] = LLVMBuildExtractValue(builder, in_val, i, ""); + params.indata[i] = LLVMBuildBitCast(builder, params.indata[i], bld_base->base.vec_type, ""); + } + params.img_op = LP_IMG_STORE; + params.image_index = var->data.binding; + + if (params.target == PIPE_TEXTURE_1D_ARRAY) + coords[2] = coords[1]; + bld_base->image_op(bld_base, ¶ms); +} + +static void visit_atomic_image(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr, + LLVMValueRef result[4]) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); + nir_variable *var = nir_deref_instr_get_variable(deref); + struct lp_img_params params; + LLVMValueRef coord_val = get_src(bld_base, instr->src[1]); + LLVMValueRef in_val = get_src(bld_base, instr->src[3]); + LLVMValueRef coords[5]; + const struct glsl_type *type = glsl_without_array(var->type); + + memset(¶ms, 0, sizeof(params)); + + switch (instr->intrinsic) { + case nir_intrinsic_image_deref_atomic_add: + params.op = LLVMAtomicRMWBinOpAdd; + break; + case nir_intrinsic_image_deref_atomic_exchange: + params.op = LLVMAtomicRMWBinOpXchg; + break; + case nir_intrinsic_image_deref_atomic_and: + params.op = LLVMAtomicRMWBinOpAnd; + break; + case nir_intrinsic_image_deref_atomic_or: + params.op = LLVMAtomicRMWBinOpOr; + break; + case nir_intrinsic_image_deref_atomic_xor: + params.op = LLVMAtomicRMWBinOpXor; + break; + case nir_intrinsic_image_deref_atomic_umin: + params.op = LLVMAtomicRMWBinOpUMin; + break; + case nir_intrinsic_image_deref_atomic_umax: + params.op = LLVMAtomicRMWBinOpUMax; + break; + case nir_intrinsic_image_deref_atomic_imin: + params.op = LLVMAtomicRMWBinOpMin; + break; + case nir_intrinsic_image_deref_atomic_imax: + params.op = LLVMAtomicRMWBinOpMax; + break; + default: + break; + } + + params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type)); + for (unsigned i = 0; i < 4; i++) + coords[i] = LLVMBuildExtractValue(builder, coord_val, i, ""); + if (params.target == PIPE_TEXTURE_1D_ARRAY) + coords[2] = coords[1]; + params.coords = coords; + if (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) { + LLVMValueRef cas_val = get_src(bld_base, instr->src[4]); + params.indata[0] = in_val; + params.indata2[0] = cas_val; + } else + params.indata[0] = in_val; + + params.outdata = result; + params.img_op = (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC; + params.image_index = var->data.binding; + + bld_base->image_op(bld_base, ¶ms); +} + + +static void visit_image_size(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr, + LLVMValueRef result[4]) +{ + nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); + nir_variable *var = nir_deref_instr_get_variable(deref); + struct lp_sampler_size_query_params params = { 0 }; + params.texture_unit = var->data.binding; + params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(var->type), glsl_sampler_type_is_array(var->type)); + params.sizes_out = result; + + bld_base->image_size(bld_base, ¶ms); +} + +static void visit_shared_load(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr, + LLVMValueRef result[4]) +{ + LLVMValueRef offset = get_src(bld_base, instr->src[0]); + bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest), + NULL, offset, result); +} + +static void visit_shared_store(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr) +{ + LLVMValueRef val = get_src(bld_base, instr->src[0]); + LLVMValueRef offset = get_src(bld_base, instr->src[1]); + int writemask = instr->const_index[1]; + int nc = nir_src_num_components(instr->src[0]); + int bitsize = nir_src_bit_size(instr->src[0]); + bld_base->store_mem(bld_base, writemask, nc, bitsize, NULL, offset, val); +} + +static void visit_shared_atomic(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr, + LLVMValueRef result[4]) +{ + LLVMValueRef offset = get_src(bld_base, instr->src[0]); + LLVMValueRef val = get_src(bld_base, instr->src[1]); + LLVMValueRef val2 = NULL; + if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap) + val2 = get_src(bld_base, instr->src[2]); + + bld_base->atomic_mem(bld_base, instr->intrinsic, NULL, offset, val, val2, &result[0]); + +} + +static void visit_barrier(struct lp_build_nir_context *bld_base) +{ + bld_base->barrier(bld_base); +} + +static void visit_discard(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr) +{ + LLVMValueRef cond = NULL; + if (instr->intrinsic == nir_intrinsic_discard_if) { + cond = get_src(bld_base, instr->src[0]); + cond = cast_type(bld_base, cond, nir_type_int, 32); + } + bld_base->discard(bld_base, cond); +} + +static void visit_intrinsic(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr) +{ + LLVMValueRef result[4] = {0}; + switch (instr->intrinsic) { + case nir_intrinsic_load_deref: + visit_load_var(bld_base, instr, result); + break; + case nir_intrinsic_store_deref: + visit_store_var(bld_base, instr); + break; + case nir_intrinsic_load_ubo: + visit_load_ubo(bld_base, instr, result); + break; + case nir_intrinsic_load_ssbo: + visit_load_ssbo(bld_base, instr, result); + break; + case nir_intrinsic_store_ssbo: + visit_store_ssbo(bld_base, instr); + break; + case nir_intrinsic_get_buffer_size: + visit_get_buffer_size(bld_base, instr, result); + break; + case nir_intrinsic_load_vertex_id: + case nir_intrinsic_load_primitive_id: + case nir_intrinsic_load_instance_id: + case nir_intrinsic_load_work_group_id: + case nir_intrinsic_load_local_invocation_id: + case nir_intrinsic_load_num_work_groups: + case nir_intrinsic_load_invocation_id: + bld_base->sysval_intrin(bld_base, instr, result); + break; + case nir_intrinsic_discard_if: + case nir_intrinsic_discard: + visit_discard(bld_base, instr); + break; + case nir_intrinsic_emit_vertex: + bld_base->emit_vertex(bld_base, nir_intrinsic_stream_id(instr)); + break; + case nir_intrinsic_end_primitive: + bld_base->end_primitive(bld_base, nir_intrinsic_stream_id(instr)); + break; + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_comp_swap: + visit_ssbo_atomic(bld_base, instr, result); + break; + case nir_intrinsic_image_deref_load: + visit_load_image(bld_base, instr, result); + break; + case nir_intrinsic_image_deref_store: + visit_store_image(bld_base, instr); + break; + case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_deref_atomic_imin: + case nir_intrinsic_image_deref_atomic_imax: + case nir_intrinsic_image_deref_atomic_umin: + case nir_intrinsic_image_deref_atomic_umax: + case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_deref_atomic_comp_swap: + visit_atomic_image(bld_base, instr, result); + break; + case nir_intrinsic_image_deref_size: + visit_image_size(bld_base, instr, result); + break; + case nir_intrinsic_load_shared: + visit_shared_load(bld_base, instr, result); + break; + case nir_intrinsic_store_shared: + visit_shared_store(bld_base, instr); + break; + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_shared_atomic_comp_swap: + visit_shared_atomic(bld_base, instr, result); + break; + case nir_intrinsic_barrier: + visit_barrier(bld_base); + break; + case nir_intrinsic_memory_barrier: + break; + default: + assert(0); + break; + } + if (result[0]) { + assign_dest(bld_base, &instr->dest, result); + } +} + +static void visit_txs(struct lp_build_nir_context *bld_base, nir_tex_instr *instr) +{ + struct lp_sampler_size_query_params params; + LLVMValueRef sizes_out[4]; + LLVMValueRef explicit_lod = NULL; + + for (unsigned i = 0; i < instr->num_srcs; i++) { + switch (instr->src[i].src_type) { + case nir_tex_src_lod: + explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32); + break; + default: + break; + } + } + + params.target = glsl_sampler_to_pipe(instr->sampler_dim, instr->is_array); + params.texture_unit = instr->texture_index; + params.explicit_lod = explicit_lod; + params.is_sviewinfo = TRUE; + params.sizes_out = sizes_out; + + if (instr->op == nir_texop_query_levels) + params.explicit_lod = bld_base->uint_bld.zero; + bld_base->tex_size(bld_base, ¶ms); + assign_dest(bld_base, &instr->dest, &sizes_out[instr->op == nir_texop_query_levels ? 3 : 0]); +} + +static enum lp_sampler_lod_property lp_build_nir_lod_property(struct lp_build_nir_context *bld_base, + nir_src lod_src) +{ + enum lp_sampler_lod_property lod_property; + + if (nir_src_is_dynamically_uniform(lod_src)) + lod_property = LP_SAMPLER_LOD_SCALAR; + else if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) { + if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) + lod_property = LP_SAMPLER_LOD_PER_ELEMENT; + else + lod_property = LP_SAMPLER_LOD_PER_QUAD; + } + else + lod_property = LP_SAMPLER_LOD_PER_ELEMENT; + return lod_property; +} + +static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *instr) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef coords[5]; + LLVMValueRef offsets[3] = { NULL }; + LLVMValueRef explicit_lod = NULL, projector = NULL; + struct lp_sampler_params params; + struct lp_derivatives derivs; + unsigned sample_key = 0; + nir_deref_instr *texture_deref_instr = NULL; + nir_deref_instr *sampler_deref_instr = NULL; + LLVMValueRef texel[4]; + unsigned lod_src = 0; + LLVMValueRef coord_undef = LLVMGetUndef(bld_base->base.int_vec_type); + + memset(¶ms, 0, sizeof(params)); + enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; + + if (instr->op == nir_texop_txs || instr->op == nir_texop_query_levels) { + visit_txs(bld_base, instr); + return; + } + if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms) + sample_key |= LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT; + else if (instr->op == nir_texop_tg4) + sample_key |= LP_SAMPLER_OP_GATHER << LP_SAMPLER_OP_TYPE_SHIFT; + else if (instr->op == nir_texop_lod) + sample_key |= LP_SAMPLER_OP_LODQ << LP_SAMPLER_OP_TYPE_SHIFT; + for (unsigned i = 0; i < instr->num_srcs; i++) { + switch (instr->src[i].src_type) { + case nir_tex_src_coord: { + LLVMValueRef coord = get_src(bld_base, instr->src[i].src); + if (instr->coord_components == 1) + coords[0] = coord; + else { + for (unsigned chan = 0; chan < instr->coord_components; ++chan) + coords[chan] = LLVMBuildExtractValue(builder, coord, + chan, ""); + } + for (unsigned chan = instr->coord_components; chan < 5; chan++) + coords[chan] = coord_undef; + + break; + } + case nir_tex_src_texture_deref: + texture_deref_instr = nir_src_as_deref(instr->src[i].src); + break; + case nir_tex_src_sampler_deref: + sampler_deref_instr = nir_src_as_deref(instr->src[i].src); + break; + case nir_tex_src_projector: + projector = lp_build_rcp(&bld_base->base, cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32)); + break; + case nir_tex_src_comparator: + sample_key |= LP_SAMPLER_SHADOW; + coords[4] = get_src(bld_base, instr->src[i].src); + coords[4] = cast_type(bld_base, coords[4], nir_type_float, 32); + break; + case nir_tex_src_bias: + sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT; + lod_src = i; + explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32); + break; + case nir_tex_src_lod: + sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; + lod_src = i; + if (instr->op == nir_texop_txf) + explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32); + else + explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32); + break; + case nir_tex_src_ddx: { + int deriv_cnt = instr->coord_components; + if (instr->is_array) + deriv_cnt--; + LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src); + if (deriv_cnt == 1) + derivs.ddx[0] = deriv_val; + else + for (unsigned chan = 0; chan < deriv_cnt; ++chan) + derivs.ddx[chan] = LLVMBuildExtractValue(builder, deriv_val, + chan, ""); + for (unsigned chan = 0; chan < deriv_cnt; ++chan) + derivs.ddx[chan] = cast_type(bld_base, derivs.ddx[chan], nir_type_float, 32); + break; + } + case nir_tex_src_ddy: { + int deriv_cnt = instr->coord_components; + if (instr->is_array) + deriv_cnt--; + LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src); + if (deriv_cnt == 1) + derivs.ddy[0] = deriv_val; + else + for (unsigned chan = 0; chan < deriv_cnt; ++chan) + derivs.ddy[chan] = LLVMBuildExtractValue(builder, deriv_val, + chan, ""); + for (unsigned chan = 0; chan < deriv_cnt; ++chan) + derivs.ddy[chan] = cast_type(bld_base, derivs.ddy[chan], nir_type_float, 32); + break; + } + case nir_tex_src_offset: { + int offset_cnt = instr->coord_components; + if (instr->is_array) + offset_cnt--; + LLVMValueRef offset_val = get_src(bld_base, instr->src[i].src); + sample_key |= LP_SAMPLER_OFFSETS; + if (offset_cnt == 1) + offsets[0] = offset_val; + else { + for (unsigned chan = 0; chan < offset_cnt; ++chan) + offsets[chan] = LLVMBuildExtractValue(builder, offset_val, + chan, ""); + } + break; + } + case nir_tex_src_ms_index: + break; + default: + assert(0); + break; + } + } + if (!sampler_deref_instr) + sampler_deref_instr = texture_deref_instr; + + if (explicit_lod) + lod_property = lp_build_nir_lod_property(bld_base, instr->src[lod_src].src); + + if (instr->op == nir_texop_tex || instr->op == nir_texop_tg4 || instr->op == nir_texop_txb || + instr->op == nir_texop_txl || instr->op == nir_texop_txd || instr->op == nir_texop_lod) + for (unsigned chan = 0; chan < instr->coord_components; ++chan) + coords[chan] = cast_type(bld_base, coords[chan], nir_type_float, 32); + else if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms) + for (unsigned chan = 0; chan < instr->coord_components; ++chan) + coords[chan] = cast_type(bld_base, coords[chan], nir_type_int, 32); + + if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D) { + /* move layer coord for 1d arrays. */ + coords[2] = coords[1]; + coords[1] = coord_undef; + } + + if (projector) { + for (unsigned chan = 0; chan < instr->coord_components; ++chan) + coords[chan] = lp_build_mul(&bld_base->base, coords[chan], projector); + if (sample_key & LP_SAMPLER_SHADOW) + coords[4] = lp_build_mul(&bld_base->base, coords[4], projector); + } + + uint32_t base_index = 0; + if (!texture_deref_instr) { + int samp_src_index = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle); + if (samp_src_index == -1) { + base_index = instr->sampler_index; + } + } + + if (instr->op == nir_texop_txd) { + sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT; + params.derivs = &derivs; + if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) { + if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) + lod_property = LP_SAMPLER_LOD_PER_ELEMENT; + else + lod_property = LP_SAMPLER_LOD_PER_QUAD; + } else + lod_property = LP_SAMPLER_LOD_PER_ELEMENT; + } + + sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; + params.sample_key = sample_key; + params.offsets = offsets; + params.texture_index = base_index; + params.sampler_index = base_index; + params.coords = coords; + params.texel = texel; + params.lod = explicit_lod; + bld_base->tex(bld_base, ¶ms); + assign_dest(bld_base, &instr->dest, texel); +} + +static void visit_ssa_undef(struct lp_build_nir_context *bld_base, + const nir_ssa_undef_instr *instr) +{ + unsigned num_components = instr->def.num_components; + LLVMValueRef undef[4]; + for (unsigned i = 0; i < num_components; i++) + undef[i] = LLVMGetUndef(bld_base->base.vec_type); + assign_ssa_dest(bld_base, &instr->def, undef); +} + +static void visit_jump(struct lp_build_nir_context *bld_base, + const nir_jump_instr *instr) +{ + switch (instr->type) { + case nir_jump_break: + bld_base->break_stmt(bld_base); + break; + case nir_jump_continue: + bld_base->continue_stmt(bld_base); + break; + default: + unreachable("Unknown jump instr\n"); + } +} + +static void visit_deref(struct lp_build_nir_context *bld_base, + nir_deref_instr *instr) +{ + if (instr->mode != nir_var_mem_shared && + instr->mode != nir_var_mem_global) + return; + LLVMValueRef result = NULL; + switch(instr->deref_type) { + case nir_deref_type_var: { + struct hash_entry *entry = _mesa_hash_table_search(bld_base->vars, instr->var); + result = entry->data; + break; + } + default: + unreachable("Unhandled deref_instr deref type"); + } + + assign_ssa(bld_base, instr->dest.ssa.index, result); +} + +static void visit_block(struct lp_build_nir_context *bld_base, nir_block *block) +{ + nir_foreach_instr(instr, block) + { + switch (instr->type) { + case nir_instr_type_alu: + visit_alu(bld_base, nir_instr_as_alu(instr)); + break; + case nir_instr_type_load_const: + visit_load_const(bld_base, nir_instr_as_load_const(instr)); + break; + case nir_instr_type_intrinsic: + visit_intrinsic(bld_base, nir_instr_as_intrinsic(instr)); + break; + case nir_instr_type_tex: + visit_tex(bld_base, nir_instr_as_tex(instr)); + break; + case nir_instr_type_phi: + assert(0); + break; + case nir_instr_type_ssa_undef: + visit_ssa_undef(bld_base, nir_instr_as_ssa_undef(instr)); + break; + case nir_instr_type_jump: + visit_jump(bld_base, nir_instr_as_jump(instr)); + break; + case nir_instr_type_deref: + visit_deref(bld_base, nir_instr_as_deref(instr)); + break; + default: + fprintf(stderr, "Unknown NIR instr type: "); + nir_print_instr(instr, stderr); + fprintf(stderr, "\n"); + abort(); + } + } +} + +static void visit_if(struct lp_build_nir_context *bld_base, nir_if *if_stmt) +{ + LLVMValueRef cond = get_src(bld_base, if_stmt->condition); + + bld_base->if_cond(bld_base, cond); + visit_cf_list(bld_base, &if_stmt->then_list); + + if (!exec_list_is_empty(&if_stmt->else_list)) { + bld_base->else_stmt(bld_base); + visit_cf_list(bld_base, &if_stmt->else_list); + } + bld_base->endif_stmt(bld_base); +} + +static void visit_loop(struct lp_build_nir_context *bld_base, nir_loop *loop) +{ + bld_base->bgnloop(bld_base); + visit_cf_list(bld_base, &loop->body); + bld_base->endloop(bld_base); +} + +static void visit_cf_list(struct lp_build_nir_context *bld_base, + struct exec_list *list) +{ + foreach_list_typed(nir_cf_node, node, node, list) + { + switch (node->type) { + case nir_cf_node_block: + visit_block(bld_base, nir_cf_node_as_block(node)); + break; + + case nir_cf_node_if: + visit_if(bld_base, nir_cf_node_as_if(node)); + break; + + case nir_cf_node_loop: + visit_loop(bld_base, nir_cf_node_as_loop(node)); + break; + + default: + assert(0); + } + } +} + +static void +handle_shader_output_decl(struct lp_build_nir_context *bld_base, + struct nir_shader *nir, + struct nir_variable *variable) +{ + bld_base->emit_var_decl(bld_base, variable); +} + +/* vector registers are stored as arrays in LLVM side, + so we can use GEP on them, as to do exec mask stores + we need to operate on a single components. + arrays are: + 0.x, 1.x, 2.x, 3.x + 0.y, 1.y, 2.y, 3.y + .... +*/ +static LLVMTypeRef get_register_type(struct lp_build_nir_context *bld_base, + nir_register *reg) +{ + struct lp_build_context *int_bld = get_int_bld(bld_base, true, reg->bit_size); + + LLVMTypeRef type = int_bld->vec_type; + if (reg->num_array_elems) + type = LLVMArrayType(type, reg->num_array_elems); + if (reg->num_components > 1) + type = LLVMArrayType(type, reg->num_components); + + return type; +} + + +bool lp_build_nir_llvm( + struct lp_build_nir_context *bld_base, + struct nir_shader *nir) +{ + struct nir_function *func; + + nir_convert_from_ssa(nir, true); + nir_lower_locals_to_regs(nir); + nir_remove_dead_derefs(nir); + nir_remove_dead_variables(nir, nir_var_function_temp); + + nir_foreach_variable(variable, &nir->outputs) + handle_shader_output_decl(bld_base, nir, variable); + + bld_base->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + bld_base->vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + func = (struct nir_function *)exec_list_get_head(&nir->functions); + + nir_foreach_register(reg, &func->impl->registers) { + LLVMTypeRef type = get_register_type(bld_base, reg); + LLVMValueRef reg_alloc = lp_build_alloca_undef(bld_base->base.gallivm, + type, "reg"); + _mesa_hash_table_insert(bld_base->regs, reg, reg_alloc); + } + nir_index_ssa_defs(func->impl); + bld_base->ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef)); + visit_cf_list(bld_base, &func->impl->body); + + free(bld_base->ssa_defs); + ralloc_free(bld_base->vars); + ralloc_free(bld_base->regs); + return true; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.h b/src/gallium/auxiliary/gallivm/lp_bld_nir.h new file mode 100644 index 00000000000..b0d16710122 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.h @@ -0,0 +1,210 @@ +/************************************************************************** + * + * Copyright 2019 Red Hat. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **************************************************************************/ + +#ifndef LP_BLD_NIR_H +#define LP_BLD_NIR_H + +#include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_limits.h" +#include "lp_bld_type.h" + +#include "gallivm/lp_bld_tgsi.h" +#include "nir.h" + +struct nir_shader; + +void lp_build_nir_soa(struct gallivm_state *gallivm, + struct nir_shader *shader, + const struct lp_build_tgsi_params *params, + LLVMValueRef (*outputs)[4]); + +struct lp_build_nir_context +{ + struct lp_build_context base; + struct lp_build_context uint_bld; + struct lp_build_context int_bld; + struct lp_build_context dbl_bld; + struct lp_build_context uint64_bld; + struct lp_build_context int64_bld; + + LLVMValueRef *ssa_defs; + struct hash_table *regs; + struct hash_table *vars; + + nir_shader *shader; + + void (*load_ubo)(struct lp_build_nir_context *bld_base, + unsigned nc, + unsigned bit_size, + bool offset_is_uniform, + LLVMValueRef index, LLVMValueRef offset, LLVMValueRef result[4]); + + /* for SSBO and shared memory */ + void (*load_mem)(struct lp_build_nir_context *bld_base, + unsigned nc, unsigned bit_size, + LLVMValueRef index, LLVMValueRef offset, LLVMValueRef result[4]); + void (*store_mem)(struct lp_build_nir_context *bld_base, + unsigned writemask, unsigned nc, unsigned bit_size, + LLVMValueRef index, LLVMValueRef offset, LLVMValueRef dst); + + void (*atomic_mem)(struct lp_build_nir_context *bld_base, + nir_intrinsic_op op, + LLVMValueRef index, LLVMValueRef offset, + LLVMValueRef val, LLVMValueRef val2, + LLVMValueRef *result); + + void (*barrier)(struct lp_build_nir_context *bld_base); + + void (*image_op)(struct lp_build_nir_context *bld_base, + struct lp_img_params *params); + void (*image_size)(struct lp_build_nir_context *bld_base, + struct lp_sampler_size_query_params *params); + LLVMValueRef (*get_buffer_size)(struct lp_build_nir_context *bld_base, + LLVMValueRef index); + + void (*load_var)(struct lp_build_nir_context *bld_base, + nir_variable_mode deref_mode, + unsigned num_components, + unsigned bit_size, + nir_variable *var, + unsigned vertex_index, + unsigned const_index, + LLVMValueRef indir_index, + LLVMValueRef result[4]); + void (*store_var)(struct lp_build_nir_context *bld_base, + nir_variable_mode deref_mode, + unsigned bit_size, + unsigned num_components, + unsigned writemask, + unsigned const_index, + nir_variable *var, LLVMValueRef dst); + + LLVMValueRef (*load_reg)(struct lp_build_nir_context *bld_base, + struct lp_build_context *reg_bld, + const nir_reg_src *reg, + LLVMValueRef indir_src, + LLVMValueRef reg_storage); + void (*store_reg)(struct lp_build_nir_context *bld_base, + struct lp_build_context *reg_bld, + const nir_reg_dest *reg, + unsigned writemask, + LLVMValueRef indir_src, + LLVMValueRef reg_storage, + LLVMValueRef dst[4]); + + void (*emit_var_decl)(struct lp_build_nir_context *bld_base, + nir_variable *var); + + void (*tex)(struct lp_build_nir_context *bld_base, + struct lp_sampler_params *params); + + void (*tex_size)(struct lp_build_nir_context *bld_base, + struct lp_sampler_size_query_params *params); + + void (*sysval_intrin)(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr, + LLVMValueRef result[4]); + void (*discard)(struct lp_build_nir_context *bld_base, + LLVMValueRef cond); + + void (*bgnloop)(struct lp_build_nir_context *bld_base); + void (*endloop)(struct lp_build_nir_context *bld_base); + void (*if_cond)(struct lp_build_nir_context *bld_base, LLVMValueRef cond); + void (*else_stmt)(struct lp_build_nir_context *bld_base); + void (*endif_stmt)(struct lp_build_nir_context *bld_base); + void (*break_stmt)(struct lp_build_nir_context *bld_base); + void (*continue_stmt)(struct lp_build_nir_context *bld_base); + + void (*emit_vertex)(struct lp_build_nir_context *bld_base, uint32_t stream_id); + void (*end_primitive)(struct lp_build_nir_context *bld_base, uint32_t stream_id); +// LLVMValueRef main_function +}; + +struct lp_build_nir_soa_context +{ + struct lp_build_nir_context bld_base; + + /* Builder for scalar elements of shader's data type (float) */ + struct lp_build_context elem_bld; + struct lp_build_context uint_elem_bld; + + LLVMValueRef consts_ptr; + LLVMValueRef const_sizes_ptr; + LLVMValueRef consts[LP_MAX_TGSI_CONST_BUFFERS]; + LLVMValueRef consts_sizes[LP_MAX_TGSI_CONST_BUFFERS]; + const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS]; + LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS]; + LLVMValueRef context_ptr; + LLVMValueRef thread_data_ptr; + + LLVMValueRef ssbo_ptr; + LLVMValueRef ssbo_sizes_ptr; + LLVMValueRef ssbos[LP_MAX_TGSI_SHADER_BUFFERS]; + LLVMValueRef ssbo_sizes[LP_MAX_TGSI_SHADER_BUFFERS]; + + LLVMValueRef shared_ptr; + + const struct lp_build_coro_suspend_info *coro; + + const struct lp_build_sampler_soa *sampler; + const struct lp_build_image_soa *image; + + const struct lp_build_gs_iface *gs_iface; + LLVMValueRef emitted_prims_vec_ptr; + LLVMValueRef total_emitted_vertices_vec_ptr; + LLVMValueRef emitted_vertices_vec_ptr; + LLVMValueRef max_output_vertices_vec; + struct lp_bld_tgsi_system_values system_values; + + nir_variable_mode indirects; + struct lp_build_mask_context *mask; + struct lp_exec_mask exec_mask; + + /* We allocate/use this array of inputs if (indirects & nir_var_shader_in) is + * set. The inputs[] array above is unused then. + */ + LLVMValueRef inputs_array; +}; + +bool +lp_build_nir_llvm(struct lp_build_nir_context *bld_base, + struct nir_shader *nir); + +static inline LLVMValueRef +lp_nir_array_build_gather_values(LLVMBuilderRef builder, + LLVMValueRef * values, + unsigned value_count) +{ + LLVMTypeRef arr_type = LLVMArrayType(LLVMTypeOf(values[0]), value_count); + LLVMValueRef arr = LLVMGetUndef(arr_type); + unsigned i; + + for (i = 0; i < value_count; i++) { + arr = LLVMBuildInsertValue(builder, arr, values[i], i, ""); + } + return arr; +} + +#endif diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c new file mode 100644 index 00000000000..7ac96970116 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c @@ -0,0 +1,1274 @@ +/************************************************************************** + * + * Copyright 2019 Red Hat. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **************************************************************************/ + +#include "lp_bld_nir.h" +#include "lp_bld_init.h" +#include "lp_bld_flow.h" +#include "lp_bld_logic.h" +#include "lp_bld_gather.h" +#include "lp_bld_const.h" +#include "lp_bld_struct.h" +#include "lp_bld_arit.h" +#include "lp_bld_bitarit.h" +#include "lp_bld_coro.h" +#include "lp_bld_printf.h" +#include "util/u_math.h" +/* + * combine the execution mask if there is one with the current mask. + */ +static LLVMValueRef +mask_vec(struct lp_build_nir_context *bld_base) +{ + struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + struct lp_exec_mask *exec_mask = &bld->exec_mask; + LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL; + if (!exec_mask->has_mask) { + return bld_mask; + } + if (!bld_mask) + return exec_mask->exec_mask; + return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask), + exec_mask->exec_mask, ""); +} + +static LLVMValueRef +emit_fetch_64bit( + struct lp_build_nir_context * bld_base, + LLVMValueRef input, + LLVMValueRef input2) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef res; + int i; + LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)]; + int len = bld_base->base.type.length * 2; + assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32))); + + for (i = 0; i < bld_base->base.type.length * 2; i+=2) { + shuffles[i] = lp_build_const_int32(gallivm, i / 2); + shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length); + } + res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), ""); + + return LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, ""); +} + +static void +emit_store_64bit_chan(struct lp_build_nir_context *bld_base, + LLVMValueRef chan_ptr, + LLVMValueRef chan_ptr2, + LLVMValueRef value) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *float_bld = &bld_base->base; + unsigned i; + LLVMValueRef temp, temp2; + LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32]; + LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32]; + int len = bld_base->base.type.length * 2; + + value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), ""); + for (i = 0; i < bld_base->base.type.length; i++) { + shuffles[i] = lp_build_const_int32(gallivm, i * 2); + shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1); + } + + temp = LLVMBuildShuffleVector(builder, value, + LLVMGetUndef(LLVMTypeOf(value)), + LLVMConstVector(shuffles, + bld_base->base.type.length), + ""); + temp2 = LLVMBuildShuffleVector(builder, value, + LLVMGetUndef(LLVMTypeOf(value)), + LLVMConstVector(shuffles2, + bld_base->base.type.length), + ""); + + lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr); + lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2); +} + +static LLVMValueRef +get_soa_array_offsets(struct lp_build_context *uint_bld, + LLVMValueRef indirect_index, + int num_components, + unsigned chan_index, + bool need_perelement_offset) +{ + struct gallivm_state *gallivm = uint_bld->gallivm; + LLVMValueRef chan_vec = + lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index); + LLVMValueRef length_vec = + lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length); + LLVMValueRef index_vec; + + /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ + index_vec = lp_build_mul(uint_bld, indirect_index, lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, num_components)); + index_vec = lp_build_add(uint_bld, index_vec, chan_vec); + index_vec = lp_build_mul(uint_bld, index_vec, length_vec); + + if (need_perelement_offset) { + LLVMValueRef pixel_offsets; + unsigned i; + /* build pixel offset vector: {0, 1, 2, 3, ...} */ + pixel_offsets = uint_bld->undef; + for (i = 0; i < uint_bld->type.length; i++) { + LLVMValueRef ii = lp_build_const_int32(gallivm, i); + pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets, + ii, ii, ""); + } + index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); + } + return index_vec; +} + +static LLVMValueRef +build_gather(struct lp_build_nir_context *bld_base, + struct lp_build_context *bld, + LLVMValueRef base_ptr, + LLVMValueRef indexes, + LLVMValueRef overflow_mask, + LLVMValueRef indexes2) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + LLVMValueRef res; + unsigned i; + + if (indexes2) + res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2)); + else + res = bld->undef; + /* + * overflow_mask is a vector telling us which channels + * in the vector overflowed. We use the overflow behavior for + * constant buffers which is defined as: + * Out of bounds access to constant buffer returns 0 in all + * components. Out of bounds behavior is always with respect + * to the size of the buffer bound at that slot. + */ + + if (overflow_mask) { + /* + * We avoid per-element control flow here (also due to llvm going crazy, + * though I suspect it's better anyway since overflow is likely rare). + * Note that since we still fetch from buffers even if num_elements was + * zero (in this case we'll fetch from index zero) the jit func callers + * MUST provide valid fake constant buffers of size 4x32 (the values do + * not matter), otherwise we'd still need (not per element though) + * control flow. + */ + indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes); + if (indexes2) + indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2); + } + + /* + * Loop over elements of index_vec, load scalar value, insert it into 'res'. + */ + for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) { + LLVMValueRef si, di; + LLVMValueRef index; + LLVMValueRef scalar_ptr, scalar; + + di = lp_build_const_int32(gallivm, i); + if (indexes2) + si = lp_build_const_int32(gallivm, i >> 1); + else + si = di; + + if (indexes2 && (i & 1)) { + index = LLVMBuildExtractElement(builder, + indexes2, si, ""); + } else { + index = LLVMBuildExtractElement(builder, + indexes, si, ""); + } + scalar_ptr = LLVMBuildGEP(builder, base_ptr, + &index, 1, "gather_ptr"); + scalar = LLVMBuildLoad(builder, scalar_ptr, ""); + + res = LLVMBuildInsertElement(builder, res, scalar, di, ""); + } + + if (overflow_mask) { + if (indexes2) { + res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, ""); + overflow_mask = LLVMBuildSExt(builder, overflow_mask, + bld_base->dbl_bld.int_vec_type, ""); + res = lp_build_select(&bld_base->dbl_bld, overflow_mask, + bld_base->dbl_bld.zero, res); + } else + res = lp_build_select(bld, overflow_mask, bld->zero, res); + } + + return res; +} + +/** + * Scatter/store vector. + */ +static void +emit_mask_scatter(struct lp_build_nir_soa_context *bld, + LLVMValueRef base_ptr, + LLVMValueRef indexes, + LLVMValueRef values, + struct lp_exec_mask *mask) +{ + struct gallivm_state *gallivm = bld->bld_base.base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + unsigned i; + LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL; + + /* + * Loop over elements of index_vec, store scalar value. + */ + for (i = 0; i < bld->bld_base.base.type.length; i++) { + LLVMValueRef ii = lp_build_const_int32(gallivm, i); + LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); + LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); + LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); + LLVMValueRef scalar_pred = pred ? + LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; + + if (0) + lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n", + ii, val, index, scalar_ptr); + + if (scalar_pred) { + LLVMValueRef real_val, dst_val; + dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); + real_val = lp_build_select(&bld->uint_elem_bld, scalar_pred, val, dst_val); + LLVMBuildStore(builder, real_val, scalar_ptr); + } + else { + LLVMBuildStore(builder, val, scalar_ptr); + } + } +} + +static void emit_load_var(struct lp_build_nir_context *bld_base, + nir_variable_mode deref_mode, + unsigned num_components, + unsigned bit_size, + nir_variable *var, + unsigned vertex_index, + unsigned const_index, + LLVMValueRef indir_index, + LLVMValueRef result[4]) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + struct gallivm_state *gallivm = bld_base->base.gallivm; + int dmul = bit_size == 64 ? 2 : 1; + switch (deref_mode) { + case nir_var_shader_in: { + for (unsigned i = 0; i < num_components; i++) { + int idx = (i * dmul) + var->data.location_frac; + if (bld->gs_iface) { + LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index); + LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location); + LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx); + LLVMValueRef result2; + result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base, + false, vertex_index_val, 0, attrib_index_val, swizzle_index_val); + if (bit_size == 64) { + LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1); + result2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base, + false, vertex_index_val, 0, attrib_index_val, swizzle_index_val); + result[i] = emit_fetch_64bit(bld_base, result[i], result2); + } + } else { + if (indir_index) { + LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location)); + LLVMValueRef index_vec = get_soa_array_offsets(&bld_base->uint_bld, + attrib_index_val, 4, idx, + TRUE); + LLVMValueRef index_vec2 = NULL; + LLVMTypeRef fptr_type; + LLVMValueRef inputs_array; + fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); + inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, fptr_type, ""); + + if (bit_size == 64) + index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, + indir_index, 4, idx + 1, TRUE); + + /* Gather values from the input register array */ + result[i] = build_gather(bld_base, &bld_base->base, inputs_array, index_vec, NULL, index_vec2); + } else { + if (bld->indirects & nir_var_shader_in) { + LLVMValueRef lindex = lp_build_const_int32(gallivm, + var->data.driver_location * 4 + idx); + LLVMValueRef input_ptr = lp_build_pointer_get(gallivm->builder, + bld->inputs_array, lindex); + if (bit_size == 64) { + LLVMValueRef lindex2 = lp_build_const_int32(gallivm, + var->data.driver_location * 4 + (idx + 1)); + LLVMValueRef input_ptr2 = lp_build_pointer_get(gallivm->builder, + bld->inputs_array, lindex2); + result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2); + } else { + result[i] = input_ptr; + } + } else { + if (bit_size == 64) { + LLVMValueRef tmp[2]; + tmp[0] = bld->inputs[var->data.driver_location + const_index][idx]; + tmp[1] = bld->inputs[var->data.driver_location + const_index][idx + 1]; + result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]); + } else { + result[i] = bld->inputs[var->data.driver_location + const_index][idx]; + } + } + } + } + } + } + default: + break; + } +} + +static void emit_store_chan(struct lp_build_nir_context *bld_base, + nir_variable_mode deref_mode, + unsigned bit_size, + unsigned location, unsigned comp, + unsigned chan, + LLVMValueRef dst) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + struct lp_build_context *float_bld = &bld_base->base; + + if (bit_size == 64) { + chan *= 2; + chan += comp; + if (chan >= 4) { + chan -= 4; + location++; + } + emit_store_64bit_chan(bld_base, bld->outputs[location][chan], + bld->outputs[location][chan + 1], dst); + } else { + dst = LLVMBuildBitCast(builder, dst, float_bld->vec_type, ""); + lp_exec_mask_store(&bld->exec_mask, float_bld, dst, + bld->outputs[location][chan + comp]); + } +} + +static void emit_store_var(struct lp_build_nir_context *bld_base, + nir_variable_mode deref_mode, + unsigned bit_size, + unsigned num_components, + unsigned writemask, + unsigned const_index, + nir_variable *var, LLVMValueRef dst) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + switch (deref_mode) { + case nir_var_shader_out: { + unsigned location = var->data.driver_location + const_index; + unsigned comp = var->data.location_frac; + if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) { + if (var->data.location == FRAG_RESULT_STENCIL) + comp = 1; + else if (var->data.location == FRAG_RESULT_DEPTH) + comp = 2; + } + for (unsigned chan = 0; chan < num_components; chan++) { + if (writemask & (1u << chan)) { + LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, ""); + emit_store_chan(bld_base, deref_mode, bit_size, location, comp, chan, chan_val); + } + } + break; + } + default: + break; + } +} + +static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base, + struct lp_build_context *reg_bld, + const nir_reg_src *reg, + LLVMValueRef indir_src, + LLVMValueRef reg_storage) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + int nc = reg->reg->num_components; + LLVMValueRef vals[4]; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + if (reg->reg->num_array_elems) { + LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset); + if (reg->indirect) { + LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1); + indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, ""); + indirect_val = lp_build_min(uint_bld, indirect_val, max_index); + } + reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), ""); + for (unsigned i = 0; i < nc; i++) { + LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE); + vals[i] = build_gather(bld_base, reg_bld, reg_storage, indirect_offset, NULL, NULL); + } + } else { + for (unsigned i = 0; i < nc; i++) { + LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage, + lp_build_const_int32(gallivm, i)); + vals[i] = LLVMBuildLoad(builder, this_storage, ""); + } + } + return nc == 1 ? vals[0] : lp_nir_array_build_gather_values(builder, vals, nc); +} + +static void emit_store_reg(struct lp_build_nir_context *bld_base, + struct lp_build_context *reg_bld, + const nir_reg_dest *reg, + unsigned writemask, + LLVMValueRef indir_src, + LLVMValueRef reg_storage, + LLVMValueRef dst[4]) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + int nc = reg->reg->num_components; + if (reg->reg->num_array_elems > 0) { + LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset); + if (reg->indirect) { + LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1); + indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, ""); + indirect_val = lp_build_min(uint_bld, indirect_val, max_index); + } + reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), ""); + for (unsigned i = 0; i < nc; i++) { + if (!(writemask & (1 << i))) + continue; + LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE); + dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, ""); + emit_mask_scatter(bld, reg_storage, indirect_offset, dst[i], &bld->exec_mask); + } + return; + } + + for (unsigned i = 0; i < nc; i++) { + LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage, + lp_build_const_int32(gallivm, i)); + dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, ""); + lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i], this_storage); + } +} + +static void emit_load_ubo(struct lp_build_nir_context *bld_base, + unsigned nc, + unsigned bit_size, + bool offset_is_uniform, + LLVMValueRef index, + LLVMValueRef offset, + LLVMValueRef result[4]) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + struct lp_build_context *bld_broad = bit_size == 64 ? &bld_base->dbl_bld : &bld_base->base; + LLVMValueRef consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, index); + unsigned size_shift = 0; + if (bit_size == 32) + size_shift = 2; + else if (bit_size == 64) + size_shift = 3; + if (size_shift) + offset = lp_build_shr(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, size_shift)); + if (bit_size == 64) { + LLVMTypeRef dptr_type = LLVMPointerType(bld_base->dbl_bld.elem_type, 0); + consts_ptr = LLVMBuildBitCast(builder, consts_ptr, dptr_type, ""); + } + + if (offset_is_uniform) { + offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), ""); + + for (unsigned c = 0; c < nc; c++) { + LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), ""); + + LLVMValueRef scalar = lp_build_pointer_get(builder, consts_ptr, this_offset); + result[c] = lp_build_broadcast_scalar(bld_broad, scalar); + } + } else { + LLVMValueRef overflow_mask; + LLVMValueRef num_consts = lp_build_array_get(gallivm, bld->const_sizes_ptr, index); + + num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 4), ""); + num_consts = lp_build_broadcast_scalar(uint_bld, num_consts); + for (unsigned c = 0; c < nc; c++) { + LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c)); + overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL, + this_offset, num_consts); + + result[c] = build_gather(bld_base, bld_broad, consts_ptr, this_offset, overflow_mask, NULL); + } + } +} + + +static void emit_load_mem(struct lp_build_nir_context *bld_base, + unsigned nc, + unsigned bit_size, + LLVMValueRef index, + LLVMValueRef offset, + LLVMValueRef outval[4]) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + LLVMValueRef ssbo_ptr = NULL; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + struct lp_build_context *uint64_bld = &bld_base->uint64_bld; + LLVMValueRef ssbo_limit = NULL; + + if (index) { + LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), "")); + ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, bit_size == 64 ? 3 : 2), ""); + ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit); + + ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), "")); + } else + ssbo_ptr = bld->shared_ptr; + + offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, bit_size == 64 ? 3 : 2), ""); + for (unsigned c = 0; c < nc; c++) { + LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c)); + LLVMValueRef exec_mask = mask_vec(bld_base); + + if (ssbo_limit) { + LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit); + exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, ""); + } + + LLVMValueRef result = lp_build_alloca(gallivm, bit_size == 64 ? uint64_bld->vec_type : uint_bld->vec_type, ""); + struct lp_build_loop_state loop_state; + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + + struct lp_build_if_state ifthen; + LLVMValueRef cond, temp_res; + + loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index, + loop_state.counter, ""); + + cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); + cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); + + lp_build_if(&ifthen, gallivm, cond); + LLVMValueRef scalar; + if (bit_size == 64) { + LLVMValueRef ssbo_ptr2 = LLVMBuildBitCast(builder, ssbo_ptr, LLVMPointerType(uint64_bld->elem_type, 0), ""); + scalar = lp_build_pointer_get(builder, ssbo_ptr2, loop_index); + } else + scalar = lp_build_pointer_get(builder, ssbo_ptr, loop_index); + + temp_res = LLVMBuildLoad(builder, result, ""); + temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, ""); + LLVMBuildStore(builder, temp_res, result); + lp_build_else(&ifthen); + temp_res = LLVMBuildLoad(builder, result, ""); + LLVMValueRef zero; + if (bit_size == 64) + zero = LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0); + else + zero = lp_build_const_int32(gallivm, 0); + temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, ""); + LLVMBuildStore(builder, temp_res, result); + lp_build_endif(&ifthen); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), + NULL, LLVMIntUGE); + outval[c] = LLVMBuildLoad(gallivm->builder, result, ""); + } +} + +static void emit_store_mem(struct lp_build_nir_context *bld_base, + unsigned writemask, + unsigned nc, + unsigned bit_size, + LLVMValueRef index, + LLVMValueRef offset, + LLVMValueRef dst) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + LLVMValueRef ssbo_ptr; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + LLVMValueRef ssbo_limit = NULL; + + if (index) { + LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), "")); + ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, bit_size == 64 ? 3 : 2), ""); + ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit); + ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), "")); + } else + ssbo_ptr = bld->shared_ptr; + + offset = lp_build_shr_imm(uint_bld, offset, bit_size == 64 ? 3 : 2); + for (unsigned c = 0; c < nc; c++) { + if (!(writemask & (1u << c))) + continue; + LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c)); + LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, ""); + + LLVMValueRef exec_mask = mask_vec(bld_base); + if (ssbo_limit) { + LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit); + exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, ""); + } + + struct lp_build_loop_state loop_state; + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val, + loop_state.counter, ""); + if (bit_size == 64) + value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, bld_base->uint64_bld.elem_type, ""); + else + value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, ""); + struct lp_build_if_state ifthen; + LLVMValueRef cond; + + loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index, + loop_state.counter, ""); + cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); + cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); + lp_build_if(&ifthen, gallivm, cond); + if (bit_size == 64) { + LLVMValueRef ssbo_ptr2 = LLVMBuildBitCast(builder, ssbo_ptr, LLVMPointerType(bld_base->uint64_bld.elem_type, 0), ""); + lp_build_pointer_set(builder, ssbo_ptr2, loop_index, value_ptr); + } else + lp_build_pointer_set(builder, ssbo_ptr, loop_index, value_ptr); + lp_build_endif(&ifthen); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), + NULL, LLVMIntUGE); + } +} + +static void emit_atomic_mem(struct lp_build_nir_context *bld_base, + nir_intrinsic_op nir_op, + LLVMValueRef index, LLVMValueRef offset, + LLVMValueRef val, LLVMValueRef val2, + LLVMValueRef *result) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + LLVMValueRef ssbo_ptr; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + LLVMAtomicRMWBinOp op; + LLVMValueRef ssbo_limit = NULL; + + if (index) { + LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), "")); + ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, 2), ""); + ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit); + ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), "")); + } else + ssbo_ptr = bld->shared_ptr; + + switch (nir_op) { + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_ssbo_atomic_add: + op = LLVMAtomicRMWBinOpAdd; + break; + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_ssbo_atomic_exchange: + op = LLVMAtomicRMWBinOpXchg; + break; + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_ssbo_atomic_and: + op = LLVMAtomicRMWBinOpAnd; + break; + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_ssbo_atomic_or: + op = LLVMAtomicRMWBinOpOr; + break; + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_ssbo_atomic_xor: + op = LLVMAtomicRMWBinOpXor; + break; + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_ssbo_atomic_umin: + op = LLVMAtomicRMWBinOpUMin; + break; + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_ssbo_atomic_umax: + op = LLVMAtomicRMWBinOpUMax; + break; + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_shared_atomic_imin: + op = LLVMAtomicRMWBinOpMin; + break; + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_shared_atomic_imax: + op = LLVMAtomicRMWBinOpMax; + break; + default: + break; + } + + offset = lp_build_shr_imm(uint_bld, offset, 2); + LLVMValueRef atom_res = lp_build_alloca(gallivm, + uint_bld->vec_type, ""); + + LLVMValueRef exec_mask = mask_vec(bld_base); + if (ssbo_limit) { + LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, offset, ssbo_limit); + exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, ""); + } + + struct lp_build_loop_state loop_state; + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + + LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val, + loop_state.counter, ""); + value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, ""); + + offset = LLVMBuildExtractElement(gallivm->builder, offset, + loop_state.counter, ""); + + LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, ssbo_ptr, + &offset, 1, ""); + + struct lp_build_if_state ifthen; + LLVMValueRef cond, temp_res; + LLVMValueRef scalar; + cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); + cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); + lp_build_if(&ifthen, gallivm, cond); + + if (nir_op == nir_intrinsic_ssbo_atomic_comp_swap || nir_op == nir_intrinsic_shared_atomic_comp_swap) { + LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2, + loop_state.counter, ""); + cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, ""); + scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr, + cas_src_ptr, + LLVMAtomicOrderingSequentiallyConsistent, + LLVMAtomicOrderingSequentiallyConsistent, + false); + scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, ""); + } else { + scalar = LLVMBuildAtomicRMW(builder, op, + scalar_ptr, value_ptr, + LLVMAtomicOrderingSequentiallyConsistent, + false); + } + temp_res = LLVMBuildLoad(builder, atom_res, ""); + temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, ""); + LLVMBuildStore(builder, temp_res, atom_res); + lp_build_else(&ifthen); + temp_res = LLVMBuildLoad(builder, atom_res, ""); + temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, ""); + LLVMBuildStore(builder, temp_res, atom_res); + lp_build_endif(&ifthen); + + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), + NULL, LLVMIntUGE); + *result = LLVMBuildLoad(builder, atom_res, ""); +} + +static void emit_barrier(struct lp_build_nir_context *bld_base) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + struct gallivm_state * gallivm = bld_base->base.gallivm; + + LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume"); + + lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false); + LLVMPositionBuilderAtEnd(gallivm->builder, resume); +} + +static LLVMValueRef emit_get_buffer_size(struct lp_build_nir_context *bld_base, + LLVMValueRef index) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + struct lp_build_context *bld_broad = &bld_base->uint_bld; + LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, bld_broad->zero, "")); + return lp_build_broadcast_scalar(bld_broad, size_ptr); +} + +static void emit_image_op(struct lp_build_nir_context *bld_base, + struct lp_img_params *params) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + params->type = bld_base->base.type; + params->context_ptr = bld->context_ptr; + params->thread_data_ptr = bld->thread_data_ptr; + params->exec_mask = mask_vec(bld_base); + bld->image->emit_op(bld->image, + bld->bld_base.base.gallivm, + params); + +} + +static void emit_image_size(struct lp_build_nir_context *bld_base, + struct lp_sampler_size_query_params *params) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + + params->int_type = bld_base->int_bld.type; + params->context_ptr = bld->context_ptr; + + bld->image->emit_size_query(bld->image, + bld->bld_base.base.gallivm, + params); + +} + +static void init_var_slots(struct lp_build_nir_context *bld_base, + nir_variable *var, unsigned sc) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + unsigned slots = glsl_count_attribute_slots(var->type, false) * 4; + + for (unsigned comp = sc; comp < slots + sc; comp++) { + unsigned this_loc = var->data.driver_location + (comp / 4); + unsigned this_chan = comp % 4; + + if (!bld->outputs[this_loc][this_chan]) + bld->outputs[this_loc][this_chan] = lp_build_alloca(bld_base->base.gallivm, + bld_base->base.vec_type, "output"); + } +} + +static void emit_var_decl(struct lp_build_nir_context *bld_base, + nir_variable *var) +{ + unsigned sc = var->data.location_frac; + switch (var->data.mode) { + case nir_var_shader_out: { + if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) { + if (var->data.location == FRAG_RESULT_STENCIL) + sc = 1; + else if (var->data.location == FRAG_RESULT_DEPTH) + sc = 2; + } + init_var_slots(bld_base, var, sc); + break; + } + default: + break; + } +} + +static void emit_tex(struct lp_build_nir_context *bld_base, + struct lp_sampler_params *params) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + + params->type = bld_base->base.type; + params->context_ptr = bld->context_ptr; + params->thread_data_ptr = bld->thread_data_ptr; + + bld->sampler->emit_tex_sample(bld->sampler, + bld->bld_base.base.gallivm, + params); +} + +static void emit_tex_size(struct lp_build_nir_context *bld_base, + struct lp_sampler_size_query_params *params) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + + params->int_type = bld_base->int_bld.type; + params->context_ptr = bld->context_ptr; + + bld->sampler->emit_size_query(bld->sampler, + bld->bld_base.base.gallivm, + params); +} + +static void emit_sysval_intrin(struct lp_build_nir_context *bld_base, + nir_intrinsic_instr *instr, + LLVMValueRef result[4]) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + struct gallivm_state *gallivm = bld_base->base.gallivm; + switch (instr->intrinsic) { + case nir_intrinsic_load_instance_id: + result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id); + break; + case nir_intrinsic_load_vertex_id: + result[0] = bld->system_values.vertex_id; + break; + case nir_intrinsic_load_primitive_id: + result[0] = bld->system_values.prim_id; + break; + case nir_intrinsic_load_work_group_id: + for (unsigned i = 0; i < 3; i++) + result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_id, lp_build_const_int32(gallivm, i), "")); + break; + case nir_intrinsic_load_local_invocation_id: + for (unsigned i = 0; i < 3; i++) + result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, i, ""); + break; + case nir_intrinsic_load_num_work_groups: + for (unsigned i = 0; i < 3; i++) + result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.grid_size, lp_build_const_int32(gallivm, i), "")); + break; + case nir_intrinsic_load_invocation_id: + result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id); + break; + default: + break; + } +} + +static void bgnloop(struct lp_build_nir_context *bld_base) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + lp_exec_bgnloop(&bld->exec_mask, true); +} + +static void endloop(struct lp_build_nir_context *bld_base) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask); +} + +static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + lp_exec_mask_cond_push(&bld->exec_mask, LLVMBuildBitCast(builder, cond, bld_base->base.int_vec_type, "")); +} + +static void else_stmt(struct lp_build_nir_context *bld_base) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + lp_exec_mask_cond_invert(&bld->exec_mask); +} + +static void endif_stmt(struct lp_build_nir_context *bld_base) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + lp_exec_mask_cond_pop(&bld->exec_mask); +} + +static void break_stmt(struct lp_build_nir_context *bld_base) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + + lp_exec_break(&bld->exec_mask, NULL, false); +} + +static void continue_stmt(struct lp_build_nir_context *bld_base) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + lp_exec_continue(&bld->exec_mask); +} + +static void discard(struct lp_build_nir_context *bld_base, LLVMValueRef cond) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + LLVMValueRef mask; + + if (!cond) { + if (bld->exec_mask.has_mask) { + mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); + } else { + mask = LLVMConstNull(bld->bld_base.base.int_vec_type); + } + } else { + mask = LLVMBuildNot(builder, cond, ""); + if (bld->exec_mask.has_mask) { + LLVMValueRef invmask; + invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); + mask = LLVMBuildOr(builder, mask, invmask, ""); + } + } + lp_build_mask_update(bld->mask, mask); +} + +static void +increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base, + LLVMValueRef ptr, + LLVMValueRef mask) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, ""); + + current_vec = LLVMBuildSub(builder, current_vec, mask, ""); + + LLVMBuildStore(builder, current_vec, ptr); +} + +static void +clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base, + LLVMValueRef ptr, + LLVMValueRef mask) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, ""); + + current_vec = lp_build_select(&bld_base->uint_bld, + mask, + bld_base->uint_bld.zero, + current_vec); + + LLVMBuildStore(builder, current_vec, ptr); +} + +static LLVMValueRef +clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld, + LLVMValueRef current_mask_vec, + LLVMValueRef total_emitted_vertices_vec) +{ + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + struct lp_build_context *int_bld = &bld->bld_base.int_bld; + LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS, + total_emitted_vertices_vec, + bld->max_output_vertices_vec); + + return LLVMBuildAnd(builder, current_mask_vec, max_mask, ""); +} + +static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_id) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + + assert(bld->gs_iface->emit_vertex); + LLVMValueRef total_emitted_vertices_vec = + LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); + LLVMValueRef mask = mask_vec(bld_base); + mask = clamp_mask_to_max_output_vertices(bld, mask, + total_emitted_vertices_vec); + bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base, + bld->outputs, + total_emitted_vertices_vec, + lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id)); + + increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr, + mask); + increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr, + mask); +} + +static void +end_primitive_masked(struct lp_build_nir_context * bld_base, + LLVMValueRef mask) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + + struct lp_build_context *uint_bld = &bld_base->uint_bld; + LLVMValueRef emitted_vertices_vec = + LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, ""); + LLVMValueRef emitted_prims_vec = + LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, ""); + LLVMValueRef total_emitted_vertices_vec = + LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); + + LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, + PIPE_FUNC_NOTEQUAL, + emitted_vertices_vec, + uint_bld->zero); + mask = LLVMBuildAnd(builder, mask, emitted_mask, ""); + bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base, + total_emitted_vertices_vec, + emitted_vertices_vec, emitted_prims_vec, mask_vec(bld_base)); + increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr, + mask); + clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr, + mask); +} + +static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + + assert(bld->gs_iface->end_primitive); + + LLVMValueRef mask = mask_vec(bld_base); + end_primitive_masked(bld_base, mask); +} + +static void +emit_prologue(struct lp_build_nir_soa_context *bld) +{ + struct gallivm_state * gallivm = bld->bld_base.base.gallivm; + if (bld->indirects & nir_var_shader_in && !bld->gs_iface) { + uint32_t num_inputs = util_bitcount64(bld->bld_base.shader->info.inputs_read); + unsigned index, chan; + LLVMTypeRef vec_type = bld->bld_base.base.vec_type; + LLVMValueRef array_size = lp_build_const_int32(gallivm, num_inputs * 4); + bld->inputs_array = lp_build_array_alloca(gallivm, + vec_type, array_size, + "input_array"); + + for (index = 0; index < num_inputs; ++index) { + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + LLVMValueRef lindex = + lp_build_const_int32(gallivm, index * 4 + chan); + LLVMValueRef input_ptr = + LLVMBuildGEP(gallivm->builder, bld->inputs_array, + &lindex, 1, ""); + LLVMValueRef value = bld->inputs[index][chan]; + if (value) + LLVMBuildStore(gallivm->builder, value, input_ptr); + } + } + } +} + +void lp_build_nir_soa(struct gallivm_state *gallivm, + struct nir_shader *shader, + const struct lp_build_tgsi_params *params, + LLVMValueRef (*outputs)[4]) +{ + struct lp_build_nir_soa_context bld; + struct lp_type type = params->type; + struct lp_type res_type; + + assert(type.length <= LP_MAX_VECTOR_LENGTH); + memset(&res_type, 0, sizeof res_type); + res_type.width = type.width; + res_type.length = type.length; + res_type.sign = 1; + + /* Setup build context */ + memset(&bld, 0, sizeof bld); + lp_build_context_init(&bld.bld_base.base, gallivm, type); + lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type)); + lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type)); + lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type)); + lp_build_context_init(&bld.uint_elem_bld, gallivm, lp_elem_type(lp_uint_type(type))); + { + struct lp_type dbl_type; + dbl_type = type; + dbl_type.width *= 2; + lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type); + } + { + struct lp_type uint64_type; + uint64_type = lp_uint_type(type); + uint64_type.width *= 2; + lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type); + } + { + struct lp_type int64_type; + int64_type = lp_int_type(type); + int64_type.width *= 2; + lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type); + } + bld.bld_base.load_var = emit_load_var; + bld.bld_base.store_var = emit_store_var; + bld.bld_base.load_reg = emit_load_reg; + bld.bld_base.store_reg = emit_store_reg; + bld.bld_base.emit_var_decl = emit_var_decl; + bld.bld_base.load_ubo = emit_load_ubo; + bld.bld_base.tex = emit_tex; + bld.bld_base.tex_size = emit_tex_size; + bld.bld_base.bgnloop = bgnloop; + bld.bld_base.endloop = endloop; + bld.bld_base.if_cond = if_cond; + bld.bld_base.else_stmt = else_stmt; + bld.bld_base.endif_stmt = endif_stmt; + bld.bld_base.break_stmt = break_stmt; + bld.bld_base.continue_stmt = continue_stmt; + bld.bld_base.sysval_intrin = emit_sysval_intrin; + bld.bld_base.discard = discard; + bld.bld_base.emit_vertex = emit_vertex; + bld.bld_base.end_primitive = end_primitive; + bld.bld_base.load_mem = emit_load_mem; + bld.bld_base.store_mem = emit_store_mem; + bld.bld_base.get_buffer_size = emit_get_buffer_size; + bld.bld_base.atomic_mem = emit_atomic_mem; + bld.bld_base.barrier = emit_barrier; + bld.bld_base.image_op = emit_image_op; + bld.bld_base.image_size = emit_image_size; + + bld.mask = params->mask; + bld.inputs = params->inputs; + bld.outputs = outputs; + bld.consts_ptr = params->consts_ptr; + bld.const_sizes_ptr = params->const_sizes_ptr; + bld.ssbo_ptr = params->ssbo_ptr; + bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr; + bld.sampler = params->sampler; +// bld.bld_base.info = params->info; + + bld.context_ptr = params->context_ptr; + bld.thread_data_ptr = params->thread_data_ptr; + bld.image = params->image; + bld.shared_ptr = params->shared_ptr; + bld.coro = params->coro; + + bld.indirects = 0; + if (params->info->indirect_files & (1 << TGSI_FILE_INPUT)) + bld.indirects |= nir_var_shader_in; + + bld.gs_iface = params->gs_iface; + if (bld.gs_iface) { + struct lp_build_context *uint_bld = &bld.bld_base.uint_bld; + + bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type, + shader->info.gs.vertices_out); + bld.emitted_prims_vec_ptr = + lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr"); + bld.emitted_vertices_vec_ptr = + lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr"); + bld.total_emitted_vertices_vec_ptr = + lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr"); + } + lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld); + + bld.system_values = *params->system_values; + + bld.bld_base.shader = shader; + + emit_prologue(&bld); + lp_build_nir_llvm(&bld.bld_base, shader); + + if (bld.gs_iface) { + LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder; + LLVMValueRef total_emitted_vertices_vec; + LLVMValueRef emitted_prims_vec; + end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask)); + total_emitted_vertices_vec = + LLVMBuildLoad(builder, bld.total_emitted_vertices_vec_ptr, ""); + emitted_prims_vec = + LLVMBuildLoad(builder, bld.emitted_prims_vec_ptr, ""); + + bld.gs_iface->gs_epilogue(bld.gs_iface, + total_emitted_vertices_vec, + emitted_prims_vec); + } + lp_exec_mask_fini(&bld.exec_mask); +} diff --git a/src/gallium/auxiliary/meson.build b/src/gallium/auxiliary/meson.build index aaf5138a169..ebc5e5449e0 100644 --- a/src/gallium/auxiliary/meson.build +++ b/src/gallium/auxiliary/meson.build @@ -388,6 +388,9 @@ if with_llvm 'gallivm/lp_bld_logic.h', 'gallivm/lp_bld_misc.cpp', 'gallivm/lp_bld_misc.h', + 'gallivm/lp_bld_nir.h', + 'gallivm/lp_bld_nir.c', + 'gallivm/lp_bld_nir_soa.c', 'gallivm/lp_bld_pack.c', 'gallivm/lp_bld_pack.h', 'gallivm/lp_bld_printf.c',