radeonsi/ac: move most of emit_ddxy to shared code.
We can reuse this in radv. Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
parent
278d5ef70a
commit
c9a2fc3679
|
@ -851,3 +851,77 @@ ac_get_thread_id(struct ac_llvm_context *ctx)
|
||||||
set_range_metadata(ctx, tid, 0, 64);
|
set_range_metadata(ctx, tid, 0, 64);
|
||||||
return tid;
|
return tid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SI implements derivatives using the local data store (LDS)
|
||||||
|
* All writes to the LDS happen in all executing threads at
|
||||||
|
* the same time. TID is the Thread ID for the current
|
||||||
|
* thread and is a value between 0 and 63, representing
|
||||||
|
* the thread's position in the wavefront.
|
||||||
|
*
|
||||||
|
* For the pixel shader threads are grouped into quads of four pixels.
|
||||||
|
* The TIDs of the pixels of a quad are:
|
||||||
|
*
|
||||||
|
* +------+------+
|
||||||
|
* |4n + 0|4n + 1|
|
||||||
|
* +------+------+
|
||||||
|
* |4n + 2|4n + 3|
|
||||||
|
* +------+------+
|
||||||
|
*
|
||||||
|
* So, masking the TID with 0xfffffffc yields the TID of the top left pixel
|
||||||
|
* of the quad, masking with 0xfffffffd yields the TID of the top pixel of
|
||||||
|
* the current pixel's column, and masking with 0xfffffffe yields the TID
|
||||||
|
* of the left pixel of the current pixel's row.
|
||||||
|
*
|
||||||
|
* Adding 1 yields the TID of the pixel to the right of the left pixel, and
|
||||||
|
* adding 2 yields the TID of the pixel below the top pixel.
|
||||||
|
*/
|
||||||
|
LLVMValueRef
|
||||||
|
ac_emit_ddxy(struct ac_llvm_context *ctx,
|
||||||
|
bool has_ds_bpermute,
|
||||||
|
uint32_t mask,
|
||||||
|
int idx,
|
||||||
|
LLVMValueRef lds,
|
||||||
|
LLVMValueRef val)
|
||||||
|
{
|
||||||
|
LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
|
||||||
|
LLVMValueRef result;
|
||||||
|
|
||||||
|
thread_id = ac_get_thread_id(ctx);
|
||||||
|
|
||||||
|
tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
|
||||||
|
LLVMConstInt(ctx->i32, mask, false), "");
|
||||||
|
|
||||||
|
trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
|
||||||
|
LLVMConstInt(ctx->i32, idx, false), "");
|
||||||
|
|
||||||
|
if (has_ds_bpermute) {
|
||||||
|
args[0] = LLVMBuildMul(ctx->builder, tl_tid,
|
||||||
|
LLVMConstInt(ctx->i32, 4, false), "");
|
||||||
|
args[1] = val;
|
||||||
|
tl = ac_emit_llvm_intrinsic(ctx,
|
||||||
|
"llvm.amdgcn.ds.bpermute", ctx->i32,
|
||||||
|
args, 2, AC_FUNC_ATTR_READNONE);
|
||||||
|
|
||||||
|
args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
|
||||||
|
LLVMConstInt(ctx->i32, 4, false), "");
|
||||||
|
trbl = ac_emit_llvm_intrinsic(ctx,
|
||||||
|
"llvm.amdgcn.ds.bpermute", ctx->i32,
|
||||||
|
args, 2, AC_FUNC_ATTR_READNONE);
|
||||||
|
} else {
|
||||||
|
LLVMValueRef store_ptr, load_ptr0, load_ptr1;
|
||||||
|
|
||||||
|
store_ptr = ac_build_gep0(ctx, lds, thread_id);
|
||||||
|
load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
|
||||||
|
load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
|
||||||
|
|
||||||
|
LLVMBuildStore(ctx->builder, val, store_ptr);
|
||||||
|
tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
|
||||||
|
trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
|
||||||
|
trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
|
||||||
|
result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
|
@ -180,6 +180,18 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
|
||||||
|
|
||||||
LLVMValueRef
|
LLVMValueRef
|
||||||
ac_get_thread_id(struct ac_llvm_context *ctx);
|
ac_get_thread_id(struct ac_llvm_context *ctx);
|
||||||
|
|
||||||
|
#define AC_TID_MASK_TOP_LEFT 0xfffffffc
|
||||||
|
#define AC_TID_MASK_TOP 0xfffffffd
|
||||||
|
#define AC_TID_MASK_LEFT 0xfffffffe
|
||||||
|
|
||||||
|
LLVMValueRef
|
||||||
|
ac_emit_ddxy(struct ac_llvm_context *ctx,
|
||||||
|
bool has_ds_bpermute,
|
||||||
|
uint32_t mask,
|
||||||
|
int idx,
|
||||||
|
LLVMValueRef lds,
|
||||||
|
LLVMValueRef val);
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -4722,35 +4722,6 @@ static void si_llvm_emit_txqs(
|
||||||
emit_data->output[emit_data->chan] = samples;
|
emit_data->output[emit_data->chan] = samples;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* SI implements derivatives using the local data store (LDS)
|
|
||||||
* All writes to the LDS happen in all executing threads at
|
|
||||||
* the same time. TID is the Thread ID for the current
|
|
||||||
* thread and is a value between 0 and 63, representing
|
|
||||||
* the thread's position in the wavefront.
|
|
||||||
*
|
|
||||||
* For the pixel shader threads are grouped into quads of four pixels.
|
|
||||||
* The TIDs of the pixels of a quad are:
|
|
||||||
*
|
|
||||||
* +------+------+
|
|
||||||
* |4n + 0|4n + 1|
|
|
||||||
* +------+------+
|
|
||||||
* |4n + 2|4n + 3|
|
|
||||||
* +------+------+
|
|
||||||
*
|
|
||||||
* So, masking the TID with 0xfffffffc yields the TID of the top left pixel
|
|
||||||
* of the quad, masking with 0xfffffffd yields the TID of the top pixel of
|
|
||||||
* the current pixel's column, and masking with 0xfffffffe yields the TID
|
|
||||||
* of the left pixel of the current pixel's row.
|
|
||||||
*
|
|
||||||
* Adding 1 yields the TID of the pixel to the right of the left pixel, and
|
|
||||||
* adding 2 yields the TID of the pixel below the top pixel.
|
|
||||||
*/
|
|
||||||
/* masks for thread ID. */
|
|
||||||
#define TID_MASK_TOP_LEFT 0xfffffffc
|
|
||||||
#define TID_MASK_TOP 0xfffffffd
|
|
||||||
#define TID_MASK_LEFT 0xfffffffe
|
|
||||||
|
|
||||||
static void si_llvm_emit_ddxy(
|
static void si_llvm_emit_ddxy(
|
||||||
const struct lp_build_tgsi_action *action,
|
const struct lp_build_tgsi_action *action,
|
||||||
struct lp_build_tgsi_context *bld_base,
|
struct lp_build_tgsi_context *bld_base,
|
||||||
|
@ -4759,59 +4730,24 @@ static void si_llvm_emit_ddxy(
|
||||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||||
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
||||||
unsigned opcode = emit_data->info->opcode;
|
unsigned opcode = emit_data->info->opcode;
|
||||||
LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, val, args[2];
|
LLVMValueRef val;
|
||||||
int idx;
|
int idx;
|
||||||
unsigned mask;
|
unsigned mask;
|
||||||
|
|
||||||
thread_id = ac_get_thread_id(&ctx->ac);
|
|
||||||
|
|
||||||
if (opcode == TGSI_OPCODE_DDX_FINE)
|
if (opcode == TGSI_OPCODE_DDX_FINE)
|
||||||
mask = TID_MASK_LEFT;
|
mask = AC_TID_MASK_LEFT;
|
||||||
else if (opcode == TGSI_OPCODE_DDY_FINE)
|
else if (opcode == TGSI_OPCODE_DDY_FINE)
|
||||||
mask = TID_MASK_TOP;
|
mask = AC_TID_MASK_TOP;
|
||||||
else
|
else
|
||||||
mask = TID_MASK_TOP_LEFT;
|
mask = AC_TID_MASK_TOP_LEFT;
|
||||||
|
|
||||||
tl_tid = LLVMBuildAnd(gallivm->builder, thread_id,
|
|
||||||
lp_build_const_int32(gallivm, mask), "");
|
|
||||||
|
|
||||||
/* for DDX we want to next X pixel, DDY next Y pixel. */
|
/* for DDX we want to next X pixel, DDY next Y pixel. */
|
||||||
idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
|
idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
|
||||||
trbl_tid = LLVMBuildAdd(gallivm->builder, tl_tid,
|
|
||||||
lp_build_const_int32(gallivm, idx), "");
|
|
||||||
|
|
||||||
val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");
|
val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");
|
||||||
|
val = ac_emit_ddxy(&ctx->ac, ctx->screen->has_ds_bpermute,
|
||||||
if (ctx->screen->has_ds_bpermute) {
|
mask, idx, ctx->lds, val);
|
||||||
args[0] = LLVMBuildMul(gallivm->builder, tl_tid,
|
emit_data->output[emit_data->chan] = val;
|
||||||
lp_build_const_int32(gallivm, 4), "");
|
|
||||||
args[1] = val;
|
|
||||||
tl = lp_build_intrinsic(gallivm->builder,
|
|
||||||
"llvm.amdgcn.ds.bpermute", ctx->i32,
|
|
||||||
args, 2, LP_FUNC_ATTR_READNONE);
|
|
||||||
|
|
||||||
args[0] = LLVMBuildMul(gallivm->builder, trbl_tid,
|
|
||||||
lp_build_const_int32(gallivm, 4), "");
|
|
||||||
trbl = lp_build_intrinsic(gallivm->builder,
|
|
||||||
"llvm.amdgcn.ds.bpermute", ctx->i32,
|
|
||||||
args, 2, LP_FUNC_ATTR_READNONE);
|
|
||||||
} else {
|
|
||||||
LLVMValueRef store_ptr, load_ptr0, load_ptr1;
|
|
||||||
|
|
||||||
store_ptr = ac_build_gep0(&ctx->ac, ctx->lds, thread_id);
|
|
||||||
load_ptr0 = ac_build_gep0(&ctx->ac, ctx->lds, tl_tid);
|
|
||||||
load_ptr1 = ac_build_gep0(&ctx->ac, ctx->lds, trbl_tid);
|
|
||||||
|
|
||||||
LLVMBuildStore(gallivm->builder, val, store_ptr);
|
|
||||||
tl = LLVMBuildLoad(gallivm->builder, load_ptr0, "");
|
|
||||||
trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, "");
|
|
||||||
}
|
|
||||||
|
|
||||||
tl = LLVMBuildBitCast(gallivm->builder, tl, ctx->f32, "");
|
|
||||||
trbl = LLVMBuildBitCast(gallivm->builder, trbl, ctx->f32, "");
|
|
||||||
|
|
||||||
emit_data->output[emit_data->chan] =
|
|
||||||
LLVMBuildFSub(gallivm->builder, trbl, tl, "");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue