radeonsi: Remove use.sgpr* intrinsics, use load instructions instead
We now model loading uses sgpr values with LLVM IR load instructions that use the USER_SGPR address space. The definition of the sgpr parameter to the use_sgpr() helper function in radeonsi_shader.c has changed so that you can pass raw sgpr values rather than having to divide the sgpr value you want to use by the dword width of the type you want to load.
This commit is contained in:
parent
467f51613e
commit
89ece086bc
|
@ -129,7 +129,8 @@ enum AddressSpaces {
|
||||||
ADDRESS_NONE = 5, // Address space for unknown memory.
|
ADDRESS_NONE = 5, // Address space for unknown memory.
|
||||||
PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0)
|
PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0)
|
||||||
PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1)
|
PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1)
|
||||||
LAST_ADDRESS = 8
|
USER_SGPR_ADDRESS = 8, // Address space for USER_SGPRS on SI
|
||||||
|
LAST_ADDRESS = 9
|
||||||
};
|
};
|
||||||
|
|
||||||
// This union/struct combination is an easy way to read out the
|
// This union/struct combination is an easy way to read out the
|
||||||
|
|
|
@ -188,6 +188,9 @@ void SITargetLowering::lowerUSE_SGPR(MachineInstr *MI,
|
||||||
unsigned dstReg = MI->getOperand(0).getReg();
|
unsigned dstReg = MI->getOperand(0).getReg();
|
||||||
int64_t newIndex = MI->getOperand(1).getImm();
|
int64_t newIndex = MI->getOperand(1).getImm();
|
||||||
const TargetRegisterClass * dstClass = MRI.getRegClass(dstReg);
|
const TargetRegisterClass * dstClass = MRI.getRegClass(dstReg);
|
||||||
|
unsigned DwordWidth = dstClass->getSize() / 4;
|
||||||
|
assert(newIndex % DwordWidth == 0 && "USER_SGPR not properly aligned");
|
||||||
|
newIndex = newIndex / DwordWidth;
|
||||||
|
|
||||||
unsigned newReg = dstClass->getRegister(newIndex);
|
unsigned newReg = dstClass->getRegister(newIndex);
|
||||||
addLiveIn(MI, MF, MRI, TII, newReg);
|
addLiveIn(MI, MF, MRI, TII, newReg);
|
||||||
|
|
|
@ -7,6 +7,18 @@
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
def load_user_sgpr : PatFrag<(ops node:$ptr),
|
||||||
|
(load node:$ptr),
|
||||||
|
[{
|
||||||
|
const Value *Src = cast<LoadSDNode>(N)->getSrcValue();
|
||||||
|
if (Src) {
|
||||||
|
PointerType * PT = dyn_cast<PointerType>(Src->getType());
|
||||||
|
return PT && PT->getAddressSpace() == AMDILAS::USER_SGPR_ADDRESS;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}]
|
||||||
|
>;
|
||||||
|
|
||||||
|
|
||||||
def isSI : Predicate<"Subtarget.device()"
|
def isSI : Predicate<"Subtarget.device()"
|
||||||
"->getGeneration() == AMDILDeviceInfo::HD7XXX">;
|
"->getGeneration() == AMDILDeviceInfo::HD7XXX">;
|
||||||
|
@ -826,26 +838,19 @@ def SI_INTERP_CONST : InstSI <
|
||||||
imm:$attr, SReg_32:$params))]
|
imm:$attr, SReg_32:$params))]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
|
||||||
def USE_SGPR_32 : InstSI <
|
def USE_SGPR_32 : InstSI <
|
||||||
(outs SReg_32:$dst),
|
(outs SReg_32:$dst),
|
||||||
(ins i32imm:$src0),
|
(ins i32imm:$src0),
|
||||||
"USE_SGPR_32",
|
"USE_SGPR_32",
|
||||||
[(set SReg_32:$dst, (int_SI_use_sgpr imm:$src0))]
|
[(set (i32 SReg_32:$dst), (load_user_sgpr imm:$src0))]
|
||||||
|
>;
|
||||||
> {
|
|
||||||
field bits<32> Inst = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
def USE_SGPR_64 : InstSI <
|
def USE_SGPR_64 : InstSI <
|
||||||
(outs SReg_64:$dst),
|
(outs SReg_64:$dst),
|
||||||
(ins i32imm:$src0),
|
(ins i32imm:$src0),
|
||||||
"USE_SGPR_64",
|
"USE_SGPR_64",
|
||||||
[(set SReg_64:$dst, (int_SI_use_sgpr imm:$src0))]
|
[(set (i64 SReg_64:$dst), (load_user_sgpr imm:$src0))]
|
||||||
|
>;
|
||||||
> {
|
|
||||||
field bits<32> Inst = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
def VS_LOAD_BUFFER_INDEX : InstSI <
|
def VS_LOAD_BUFFER_INDEX : InstSI <
|
||||||
(outs VReg_32:$dst),
|
(outs VReg_32:$dst),
|
||||||
|
@ -869,19 +874,6 @@ def : Pat<
|
||||||
0, 0, (i32 SREG_LIT_0))
|
0, 0, (i32 SREG_LIT_0))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Pat<
|
|
||||||
(int_SI_use_sgprptrcf32 imm:$src0),
|
|
||||||
(USE_SGPR_64 imm:$src0)
|
|
||||||
>;
|
|
||||||
def : Pat<
|
|
||||||
(int_SI_use_sgprptrci128 imm:$src0),
|
|
||||||
(USE_SGPR_64 imm:$src0)
|
|
||||||
>;
|
|
||||||
def : Pat<
|
|
||||||
(int_SI_use_sgprptrci256 imm:$src0),
|
|
||||||
(USE_SGPR_64 imm:$src0)
|
|
||||||
>;
|
|
||||||
|
|
||||||
/* int_SI_export */
|
/* int_SI_export */
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
|
(int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
|
||||||
|
|
|
@ -21,11 +21,6 @@ let TargetPrefix = "SI", isTarget = 1 in {
|
||||||
def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], []> ;
|
def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], []> ;
|
||||||
|
|
||||||
def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty]>;
|
def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty]>;
|
||||||
def int_SI_use_sgpr : Intrinsic <[llvm_anyint_ty], [llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
class int_SI_use_sgprptr : Intrinsic <[llvm_anyptr_ty], [llvm_i32_ty], []>;
|
|
||||||
def int_SI_use_sgprptrcf32 : int_SI_use_sgprptr;
|
|
||||||
def int_SI_use_sgprptrci128 : int_SI_use_sgprptr;
|
|
||||||
def int_SI_use_sgprptrci256 : int_SI_use_sgprptr;
|
|
||||||
|
|
||||||
/* Interpolation Intrinsics */
|
/* Interpolation Intrinsics */
|
||||||
|
|
||||||
|
|
|
@ -67,6 +67,7 @@ static struct si_shader_context * si_shader_context(
|
||||||
|
|
||||||
#define USE_SGPR_MAX_SUFFIX_LEN 5
|
#define USE_SGPR_MAX_SUFFIX_LEN 5
|
||||||
#define CONST_ADDR_SPACE 2
|
#define CONST_ADDR_SPACE 2
|
||||||
|
#define USER_SGPR_ADDR_SPACE 8
|
||||||
|
|
||||||
enum sgpr_type {
|
enum sgpr_type {
|
||||||
SGPR_CONST_PTR_F32,
|
SGPR_CONST_PTR_F32,
|
||||||
|
@ -99,10 +100,19 @@ static LLVMValueRef build_indexed_load(
|
||||||
return LLVMBuildLoad(gallivm->builder, computed_ptr, "");
|
return LLVMBuildLoad(gallivm->builder, computed_ptr, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/**
|
||||||
* XXX: Instead of using an intrinsic to use a specific SGPR, we should be
|
* Load a value stored in one of the user SGPRs
|
||||||
* using load instructions. The loads should load from the USER_SGPR address
|
*
|
||||||
* space and use the sgpr index as the pointer.
|
* @param sgpr This is the sgpr to load the value from. If you need to load a
|
||||||
|
* value that is stored in consecutive SGPR registers (e.g. a 64-bit pointer),
|
||||||
|
* then you should pass the index of the first SGPR that holds the value. For
|
||||||
|
* example, if you want to load a pointer that is stored in SGPRs 2 and 3, then
|
||||||
|
* use pass 2 for the sgpr parameter.
|
||||||
|
*
|
||||||
|
* The value of the sgpr parameter must also be aligned to the width of the type
|
||||||
|
* being loaded, so that the sgpr parameter is divisible by the dword width of the
|
||||||
|
* type. For example, if the value being loaded is two dwords wide, then the sgpr
|
||||||
|
* parameter must be divisible by two.
|
||||||
*/
|
*/
|
||||||
static LLVMValueRef use_sgpr(
|
static LLVMValueRef use_sgpr(
|
||||||
struct gallivm_state * gallivm,
|
struct gallivm_state * gallivm,
|
||||||
|
@ -111,44 +121,48 @@ static LLVMValueRef use_sgpr(
|
||||||
{
|
{
|
||||||
LLVMValueRef sgpr_index;
|
LLVMValueRef sgpr_index;
|
||||||
LLVMTypeRef ret_type;
|
LLVMTypeRef ret_type;
|
||||||
|
LLVMValueRef ptr;
|
||||||
|
|
||||||
sgpr_index = lp_build_const_int32(gallivm, sgpr);
|
sgpr_index = lp_build_const_int32(gallivm, sgpr);
|
||||||
|
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case SGPR_CONST_PTR_F32:
|
case SGPR_CONST_PTR_F32:
|
||||||
|
assert(sgpr % 2 == 0);
|
||||||
ret_type = LLVMFloatTypeInContext(gallivm->context);
|
ret_type = LLVMFloatTypeInContext(gallivm->context);
|
||||||
ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
|
ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
|
||||||
return lp_build_intrinsic_unary(gallivm->builder,
|
break;
|
||||||
"llvm.SI.use.sgprptrcf32.",
|
|
||||||
ret_type, sgpr_index);
|
|
||||||
case SGPR_I32:
|
case SGPR_I32:
|
||||||
ret_type = LLVMInt32TypeInContext(gallivm->context);
|
ret_type = LLVMInt32TypeInContext(gallivm->context);
|
||||||
return lp_build_intrinsic_unary(gallivm->builder,
|
break;
|
||||||
"llvm.SI.use.sgpr.i32",
|
|
||||||
ret_type, sgpr_index);
|
|
||||||
case SGPR_I64:
|
case SGPR_I64:
|
||||||
|
assert(sgpr % 2 == 0);
|
||||||
ret_type= LLVMInt64TypeInContext(gallivm->context);
|
ret_type= LLVMInt64TypeInContext(gallivm->context);
|
||||||
return lp_build_intrinsic_unary(gallivm->builder,
|
break;
|
||||||
"llvm.SI.use.sgpr.i64",
|
|
||||||
ret_type, sgpr_index);
|
|
||||||
case SGPR_CONST_PTR_V4I32:
|
case SGPR_CONST_PTR_V4I32:
|
||||||
|
assert(sgpr % 2 == 0);
|
||||||
ret_type = LLVMInt32TypeInContext(gallivm->context);
|
ret_type = LLVMInt32TypeInContext(gallivm->context);
|
||||||
ret_type = LLVMVectorType(ret_type, 4);
|
ret_type = LLVMVectorType(ret_type, 4);
|
||||||
ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
|
ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
|
||||||
return lp_build_intrinsic_unary(gallivm->builder,
|
break;
|
||||||
"llvm.SI.use.sgprptrci128.",
|
|
||||||
ret_type, sgpr_index);
|
|
||||||
case SGPR_CONST_PTR_V8I32:
|
case SGPR_CONST_PTR_V8I32:
|
||||||
|
assert(sgpr % 2 == 0);
|
||||||
ret_type = LLVMInt32TypeInContext(gallivm->context);
|
ret_type = LLVMInt32TypeInContext(gallivm->context);
|
||||||
ret_type = LLVMVectorType(ret_type, 8);
|
ret_type = LLVMVectorType(ret_type, 8);
|
||||||
ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
|
ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
|
||||||
return lp_build_intrinsic_unary(gallivm->builder,
|
break;
|
||||||
"llvm.SI.use.sgprptrci256.",
|
|
||||||
ret_type, sgpr_index);
|
|
||||||
default:
|
default:
|
||||||
assert(!"Unsupported SGPR type in use_sgpr()");
|
assert(!"Unsupported SGPR type in use_sgpr()");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ret_type = LLVMPointerType(ret_type, USER_SGPR_ADDR_SPACE);
|
||||||
|
ptr = LLVMBuildIntToPtr(gallivm->builder, sgpr_index, ret_type, "");
|
||||||
|
return LLVMBuildLoad(gallivm->builder, ptr, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void declare_input_vs(
|
static void declare_input_vs(
|
||||||
|
@ -174,7 +188,7 @@ static void declare_input_vs(
|
||||||
/* XXX: Communicate with the rest of the driver about which SGPR the T#
|
/* XXX: Communicate with the rest of the driver about which SGPR the T#
|
||||||
* list pointer is going to be stored in. Hard code to SGPR[6:7] for
|
* list pointer is going to be stored in. Hard code to SGPR[6:7] for
|
||||||
* now */
|
* now */
|
||||||
t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, 3);
|
t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, 6);
|
||||||
|
|
||||||
t_offset = lp_build_const_int32(base->gallivm, velem->vertex_buffer_index);
|
t_offset = lp_build_const_int32(base->gallivm, velem->vertex_buffer_index);
|
||||||
|
|
||||||
|
@ -320,27 +334,6 @@ static LLVMValueRef fetch_constant(
|
||||||
return build_indexed_load(base->gallivm, const_ptr, offset);
|
return build_indexed_load(base->gallivm, const_ptr, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Declare some intrinsics with the correct attributes */
|
|
||||||
static void si_llvm_emit_prologue(struct lp_build_tgsi_context * bld_base)
|
|
||||||
{
|
|
||||||
LLVMValueRef function;
|
|
||||||
struct gallivm_state * gallivm = bld_base->base.gallivm;
|
|
||||||
|
|
||||||
LLVMTypeRef i64 = LLVMInt64TypeInContext(gallivm->context);
|
|
||||||
LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
|
|
||||||
|
|
||||||
/* declare i32 @llvm.SI.use.sgpr.i32(i32) */
|
|
||||||
function = lp_declare_intrinsic(gallivm->module, "llvm.SI.use.sgpr.i32",
|
|
||||||
i32, &i32, 1);
|
|
||||||
LLVMAddFunctionAttr(function, LLVMReadNoneAttribute);
|
|
||||||
|
|
||||||
/* declare i64 @llvm.SI.use.sgpr.i64(i32) */
|
|
||||||
function = lp_declare_intrinsic(gallivm->module, "llvm.SI.use.sgpr.i64",
|
|
||||||
i64, &i32, 1);
|
|
||||||
LLVMAddFunctionAttr(function, LLVMReadNoneAttribute);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* XXX: This is partially implemented for VS only at this point. It is not complete */
|
/* XXX: This is partially implemented for VS only at this point. It is not complete */
|
||||||
static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
|
static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
|
||||||
{
|
{
|
||||||
|
@ -504,14 +497,14 @@ static void tex_fetch_args(
|
||||||
0, LP_CHAN_ALL);
|
0, LP_CHAN_ALL);
|
||||||
|
|
||||||
/* Resource */
|
/* Resource */
|
||||||
ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, 2);
|
ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, 4);
|
||||||
offset = lp_build_const_int32(bld_base->base.gallivm,
|
offset = lp_build_const_int32(bld_base->base.gallivm,
|
||||||
8 * emit_data->inst->Src[1].Register.Index);
|
8 * emit_data->inst->Src[1].Register.Index);
|
||||||
emit_data->args[2] = build_indexed_load(bld_base->base.gallivm,
|
emit_data->args[2] = build_indexed_load(bld_base->base.gallivm,
|
||||||
ptr, offset);
|
ptr, offset);
|
||||||
|
|
||||||
/* Sampler */
|
/* Sampler */
|
||||||
ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, 1);
|
ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, 2);
|
||||||
offset = lp_build_const_int32(bld_base->base.gallivm,
|
offset = lp_build_const_int32(bld_base->base.gallivm,
|
||||||
4 * emit_data->inst->Src[1].Register.Index);
|
4 * emit_data->inst->Src[1].Register.Index);
|
||||||
emit_data->args[3] = build_indexed_load(bld_base->base.gallivm,
|
emit_data->args[3] = build_indexed_load(bld_base->base.gallivm,
|
||||||
|
@ -557,7 +550,6 @@ int si_pipe_shader_create(
|
||||||
tgsi_scan_shader(shader->tokens, &shader_info);
|
tgsi_scan_shader(shader->tokens, &shader_info);
|
||||||
bld_base->info = &shader_info;
|
bld_base->info = &shader_info;
|
||||||
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
|
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
|
||||||
bld_base->emit_prologue = si_llvm_emit_prologue;
|
|
||||||
bld_base->emit_epilogue = si_llvm_emit_epilogue;
|
bld_base->emit_epilogue = si_llvm_emit_epilogue;
|
||||||
|
|
||||||
bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
|
bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
|
||||||
|
|
Loading…
Reference in New Issue