gallivm: handle huge number of immediates

We only supported up to 256 immediates, which isn't enough. We had
code which was allocating immediates as an allocated array, but it
was always used along a statically backed array for performance
reasons. This commit adds code to skip that performance optimization
and always use just the dynamically allocated immediates if the
number of them is too great.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
This commit is contained in:
Zack Rusin 2014-02-04 19:28:58 -05:00
parent 8507afc97f
commit 69ee3f431f
4 changed files with 86 additions and 44 deletions

View File

@ -47,19 +47,21 @@
#define LP_MAX_TGSI_ADDRS 16
#define LP_MAX_TGSI_IMMEDIATES 256
#define LP_MAX_TGSI_IMMEDIATES 4096
#define LP_MAX_TGSI_PREDS 16
#define LP_MAX_TGSI_CONST_BUFFERS 16
/*
* For quick access we cache temps in a statically
* allocated array. This defines the maximum size
* of that array.
* For quick access we cache registers in statically
* allocated arrays. Here we define the maximum size
* for those arrays.
*/
#define LP_MAX_INLINED_TEMPS 256
#define LP_MAX_INLINED_IMMEDIATES 256
/**
* Maximum control flow nesting
*

View File

@ -444,7 +444,7 @@ struct lp_build_tgsi_soa_context
struct tgsi_declaration_sampler_view sv[PIPE_MAX_SHADER_SAMPLER_VIEWS];
LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][TGSI_NUM_CHANNELS];
LLVMValueRef immediates[LP_MAX_INLINED_IMMEDIATES][TGSI_NUM_CHANNELS];
LLVMValueRef temps[LP_MAX_INLINED_TEMPS][TGSI_NUM_CHANNELS];
LLVMValueRef addr[LP_MAX_TGSI_ADDRS][TGSI_NUM_CHANNELS];
LLVMValueRef preds[LP_MAX_TGSI_PREDS][TGSI_NUM_CHANNELS];
@ -482,7 +482,7 @@ struct lp_build_tgsi_soa_context
struct lp_exec_mask exec_mask;
uint num_immediates;
boolean use_immediates_array;
};
void
@ -536,7 +536,7 @@ struct lp_build_tgsi_aos_context
struct lp_build_sampler_aos *sampler;
LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
LLVMValueRef immediates[LP_MAX_INLINED_IMMEDIATES];
LLVMValueRef temps[LP_MAX_INLINED_TEMPS];
LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
LLVMValueRef preds[LP_MAX_TGSI_PREDS];

View File

@ -1042,7 +1042,7 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm,
const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
float imm[4];
assert(size <= 4);
assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
assert(num_immediates < LP_MAX_INLINED_IMMEDIATES);
for (chan = 0; chan < 4; ++chan) {
imm[chan] = 0.0f;
}

View File

@ -1295,33 +1295,42 @@ emit_fetch_immediate(
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef res = NULL;
if (reg->Register.Indirect) {
LLVMValueRef indirect_index;
LLVMValueRef index_vec; /* index into the immediate register array */
if (bld->use_immediates_array || reg->Register.Indirect) {
LLVMValueRef imms_array;
LLVMTypeRef fptr_type;
indirect_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
&reg->Indirect);
/*
* Unlike for other reg classes, adding pixel offsets is unnecessary -
* immediates are stored as full vectors (FIXME??? - might be better
* to store them the same as constants) but all elements are the same
* in any case.
*/
index_vec = get_soa_array_offsets(&bld_base->uint_bld,
indirect_index,
swizzle,
FALSE);
/* cast imms_array pointer to float* */
fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
/* Gather values from the immediate register array */
res = build_gather(&bld_base->base, imms_array, index_vec, NULL);
if (reg->Register.Indirect) {
LLVMValueRef indirect_index;
LLVMValueRef index_vec; /* index into the immediate register array */
indirect_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
&reg->Indirect);
/*
* Unlike for other reg classes, adding pixel offsets is unnecessary -
* immediates are stored as full vectors (FIXME??? - might be better
* to store them the same as constants) but all elements are the same
* in any case.
*/
index_vec = get_soa_array_offsets(&bld_base->uint_bld,
indirect_index,
swizzle,
FALSE);
/* Gather values from the immediate register array */
res = build_gather(&bld_base->base, imms_array, index_vec, NULL);
} else {
LLVMValueRef lindex = lp_build_const_int32(gallivm,
reg->Register.Index * 4 + swizzle);
LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
bld->imms_array, &lindex, 1, "");
res = LLVMBuildLoad(builder, imms_ptr, "");
}
}
else {
res = bld->immediates[reg->Register.Index][swizzle];
@ -2728,51 +2737,71 @@ void lp_emit_immediate_soa(
{
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
struct gallivm_state * gallivm = bld_base->base.gallivm;
/* simply copy the immediate values into the next immediates[] slot */
LLVMValueRef imms[4];
unsigned i;
const uint size = imm->Immediate.NrTokens - 1;
assert(size <= 4);
assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
for( i = 0; i < size; ++i )
bld->immediates[bld->num_immediates][i] =
lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
imms[i] =
lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
break;
case TGSI_IMM_UINT32:
for( i = 0; i < size; ++i ) {
LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
bld->immediates[bld->num_immediates][i] =
LLVMConstBitCast(tmp, bld_base->base.vec_type);
imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
}
break;
case TGSI_IMM_INT32:
for( i = 0; i < size; ++i ) {
LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
bld->immediates[bld->num_immediates][i] =
LLVMConstBitCast(tmp, bld_base->base.vec_type);
imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
}
break;
}
for( i = size; i < 4; ++i )
bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
imms[i] = bld_base->base.undef;
if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
if (bld->use_immediates_array) {
unsigned index = bld->num_immediates;
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
for (i = 0; i < 4; ++i ) {
LLVMValueRef lindex = lp_build_const_int32(
bld->bld_base.base.gallivm, index * 4 + i);
bld->bld_base.base.gallivm, index * 4 + i);
LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
bld->imms_array, &lindex, 1, "");
LLVMBuildStore(builder,
bld->immediates[index][i],
imm_ptr);
LLVMBuildStore(builder, imms[i], imm_ptr);
}
} else {
/* simply copy the immediate values into the next immediates[] slot */
unsigned i;
const uint size = imm->Immediate.NrTokens - 1;
assert(size <= 4);
assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
for(i = 0; i < 4; ++i )
bld->immediates[bld->num_immediates][i] = imms[i];
if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
unsigned index = bld->num_immediates;
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
for (i = 0; i < 4; ++i ) {
LLVMValueRef lindex = lp_build_const_int32(
bld->bld_base.base.gallivm, index * 4 + i);
LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
bld->imms_array, &lindex, 1, "");
LLVMBuildStore(builder,
bld->immediates[index][i],
imm_ptr);
}
}
}
@ -3629,6 +3658,17 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
}
/*
* For performance reason immediates are always backed in a static
* array, but if their number is too great, we have to use just
* a dynamically allocated array.
*/
bld.use_immediates_array =
(info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
if (bld.use_immediates_array) {
bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
}
bld.bld_base.soa = TRUE;
bld.bld_base.emit_debug = emit_debug;