gallivm/tgsi: fix issues with sample opcodes

We need to encode them as Texture instructions since the NumOffsets field
is encoded there. However, we don't encode the actual target in there, this
is derived from the sampler view src later.

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
This commit is contained in:
Roland Scheidegger 2013-02-15 03:15:43 +01:00
parent cb2e678294
commit f1ab67c13a
7 changed files with 247 additions and 95 deletions

View File

@ -374,6 +374,8 @@ struct lp_build_tgsi_soa_context
const struct lp_build_sampler_soa *sampler;
struct tgsi_declaration_sampler_view sv[PIPE_MAX_SHADER_SAMPLER_VIEWS];
LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][TGSI_NUM_CHANNELS];
LLVMValueRef temps[LP_MAX_TGSI_TEMPS][TGSI_NUM_CHANNELS];
LLVMValueRef addr[LP_MAX_TGSI_ADDRS][TGSI_NUM_CHANNELS];

View File

@ -188,7 +188,8 @@ analyse_tex(struct analysis_context *ctx,
static void
analyse_sample(struct analysis_context *ctx,
const struct tgsi_full_instruction *inst,
enum lp_build_tex_modifier modifier)
enum lp_build_tex_modifier modifier,
boolean shadow)
{
struct lp_tgsi_info *info = ctx->info;
unsigned chan;
@ -197,45 +198,14 @@ analyse_sample(struct analysis_context *ctx,
struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs];
boolean indirect = FALSE;
boolean shadow = FALSE;
unsigned readmask = 0;
unsigned readmask;
tex_info->target = inst->Texture.Texture;
switch (inst->Texture.Texture) {
case TGSI_TEXTURE_SHADOW1D:
shadow = TRUE;
/* Fallthrough */
case TGSI_TEXTURE_1D:
readmask = TGSI_WRITEMASK_X;
break;
case TGSI_TEXTURE_SHADOW1D_ARRAY:
case TGSI_TEXTURE_SHADOW2D:
case TGSI_TEXTURE_SHADOWRECT:
shadow = TRUE;
/* Fallthrough */
case TGSI_TEXTURE_1D_ARRAY:
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
readmask = TGSI_WRITEMASK_XY;
break;
case TGSI_TEXTURE_SHADOW2D_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE:
shadow = TRUE;
/* Fallthrough */
case TGSI_TEXTURE_2D_ARRAY:
case TGSI_TEXTURE_3D:
case TGSI_TEXTURE_CUBE:
readmask = TGSI_WRITEMASK_XYZ;
break;
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
shadow = TRUE;
/* Fallthrough */
case TGSI_TEXTURE_CUBE_ARRAY:
readmask = TGSI_WRITEMASK_XYZW;
break;
default:
assert(0);
return;
}
/*
* We don't really get much information here, in particular not
* the target info, hence no useful writemask neither. Maybe should just
* forget the whole function.
*/
readmask = TGSI_WRITEMASK_XYZW;
tex_info->texture_unit = inst->Src[1].Register.Index;
tex_info->sampler_unit = inst->Src[2].Register.Index;
@ -327,20 +297,22 @@ analyse_instruction(struct analysis_context *ctx,
analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
break;
case TGSI_OPCODE_SAMPLE:
analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_NONE, FALSE);
break;
case TGSI_OPCODE_SAMPLE_C:
analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_NONE);
analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_NONE, TRUE);
break;
case TGSI_OPCODE_SAMPLE_C_LZ:
analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_LOD_ZERO);
analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_LOD_ZERO, TRUE);
break;
case TGSI_OPCODE_SAMPLE_D:
analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, FALSE);
break;
case TGSI_OPCODE_SAMPLE_B:
analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, FALSE);
break;
case TGSI_OPCODE_SAMPLE_L:
analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, FALSE);
break;
default:
break;

View File

@ -1343,6 +1343,7 @@ static void
emit_sample(struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
enum lp_build_tex_modifier modifier,
boolean compare,
LLVMValueRef *texel)
{
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
@ -1354,7 +1355,6 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
struct lp_derivatives derivs;
unsigned num_coords, dims;
unsigned i;
boolean compare = FALSE;
if (!bld->sampler) {
_debug_printf("warning: found texture instruction but no sampler generator supplied\n");
@ -1364,37 +1364,35 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
return;
}
/*
* unlike old-style tex opcodes the texture/sampler indices
* always come from src1 and src2 respectively.
*/
texture_unit = inst->Src[1].Register.Index;
sampler_unit = inst->Src[2].Register.Index;
derivs.ddx_ddy[0] = bld->bld_base.base.undef;
derivs.ddx_ddy[1] = bld->bld_base.base.undef;
switch (inst->Texture.Texture) {
case TGSI_TEXTURE_SHADOW1D:
compare = TRUE;
/* Fallthrough */
/*
* Note inst->Texture.Texture will contain the number of offsets,
* however the target information is NOT there and comes from the
* declared sampler views instead.
*/
switch (bld->sv[texture_unit].Resource) {
case TGSI_TEXTURE_1D:
num_coords = 1;
dims = 1;
break;
case TGSI_TEXTURE_SHADOW1D_ARRAY:
compare = TRUE;
/* Fallthrough */
case TGSI_TEXTURE_1D_ARRAY:
num_coords = 2;
dims = 1;
break;
case TGSI_TEXTURE_SHADOW2D:
case TGSI_TEXTURE_SHADOWRECT:
compare = TRUE;
/* Fallthrough */
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
num_coords = 2;
dims = 2;
break;
case TGSI_TEXTURE_SHADOW2D_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE:
compare = TRUE;
/* Fallthrough */
case TGSI_TEXTURE_2D_ARRAY:
case TGSI_TEXTURE_CUBE:
num_coords = 3;
@ -1404,9 +1402,6 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
num_coords = 3;
dims = 3;
break;
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
compare = TRUE;
/* Fallthrough */
case TGSI_TEXTURE_CUBE_ARRAY:
num_coords = 4;
dims = 3;
@ -1533,11 +1528,12 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
}
static void
emit_txf( struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
LLVMValueRef *texel)
emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
LLVMValueRef *texel,
boolean is_samplei)
{
unsigned unit;
unsigned unit, target;
LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
LLVMValueRef explicit_lod = NULL;
LLVMValueRef coords[3];
@ -1555,10 +1551,19 @@ emit_txf( struct lp_build_tgsi_soa_context *bld,
return;
}
unit = inst->Src[1].Register.Index;
derivs.ddx_ddy[0] = coord_undef;
derivs.ddx_ddy[1] = coord_undef;
switch (inst->Texture.Texture) {
if (is_samplei) {
target = bld->sv[unit].Resource;
}
else {
target = inst->Texture.Texture;
}
switch (target) {
case TGSI_TEXTURE_1D:
case TGSI_TEXTURE_BUFFER:
num_coords = 1;
@ -1587,7 +1592,7 @@ emit_txf( struct lp_build_tgsi_soa_context *bld,
}
/* always have lod except for buffers ? */
if (inst->Texture.Texture != TGSI_TEXTURE_BUFFER) {
if (target != TGSI_TEXTURE_BUFFER) {
explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
}
@ -1598,8 +1603,6 @@ emit_txf( struct lp_build_tgsi_soa_context *bld,
coords[i] = coord_undef;
}
unit = inst->Src[1].Register.Index;
if (inst->Texture.NumOffsets == 1) {
unsigned dim;
for (dim = 0; dim < dims; dim++) {
@ -1628,8 +1631,16 @@ emit_size_query( struct lp_build_tgsi_soa_context *bld,
LLVMValueRef explicit_lod;
unsigned has_lod;
unsigned i;
unsigned unit = inst->Src[1].Register.Index;
unsigned target;
switch (inst->Texture.Texture) {
if (is_sviewinfo) {
target = bld->sv[unit].Resource;
}
else {
target = inst->Texture.Texture;
}
switch (target) {
case TGSI_TEXTURE_BUFFER:
case TGSI_TEXTURE_RECT:
case TGSI_TEXTURE_SHADOWRECT:
@ -1655,7 +1666,7 @@ emit_size_query( struct lp_build_tgsi_soa_context *bld,
bld->sampler->emit_size_query(bld->sampler,
bld->bld_base.base.gallivm,
bld->bld_base.int_bld.type,
inst->Src[1].Register.Index,
unit,
is_sviewinfo,
explicit_lod,
sizes_out);
@ -1881,6 +1892,15 @@ lp_emit_declaration_soa(
"predicate");
break;
case TGSI_FILE_SAMPLER_VIEW:
/*
* The target stored here MUST match whatever there actually
* is in the set sampler views (what about return type?).
*/
assert(idx < PIPE_MAX_SHADER_SAMPLER_VIEWS);
bld->sv[idx] = decl->SamplerView;
break;
default:
/* don't need to declare other vars */
break;
@ -2055,7 +2075,18 @@ txf_emit(
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_txf(bld, emit_data->inst, emit_data->output);
emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
}
static void
sample_i_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
}
static void
@ -2067,7 +2098,7 @@ sample_emit(
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
emit_data->output);
FALSE, emit_data->output);
}
static void
@ -2079,7 +2110,7 @@ sample_b_emit(
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
emit_data->output);
FALSE, emit_data->output);
}
static void
@ -2089,12 +2120,9 @@ sample_c_emit(
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
/*
* note that we can ignore this is a comparison instruction here
* since it should be encoded elsewhere (SHADOW target).
*/
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
emit_data->output);
TRUE, emit_data->output);
}
static void
@ -2106,7 +2134,7 @@ sample_c_lz_emit(
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
emit_data->output);
TRUE, emit_data->output);
}
static void
@ -2118,7 +2146,7 @@ sample_d_emit(
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
emit_data->output);
FALSE, emit_data->output);
}
static void
@ -2130,7 +2158,7 @@ sample_l_emit(
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
emit_data->output);
FALSE, emit_data->output);
}
static void
@ -2522,7 +2550,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = txf_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;

View File

@ -185,7 +185,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{ 1, 3, 0, 0, 0, 0, OTHR, "SAMPLE", TGSI_OPCODE_SAMPLE },
{ 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_I", TGSI_OPCODE_SAMPLE_I },
{ 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_I_MS", TGSI_OPCODE_SAMPLE_I_MS },
{ 1, 3, 0, 0, 0, 0, OTHR, "SAMPLE_I_MS", TGSI_OPCODE_SAMPLE_I_MS },
{ 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_B", TGSI_OPCODE_SAMPLE_B },
{ 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_C", TGSI_OPCODE_SAMPLE_C },
{ 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_C_LZ", TGSI_OPCODE_SAMPLE_C_LZ },

View File

@ -32,6 +32,22 @@
#define OP14_TEX(a) OP14(a)
#endif
#ifndef OP12_SAMPLE
#define OP12_SAMPLE(a) OP12(a)
#endif
#ifndef OP13_SAMPLE
#define OP13_SAMPLE(a) OP13(a)
#endif
#ifndef OP14_SAMPLE
#define OP14_SAMPLE(a) OP14(a)
#endif
#ifndef OP15_SAMPLE
#define OP15_SAMPLE(a) OP15(a)
#endif
#ifndef OP00_LBL
#define OP00_LBL(a) OP00(a)
#endif
@ -169,15 +185,15 @@ OP01(CASE)
OP00(DEFAULT)
OP00(ENDSWITCH)
OP13(SAMPLE)
OP12(SAMPLE_I)
OP12(SAMPLE_I_MS)
OP14(SAMPLE_B)
OP14(SAMPLE_C)
OP14(SAMPLE_C_LZ)
OP15(SAMPLE_D)
OP14(SAMPLE_L)
OP13(GATHER4)
OP13_SAMPLE(SAMPLE)
OP12_SAMPLE(SAMPLE_I)
OP13_SAMPLE(SAMPLE_I_MS)
OP14_SAMPLE(SAMPLE_B)
OP14_SAMPLE(SAMPLE_C)
OP14_SAMPLE(SAMPLE_C_LZ)
OP15_SAMPLE(SAMPLE_D)
OP14_SAMPLE(SAMPLE_L)
OP13_SAMPLE(GATHER4)
OP12(SVIEWINFO)
OP13(SAMPLE_POS)
OP12(SAMPLE_INFO)
@ -204,3 +220,7 @@ OP12(SAMPLE_INFO)
#undef OP12_TEX
#undef OP14_TEX
#undef OP12_SAMPLE
#undef OP13_SAMPLE
#undef OP14_SAMPLE
#undef OP15_SAMPLE

View File

@ -941,6 +941,17 @@ parse_instruction(
inst.Instruction.NumDstRegs = info->num_dst;
inst.Instruction.NumSrcRegs = info->num_src;
if (i >= TGSI_OPCODE_SAMPLE && i <= TGSI_OPCODE_GATHER4) {
/*
* These are not considered tex opcodes here (no additional
* target argument) however we're required to set the Texture
* bit so we can set the number of tex offsets (offsets aren't
* actually handled here yet in any case).
*/
inst.Instruction.Texture = 1;
inst.Texture.Texture = TGSI_TEXTURE_UNKNOWN;
}
/* Parse instruction operands.
*/
for (i = 0; i < info->num_dst + info->num_src + info->is_tex; i++) {

View File

@ -719,6 +719,33 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
ureg_fixup_insn_size( ureg, insn.insn_token ); \
}
#define OP12_SAMPLE( op ) \
static INLINE void ureg_##op( struct ureg_program *ureg, \
struct ureg_dst dst, \
struct ureg_src src0, \
struct ureg_src src1 ) \
{ \
unsigned opcode = TGSI_OPCODE_##op; \
unsigned target = TGSI_TEXTURE_UNKNOWN; \
struct ureg_emit_insn_result insn; \
insn = ureg_emit_insn(ureg, \
opcode, \
dst.Saturate, \
dst.Predicate, \
dst.PredNegate, \
dst.PredSwizzleX, \
dst.PredSwizzleY, \
dst.PredSwizzleZ, \
dst.PredSwizzleW, \
1, \
2); \
ureg_emit_texture( ureg, insn.extended_token, target, 0 ); \
ureg_emit_dst( ureg, dst ); \
ureg_emit_src( ureg, src0 ); \
ureg_emit_src( ureg, src1 ); \
ureg_fixup_insn_size( ureg, insn.insn_token ); \
}
#define OP13( op ) \
static INLINE void ureg_##op( struct ureg_program *ureg, \
struct ureg_dst dst, \
@ -745,6 +772,35 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
ureg_fixup_insn_size( ureg, insn ); \
}
#define OP13_SAMPLE( op ) \
static INLINE void ureg_##op( struct ureg_program *ureg, \
struct ureg_dst dst, \
struct ureg_src src0, \
struct ureg_src src1, \
struct ureg_src src2 ) \
{ \
unsigned opcode = TGSI_OPCODE_##op; \
unsigned target = TGSI_TEXTURE_UNKNOWN; \
struct ureg_emit_insn_result insn; \
insn = ureg_emit_insn(ureg, \
opcode, \
dst.Saturate, \
dst.Predicate, \
dst.PredNegate, \
dst.PredSwizzleX, \
dst.PredSwizzleY, \
dst.PredSwizzleZ, \
dst.PredSwizzleW, \
1, \
3); \
ureg_emit_texture( ureg, insn.extended_token, target, 0 ); \
ureg_emit_dst( ureg, dst ); \
ureg_emit_src( ureg, src0 ); \
ureg_emit_src( ureg, src1 ); \
ureg_emit_src( ureg, src2 ); \
ureg_fixup_insn_size( ureg, insn.insn_token ); \
}
#define OP14_TEX( op ) \
static INLINE void ureg_##op( struct ureg_program *ureg, \
struct ureg_dst dst, \
@ -776,6 +832,37 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
ureg_fixup_insn_size( ureg, insn.insn_token ); \
}
#define OP14_SAMPLE( op ) \
static INLINE void ureg_##op( struct ureg_program *ureg, \
struct ureg_dst dst, \
struct ureg_src src0, \
struct ureg_src src1, \
struct ureg_src src2, \
struct ureg_src src3 ) \
{ \
unsigned opcode = TGSI_OPCODE_##op; \
unsigned target = TGSI_TEXTURE_UNKNOWN; \
struct ureg_emit_insn_result insn; \
insn = ureg_emit_insn(ureg, \
opcode, \
dst.Saturate, \
dst.Predicate, \
dst.PredNegate, \
dst.PredSwizzleX, \
dst.PredSwizzleY, \
dst.PredSwizzleZ, \
dst.PredSwizzleW, \
1, \
4); \
ureg_emit_texture( ureg, insn.extended_token, target, 0 ); \
ureg_emit_dst( ureg, dst ); \
ureg_emit_src( ureg, src0 ); \
ureg_emit_src( ureg, src1 ); \
ureg_emit_src( ureg, src2 ); \
ureg_emit_src( ureg, src3 ); \
ureg_fixup_insn_size( ureg, insn.insn_token ); \
}
#define OP14( op ) \
static INLINE void ureg_##op( struct ureg_program *ureg, \
@ -836,6 +923,38 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
ureg_fixup_insn_size( ureg, insn ); \
}
#define OP15_SAMPLE( op ) \
static INLINE void ureg_##op( struct ureg_program *ureg, \
struct ureg_dst dst, \
struct ureg_src src0, \
struct ureg_src src1, \
struct ureg_src src2, \
struct ureg_src src3, \
struct ureg_src src4 ) \
{ \
unsigned opcode = TGSI_OPCODE_##op; \
unsigned target = TGSI_TEXTURE_UNKNOWN; \
struct ureg_emit_insn_result insn; \
insn = ureg_emit_insn(ureg, \
opcode, \
dst.Saturate, \
dst.Predicate, \
dst.PredNegate, \
dst.PredSwizzleX, \
dst.PredSwizzleY, \
dst.PredSwizzleZ, \
dst.PredSwizzleW, \
1, \
5); \
ureg_emit_texture( ureg, insn.extended_token, target, 0 ); \
ureg_emit_dst( ureg, dst ); \
ureg_emit_src( ureg, src0 ); \
ureg_emit_src( ureg, src1 ); \
ureg_emit_src( ureg, src2 ); \
ureg_emit_src( ureg, src3 ); \
ureg_emit_src( ureg, src4 ); \
ureg_fixup_insn_size( ureg, insn.insn_token ); \
}
/* Use a template include to generate a correctly-typed ureg_OP()
* function for each TGSI opcode: