gallivm: hook up dx10 sampling opcodes

They are similar to old-style tex opcodes but with separate sampler and
texture units (and other arguments in different places).
Also adjust the debug tgsi dump code.

Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
This commit is contained in:
Roland Scheidegger 2013-02-01 14:44:59 -08:00
parent db7612d15d
commit 0a8043bb76
3 changed files with 419 additions and 6 deletions

View File

@ -68,7 +68,8 @@ enum lp_build_tex_modifier {
LP_BLD_TEX_MODIFIER_PROJECTED,
LP_BLD_TEX_MODIFIER_LOD_BIAS,
LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV
LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
LP_BLD_TEX_MODIFIER_LOD_ZERO
};
@ -104,7 +105,8 @@ struct lp_tgsi_texture_info
{
struct lp_tgsi_channel_info coord[4];
unsigned target:8; /* TGSI_TEXTURE_* */
unsigned unit:8; /* Sampler unit */
unsigned sampler_unit:8; /* Sampler unit */
unsigned texture_unit:8; /* Texture unit */
unsigned modifier:8; /* LP_BLD_TEX_MODIFIER_* */
};

View File

@ -96,6 +96,11 @@ is_immediate(const struct lp_tgsi_channel_info *chan_info, float value)
}
/**
* Analyse properties of tex instructions, in particular used
* to figure out if a texture is considered indirect.
* Not actually used by much except the tgsi dumping code.
*/
static void
analyse_tex(struct analysis_context *ctx,
const struct tgsi_full_instruction *inst,
@ -140,14 +145,109 @@ analyse_tex(struct analysis_context *ctx,
if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
/* We don't track explicit derivatives, although we could */
indirect = TRUE;
tex_info->unit = inst->Src[3].Register.Index;
tex_info->sampler_unit = inst->Src[3].Register.Index;
tex_info->texture_unit = inst->Src[3].Register.Index;
} else {
if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED ||
modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
readmask |= TGSI_WRITEMASK_W;
}
tex_info->unit = inst->Src[1].Register.Index;
tex_info->sampler_unit = inst->Src[1].Register.Index;
tex_info->texture_unit = inst->Src[1].Register.Index;
}
for (chan = 0; chan < 4; ++chan) {
struct lp_tgsi_channel_info *chan_info = &tex_info->coord[chan];
if (readmask & (1 << chan)) {
analyse_src(ctx, chan_info, &inst->Src[0].Register, chan);
if (chan_info->file != TGSI_FILE_INPUT) {
indirect = TRUE;
}
} else {
memset(chan_info, 0, sizeof *chan_info);
}
}
if (indirect) {
info->indirect_textures = TRUE;
}
++info->num_texs;
} else {
info->indirect_textures = TRUE;
}
}
/**
* Analyse properties of sample instructions, in particular used
* to figure out if a texture is considered indirect.
* Not actually used by much except the tgsi dumping code.
*/
static void
analyse_sample(struct analysis_context *ctx,
const struct tgsi_full_instruction *inst,
enum lp_build_tex_modifier modifier)
{
struct lp_tgsi_info *info = ctx->info;
unsigned chan;
if (info->num_texs < Elements(info->tex)) {
struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs];
boolean indirect = FALSE;
boolean shadow = FALSE;
unsigned readmask = 0;
tex_info->target = inst->Texture.Texture;
switch (inst->Texture.Texture) {
case TGSI_TEXTURE_SHADOW1D:
shadow = TRUE;
/* Fallthrough */
case TGSI_TEXTURE_1D:
readmask = TGSI_WRITEMASK_X;
break;
case TGSI_TEXTURE_SHADOW1D_ARRAY:
case TGSI_TEXTURE_SHADOW2D:
case TGSI_TEXTURE_SHADOWRECT:
shadow = TRUE;
/* Fallthrough */
case TGSI_TEXTURE_1D_ARRAY:
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
readmask = TGSI_WRITEMASK_XY;
break;
case TGSI_TEXTURE_SHADOW2D_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE:
shadow = TRUE;
/* Fallthrough */
case TGSI_TEXTURE_2D_ARRAY:
case TGSI_TEXTURE_3D:
case TGSI_TEXTURE_CUBE:
readmask = TGSI_WRITEMASK_XYZ;
break;
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
shadow = TRUE;
/* Fallthrough */
case TGSI_TEXTURE_CUBE_ARRAY:
readmask = TGSI_WRITEMASK_XYZW;
break;
default:
assert(0);
return;
}
tex_info->texture_unit = inst->Src[1].Register.Index;
tex_info->sampler_unit = inst->Src[2].Register.Index;
if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV ||
modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS || shadow) {
/* We don't track insts with additional regs, although we could */
indirect = TRUE;
} else {
if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
readmask |= TGSI_WRITEMASK_W;
}
}
for (chan = 0; chan < 4; ++chan) {
@ -229,6 +329,22 @@ analyse_instruction(struct analysis_context *ctx,
case TGSI_OPCODE_TXP:
analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
break;
case TGSI_OPCODE_SAMPLE:
case TGSI_OPCODE_SAMPLE_C:
analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_NONE);
break;
case TGSI_OPCODE_SAMPLE_C_LZ:
analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_LOD_ZERO);
break;
case TGSI_OPCODE_SAMPLE_D:
analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
break;
case TGSI_OPCODE_SAMPLE_B:
analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
break;
case TGSI_OPCODE_SAMPLE_L:
analyse_sample(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
break;
default:
break;
}
@ -355,8 +471,9 @@ dump_info(const struct tgsi_token *tokens,
debug_printf(" _");
}
}
debug_printf(", SAMP[%u], %s\n",
tex_info->unit,
debug_printf(", RES[%u], SAMP[%u], %s\n",
tex_info->texture_unit,
tex_info->sampler_unit,
tgsi_texture_names[tex_info->target]);
}

View File

@ -1339,6 +1339,200 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
texel);
}
static void
emit_sample(struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
enum lp_build_tex_modifier modifier,
LLVMValueRef *texel)
{
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
unsigned texture_unit, sampler_unit;
LLVMValueRef lod_bias, explicit_lod;
LLVMValueRef coords[4];
LLVMValueRef offsets[3] = { NULL };
struct lp_derivatives derivs;
unsigned num_coords, dims;
unsigned i;
boolean compare = FALSE;
if (!bld->sampler) {
_debug_printf("warning: found texture instruction but no sampler generator supplied\n");
for (i = 0; i < 4; i++) {
texel[i] = bld->bld_base.base.undef;
}
return;
}
derivs.ddx_ddy[0] = bld->bld_base.base.undef;
derivs.ddx_ddy[1] = bld->bld_base.base.undef;
switch (inst->Texture.Texture) {
case TGSI_TEXTURE_SHADOW1D:
compare = TRUE;
/* Fallthrough */
case TGSI_TEXTURE_1D:
num_coords = 1;
dims = 1;
break;
case TGSI_TEXTURE_SHADOW1D_ARRAY:
compare = TRUE;
/* Fallthrough */
case TGSI_TEXTURE_1D_ARRAY:
num_coords = 2;
dims = 1;
break;
case TGSI_TEXTURE_SHADOW2D:
case TGSI_TEXTURE_SHADOWRECT:
compare = TRUE;
/* Fallthrough */
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
num_coords = 2;
dims = 2;
break;
case TGSI_TEXTURE_SHADOW2D_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE:
compare = TRUE;
/* Fallthrough */
case TGSI_TEXTURE_2D_ARRAY:
case TGSI_TEXTURE_CUBE:
num_coords = 3;
dims = 2;
break;
case TGSI_TEXTURE_3D:
num_coords = 3;
dims = 3;
break;
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
compare = TRUE;
/* Fallthrough */
case TGSI_TEXTURE_CUBE_ARRAY:
num_coords = 4;
dims = 3;
break;
default:
assert(0);
return;
}
/*
* unlike old-style tex opcodes the texture/sampler indices
* always come from src1 and src2 respectively.
*/
texture_unit = inst->Src[1].Register.Index;
sampler_unit = inst->Src[2].Register.Index;
if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
explicit_lod = NULL;
}
else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
/* lod bias comes from src 3.r but explicit lod from 0.a */
lod_bias = NULL;
explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
}
else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
lod_bias = NULL;
/* XXX might be better to explicitly pass the level zero information */
explicit_lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
}
else {
lod_bias = NULL;
explicit_lod = NULL;
}
for (i = 0; i < num_coords; i++) {
coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
}
for (i = num_coords; i < 4; i++) {
coords[i] = bld->bld_base.base.undef;
}
/*
* XXX: whack shadow comparison value into place.
* Should probably fix the interface for separate value
* (it will not work for cube arrays if it is part of coords).
*/
if (compare) {
unsigned c_coord = num_coords > 2 ? 3 : 2;
assert(num_coords < 4);
coords[c_coord] = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
}
if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
LLVMValueRef ddxdyonec[3];
unsigned length = bld->bld_base.base.type.length;
unsigned num_quads = length / 4;
unsigned dim;
unsigned quad;
for (dim = 0; dim < dims; ++dim) {
LLVMValueRef srcx = lp_build_emit_fetch( &bld->bld_base, inst, 3, dim );
LLVMValueRef srcy = lp_build_emit_fetch( &bld->bld_base, inst, 4, dim );
for (quad = 0; quad < num_quads; ++quad) {
unsigned s1 = 4*quad;
unsigned s2 = 4*quad + length;
shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s2);
shuffles[4*quad + 2] = i32undef;
shuffles[4*quad + 3] = i32undef;
}
ddxdyonec[dim] = LLVMBuildShuffleVector(builder, srcx, srcy,
LLVMConstVector(shuffles, length), "");
}
if (dims == 1) {
derivs.ddx_ddy[0] = ddxdyonec[0];
}
else if (dims >= 2) {
for (quad = 0; quad < num_quads; ++quad) {
unsigned s1 = 4*quad;
unsigned s2 = 4*quad + length;
shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1);
shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s1 + 1);
shuffles[4*quad + 2] = lp_build_const_int32(gallivm, s2);
shuffles[4*quad + 3] = lp_build_const_int32(gallivm, s2 + 1);
}
derivs.ddx_ddy[0] = LLVMBuildShuffleVector(builder, ddxdyonec[0], ddxdyonec[1],
LLVMConstVector(shuffles, length), "");
if (dims == 3) {
derivs.ddx_ddy[1] = ddxdyonec[2];
}
}
} else {
if (dims == 1) {
derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[0]);
}
else if (dims >= 2) {
derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(&bld->bld_base.base,
coords[0], coords[1]);
if (dims == 3) {
derivs.ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[2]);
}
}
}
/* some advanced gather instructions (txgo) would require 4 offsets */
if (inst->Texture.NumOffsets == 1) {
unsigned dim;
for (dim = 0; dim < dims; dim++) {
offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
}
}
bld->sampler->emit_fetch_texel(bld->sampler,
bld->bld_base.base.gallivm,
bld->bld_base.base.type,
FALSE,
texture_unit, sampler_unit,
coords,
offsets,
&derivs,
lod_bias, explicit_lod,
texel);
}
static void
emit_txf( struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
@ -1898,6 +2092,97 @@ txf_emit(
emit_txf(bld, emit_data->inst, emit_data->output);
}
static void
sample_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
emit_data->output);
}
static void
sample_b_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
emit_data->output);
}
static void
sample_c_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
/*
* note that we can ignore this is a comparison instruction here
* since it should be encoded elsewhere (SHADOW target).
*/
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
emit_data->output);
}
static void
sample_c_lz_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
emit_data->output);
}
static void
sample_d_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
emit_data->output);
}
static void
sample_l_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
emit_data->output);
}
static void
sviewinfo_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
/*
* FIXME: unlike txq we are required to return number of mipmap levels
* too, and the unused channels are defined to be zero.
* Either always do that (and hope llvm can optimize it away?)
* or pass a parameter all the way down.
*/
emit_txq(bld, emit_data->inst, emit_data->output);
}
static void
cal_emit(
const struct lp_build_tgsi_action * action,
@ -2270,6 +2555,15 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
/* DX10 sampling ops */
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = txf_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);