gallium: remove TGSI opcode SCS
use COS+SIN instead. Reviewed-by: Roland Scheidegger <sroland@vmware.com> Acked-by: Jose Fonseca <jfonseca@vmware.com>
This commit is contained in:
parent
33efa6416f
commit
497506ad93
|
@ -686,31 +686,6 @@ const struct lp_build_tgsi_action sqrt_action = {
|
|||
sqrt_emit /* emit */
|
||||
};
|
||||
|
||||
/* TGSI_OPCODE_SCS */
|
||||
static void
|
||||
scs_emit(
|
||||
const struct lp_build_tgsi_action * action,
|
||||
struct lp_build_tgsi_context * bld_base,
|
||||
struct lp_build_emit_data * emit_data)
|
||||
{
|
||||
/* dst.x */
|
||||
emit_data->output[TGSI_CHAN_X] = lp_build_emit_llvm_unary(bld_base,
|
||||
TGSI_OPCODE_COS, emit_data->args[0]);
|
||||
/* dst.y */
|
||||
emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_unary(bld_base,
|
||||
TGSI_OPCODE_SIN, emit_data->args[0]);
|
||||
/* dst.z */
|
||||
emit_data->output[TGSI_CHAN_Z] = bld_base->base.zero;
|
||||
|
||||
/* dst.w */
|
||||
emit_data->output[TGSI_CHAN_W] = bld_base->base.one;
|
||||
}
|
||||
|
||||
const struct lp_build_tgsi_action scs_action = {
|
||||
scalar_unary_fetch_args, /* fetch_args */
|
||||
scs_emit /* emit */
|
||||
};
|
||||
|
||||
/* TGSI_OPCODE_F2U */
|
||||
static void
|
||||
f2u_emit(
|
||||
|
@ -1195,7 +1170,6 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
|
|||
bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
|
||||
bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
|
||||
|
||||
bld_base->op_actions[TGSI_OPCODE_SWITCH].fetch_args = scalar_unary_fetch_args;
|
||||
|
|
|
@ -719,9 +719,6 @@ lp_emit_instruction_aos(
|
|||
dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_SCS:
|
||||
return FALSE;
|
||||
|
||||
case TGSI_OPCODE_TXB:
|
||||
dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
|
||||
break;
|
||||
|
|
|
@ -956,23 +956,6 @@ ttn_lit(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
|
|||
}
|
||||
}
|
||||
|
||||
/* SCS - Sine Cosine
|
||||
* dst.x = \cos{src.x}
|
||||
* dst.y = \sin{src.x}
|
||||
* dst.z = 0.0
|
||||
* dst.w = 1.0
|
||||
*/
|
||||
static void
|
||||
ttn_scs(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
|
||||
{
|
||||
ttn_move_dest_masked(b, dest, nir_fcos(b, ttn_channel(b, src[0], X)),
|
||||
TGSI_WRITEMASK_X);
|
||||
ttn_move_dest_masked(b, dest, nir_fsin(b, ttn_channel(b, src[0], X)),
|
||||
TGSI_WRITEMASK_Y);
|
||||
ttn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), TGSI_WRITEMASK_Z);
|
||||
ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
|
||||
}
|
||||
|
||||
static void
|
||||
ttn_sle(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
|
||||
{
|
||||
|
@ -1539,7 +1522,6 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = {
|
|||
|
||||
[TGSI_OPCODE_SSG] = nir_op_fsign,
|
||||
[TGSI_OPCODE_CMP] = 0,
|
||||
[TGSI_OPCODE_SCS] = 0,
|
||||
[TGSI_OPCODE_TXB] = 0,
|
||||
[TGSI_OPCODE_DIV] = nir_op_fdiv,
|
||||
[TGSI_OPCODE_DP2] = 0,
|
||||
|
@ -1756,10 +1738,6 @@ ttn_emit_instruction(struct ttn_compile *c)
|
|||
ttn_ucmp(b, op_trans[tgsi_op], dest, src);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_SCS:
|
||||
ttn_scs(b, op_trans[tgsi_op], dest, src);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_SGT:
|
||||
ttn_sgt(b, op_trans[tgsi_op], dest, src);
|
||||
break;
|
||||
|
|
|
@ -3285,33 +3285,6 @@ exec_ucmp(struct tgsi_exec_machine *mach,
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
exec_scs(struct tgsi_exec_machine *mach,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
|
||||
union tgsi_exec_channel arg;
|
||||
union tgsi_exec_channel result;
|
||||
|
||||
fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
|
||||
|
||||
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
|
||||
micro_cos(&result, &arg);
|
||||
store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
|
||||
}
|
||||
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
|
||||
micro_sin(&result, &arg);
|
||||
store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
|
||||
}
|
||||
}
|
||||
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
|
||||
store_dest(mach, &ZeroVec, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
|
||||
}
|
||||
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
|
||||
store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
exec_dst(struct tgsi_exec_machine *mach,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
|
@ -5334,10 +5307,6 @@ exec_instruction(
|
|||
exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_SCS:
|
||||
exec_scs(mach, inst);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_DIV:
|
||||
exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
|
||||
break;
|
||||
|
|
|
@ -104,7 +104,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
|
|||
{ 0, 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG },
|
||||
{ 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "", 67 }, /* removed */
|
||||
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB },
|
||||
{ 1, 1, 0, 0, 0, 0, 0, OTHR, "FBFETCH", TGSI_OPCODE_FBFETCH },
|
||||
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV },
|
||||
|
|
|
@ -258,71 +258,6 @@ transform_dst(struct tgsi_transform_context *tctx,
|
|||
}
|
||||
}
|
||||
|
||||
/* SCS - Sine Cosine
|
||||
* dst.x = \cos{src.x}
|
||||
* dst.y = \sin{src.x}
|
||||
* dst.z = 0.0
|
||||
* dst.w = 1.0
|
||||
*
|
||||
* ; needs: 1 tmp, imm{0.0, 1.0}
|
||||
* if (dst.x aliases src.x) {
|
||||
* MOV tmpA.x, src.x
|
||||
* src = tmpA
|
||||
* }
|
||||
* COS dst.x, src.x
|
||||
* SIN dst.y, src.x
|
||||
* MOV dst.zw, imm{0.0, 1.0}
|
||||
*/
|
||||
#define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1))
|
||||
#define SCS_TMP 1
|
||||
static void
|
||||
transform_scs(struct tgsi_transform_context *tctx,
|
||||
struct tgsi_full_instruction *inst)
|
||||
{
|
||||
struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
|
||||
struct tgsi_full_dst_register *dst = &inst->Dst[0];
|
||||
struct tgsi_full_src_register *src = &inst->Src[0];
|
||||
struct tgsi_full_instruction new_inst;
|
||||
|
||||
if (aliases(dst, TGSI_WRITEMASK_X, src, TGSI_WRITEMASK_X)) {
|
||||
create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X, 0);
|
||||
src = &ctx->tmp[A].src;
|
||||
}
|
||||
|
||||
if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
|
||||
/* COS dst.x, src.x */
|
||||
new_inst = tgsi_default_full_instruction();
|
||||
new_inst.Instruction.Opcode = TGSI_OPCODE_COS;
|
||||
new_inst.Instruction.NumDstRegs = 1;
|
||||
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
|
||||
new_inst.Instruction.NumSrcRegs = 1;
|
||||
reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
|
||||
tctx->emit_instruction(tctx, &new_inst);
|
||||
}
|
||||
|
||||
if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
|
||||
/* SIN dst.y, src.x */
|
||||
new_inst = tgsi_default_full_instruction();
|
||||
new_inst.Instruction.Opcode = TGSI_OPCODE_SIN;
|
||||
new_inst.Instruction.NumDstRegs = 1;
|
||||
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
|
||||
new_inst.Instruction.NumSrcRegs = 1;
|
||||
reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
|
||||
tctx->emit_instruction(tctx, &new_inst);
|
||||
}
|
||||
|
||||
if (dst->Register.WriteMask & TGSI_WRITEMASK_ZW) {
|
||||
/* MOV dst.zw, imm{0.0, 1.0} */
|
||||
new_inst = tgsi_default_full_instruction();
|
||||
new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
|
||||
new_inst.Instruction.NumDstRegs = 1;
|
||||
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_ZW);
|
||||
new_inst.Instruction.NumSrcRegs = 1;
|
||||
reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, X, Y));
|
||||
tctx->emit_instruction(tctx, &new_inst);
|
||||
}
|
||||
}
|
||||
|
||||
/* LRP - Linear Interpolate
|
||||
* dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
|
||||
* dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
|
||||
|
@ -1407,11 +1342,6 @@ transform_instr(struct tgsi_transform_context *tctx,
|
|||
goto skip;
|
||||
transform_dst(tctx, inst);
|
||||
break;
|
||||
case TGSI_OPCODE_SCS:
|
||||
if (!ctx->config->lower_SCS)
|
||||
goto skip;
|
||||
transform_scs(tctx, inst);
|
||||
break;
|
||||
case TGSI_OPCODE_LRP:
|
||||
if (!ctx->config->lower_LRP)
|
||||
goto skip;
|
||||
|
@ -1535,7 +1465,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
|
|||
#define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
|
||||
/* if there are no instructions to lower, then we are done: */
|
||||
if (!(OPCS(DST) ||
|
||||
OPCS(SCS) ||
|
||||
OPCS(LRP) ||
|
||||
OPCS(FRC) ||
|
||||
OPCS(POW) ||
|
||||
|
@ -1564,10 +1493,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
|
|||
newlen += DST_GROW * OPCS(DST);
|
||||
numtmp = MAX2(numtmp, DST_TMP);
|
||||
}
|
||||
if (OPCS(SCS)) {
|
||||
newlen += SCS_GROW * OPCS(SCS);
|
||||
numtmp = MAX2(numtmp, SCS_TMP);
|
||||
}
|
||||
if (OPCS(LRP)) {
|
||||
newlen += LRP_GROW * OPCS(LRP);
|
||||
numtmp = MAX2(numtmp, LRP_TMP);
|
||||
|
|
|
@ -55,7 +55,6 @@ struct tgsi_lowering_config
|
|||
* enable lowering of TGSI_OPCODE_<opc>
|
||||
*/
|
||||
unsigned lower_DST:1;
|
||||
unsigned lower_SCS:1;
|
||||
unsigned lower_LRP:1;
|
||||
unsigned lower_FRC:1;
|
||||
unsigned lower_POW:1;
|
||||
|
|
|
@ -90,7 +90,6 @@ OP00_LBL(CAL)
|
|||
OP00(RET)
|
||||
OP11(SSG)
|
||||
OP13(CMP)
|
||||
OP11(SCS)
|
||||
OP12_TEX(TXB)
|
||||
OP12(DIV)
|
||||
OP12(DP2)
|
||||
|
|
|
@ -253,10 +253,6 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
|
|||
read_mask = TGSI_WRITEMASK_X;
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_SCS:
|
||||
read_mask = write_mask & TGSI_WRITEMASK_XY ? TGSI_WRITEMASK_X : 0;
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_EXP:
|
||||
case TGSI_OPCODE_LOG:
|
||||
read_mask = write_mask & TGSI_WRITEMASK_XYZ ? TGSI_WRITEMASK_X : 0;
|
||||
|
|
|
@ -651,19 +651,6 @@ This instruction replicates its result.
|
|||
Unconditional discard. Allowed in fragment shaders only.
|
||||
|
||||
|
||||
.. opcode:: SCS - Sine Cosine
|
||||
|
||||
.. math::
|
||||
|
||||
dst.x = \cos{src.x}
|
||||
|
||||
dst.y = \sin{src.x}
|
||||
|
||||
dst.z = 0
|
||||
|
||||
dst.w = 1
|
||||
|
||||
|
||||
.. opcode:: TXB - Texture Lookup With Bias
|
||||
|
||||
for cube map array textures and shadow cube maps, the bias value
|
||||
|
|
|
@ -1474,9 +1474,6 @@ trans_trig(const struct instr_translater *t, struct etna_compile *c,
|
|||
* - Output an x and y component, which need to be multiplied to
|
||||
* get the result
|
||||
*/
|
||||
/* TGSI lowering should deal with SCS */
|
||||
assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
|
||||
|
||||
struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */
|
||||
emit_inst(c, &(struct etna_inst) {
|
||||
.opcode = INST_OPCODE_MUL,
|
||||
|
@ -1503,9 +1500,6 @@ trans_trig(const struct instr_translater *t, struct etna_compile *c,
|
|||
});
|
||||
|
||||
} else if (c->specs->has_sin_cos_sqrt) {
|
||||
/* TGSI lowering should deal with SCS */
|
||||
assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
|
||||
|
||||
struct etna_native_reg temp = etna_compile_get_inner_temp(c);
|
||||
/* add divide by PI/2, using a temp register. GC2000
|
||||
* fails with src==dst for the trig instruction. */
|
||||
|
@ -1540,8 +1534,6 @@ trans_trig(const struct instr_translater *t, struct etna_compile *c,
|
|||
* DP3 t.x___, t.xyww, C, void (for scs)
|
||||
* MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz
|
||||
* MAD dst, t.ywyw, .2225, t.xzxz
|
||||
*
|
||||
* TODO: we don't set dst.zw correctly for SCS.
|
||||
*/
|
||||
struct etna_inst *p, ins[9] = { };
|
||||
struct etna_native_reg t0 = etna_compile_get_inner_temp(c);
|
||||
|
@ -1597,19 +1589,7 @@ trans_trig(const struct instr_translater *t, struct etna_compile *c,
|
|||
ins[4].src[0] = swizzle(t0s, dp3_swiz);
|
||||
ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W));
|
||||
|
||||
if (inst->Instruction.Opcode == TGSI_OPCODE_SCS) {
|
||||
ins[5] = ins[3];
|
||||
ins[6] = ins[4];
|
||||
ins[4].dst.comps = INST_COMPS_X;
|
||||
ins[6].dst.comps = INST_COMPS_Z;
|
||||
ins[5].src[0] = swizzle(t0s, SWIZZLE(W, Z, W, W));
|
||||
ins[6].src[0] = swizzle(t0s, SWIZZLE(Z, Y, W, W));
|
||||
ins[5].src[1] = absolute(ins[5].src[0]);
|
||||
p = &ins[7];
|
||||
} else {
|
||||
p = &ins[5];
|
||||
}
|
||||
|
||||
p = &ins[5];
|
||||
p->opcode = INST_OPCODE_MAD;
|
||||
p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W);
|
||||
p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z));
|
||||
|
@ -1809,7 +1789,6 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
|
|||
|
||||
INSTR(SIN, trans_trig),
|
||||
INSTR(COS, trans_trig),
|
||||
INSTR(SCS, trans_trig),
|
||||
|
||||
INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT),
|
||||
INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE),
|
||||
|
@ -2309,7 +2288,6 @@ etna_compile_shader(struct etna_shader_variant *v)
|
|||
const struct etna_specs *specs = v->shader->specs;
|
||||
|
||||
struct tgsi_lowering_config lconfig = {
|
||||
.lower_SCS = specs->has_sin_cos_sqrt,
|
||||
.lower_FLR = !specs->has_sign_floor_ceil,
|
||||
.lower_CEIL = !specs->has_sign_floor_ceil,
|
||||
.lower_POW = true,
|
||||
|
|
|
@ -105,7 +105,6 @@ static const struct {
|
|||
[ TGSI_OPCODE_RCP ] = { false, false, 0, 1, 1 },
|
||||
[ TGSI_OPCODE_RET ] = { false, false, 0, 0, 0 },
|
||||
[ TGSI_OPCODE_RSQ ] = { false, false, 0, 1, 1 },
|
||||
[ TGSI_OPCODE_SCS ] = { false, false, 0, 1, 1 },
|
||||
[ TGSI_OPCODE_SEQ ] = { false, false, 0, 1, 2 },
|
||||
[ TGSI_OPCODE_SGE ] = { false, false, 0, 1, 2 },
|
||||
[ TGSI_OPCODE_SGT ] = { false, false, 0, 1, 2 },
|
||||
|
|
|
@ -803,70 +803,6 @@ i915_translate_instruction(struct i915_fp_compile *p,
|
|||
swizzle(src0, X, X, X, X), 0, 0);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_SCS:
|
||||
src0 = src_vector(p, &inst->Src[0], fs);
|
||||
tmp = i915_get_utemp(p);
|
||||
|
||||
/*
|
||||
* t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
|
||||
* t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
|
||||
* t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
|
||||
* scs.x = DP4 t1, scs_sin_constants
|
||||
* t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
|
||||
* scs.y = DP4 t1, scs_cos_constants
|
||||
*/
|
||||
i915_emit_arith(p,
|
||||
A0_MUL,
|
||||
tmp, A0_DEST_CHANNEL_XY, 0,
|
||||
swizzle(src0, X, X, ONE, ONE),
|
||||
swizzle(src0, X, ONE, ONE, ONE), 0);
|
||||
|
||||
i915_emit_arith(p,
|
||||
A0_MUL,
|
||||
tmp, A0_DEST_CHANNEL_ALL, 0,
|
||||
swizzle(tmp, X, Y, X, Y),
|
||||
swizzle(tmp, X, X, ONE, ONE), 0);
|
||||
|
||||
writemask = inst->Dst[0].Register.WriteMask;
|
||||
|
||||
if (writemask & TGSI_WRITEMASK_Y) {
|
||||
uint tmp1;
|
||||
|
||||
if (writemask & TGSI_WRITEMASK_X)
|
||||
tmp1 = i915_get_utemp(p);
|
||||
else
|
||||
tmp1 = tmp;
|
||||
|
||||
i915_emit_arith(p,
|
||||
A0_MUL,
|
||||
tmp1, A0_DEST_CHANNEL_ALL, 0,
|
||||
swizzle(tmp, X, Y, Y, W),
|
||||
swizzle(tmp, X, Z, ONE, ONE), 0);
|
||||
|
||||
i915_emit_arith(p,
|
||||
A0_DP4,
|
||||
get_result_vector(p, &inst->Dst[0]),
|
||||
A0_DEST_CHANNEL_Y, 0,
|
||||
swizzle(tmp1, W, Z, Y, X),
|
||||
i915_emit_const4fv(p, scs_sin_constants), 0);
|
||||
}
|
||||
|
||||
if (writemask & TGSI_WRITEMASK_X) {
|
||||
i915_emit_arith(p,
|
||||
A0_MUL,
|
||||
tmp, A0_DEST_CHANNEL_XYZ, 0,
|
||||
swizzle(tmp, X, X, Z, ONE),
|
||||
swizzle(tmp, Z, ONE, ONE, ONE), 0);
|
||||
|
||||
i915_emit_arith(p,
|
||||
A0_DP4,
|
||||
get_result_vector(p, &inst->Dst[0]),
|
||||
A0_DEST_CHANNEL_X, 0,
|
||||
swizzle(tmp, ONE, Z, Y, X),
|
||||
i915_emit_const4fv(p, scs_cos_constants), 0);
|
||||
}
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_SEQ:
|
||||
/* if we're both >= and <= then we're == */
|
||||
src0 = src_vector(p, &inst->Src[0], fs);
|
||||
|
|
|
@ -288,7 +288,6 @@ unsigned int Instruction::srcMask(unsigned int s) const
|
|||
case TGSI_OPCODE_POW:
|
||||
case TGSI_OPCODE_RCP:
|
||||
case TGSI_OPCODE_RSQ:
|
||||
case TGSI_OPCODE_SCS:
|
||||
return 0x1;
|
||||
case TGSI_OPCODE_IF:
|
||||
case TGSI_OPCODE_UIF:
|
||||
|
@ -3258,19 +3257,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
|||
mkOp1(op, TYPE_F32, dst0[3], val0);
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_SCS:
|
||||
if (mask & 3) {
|
||||
val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0));
|
||||
if (dst0[0])
|
||||
mkOp1(OP_COS, TYPE_F32, dst0[0], val0);
|
||||
if (dst0[1])
|
||||
mkOp1(OP_SIN, TYPE_F32, dst0[1], val0);
|
||||
}
|
||||
if (dst0[2])
|
||||
loadImm(dst0[2], 0.0f);
|
||||
if (dst0[3])
|
||||
loadImm(dst0[3], 1.0f);
|
||||
break;
|
||||
case TGSI_OPCODE_EXP:
|
||||
src0 = fetchSrc(0, 0);
|
||||
val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0);
|
||||
|
|
|
@ -689,23 +689,6 @@ nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc,
|
|||
nvfx_fp_emit(fpc, arith(sat, EX2, dst, mask, neg(swz(tmp, X, X, X, X)), none, none));
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_SCS:
|
||||
/* avoid overwriting the source */
|
||||
if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X)
|
||||
{
|
||||
if (mask & NVFX_FP_MASK_X)
|
||||
nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
|
||||
if (mask & NVFX_FP_MASK_Y)
|
||||
nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mask & NVFX_FP_MASK_Y)
|
||||
nvfx_fp_emit(fpc, arith(sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none));
|
||||
if (mask & NVFX_FP_MASK_X)
|
||||
nvfx_fp_emit(fpc, arith(sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none));
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_SEQ:
|
||||
nvfx_fp_emit(fpc, arith(sat, SEQ, dst, mask, src[0], src[1], none));
|
||||
break;
|
||||
|
|
|
@ -163,7 +163,6 @@
|
|||
* SUB - ADD, negate second source
|
||||
* RSQ - LG2 + EX2
|
||||
* POW - LG2 + MUL + EX2
|
||||
* SCS - COS + SIN
|
||||
*
|
||||
* NV40 Looping
|
||||
* Loops appear to be fairly expensive on NV40 at least, the proprietary
|
||||
|
|
|
@ -82,7 +82,6 @@ static unsigned translate_opcode(unsigned opcode)
|
|||
/* case TGSI_OPCODE_RET: return RC_OPCODE_RET; */
|
||||
case TGSI_OPCODE_SSG: return RC_OPCODE_SSG;
|
||||
case TGSI_OPCODE_CMP: return RC_OPCODE_CMP;
|
||||
case TGSI_OPCODE_SCS: return RC_OPCODE_SCS;
|
||||
case TGSI_OPCODE_TXB: return RC_OPCODE_TXB;
|
||||
/* case TGSI_OPCODE_DIV: return RC_OPCODE_DIV; */
|
||||
case TGSI_OPCODE_DP2: return RC_OPCODE_DP2;
|
||||
|
|
|
@ -4602,127 +4602,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int tgsi_scs(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
struct r600_bytecode_alu alu;
|
||||
int i, r;
|
||||
|
||||
/* We'll only need the trig stuff if we are going to write to the
|
||||
* X or Y components of the destination vector.
|
||||
*/
|
||||
if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
|
||||
r = tgsi_setup_trig(ctx);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* dst.x = COS */
|
||||
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
|
||||
if (ctx->bc->chip_class == CAYMAN) {
|
||||
for (i = 0 ; i < 3; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_COS;
|
||||
tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
|
||||
|
||||
if (i == 0)
|
||||
alu.dst.write = 1;
|
||||
else
|
||||
alu.dst.write = 0;
|
||||
alu.src[0].sel = ctx->temp_reg;
|
||||
alu.src[0].chan = 0;
|
||||
if (i == 2)
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
} else {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_COS;
|
||||
tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
|
||||
|
||||
alu.src[0].sel = ctx->temp_reg;
|
||||
alu.src[0].chan = 0;
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
/* dst.y = SIN */
|
||||
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
|
||||
if (ctx->bc->chip_class == CAYMAN) {
|
||||
for (i = 0 ; i < 3; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_SIN;
|
||||
tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
|
||||
if (i == 1)
|
||||
alu.dst.write = 1;
|
||||
else
|
||||
alu.dst.write = 0;
|
||||
alu.src[0].sel = ctx->temp_reg;
|
||||
alu.src[0].chan = 0;
|
||||
if (i == 2)
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
} else {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_SIN;
|
||||
tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
|
||||
|
||||
alu.src[0].sel = ctx->temp_reg;
|
||||
alu.src[0].chan = 0;
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
/* dst.z = 0.0; */
|
||||
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
|
||||
alu.op = ALU_OP1_MOV;
|
||||
|
||||
tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
|
||||
|
||||
alu.src[0].sel = V_SQ_ALU_SRC_0;
|
||||
alu.src[0].chan = 0;
|
||||
|
||||
alu.last = 1;
|
||||
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* dst.w = 1.0; */
|
||||
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
|
||||
alu.op = ALU_OP1_MOV;
|
||||
|
||||
tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
|
||||
|
||||
alu.src[0].sel = V_SQ_ALU_SRC_1;
|
||||
alu.src[0].chan = 0;
|
||||
|
||||
alu.last = 1;
|
||||
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tgsi_kill(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
const struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
|
@ -9017,7 +8896,7 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]
|
|||
[TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg},
|
||||
[TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp},
|
||||
[TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs},
|
||||
[67] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
|
||||
[69] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
|
@ -9215,7 +9094,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
|
|||
[TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg},
|
||||
[TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp},
|
||||
[TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs},
|
||||
[67] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
|
||||
[69] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
|
@ -9438,7 +9317,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
|
|||
[TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg},
|
||||
[TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp},
|
||||
[TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs},
|
||||
[67] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
|
||||
[69] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
|
|
|
@ -1249,29 +1249,6 @@ do_emit_sincos(struct svga_shader_emitter *emit,
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Translate/emit a TGSI SIN, COS or CSC instruction.
|
||||
*/
|
||||
static boolean
|
||||
emit_sincos(struct svga_shader_emitter *emit,
|
||||
const struct tgsi_full_instruction *insn)
|
||||
{
|
||||
SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
|
||||
struct src_register src0 = translate_src_register(emit, &insn->Src[0]);
|
||||
SVGA3dShaderDestToken temp = get_temp( emit );
|
||||
|
||||
/* SCS TMP SRC */
|
||||
if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 ))
|
||||
return FALSE;
|
||||
|
||||
/* MOV DST TMP */
|
||||
if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) ))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Translate TGSI SIN instruction into:
|
||||
* SCS TMP SRC
|
||||
|
@ -2840,9 +2817,6 @@ svga_emit_instruction(struct svga_shader_emitter *emit,
|
|||
case TGSI_OPCODE_SIN:
|
||||
return emit_sin( emit, insn );
|
||||
|
||||
case TGSI_OPCODE_SCS:
|
||||
return emit_sincos( emit, insn );
|
||||
|
||||
case TGSI_OPCODE_END:
|
||||
/* TGSI always finishes the main func with an END */
|
||||
return emit_end( emit );
|
||||
|
|
|
@ -4278,42 +4278,6 @@ emit_rsq(struct svga_shader_emitter_v10 *emit,
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit code for TGSI_OPCODE_SCS instruction.
|
||||
*/
|
||||
static boolean
|
||||
emit_scs(struct svga_shader_emitter_v10 *emit,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
/* dst.x = cos(src.x)
|
||||
* dst.y = sin(src.x)
|
||||
* dst.z = 0.0
|
||||
* dst.w = 1.0
|
||||
*/
|
||||
struct tgsi_full_dst_register dst_x =
|
||||
writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
|
||||
struct tgsi_full_dst_register dst_y =
|
||||
writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
|
||||
struct tgsi_full_dst_register dst_zw =
|
||||
writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW);
|
||||
|
||||
struct tgsi_full_src_register zero_one =
|
||||
make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f);
|
||||
|
||||
begin_emit_instruction(emit);
|
||||
emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate);
|
||||
emit_dst_register(emit, &dst_y);
|
||||
emit_dst_register(emit, &dst_x);
|
||||
emit_src_register(emit, &inst->Src[0]);
|
||||
end_emit_instruction(emit);
|
||||
|
||||
emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
|
||||
&dst_zw, &zero_one, inst->Instruction.Saturate);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
|
||||
*/
|
||||
|
@ -5593,8 +5557,6 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
|
|||
return emit_rsq(emit, inst);
|
||||
case TGSI_OPCODE_SAMPLE:
|
||||
return emit_sample(emit, inst);
|
||||
case TGSI_OPCODE_SCS:
|
||||
return emit_scs(emit, inst);
|
||||
case TGSI_OPCODE_SEQ:
|
||||
return emit_seq(emit, inst);
|
||||
case TGSI_OPCODE_SGE:
|
||||
|
|
|
@ -404,7 +404,7 @@ struct tgsi_property_data {
|
|||
#define TGSI_OPCODE_RET 64
|
||||
#define TGSI_OPCODE_SSG 65 /* SGN */
|
||||
#define TGSI_OPCODE_CMP 66
|
||||
#define TGSI_OPCODE_SCS 67
|
||||
/* gap */
|
||||
#define TGSI_OPCODE_TXB 68
|
||||
#define TGSI_OPCODE_FBFETCH 69
|
||||
#define TGSI_OPCODE_DIV 70
|
||||
|
|
|
@ -1784,13 +1784,17 @@ DECL_SPECIAL(LABEL)
|
|||
|
||||
DECL_SPECIAL(SINCOS)
|
||||
{
|
||||
struct ureg_program *ureg = tx->ureg;
|
||||
struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
|
||||
struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
|
||||
|
||||
assert(!(dst.WriteMask & 0xc));
|
||||
|
||||
dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
|
||||
ureg_SCS(tx->ureg, dst, src);
|
||||
/* z undefined, w untouched */
|
||||
ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
|
||||
ureg_scalar(src, TGSI_SWIZZLE_X));
|
||||
ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
|
||||
ureg_scalar(src, TGSI_SWIZZLE_X));
|
||||
return D3D_OK;
|
||||
}
|
||||
|
||||
|
@ -2943,8 +2947,8 @@ struct sm1_op_info inst_table[] =
|
|||
_OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
|
||||
_OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
|
||||
|
||||
_OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
|
||||
_OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
|
||||
_OPI(SINCOS, NOP, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
|
||||
_OPI(SINCOS, NOP, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
|
||||
|
||||
/* More flow control */
|
||||
_OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
|
||||
|
|
|
@ -6044,11 +6044,6 @@ compile_tgsi_instruction(struct st_translate *t,
|
|||
tex_target, inst->image_format);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_SCS:
|
||||
dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
|
||||
ureg_insn(ureg, inst->op, dst, num_dst, src, num_src, inst->precise);
|
||||
break;
|
||||
|
||||
default:
|
||||
ureg_insn(ureg,
|
||||
inst->op,
|
||||
|
|
|
@ -491,8 +491,6 @@ translate_opcode( unsigned op )
|
|||
return TGSI_OPCODE_POW;
|
||||
case OPCODE_RCP:
|
||||
return TGSI_OPCODE_RCP;
|
||||
case OPCODE_SCS:
|
||||
return TGSI_OPCODE_SCS;
|
||||
case OPCODE_SGE:
|
||||
return TGSI_OPCODE_SGE;
|
||||
case OPCODE_SIN:
|
||||
|
@ -558,11 +556,10 @@ compile_instruction(
|
|||
return;
|
||||
|
||||
case OPCODE_SCS:
|
||||
dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
|
||||
ureg_insn( ureg,
|
||||
translate_opcode( inst->Opcode ),
|
||||
dst, num_dst,
|
||||
src, num_src, 0 );
|
||||
ureg_COS(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_X),
|
||||
ureg_scalar(src[0], TGSI_SWIZZLE_X));
|
||||
ureg_SIN(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_Y),
|
||||
ureg_scalar(src[0], TGSI_SWIZZLE_X));
|
||||
break;
|
||||
|
||||
case OPCODE_XPD:
|
||||
|
|
Loading…
Reference in New Issue