mesa/src/gallium/frontends/d3d10umd/ShaderTGSI.c

2303 lines
87 KiB
C

/**************************************************************************
*
* Copyright 2012-2021 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
**************************************************************************/
/*
* ShaderTGSI.c --
* Functions for translating shaders.
*/
#include "Debug.h"
#include "ShaderParse.h"
#include "pipe/p_state.h"
#include "tgsi/tgsi_ureg.h"
#include "tgsi/tgsi_dump.h"
#include "util/u_memory.h"
#include "ShaderDump.h"
enum dx10_opcode_format {
OF_FLOAT,
OF_INT,
OF_UINT
};
struct dx10_opcode_xlate {
D3D10_SB_OPCODE_TYPE type;
enum dx10_opcode_format format;
uint tgsi_opcode;
};
/* Opcodes that we have not even attempted to implement:
*/
#define TGSI_LOG_UNSUPPORTED TGSI_OPCODE_LAST
/* Opcodes which do not translate directly to a TGSI opcode, but which
* have at least a partial implemention coded below:
*/
#define TGSI_EXPAND (TGSI_OPCODE_LAST+1)
static struct dx10_opcode_xlate opcode_xlate[D3D10_SB_NUM_OPCODES] = {
{D3D10_SB_OPCODE_ADD, OF_FLOAT, TGSI_OPCODE_ADD},
{D3D10_SB_OPCODE_AND, OF_UINT, TGSI_OPCODE_AND},
{D3D10_SB_OPCODE_BREAK, OF_FLOAT, TGSI_OPCODE_BRK},
{D3D10_SB_OPCODE_BREAKC, OF_UINT, TGSI_EXPAND},
{D3D10_SB_OPCODE_CALL, OF_UINT, TGSI_EXPAND},
{D3D10_SB_OPCODE_CALLC, OF_UINT, TGSI_EXPAND},
{D3D10_SB_OPCODE_CASE, OF_UINT, TGSI_OPCODE_CASE},
{D3D10_SB_OPCODE_CONTINUE, OF_FLOAT, TGSI_OPCODE_CONT},
{D3D10_SB_OPCODE_CONTINUEC, OF_UINT, TGSI_EXPAND},
{D3D10_SB_OPCODE_CUT, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_DEFAULT, OF_FLOAT, TGSI_OPCODE_DEFAULT},
{D3D10_SB_OPCODE_DERIV_RTX, OF_FLOAT, TGSI_OPCODE_DDX},
{D3D10_SB_OPCODE_DERIV_RTY, OF_FLOAT, TGSI_OPCODE_DDY},
{D3D10_SB_OPCODE_DISCARD, OF_UINT, TGSI_EXPAND},
{D3D10_SB_OPCODE_DIV, OF_FLOAT, TGSI_OPCODE_DIV},
{D3D10_SB_OPCODE_DP2, OF_FLOAT, TGSI_OPCODE_DP2},
{D3D10_SB_OPCODE_DP3, OF_FLOAT, TGSI_OPCODE_DP3},
{D3D10_SB_OPCODE_DP4, OF_FLOAT, TGSI_OPCODE_DP4},
{D3D10_SB_OPCODE_ELSE, OF_FLOAT, TGSI_OPCODE_ELSE},
{D3D10_SB_OPCODE_EMIT, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_EMITTHENCUT, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_ENDIF, OF_FLOAT, TGSI_OPCODE_ENDIF},
{D3D10_SB_OPCODE_ENDLOOP, OF_FLOAT, TGSI_OPCODE_ENDLOOP},
{D3D10_SB_OPCODE_ENDSWITCH, OF_FLOAT, TGSI_OPCODE_ENDSWITCH},
{D3D10_SB_OPCODE_EQ, OF_FLOAT, TGSI_OPCODE_FSEQ},
{D3D10_SB_OPCODE_EXP, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_FRC, OF_FLOAT, TGSI_OPCODE_FRC},
{D3D10_SB_OPCODE_FTOI, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_FTOU, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_GE, OF_FLOAT, TGSI_OPCODE_FSGE},
{D3D10_SB_OPCODE_IADD, OF_INT, TGSI_OPCODE_UADD},
{D3D10_SB_OPCODE_IF, OF_UINT, TGSI_EXPAND},
{D3D10_SB_OPCODE_IEQ, OF_INT, TGSI_OPCODE_USEQ},
{D3D10_SB_OPCODE_IGE, OF_INT, TGSI_OPCODE_ISGE},
{D3D10_SB_OPCODE_ILT, OF_INT, TGSI_OPCODE_ISLT},
{D3D10_SB_OPCODE_IMAD, OF_INT, TGSI_OPCODE_UMAD},
{D3D10_SB_OPCODE_IMAX, OF_INT, TGSI_OPCODE_IMAX},
{D3D10_SB_OPCODE_IMIN, OF_INT, TGSI_OPCODE_IMIN},
{D3D10_SB_OPCODE_IMUL, OF_INT, TGSI_EXPAND},
{D3D10_SB_OPCODE_INE, OF_INT, TGSI_OPCODE_USNE},
{D3D10_SB_OPCODE_INEG, OF_INT, TGSI_OPCODE_INEG},
{D3D10_SB_OPCODE_ISHL, OF_INT, TGSI_OPCODE_SHL},
{D3D10_SB_OPCODE_ISHR, OF_INT, TGSI_OPCODE_ISHR},
{D3D10_SB_OPCODE_ITOF, OF_INT, TGSI_OPCODE_I2F},
{D3D10_SB_OPCODE_LABEL, OF_INT, TGSI_EXPAND},
{D3D10_SB_OPCODE_LD, OF_UINT, TGSI_EXPAND},
{D3D10_SB_OPCODE_LD_MS, OF_UINT, TGSI_EXPAND},
{D3D10_SB_OPCODE_LOG, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_LOOP, OF_FLOAT, TGSI_OPCODE_BGNLOOP},
{D3D10_SB_OPCODE_LT, OF_FLOAT, TGSI_OPCODE_FSLT},
{D3D10_SB_OPCODE_MAD, OF_FLOAT, TGSI_OPCODE_MAD},
{D3D10_SB_OPCODE_MIN, OF_FLOAT, TGSI_OPCODE_MIN},
{D3D10_SB_OPCODE_MAX, OF_FLOAT, TGSI_OPCODE_MAX},
{D3D10_SB_OPCODE_CUSTOMDATA, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_MOV, OF_UINT, TGSI_OPCODE_MOV},
{D3D10_SB_OPCODE_MOVC, OF_UINT, TGSI_OPCODE_UCMP},
{D3D10_SB_OPCODE_MUL, OF_FLOAT, TGSI_OPCODE_MUL},
{D3D10_SB_OPCODE_NE, OF_FLOAT, TGSI_OPCODE_FSNE},
{D3D10_SB_OPCODE_NOP, OF_FLOAT, TGSI_OPCODE_NOP},
{D3D10_SB_OPCODE_NOT, OF_UINT, TGSI_OPCODE_NOT},
{D3D10_SB_OPCODE_OR, OF_UINT, TGSI_OPCODE_OR},
{D3D10_SB_OPCODE_RESINFO, OF_UINT, TGSI_EXPAND},
{D3D10_SB_OPCODE_RET, OF_FLOAT, TGSI_OPCODE_RET},
{D3D10_SB_OPCODE_RETC, OF_UINT, TGSI_EXPAND},
{D3D10_SB_OPCODE_ROUND_NE, OF_FLOAT, TGSI_OPCODE_ROUND},
{D3D10_SB_OPCODE_ROUND_NI, OF_FLOAT, TGSI_OPCODE_FLR},
{D3D10_SB_OPCODE_ROUND_PI, OF_FLOAT, TGSI_OPCODE_CEIL},
{D3D10_SB_OPCODE_ROUND_Z, OF_FLOAT, TGSI_OPCODE_TRUNC},
{D3D10_SB_OPCODE_RSQ, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_SAMPLE, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_SAMPLE_C, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_SAMPLE_C_LZ, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_SAMPLE_L, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_SAMPLE_D, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_SAMPLE_B, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_SQRT, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_SWITCH, OF_UINT, TGSI_OPCODE_SWITCH},
{D3D10_SB_OPCODE_SINCOS, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_UDIV, OF_UINT, TGSI_EXPAND},
{D3D10_SB_OPCODE_ULT, OF_UINT, TGSI_OPCODE_USLT},
{D3D10_SB_OPCODE_UGE, OF_UINT, TGSI_OPCODE_USGE},
{D3D10_SB_OPCODE_UMUL, OF_UINT, TGSI_EXPAND},
{D3D10_SB_OPCODE_UMAD, OF_UINT, TGSI_OPCODE_UMAD},
{D3D10_SB_OPCODE_UMAX, OF_UINT, TGSI_OPCODE_UMAX},
{D3D10_SB_OPCODE_UMIN, OF_UINT, TGSI_OPCODE_UMIN},
{D3D10_SB_OPCODE_USHR, OF_UINT, TGSI_OPCODE_USHR},
{D3D10_SB_OPCODE_UTOF, OF_UINT, TGSI_OPCODE_U2F},
{D3D10_SB_OPCODE_XOR, OF_UINT, TGSI_OPCODE_XOR},
{D3D10_SB_OPCODE_DCL_RESOURCE, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_DCL_SAMPLER, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_DCL_INDEX_RANGE, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
{D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_DCL_INPUT, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_DCL_INPUT_SGV, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_DCL_INPUT_SIV, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_DCL_INPUT_PS, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_DCL_INPUT_PS_SGV, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_DCL_INPUT_PS_SIV, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_DCL_OUTPUT, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_DCL_OUTPUT_SGV, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_DCL_OUTPUT_SIV, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_DCL_TEMPS, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP, OF_FLOAT, TGSI_EXPAND},
{D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
{D3D10_SB_OPCODE_RESERVED0, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
{D3D10_1_SB_OPCODE_LOD, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
{D3D10_1_SB_OPCODE_GATHER4, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
{D3D10_1_SB_OPCODE_SAMPLE_POS, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
{D3D10_1_SB_OPCODE_SAMPLE_INFO, OF_FLOAT, TGSI_LOG_UNSUPPORTED}
};
#define SHADER_MAX_TEMPS 4096
#define SHADER_MAX_INPUTS 32
#define SHADER_MAX_OUTPUTS 32
#define SHADER_MAX_CONSTS 4096
#define SHADER_MAX_RESOURCES PIPE_MAX_SHADER_SAMPLER_VIEWS
#define SHADER_MAX_SAMPLERS PIPE_MAX_SAMPLERS
#define SHADER_MAX_INDEXABLE_TEMPS 4096
struct Shader_call {
unsigned d3d_label;
unsigned tgsi_label_token;
};
struct Shader_label {
unsigned d3d_label;
unsigned tgsi_insn_no;
};
struct Shader_resource {
uint target; /* TGSI_TEXTURE_x */
};
struct Shader_xlate {
struct ureg_program *ureg;
uint vertices_in;
uint declared_temps;
struct ureg_dst temps[SHADER_MAX_TEMPS];
struct ureg_dst output_depth;
struct Shader_resource resources[SHADER_MAX_RESOURCES];
struct ureg_src sv[SHADER_MAX_RESOURCES];
struct ureg_src samplers[SHADER_MAX_SAMPLERS];
struct ureg_src imms;
struct ureg_src prim_id;
uint temp_offset;
uint indexable_temp_offsets[SHADER_MAX_INDEXABLE_TEMPS];
struct {
boolean declared;
uint writemask;
uint siv_name;
boolean overloaded;
struct ureg_src reg;
} inputs[SHADER_MAX_INPUTS];
struct {
struct ureg_dst reg[4];
} outputs[SHADER_MAX_OUTPUTS];
struct {
uint d3d;
uint tgsi;
} clip_distance_mapping[2], cull_distance_mapping[2];
uint num_clip_distances_declared;
uint num_cull_distances_declared;
struct Shader_call *calls;
uint num_calls;
uint max_calls;
struct Shader_label *labels;
uint num_labels;
uint max_labels;
};
static uint
translate_interpolation(D3D10_SB_INTERPOLATION_MODE interpolation)
{
switch (interpolation) {
case D3D10_SB_INTERPOLATION_UNDEFINED:
assert(0);
return TGSI_INTERPOLATE_LINEAR;
case D3D10_SB_INTERPOLATION_CONSTANT:
return TGSI_INTERPOLATE_CONSTANT;
case D3D10_SB_INTERPOLATION_LINEAR:
return TGSI_INTERPOLATE_PERSPECTIVE;
case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE:
return TGSI_INTERPOLATE_LINEAR;
case D3D10_SB_INTERPOLATION_LINEAR_CENTROID:
case D3D10_SB_INTERPOLATION_LINEAR_SAMPLE: // DX10.1
LOG_UNSUPPORTED(TRUE);
return TGSI_INTERPOLATE_PERSPECTIVE;
case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID:
case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: // DX10.1
LOG_UNSUPPORTED(TRUE);
return TGSI_INTERPOLATE_LINEAR;
}
assert(0);
return TGSI_INTERPOLATE_LINEAR;
}
static uint
translate_system_name(D3D10_SB_NAME name)
{
switch (name) {
case D3D10_SB_NAME_UNDEFINED:
assert(0); /* should not happen */
return TGSI_SEMANTIC_GENERIC;
case D3D10_SB_NAME_POSITION:
return TGSI_SEMANTIC_POSITION;
case D3D10_SB_NAME_CLIP_DISTANCE:
case D3D10_SB_NAME_CULL_DISTANCE:
return TGSI_SEMANTIC_CLIPDIST;
case D3D10_SB_NAME_PRIMITIVE_ID:
return TGSI_SEMANTIC_PRIMID;
case D3D10_SB_NAME_INSTANCE_ID:
return TGSI_SEMANTIC_INSTANCEID;
case D3D10_SB_NAME_VERTEX_ID:
return TGSI_SEMANTIC_VERTEXID_NOBASE;
case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX:
return TGSI_SEMANTIC_VIEWPORT_INDEX;
case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX:
return TGSI_SEMANTIC_LAYER;
case D3D10_SB_NAME_IS_FRONT_FACE:
return TGSI_SEMANTIC_FACE;
case D3D10_SB_NAME_SAMPLE_INDEX:
LOG_UNSUPPORTED(TRUE);
return TGSI_SEMANTIC_GENERIC;
}
assert(0);
return TGSI_SEMANTIC_GENERIC;
}
static uint
translate_semantic_index(struct Shader_xlate *sx,
D3D10_SB_NAME name,
const struct Shader_dst_operand *operand)
{
unsigned idx;
switch (name) {
case D3D10_SB_NAME_CLIP_DISTANCE:
case D3D10_SB_NAME_CULL_DISTANCE:
if (sx->clip_distance_mapping[0].d3d == operand->base.index[0].imm) {
idx = sx->clip_distance_mapping[0].tgsi;
} else {
assert(sx->clip_distance_mapping[1].d3d == operand->base.index[0].imm);
idx = sx->clip_distance_mapping[1].tgsi;
}
break;
/* case D3D10_SB_NAME_CULL_DISTANCE:
if (sx->cull_distance_mapping[0].d3d == operand->base.index[0].imm) {
idx = sx->cull_distance_mapping[0].tgsi;
} else {
assert(sx->cull_distance_mapping[1].d3d == operand->base.index[0].imm);
idx = sx->cull_distance_mapping[1].tgsi;
}
break;*/
default:
idx = 0;
}
return idx;
}
static enum tgsi_return_type
trans_dcl_ret_type(D3D10_SB_RESOURCE_RETURN_TYPE d3drettype) {
switch (d3drettype) {
case D3D10_SB_RETURN_TYPE_UNORM:
return TGSI_RETURN_TYPE_UNORM;
case D3D10_SB_RETURN_TYPE_SNORM:
return TGSI_RETURN_TYPE_SNORM;
case D3D10_SB_RETURN_TYPE_SINT:
return TGSI_RETURN_TYPE_SINT;
case D3D10_SB_RETURN_TYPE_UINT:
return TGSI_RETURN_TYPE_UINT;
case D3D10_SB_RETURN_TYPE_FLOAT:
return TGSI_RETURN_TYPE_FLOAT;
case D3D10_SB_RETURN_TYPE_MIXED:
default:
LOG_UNSUPPORTED(TRUE);
return TGSI_RETURN_TYPE_FLOAT;
}
}
static void
declare_vertices_in(struct Shader_xlate *sx,
unsigned in)
{
/* Make sure vertices_in is consistent with input primitive
* and other input declarations.
*/
if (sx->vertices_in) {
assert(sx->vertices_in == in);
} else {
sx->vertices_in = in;
}
}
struct swizzle_mapping {
unsigned x;
unsigned y;
unsigned z;
unsigned w;
};
/* mapping of writmask to swizzles */
static const struct swizzle_mapping writemask_to_swizzle[] = {
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_NONE
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_X
{ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y }, //TGSI_WRITEMASK_Y
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y }, //TGSI_WRITEMASK_XY
{ TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_Z
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_XZ
{ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_YZ
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_XYZ
{ TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_W
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_W, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XW
{ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_YW
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XYW
{ TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_ZW
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XZW
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_YZW
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XYZW
};
static struct ureg_src
swizzle_reg(struct ureg_src src, uint writemask,
unsigned siv_name)
{
switch (siv_name) {
case D3D10_SB_NAME_PRIMITIVE_ID:
case D3D10_SB_NAME_INSTANCE_ID:
case D3D10_SB_NAME_VERTEX_ID:
case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX:
case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX:
case D3D10_SB_NAME_IS_FRONT_FACE:
return ureg_scalar(src, TGSI_SWIZZLE_X);
default: {
const struct swizzle_mapping *swizzle =
&writemask_to_swizzle[writemask];
return ureg_swizzle(src, swizzle->x, swizzle->y,
swizzle->z, swizzle->w);
}
}
}
static void
dcl_base_output(struct Shader_xlate *sx,
struct ureg_program *ureg,
struct ureg_dst reg,
const struct Shader_dst_operand *operand)
{
unsigned writemask =
operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;
unsigned idx = operand->base.index[0].imm;
unsigned i;
if (!writemask) {
sx->outputs[idx].reg[0] = reg;
sx->outputs[idx].reg[1] = reg;
sx->outputs[idx].reg[2] = reg;
sx->outputs[idx].reg[3] = reg;
return;
}
for (i = 0; i < 4; ++i) {
unsigned mask = 1 << i;
if ((writemask & mask)) {
sx->outputs[idx].reg[i] = reg;
}
}
}
static void
dcl_base_input(struct Shader_xlate *sx,
struct ureg_program *ureg,
const struct Shader_dst_operand *operand,
struct ureg_src dcl_reg,
uint index,
uint siv_name)
{
unsigned writemask =
operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;
if (sx->inputs[index].declared && !sx->inputs[index].overloaded) {
struct ureg_dst temp = ureg_DECL_temporary(sx->ureg);
ureg_MOV(ureg,
ureg_writemask(temp, sx->inputs[index].writemask),
swizzle_reg(sx->inputs[index].reg, sx->inputs[index].writemask,
sx->inputs[index].siv_name));
ureg_MOV(ureg, ureg_writemask(temp, writemask),
swizzle_reg(dcl_reg, writemask, siv_name));
sx->inputs[index].reg = ureg_src(temp);
sx->inputs[index].overloaded = TRUE;
sx->inputs[index].writemask |= writemask;
} else if (sx->inputs[index].overloaded) {
struct ureg_dst temp = ureg_dst(sx->inputs[index].reg);
ureg_MOV(ureg, ureg_writemask(temp, writemask),
swizzle_reg(dcl_reg, writemask, siv_name));
sx->inputs[index].writemask |= writemask;
} else {
assert(!sx->inputs[index].declared);
sx->inputs[index].reg = dcl_reg;
sx->inputs[index].declared = TRUE;
sx->inputs[index].writemask = writemask;
sx->inputs[index].siv_name = siv_name;
}
}
static void
dcl_vs_input(struct Shader_xlate *sx,
struct ureg_program *ureg,
const struct Shader_dst_operand *dst)
{
struct ureg_src reg;
assert(dst->base.index_dim == 1);
assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
reg = ureg_DECL_vs_input(ureg, dst->base.index[0].imm);
dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
D3D10_SB_NAME_UNDEFINED);
}
static void
dcl_gs_input(struct Shader_xlate *sx,
struct ureg_program *ureg,
const struct Shader_dst_operand *dst)
{
if (dst->base.index_dim == 2) {
assert(dst->base.index[1].imm < SHADER_MAX_INPUTS);
declare_vertices_in(sx, dst->base.index[0].imm);
/* XXX: Implement declaration masks in gallium.
*/
if (!sx->inputs[dst->base.index[1].imm].reg.File) {
struct ureg_src reg =
ureg_DECL_input(ureg,
TGSI_SEMANTIC_GENERIC,
dst->base.index[1].imm,
0, 1);
dcl_base_input(sx, ureg, dst, reg, dst->base.index[1].imm,
D3D10_SB_NAME_UNDEFINED);
}
} else {
assert(dst->base.type == D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID);
assert(dst->base.index_dim == 0);
sx->prim_id = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_PRIMID, 0);
}
}
static void
dcl_sgv_input(struct Shader_xlate *sx,
struct ureg_program *ureg,
const struct Shader_dst_operand *dst,
uint dcl_siv_name)
{
struct ureg_src reg;
assert(dst->base.index_dim == 1);
assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
reg = ureg_DECL_system_value(ureg, translate_system_name(dcl_siv_name), 0);
dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
dcl_siv_name);
}
static void
dcl_siv_input(struct Shader_xlate *sx,
struct ureg_program *ureg,
const struct Shader_dst_operand *dst,
uint dcl_siv_name)
{
struct ureg_src reg;
assert(dst->base.index_dim == 2);
assert(dst->base.index[1].imm < SHADER_MAX_INPUTS);
declare_vertices_in(sx, dst->base.index[0].imm);
reg = ureg_DECL_input(ureg,
translate_system_name(dcl_siv_name), 0,
0, 1);
dcl_base_input(sx, ureg, dst, reg, dst->base.index[1].imm,
dcl_siv_name);
}
static void
dcl_ps_input(struct Shader_xlate *sx,
struct ureg_program *ureg,
const struct Shader_dst_operand *dst,
uint dcl_in_ps_interp)
{
struct ureg_src reg;
assert(dst->base.index_dim == 1);
assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
reg = ureg_DECL_fs_input(ureg,
TGSI_SEMANTIC_GENERIC,
dst->base.index[0].imm,
translate_interpolation(dcl_in_ps_interp));
dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
D3D10_SB_NAME_UNDEFINED);
}
static void
dcl_ps_sgv_input(struct Shader_xlate *sx,
struct ureg_program *ureg,
const struct Shader_dst_operand *dst,
uint dcl_siv_name)
{
struct ureg_src reg;
assert(dst->base.index_dim == 1);
assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
if (dcl_siv_name == D3D10_SB_NAME_POSITION) {
ureg_property(ureg,
TGSI_PROPERTY_FS_COORD_ORIGIN,
TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
ureg_property(ureg,
TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
}
reg = ureg_DECL_fs_input(ureg,
translate_system_name(dcl_siv_name),
0,
TGSI_INTERPOLATE_CONSTANT);
if (dcl_siv_name == D3D10_SB_NAME_IS_FRONT_FACE) {
/* We need to map gallium's front_face to the one expected
* by D3D10 */
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
tmp = ureg_writemask(tmp, TGSI_WRITEMASK_X);
ureg_CMP(ureg, tmp, reg,
ureg_imm1i(ureg, 0), ureg_imm1i(ureg, -1));
reg = ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X);
}
dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
dcl_siv_name);
}
static void
dcl_ps_siv_input(struct Shader_xlate *sx,
struct ureg_program *ureg,
const struct Shader_dst_operand *dst,
uint dcl_siv_name, uint dcl_in_ps_interp)
{
struct ureg_src reg;
assert(dst->base.index_dim == 1);
assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
reg = ureg_DECL_fs_input(ureg,
translate_system_name(dcl_siv_name),
0,
translate_interpolation(dcl_in_ps_interp));
if (dcl_siv_name == D3D10_SB_NAME_POSITION) {
/* D3D10 expects reciprocal of interpolated 1/w as 4th component,
* gallium/GL just interpolated 1/w */
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
ureg_MOV(ureg, tmp, reg);
ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W),
ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_W));
reg = ureg_src(tmp);
}
dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
dcl_siv_name);
}
static struct ureg_src
translate_relative_operand(struct Shader_xlate *sx,
const struct Shader_relative_operand *operand)
{
struct ureg_src reg;
switch (operand->type) {
case D3D10_SB_OPERAND_TYPE_TEMP:
assert(operand->index[0].imm < SHADER_MAX_TEMPS);
reg = ureg_src(sx->temps[sx->temp_offset + operand->index[0].imm]);
break;
case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
reg = sx->prim_id;
break;
case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
assert(operand->index[1].imm < SHADER_MAX_TEMPS);
reg = ureg_src(sx->temps[sx->indexable_temp_offsets[operand->index[0].imm] +
operand->index[1].imm]);
break;
case D3D10_SB_OPERAND_TYPE_INPUT:
case D3D10_SB_OPERAND_TYPE_OUTPUT:
case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
case D3D10_SB_OPERAND_TYPE_IMMEDIATE64:
case D3D10_SB_OPERAND_TYPE_SAMPLER:
case D3D10_SB_OPERAND_TYPE_RESOURCE:
case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:
case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
case D3D10_SB_OPERAND_TYPE_LABEL:
case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH:
case D3D10_SB_OPERAND_TYPE_NULL:
case D3D10_SB_OPERAND_TYPE_RASTERIZER:
case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
LOG_UNSUPPORTED(TRUE);
reg = ureg_src(ureg_DECL_temporary(sx->ureg));
break;
default:
assert(0); /* should never happen */
reg = ureg_src(ureg_DECL_temporary(sx->ureg));
}
reg = ureg_scalar(reg, operand->comp);
return reg;
}
static struct ureg_dst
translate_operand(struct Shader_xlate *sx,
const struct Shader_operand *operand,
unsigned writemask)
{
struct ureg_dst reg;
switch (operand->type) {
case D3D10_SB_OPERAND_TYPE_TEMP:
assert(operand->index_dim == 1);
assert(operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
assert(operand->index[0].imm < SHADER_MAX_TEMPS);
reg = sx->temps[sx->temp_offset + operand->index[0].imm];
break;
case D3D10_SB_OPERAND_TYPE_OUTPUT:
assert(operand->index_dim == 1);
assert(operand->index[0].imm < SHADER_MAX_OUTPUTS);
if (operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32) {
if (!writemask) {
reg = sx->outputs[operand->index[0].imm].reg[0];
} else {
unsigned i;
for (i = 0; i < 4; ++i) {
unsigned mask = 1 << i;
if ((writemask & mask)) {
reg = sx->outputs[operand->index[0].imm].reg[i];
break;
}
}
}
} else {
struct ureg_src addr =
translate_relative_operand(sx, &operand->index[0].rel);
assert(operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE);
reg = ureg_dst_indirect(sx->outputs[operand->index[0].imm].reg[0], addr);
}
break;
case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH:
assert(operand->index_dim == 0);
reg = sx->output_depth;
break;
case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
assert(operand->index_dim == 0);
reg = ureg_dst(sx->prim_id);
break;
case D3D10_SB_OPERAND_TYPE_INPUT:
case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
case D3D10_SB_OPERAND_TYPE_IMMEDIATE64:
case D3D10_SB_OPERAND_TYPE_SAMPLER:
case D3D10_SB_OPERAND_TYPE_RESOURCE:
case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:
case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
case D3D10_SB_OPERAND_TYPE_LABEL:
case D3D10_SB_OPERAND_TYPE_NULL:
case D3D10_SB_OPERAND_TYPE_RASTERIZER:
case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
/* XXX: Translate more operands types.
*/
LOG_UNSUPPORTED(TRUE);
reg = ureg_DECL_temporary(sx->ureg);
}
return reg;
}
static struct ureg_src
translate_indexable_temp(struct Shader_xlate *sx,
const struct Shader_operand *operand)
{
struct ureg_src reg;
switch (operand->index[1].index_rep) {
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
reg = ureg_src(
sx->temps[sx->indexable_temp_offsets[operand->index[0].imm] +
operand->index[1].imm]);
break;
case D3D10_SB_OPERAND_INDEX_RELATIVE:
reg = ureg_src_indirect(
ureg_src(sx->temps[
sx->indexable_temp_offsets[operand->index[0].imm]]),
translate_relative_operand(sx,
&operand->index[1].rel));
break;
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
reg = ureg_src_indirect(
ureg_src(sx->temps[
operand->index[1].imm +
sx->indexable_temp_offsets[operand->index[0].imm]]),
translate_relative_operand(sx,
&operand->index[1].rel));
break;
default:
/* XXX: Other index representations.
*/
LOG_UNSUPPORTED(TRUE);
reg = ureg_src(ureg_DECL_temporary(sx->ureg));
}
return reg;
}
static struct ureg_dst
translate_dst_operand(struct Shader_xlate *sx,
const struct Shader_dst_operand *operand,
boolean saturate)
{
struct ureg_dst reg;
unsigned writemask =
operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;
assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT) == 4);
assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_X >> 4) == TGSI_WRITEMASK_X);
assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_Y >> 4) == TGSI_WRITEMASK_Y);
assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_Z >> 4) == TGSI_WRITEMASK_Z);
assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_W >> 4) == TGSI_WRITEMASK_W);
switch (operand->base.type) {
case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
assert(operand->base.index_dim == 2);
assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
assert(operand->base.index[0].imm < SHADER_MAX_INDEXABLE_TEMPS);
reg = ureg_dst(translate_indexable_temp(sx, &operand->base));
break;
default:
reg = translate_operand(sx, &operand->base, writemask);
}
/* oDepth often has an empty writemask */
if (operand->base.type != D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {
reg = ureg_writemask(reg, writemask);
}
if (saturate) {
reg = ureg_saturate(reg);
}
return reg;
}
static struct ureg_src
translate_src_operand(struct Shader_xlate *sx,
const struct Shader_src_operand *operand,
const enum dx10_opcode_format format)
{
struct ureg_src reg;
switch (operand->base.type) {
case D3D10_SB_OPERAND_TYPE_INPUT:
if (operand->base.index_dim == 1) {
switch (operand->base.index[0].index_rep) {
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
assert(operand->base.index[0].imm < SHADER_MAX_INPUTS);
reg = sx->inputs[operand->base.index[0].imm].reg;
break;
case D3D10_SB_OPERAND_INDEX_RELATIVE: {
struct ureg_src tmp =
translate_relative_operand(sx, &operand->base.index[0].rel);
reg = ureg_src_indirect(sx->inputs[0].reg, tmp);
}
break;
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {
struct ureg_src tmp =
translate_relative_operand(sx, &operand->base.index[0].rel);
reg = ureg_src_indirect(sx->inputs[operand->base.index[0].imm].reg, tmp);
}
break;
default:
/* XXX: Other index representations.
*/
LOG_UNSUPPORTED(TRUE);
}
} else {
assert(operand->base.index_dim == 2);
assert(operand->base.index[1].imm < SHADER_MAX_INPUTS);
switch (operand->base.index[1].index_rep) {
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
reg = sx->inputs[operand->base.index[1].imm].reg;
break;
case D3D10_SB_OPERAND_INDEX_RELATIVE: {
struct ureg_src tmp =
translate_relative_operand(sx, &operand->base.index[1].rel);
reg = ureg_src_indirect(sx->inputs[0].reg, tmp);
}
break;
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {
struct ureg_src tmp =
translate_relative_operand(sx, &operand->base.index[1].rel);
reg = ureg_src_indirect(sx->inputs[operand->base.index[1].imm].reg, tmp);
}
break;
default:
/* XXX: Other index representations.
*/
LOG_UNSUPPORTED(TRUE);
}
switch (operand->base.index[0].index_rep) {
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
reg = ureg_src_dimension(reg, operand->base.index[0].imm);
break;
case D3D10_SB_OPERAND_INDEX_RELATIVE:{
struct ureg_src tmp =
translate_relative_operand(sx, &operand->base.index[0].rel);
reg = ureg_src_dimension_indirect(reg, tmp, 0);
}
break;
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {
struct ureg_src tmp =
translate_relative_operand(sx, &operand->base.index[0].rel);
reg = ureg_src_dimension_indirect(reg, tmp, operand->base.index[0].imm);
}
break;
default:
/* XXX: Other index representations.
*/
LOG_UNSUPPORTED(TRUE);
}
}
break;
case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
assert(operand->base.index_dim == 2);
assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
assert(operand->base.index[0].imm < SHADER_MAX_INDEXABLE_TEMPS);
reg = translate_indexable_temp(sx, &operand->base);
break;
case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
switch (format) {
case OF_FLOAT:
reg = ureg_imm4f(sx->ureg,
operand->imm[0].f32,
operand->imm[1].f32,
operand->imm[2].f32,
operand->imm[3].f32);
break;
case OF_INT:
reg = ureg_imm4i(sx->ureg,
operand->imm[0].i32,
operand->imm[1].i32,
operand->imm[2].i32,
operand->imm[3].i32);
break;
case OF_UINT:
reg = ureg_imm4u(sx->ureg,
operand->imm[0].u32,
operand->imm[1].u32,
operand->imm[2].u32,
operand->imm[3].u32);
break;
default:
assert(0);
reg = ureg_src(ureg_DECL_temporary(sx->ureg));
}
break;
case D3D10_SB_OPERAND_TYPE_SAMPLER:
assert(operand->base.index_dim == 1);
assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
assert(operand->base.index[0].imm < SHADER_MAX_SAMPLERS);
reg = sx->samplers[operand->base.index[0].imm];
break;
case D3D10_SB_OPERAND_TYPE_RESOURCE:
assert(operand->base.index_dim == 1);
assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
assert(operand->base.index[0].imm < SHADER_MAX_RESOURCES);
reg = sx->sv[operand->base.index[0].imm];
break;
case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:
assert(operand->base.index_dim == 2);
assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
assert(operand->base.index[0].imm < PIPE_MAX_CONSTANT_BUFFERS);
switch (operand->base.index[1].index_rep) {
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
assert(operand->base.index[1].imm < SHADER_MAX_CONSTS);
reg = ureg_src_register(TGSI_FILE_CONSTANT, operand->base.index[1].imm);
reg = ureg_src_dimension(reg, operand->base.index[0].imm);
break;
case D3D10_SB_OPERAND_INDEX_RELATIVE:
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
reg = ureg_src_register(TGSI_FILE_CONSTANT, operand->base.index[1].imm);
reg = ureg_src_indirect(
reg,
translate_relative_operand(sx, &operand->base.index[1].rel));
reg = ureg_src_dimension(reg, operand->base.index[0].imm);
break;
default:
/* XXX: Other index representations.
*/
LOG_UNSUPPORTED(TRUE);
}
break;
case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
assert(operand->base.index_dim == 1);
switch (operand->base.index[0].index_rep) {
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
reg = sx->imms;
reg.Index += operand->base.index[0].imm;
break;
case D3D10_SB_OPERAND_INDEX_RELATIVE:
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
reg = sx->imms;
reg.Index += operand->base.index[0].imm;
reg = ureg_src_indirect(
sx->imms,
translate_relative_operand(sx, &operand->base.index[0].rel));
break;
default:
/* XXX: Other index representations.
*/
LOG_UNSUPPORTED(TRUE);
}
break;
case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
reg = sx->prim_id;
break;
default:
reg = ureg_src(translate_operand(sx, &operand->base, 0));
}
reg = ureg_swizzle(reg,
operand->swizzle[0],
operand->swizzle[1],
operand->swizzle[2],
operand->swizzle[3]);
switch (operand->modifier) {
case D3D10_SB_OPERAND_MODIFIER_NONE:
break;
case D3D10_SB_OPERAND_MODIFIER_NEG:
reg = ureg_negate(reg);
break;
case D3D10_SB_OPERAND_MODIFIER_ABS:
reg = ureg_abs(reg);
break;
case D3D10_SB_OPERAND_MODIFIER_ABSNEG:
reg = ureg_negate(ureg_abs(reg));
break;
default:
assert(0);
}
return reg;
}
static uint
translate_resource_dimension(D3D10_SB_RESOURCE_DIMENSION dim)
{
switch (dim) {
case D3D10_SB_RESOURCE_DIMENSION_UNKNOWN:
return TGSI_TEXTURE_UNKNOWN;
case D3D10_SB_RESOURCE_DIMENSION_BUFFER:
return TGSI_TEXTURE_BUFFER;
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1D:
return TGSI_TEXTURE_1D;
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2D:
return TGSI_TEXTURE_2D;
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMS:
return TGSI_TEXTURE_2D_MSAA;
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D:
return TGSI_TEXTURE_3D;
case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE:
return TGSI_TEXTURE_CUBE;
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1DARRAY:
return TGSI_TEXTURE_1D_ARRAY;
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY:
return TGSI_TEXTURE_2D_ARRAY;
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMSARRAY:
return TGSI_TEXTURE_2D_ARRAY_MSAA;
case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBEARRAY:
return TGSI_TEXTURE_CUBE_ARRAY;
default:
assert(0);
return TGSI_TEXTURE_UNKNOWN;
}
}
static uint
texture_dim_from_tgsi_target(unsigned tgsi_target)
{
switch (tgsi_target) {
case TGSI_TEXTURE_BUFFER:
case TGSI_TEXTURE_1D:
case TGSI_TEXTURE_1D_ARRAY:
return 1;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_2D_MSAA:
case TGSI_TEXTURE_CUBE:
case TGSI_TEXTURE_2D_ARRAY:
case TGSI_TEXTURE_2D_ARRAY_MSAA:
return 2;
case TGSI_TEXTURE_3D:
return 3;
case TGSI_TEXTURE_UNKNOWN:
default:
assert(0);
return 1;
}
}
static boolean
operand_is_scalar(const struct Shader_src_operand *operand)
{
return operand->swizzle[0] == operand->swizzle[1] &&
operand->swizzle[1] == operand->swizzle[2] &&
operand->swizzle[2] == operand->swizzle[3];
}
static void
Shader_add_call(struct Shader_xlate *sx,
unsigned d3d_label,
unsigned tgsi_label_token)
{
ASSERT(sx->num_calls < sx->max_calls);
sx->calls[sx->num_calls].d3d_label = d3d_label;
sx->calls[sx->num_calls].tgsi_label_token = tgsi_label_token;
sx->num_calls++;
}
static void
Shader_add_label(struct Shader_xlate *sx,
unsigned d3d_label,
unsigned tgsi_insn_no)
{
ASSERT(sx->num_labels < sx->max_labels);
sx->labels[sx->num_labels].d3d_label = d3d_label;
sx->labels[sx->num_labels].tgsi_insn_no = tgsi_insn_no;
sx->num_labels++;
}
static void
sample_ureg_emit(struct ureg_program *ureg,
unsigned tgsi_opcode,
unsigned num_src,
struct Shader_opcode *opcode,
struct ureg_dst dst,
struct ureg_src *src)
{
unsigned num_offsets = 0;
struct tgsi_texture_offset texoffsets;
memset(&texoffsets, 0, sizeof texoffsets);
if (opcode->imm_texel_offset.u ||
opcode->imm_texel_offset.v ||
opcode->imm_texel_offset.w) {
struct ureg_src offsetreg;
num_offsets = 1;
/* don't actually always need all 3 values */
offsetreg = ureg_imm3i(ureg,
opcode->imm_texel_offset.u,
opcode->imm_texel_offset.v,
opcode->imm_texel_offset.w);
texoffsets.File = offsetreg.File;
texoffsets.Index = offsetreg.Index;
texoffsets.SwizzleX = offsetreg.SwizzleX;
texoffsets.SwizzleY = offsetreg.SwizzleY;
texoffsets.SwizzleZ = offsetreg.SwizzleZ;
}
ureg_tex_insn(ureg,
tgsi_opcode,
&dst, 1,
TGSI_TEXTURE_UNKNOWN,
TGSI_RETURN_TYPE_UNKNOWN,
&texoffsets, num_offsets,
src, num_src);
}
typedef void (*unary_ureg_func)(struct ureg_program *ureg, struct ureg_dst dst,
struct ureg_src src);
static void
expand_unary_to_scalarf(struct ureg_program *ureg, unary_ureg_func func,
struct Shader_xlate *sx, struct Shader_opcode *opcode)
{
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
struct ureg_dst dst = translate_dst_operand(sx, &opcode->dst[0],
opcode->saturate);
struct ureg_src src = translate_src_operand(sx, &opcode->src[0], OF_FLOAT);
struct ureg_dst scalar_dst;
ureg_MOV(ureg, tmp, src);
src = ureg_src(tmp);
scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_X);
if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
func(ureg, scalar_dst,
ureg_scalar(src, TGSI_SWIZZLE_X));
}
scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_Y);
if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
func(ureg, scalar_dst,
ureg_scalar(src, TGSI_SWIZZLE_Y));
}
scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_Z);
if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
func(ureg, scalar_dst,
ureg_scalar(src, TGSI_SWIZZLE_Z));
}
scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_W);
if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
func(ureg, scalar_dst,
ureg_scalar(src, TGSI_SWIZZLE_W));
}
ureg_release_temporary(ureg, tmp);
}
const struct tgsi_token *
Shader_tgsi_translate(const unsigned *code,
unsigned *output_mapping)
{
struct Shader_xlate sx;
struct Shader_parser parser;
struct ureg_program *ureg = NULL;
struct Shader_opcode opcode;
const struct tgsi_token *tokens = NULL;
uint nr_tokens;
boolean shader_dumped = FALSE;
boolean inside_sub = FALSE;
uint i, j;
memset(&sx, 0, sizeof sx);
Shader_parse_init(&parser, code);
if (st_debug & ST_DEBUG_TGSI) {
dx10_shader_dump_tokens(code);
shader_dumped = TRUE;
}
sx.max_calls = 64;
sx.calls = (struct Shader_call *)MALLOC(sx.max_calls *
sizeof(struct Shader_call));
sx.num_calls = 0;
sx.max_labels = 64;
sx.labels = (struct Shader_label *)MALLOC(sx.max_labels *
sizeof(struct Shader_call));
sx.num_labels = 0;
/* Header. */
switch (parser.header.type) {
case D3D10_SB_PIXEL_SHADER:
ureg = ureg_create(PIPE_SHADER_FRAGMENT);
break;
case D3D10_SB_VERTEX_SHADER:
ureg = ureg_create(PIPE_SHADER_VERTEX);
break;
case D3D10_SB_GEOMETRY_SHADER:
ureg = ureg_create(PIPE_SHADER_GEOMETRY);
break;
}
assert(ureg);
sx.ureg = ureg;
while (Shader_parse_opcode(&parser, &opcode)) {
const struct dx10_opcode_xlate *ox;
assert(opcode.type < D3D10_SB_NUM_OPCODES);
ox = &opcode_xlate[opcode.type];
switch (opcode.type) {
case D3D10_SB_OPCODE_EXP:
expand_unary_to_scalarf(ureg, ureg_EX2, &sx, &opcode);
break;
case D3D10_SB_OPCODE_SQRT:
expand_unary_to_scalarf(ureg, ureg_SQRT, &sx, &opcode);
break;
case D3D10_SB_OPCODE_RSQ:
expand_unary_to_scalarf(ureg, ureg_RSQ, &sx, &opcode);
break;
case D3D10_SB_OPCODE_LOG:
expand_unary_to_scalarf(ureg, ureg_LG2, &sx, &opcode);
break;
case D3D10_SB_OPCODE_IMUL:
if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
ureg_IMUL_HI(ureg,
translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
translate_src_operand(&sx, &opcode.src[0], OF_INT),
translate_src_operand(&sx, &opcode.src[1], OF_INT));
}
if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
ureg_UMUL(ureg,
translate_dst_operand(&sx, &opcode.dst[1], opcode.saturate),
translate_src_operand(&sx, &opcode.src[0], OF_INT),
translate_src_operand(&sx, &opcode.src[1], OF_INT));
}
break;
case D3D10_SB_OPCODE_FTOI: {
/* XXX: tgsi (and just about everybody else, c, opencl, glsl) has
* out-of-range (and NaN) values undefined for f2i/f2u, but d3d10
* requires clamping to min and max representable value (as well as 0
* for NaNs) (this applies to both ftoi and ftou). At least the online
* docs state that - this is consistent with generic d3d10 conversion
* rules.
* For FTOI, we cheat a bit here - in particular depending on noone
* caring about NaNs, and depending on the (undefined!) behavior of
* F2I returning 0x80000000 for too negative values (which works with
* x86 sse). Hence only need to clamp too positive values.
* Note that it is impossible to clamp using a float, since 2^31 - 1
* is not exactly representable with a float.
*/
struct ureg_dst too_large = ureg_DECL_temporary(ureg);
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
ureg_FSGE(ureg, too_large,
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
ureg_imm1f(ureg, 2147483648.0f));
ureg_F2I(ureg, tmp,
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
ureg_UCMP(ureg,
translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
ureg_src(too_large),
ureg_imm1i(ureg, 0x7fffffff),
ureg_src(tmp));
ureg_release_temporary(ureg, too_large);
ureg_release_temporary(ureg, tmp);
}
break;
case D3D10_SB_OPCODE_FTOU: {
/* For ftou, we need to do both clamps, which as a bonus also
* gets us correct NaN behavior.
* Note that it is impossible to clamp using a float against the upper
* limit, since 2^32 - 1 is not exactly representable with a float,
* but the clamp against 0.0 certainly works just fine.
*/
struct ureg_dst too_large = ureg_DECL_temporary(ureg);
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
ureg_FSGE(ureg, too_large,
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
ureg_imm1f(ureg, 4294967296.0f));
/* clamp negative values + NaN to zero.
* (Could be done slightly more efficient in llvmpipe due to
* MAX NaN behavior handling.)
*/
ureg_MAX(ureg, tmp,
ureg_imm1f(ureg, 0.0f),
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
ureg_F2U(ureg, tmp,
ureg_src(tmp));
ureg_UCMP(ureg,
translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
ureg_src(too_large),
ureg_imm1u(ureg, 0xffffffff),
ureg_src(tmp));
ureg_release_temporary(ureg, too_large);
ureg_release_temporary(ureg, tmp);
}
break;
case D3D10_SB_OPCODE_LD_MS:
/* XXX: We don't support multi-sampling yet, but we need to parse
* this opcode regardless, so we just ignore sample index operand
* for now */
case D3D10_SB_OPCODE_LD:
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
unsigned resource = opcode.src[1].base.index[0].imm;
assert(opcode.src[1].base.index_dim == 1);
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
if (ureg_src_is_undef(sx.samplers[resource])) {
sx.samplers[resource] =
ureg_DECL_sampler(ureg, resource);
}
ureg_TXF(ureg,
translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
sx.resources[resource].target,
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
sx.samplers[resource]);
}
else {
struct ureg_src srcreg[2];
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_INT);
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_INT);
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_I, 2, &opcode,
translate_dst_operand(&sx, &opcode.dst[0],
opcode.saturate),
srcreg);
}
break;
case D3D10_SB_OPCODE_CUSTOMDATA:
if (opcode.customdata._class ==
D3D10_SB_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER) {
sx.imms =
ureg_DECL_immediate_block_uint(ureg,
opcode.customdata.u.constbuf.data,
opcode.customdata.u.constbuf.count);
} else {
assert(0);
}
break;
case D3D10_SB_OPCODE_RESINFO:
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
unsigned resource = opcode.src[1].base.index[0].imm;
assert(opcode.src[1].base.index_dim == 1);
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
if (ureg_src_is_undef(sx.samplers[resource])) {
sx.samplers[resource] =
ureg_DECL_sampler(ureg, resource);
}
/* don't bother with swizzle, ret type etc. */
ureg_TXQ(ureg,
translate_dst_operand(&sx, &opcode.dst[0],
opcode.saturate),
sx.resources[resource].target,
translate_src_operand(&sx, &opcode.src[0], OF_UINT),
sx.samplers[resource]);
}
else {
struct ureg_dst r0 = ureg_DECL_temporary(ureg);
struct ureg_src tsrc = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
struct ureg_dst dstreg = translate_dst_operand(&sx, &opcode.dst[0],
opcode.saturate);
/* while specs say swizzle is ignored better safe than sorry */
tsrc.SwizzleX = TGSI_SWIZZLE_X;
tsrc.SwizzleY = TGSI_SWIZZLE_Y;
tsrc.SwizzleZ = TGSI_SWIZZLE_Z;
tsrc.SwizzleW = TGSI_SWIZZLE_W;
ureg_SVIEWINFO(ureg, r0,
translate_src_operand(&sx, &opcode.src[0], OF_UINT),
tsrc);
tsrc = ureg_src(r0);
tsrc.SwizzleX = opcode.src[1].swizzle[0];
tsrc.SwizzleY = opcode.src[1].swizzle[1];
tsrc.SwizzleZ = opcode.src[1].swizzle[2];
tsrc.SwizzleW = opcode.src[1].swizzle[3];
if (opcode.specific.resinfo_ret_type ==
D3D10_SB_RESINFO_INSTRUCTION_RETURN_UINT) {
ureg_MOV(ureg, dstreg, tsrc);
}
else if (opcode.specific.resinfo_ret_type ==
D3D10_SB_RESINFO_INSTRUCTION_RETURN_FLOAT) {
ureg_I2F(ureg, dstreg, tsrc);
}
else { /* D3D10_SB_RESINFO_INSTRUCTION_RETURN_RCPFLOAT */
unsigned i;
/*
* Must apply rcp only to parts determined by dims,
* (width/height/depth) but NOT to array size nor mip levels
* hence need to figure that out here.
* This is one sick modifier if you ask me!
*/
unsigned res_index = opcode.src[1].base.index[0].imm;
unsigned target = sx.resources[res_index].target;
unsigned dims = texture_dim_from_tgsi_target(target);
ureg_I2F(ureg, r0, ureg_src(r0));
tsrc = ureg_src(r0);
for (i = 0; i < 4; i++) {
unsigned dst_swizzle = opcode.src[1].swizzle[i];
struct ureg_dst dstregmasked = ureg_writemask(dstreg, 1 << i);
/*
* could do one mov with multiple write mask bits set
* but rcp is scalar anyway.
*/
if (dst_swizzle < dims) {
ureg_RCP(ureg, dstregmasked, ureg_scalar(tsrc, dst_swizzle));
}
else {
ureg_MOV(ureg, dstregmasked, ureg_scalar(tsrc, dst_swizzle));
}
}
}
ureg_release_temporary(ureg, r0);
}
break;
case D3D10_SB_OPCODE_SAMPLE:
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
assert(opcode.src[1].base.index_dim == 1);
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
LOG_UNSUPPORTED(opcode.src[1].base.index[0].imm != opcode.src[2].base.index[0].imm);
ureg_TEX(ureg,
translate_dst_operand(&sx, &opcode.dst[0],
opcode.saturate),
sx.resources[opcode.src[1].base.index[0].imm].target,
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
}
else {
struct ureg_src srcreg[3];
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE, 3, &opcode,
translate_dst_operand(&sx, &opcode.dst[0],
opcode.saturate),
srcreg);
}
break;
case D3D10_SB_OPCODE_SAMPLE_C:
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
struct ureg_dst r0 = ureg_DECL_temporary(ureg);
/* XXX: Support only 2D texture targets for now.
* Need to figure out how to pack the compare value
* for other dimensions and if there is enough space
* in a single operand for all possible cases.
*/
LOG_UNSUPPORTED(sx.resources[opcode.src[1].base.index[0].imm].target !=
TGSI_TEXTURE_2D);
assert(opcode.src[1].base.index_dim == 1);
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
/* Insert the compare value into .z component.
*/
ureg_MOV(ureg,
ureg_writemask(r0, TGSI_WRITEMASK_XYW),
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
ureg_MOV(ureg,
ureg_writemask(r0, TGSI_WRITEMASK_Z),
translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
/* XXX: Pass explicit Lod=0 in D3D10_SB_OPCODE_SAMPLE_C_LZ case.
*/
ureg_TEX(ureg,
translate_dst_operand(&sx, &opcode.dst[0],
opcode.saturate),
sx.resources[opcode.src[1].base.index[0].imm].target,
ureg_src(r0),
translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
ureg_release_temporary(ureg, r0);
}
else {
struct ureg_src srcreg[4];
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_C, 4, &opcode,
translate_dst_operand(&sx, &opcode.dst[0],
opcode.saturate),
srcreg);
}
break;
case D3D10_SB_OPCODE_SAMPLE_C_LZ:
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
struct ureg_dst r0 = ureg_DECL_temporary(ureg);
assert(opcode.src[1].base.index_dim == 1);
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
/* XXX: Support only 2D texture targets for now.
* Need to figure out how to pack the compare value
* for other dimensions and if there is enough space
* in a single operand for all possible cases.
*/
LOG_UNSUPPORTED(sx.resources[opcode.src[1].base.index[0].imm].target !=
TGSI_TEXTURE_2D);
/* Insert the compare value into .z component.
* Insert 0 into .w component.
*/
ureg_MOV(ureg,
ureg_writemask(r0, TGSI_WRITEMASK_XY),
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
ureg_MOV(ureg,
ureg_writemask(r0, TGSI_WRITEMASK_Z),
translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
ureg_MOV(ureg,
ureg_writemask(r0, TGSI_WRITEMASK_W),
ureg_imm1f(ureg, 0.0f));
ureg_TXL(ureg,
translate_dst_operand(&sx, &opcode.dst[0],
opcode.saturate),
sx.resources[opcode.src[1].base.index[0].imm].target,
ureg_src(r0),
translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
ureg_release_temporary(ureg, r0);
}
else {
struct ureg_src srcreg[4];
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_C_LZ, 4, &opcode,
translate_dst_operand(&sx, &opcode.dst[0],
opcode.saturate),
srcreg);
}
break;
case D3D10_SB_OPCODE_SAMPLE_L:
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
struct ureg_dst r0 = ureg_DECL_temporary(ureg);
assert(opcode.src[1].base.index_dim == 1);
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
/* Insert LOD into .w component.
*/
ureg_MOV(ureg,
ureg_writemask(r0, TGSI_WRITEMASK_XYZ),
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
ureg_MOV(ureg,
ureg_writemask(r0, TGSI_WRITEMASK_W),
translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
ureg_TXL(ureg,
translate_dst_operand(&sx, &opcode.dst[0],
opcode.saturate),
sx.resources[opcode.src[1].base.index[0].imm].target,
ureg_src(r0),
translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
ureg_release_temporary(ureg, r0);
}
else {
struct ureg_src srcreg[4];
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_L, 4, &opcode,
translate_dst_operand(&sx, &opcode.dst[0],
opcode.saturate),
srcreg);
}
break;
case D3D10_SB_OPCODE_SAMPLE_D:
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
assert(opcode.src[1].base.index_dim == 1);
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
ureg_TXD(ureg,
translate_dst_operand(&sx, &opcode.dst[0],
opcode.saturate),
sx.resources[opcode.src[1].base.index[0].imm].target,
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
translate_src_operand(&sx, &opcode.src[3], OF_FLOAT),
translate_src_operand(&sx, &opcode.src[4], OF_FLOAT),
translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
}
else {
struct ureg_src srcreg[5];
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
srcreg[4] = translate_src_operand(&sx, &opcode.src[4], OF_FLOAT);
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_D, 5, &opcode,
translate_dst_operand(&sx, &opcode.dst[0],
opcode.saturate),
srcreg);
}
break;
case D3D10_SB_OPCODE_SAMPLE_B:
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
struct ureg_dst r0 = ureg_DECL_temporary(ureg);
assert(opcode.src[1].base.index_dim == 1);
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
/* Insert LOD bias into .w component.
*/
ureg_MOV(ureg,
ureg_writemask(r0, TGSI_WRITEMASK_XYZ),
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
ureg_MOV(ureg,
ureg_writemask(r0, TGSI_WRITEMASK_W),
translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
ureg_TXB(ureg,
translate_dst_operand(&sx, &opcode.dst[0],
opcode.saturate),
sx.resources[opcode.src[1].base.index[0].imm].target,
ureg_src(r0),
translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
ureg_release_temporary(ureg, r0);
}
else {
struct ureg_src srcreg[4];
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_B, 4, &opcode,
translate_dst_operand(&sx, &opcode.dst[0],
opcode.saturate),
srcreg);
}
break;
case D3D10_SB_OPCODE_SINCOS: {
struct ureg_dst src0 = ureg_DECL_temporary(ureg);
ureg_MOV(ureg, src0, translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
struct ureg_dst dst = translate_dst_operand(&sx, &opcode.dst[0],
opcode.saturate);
struct ureg_src src = ureg_src(src0);
ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
ureg_scalar(src, TGSI_SWIZZLE_X));
ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
ureg_scalar(src, TGSI_SWIZZLE_Y));
ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
ureg_scalar(src, TGSI_SWIZZLE_Z));
ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
ureg_scalar(src, TGSI_SWIZZLE_W));
}
if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
struct ureg_dst dst = translate_dst_operand(&sx, &opcode.dst[1],
opcode.saturate);
struct ureg_src src = ureg_src(src0);
ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
ureg_scalar(src, TGSI_SWIZZLE_X));
ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
ureg_scalar(src, TGSI_SWIZZLE_Y));
ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
ureg_scalar(src, TGSI_SWIZZLE_Z));
ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
ureg_scalar(src, TGSI_SWIZZLE_W));
}
ureg_release_temporary(ureg, src0);
}
break;
case D3D10_SB_OPCODE_UDIV: {
struct ureg_dst src0 = ureg_DECL_temporary(ureg);
struct ureg_dst src1 = ureg_DECL_temporary(ureg);
ureg_MOV(ureg, src0, translate_src_operand(&sx, &opcode.src[0], OF_UINT));
ureg_MOV(ureg, src1, translate_src_operand(&sx, &opcode.src[1], OF_UINT));
if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
ureg_UDIV(ureg,
translate_dst_operand(&sx, &opcode.dst[0],
opcode.saturate),
ureg_src(src0), ureg_src(src1));
}
if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
ureg_UMOD(ureg,
translate_dst_operand(&sx, &opcode.dst[1],
opcode.saturate),
ureg_src(src0), ureg_src(src1));
}
ureg_release_temporary(ureg, src0);
ureg_release_temporary(ureg, src1);
}
break;
case D3D10_SB_OPCODE_UMUL: {
if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
ureg_UMUL_HI(ureg,
translate_dst_operand(&sx, &opcode.dst[0],
opcode.saturate),
translate_src_operand(&sx, &opcode.src[0], OF_UINT),
translate_src_operand(&sx, &opcode.src[1], OF_UINT));
}
if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
ureg_UMUL(ureg,
translate_dst_operand(&sx, &opcode.dst[1],
opcode.saturate),
translate_src_operand(&sx, &opcode.src[0], OF_UINT),
translate_src_operand(&sx, &opcode.src[1], OF_UINT));
}
}
break;
case D3D10_SB_OPCODE_DCL_RESOURCE:
{
unsigned target;
unsigned res_index = opcode.dst[0].base.index[0].imm;
assert(opcode.dst[0].base.index_dim == 1);
assert(res_index < SHADER_MAX_RESOURCES);
target = translate_resource_dimension(opcode.specific.dcl_resource_dimension);
sx.resources[res_index].target = target;
if (!(st_debug & ST_DEBUG_OLD_TEX_OPS)) {
sx.sv[res_index] =
ureg_DECL_sampler_view(ureg, res_index, target,
trans_dcl_ret_type(opcode.dcl_resource_ret_type[0]),
trans_dcl_ret_type(opcode.dcl_resource_ret_type[1]),
trans_dcl_ret_type(opcode.dcl_resource_ret_type[2]),
trans_dcl_ret_type(opcode.dcl_resource_ret_type[3]));
}
break;
}
case D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER: {
unsigned num_constants = opcode.src[0].base.index[1].imm;
assert(opcode.src[0].base.index[0].imm < PIPE_MAX_CONSTANT_BUFFERS);
if (num_constants == 0) {
num_constants = SHADER_MAX_CONSTS;
} else {
assert(num_constants <= SHADER_MAX_CONSTS);
}
ureg_DECL_constant2D(ureg,
0,
num_constants - 1,
opcode.src[0].base.index[0].imm);
break;
}
case D3D10_SB_OPCODE_DCL_SAMPLER:
assert(opcode.dst[0].base.index_dim == 1);
assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_SAMPLERS);
sx.samplers[opcode.dst[0].base.index[0].imm] =
ureg_DECL_sampler(ureg,
opcode.dst[0].base.index[0].imm);
break;
case D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
switch (opcode.specific.dcl_gs_output_primitive_topology) {
case D3D10_SB_PRIMITIVE_TOPOLOGY_POINTLIST:
ureg_property(sx.ureg,
TGSI_PROPERTY_GS_OUTPUT_PRIM,
PIPE_PRIM_POINTS);
break;
case D3D10_SB_PRIMITIVE_TOPOLOGY_LINESTRIP:
ureg_property(sx.ureg,
TGSI_PROPERTY_GS_OUTPUT_PRIM,
PIPE_PRIM_LINE_STRIP);
break;
case D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP:
ureg_property(sx.ureg,
TGSI_PROPERTY_GS_OUTPUT_PRIM,
PIPE_PRIM_TRIANGLE_STRIP);
break;
default:
assert(0);
}
break;
case D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE:
assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
/* Figure out the second dimension of GS inputs.
*/
switch (opcode.specific.dcl_gs_input_primitive) {
case D3D10_SB_PRIMITIVE_POINT:
declare_vertices_in(&sx, 1);
ureg_property(sx.ureg,
TGSI_PROPERTY_GS_INPUT_PRIM,
PIPE_PRIM_POINTS);
break;
case D3D10_SB_PRIMITIVE_LINE:
declare_vertices_in(&sx, 2);
ureg_property(sx.ureg,
TGSI_PROPERTY_GS_INPUT_PRIM,
PIPE_PRIM_LINES);
break;
case D3D10_SB_PRIMITIVE_TRIANGLE:
declare_vertices_in(&sx, 3);
ureg_property(sx.ureg,
TGSI_PROPERTY_GS_INPUT_PRIM,
PIPE_PRIM_TRIANGLES);
break;
case D3D10_SB_PRIMITIVE_LINE_ADJ:
declare_vertices_in(&sx, 4);
ureg_property(sx.ureg,
TGSI_PROPERTY_GS_INPUT_PRIM,
PIPE_PRIM_LINES_ADJACENCY);
break;
case D3D10_SB_PRIMITIVE_TRIANGLE_ADJ:
declare_vertices_in(&sx, 6);
ureg_property(sx.ureg,
TGSI_PROPERTY_GS_INPUT_PRIM,
PIPE_PRIM_TRIANGLES_ADJACENCY);
break;
default:
assert(0);
}
break;
case D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
ureg_property(sx.ureg,
TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
opcode.specific.dcl_max_output_vertex_count);
break;
case D3D10_SB_OPCODE_DCL_INPUT:
if (parser.header.type == D3D10_SB_VERTEX_SHADER) {
dcl_vs_input(&sx, ureg, &opcode.dst[0]);
} else {
assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
dcl_gs_input(&sx, ureg, &opcode.dst[0]);
}
break;
case D3D10_SB_OPCODE_DCL_INPUT_SGV:
assert(parser.header.type == D3D10_SB_VERTEX_SHADER);
dcl_sgv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name);
break;
case D3D10_SB_OPCODE_DCL_INPUT_SIV:
assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
dcl_siv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name);
break;
case D3D10_SB_OPCODE_DCL_INPUT_PS:
assert(parser.header.type == D3D10_SB_PIXEL_SHADER);
dcl_ps_input(&sx, ureg, &opcode.dst[0],
opcode.specific.dcl_in_ps_interp);
break;
case D3D10_SB_OPCODE_DCL_INPUT_PS_SGV:
assert(parser.header.type == D3D10_SB_PIXEL_SHADER);
dcl_ps_sgv_input(&sx, ureg, &opcode.dst[0],
opcode.dcl_siv_name);
break;
case D3D10_SB_OPCODE_DCL_INPUT_PS_SIV:
assert(parser.header.type == D3D10_SB_PIXEL_SHADER);
dcl_ps_siv_input(&sx, ureg, &opcode.dst[0],
opcode.dcl_siv_name,
opcode.specific.dcl_in_ps_interp);
break;
case D3D10_SB_OPCODE_DCL_OUTPUT:
if (parser.header.type == D3D10_SB_PIXEL_SHADER) {
/* Pixel shader outputs. */
if (opcode.dst[0].base.type == D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {
/* Depth output. */
assert(opcode.dst[0].base.index_dim == 0);
sx.output_depth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z, 0, 1);
sx.output_depth = ureg_writemask(sx.output_depth, TGSI_WRITEMASK_Z);
} else {
/* Color outputs. */
assert(opcode.dst[0].base.index_dim == 1);
assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
dcl_base_output(&sx, ureg,
ureg_DECL_output(ureg,
TGSI_SEMANTIC_COLOR,
opcode.dst[0].base.index[0].imm),
&opcode.dst[0]);
}
} else {
assert(opcode.dst[0].base.index_dim == 1);
assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
if (output_mapping) {
unsigned nr_outputs = ureg_get_nr_outputs(ureg);
output_mapping[nr_outputs]
= opcode.dst[0].base.index[0].imm;
}
dcl_base_output(&sx, ureg,
ureg_DECL_output(ureg,
TGSI_SEMANTIC_GENERIC,
opcode.dst[0].base.index[0].imm),
&opcode.dst[0]);
}
break;
case D3D10_SB_OPCODE_DCL_OUTPUT_SIV:
assert(opcode.dst[0].base.index_dim == 1);
assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
if (output_mapping) {
unsigned nr_outputs = ureg_get_nr_outputs(ureg);
output_mapping[nr_outputs]
= opcode.dst[0].base.index[0].imm;
}
if (opcode.dcl_siv_name == D3D10_SB_NAME_CLIP_DISTANCE ||
opcode.dcl_siv_name == D3D10_SB_NAME_CULL_DISTANCE) {
/*
* FIXME: this is quite broken. gallium no longer has separate
* clip/cull dists, using (max 2) combined clipdist/culldist regs
* instead. Unlike d3d10 though, which is clip and which cull is
* simply determined by by number of clip/cull dists (that is,
* all clip dists must come first).
*/
unsigned numcliporcull = sx.num_clip_distances_declared +
sx.num_cull_distances_declared;
sx.clip_distance_mapping[numcliporcull].d3d =
opcode.dst[0].base.index[0].imm;
sx.clip_distance_mapping[numcliporcull].tgsi = numcliporcull;
if (opcode.dcl_siv_name == D3D10_SB_NAME_CLIP_DISTANCE) {
++sx.num_clip_distances_declared;
/* re-emit should be safe... */
ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
sx.num_clip_distances_declared);
} else {
++sx.num_cull_distances_declared;
ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
sx.num_cull_distances_declared);
}
} else if (0 && opcode.dcl_siv_name == D3D10_SB_NAME_CULL_DISTANCE) {
sx.cull_distance_mapping[sx.num_cull_distances_declared].d3d =
opcode.dst[0].base.index[0].imm;
sx.cull_distance_mapping[sx.num_cull_distances_declared].tgsi =
sx.num_cull_distances_declared;
++sx.num_cull_distances_declared;
ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
sx.num_cull_distances_declared);
}
dcl_base_output(&sx, ureg,
ureg_DECL_output_masked(
ureg,
translate_system_name(opcode.dcl_siv_name),
translate_semantic_index(&sx, opcode.dcl_siv_name,
&opcode.dst[0]),
opcode.dst[0].mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT,
0, 1),
&opcode.dst[0]);
break;
case D3D10_SB_OPCODE_DCL_OUTPUT_SGV:
assert(opcode.dst[0].base.index_dim == 1);
assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
if (output_mapping) {
unsigned nr_outputs = ureg_get_nr_outputs(ureg);
output_mapping[nr_outputs]
= opcode.dst[0].base.index[0].imm;
}
dcl_base_output(&sx, ureg,
ureg_DECL_output(ureg,
translate_system_name(opcode.dcl_siv_name),
0),
&opcode.dst[0]);
break;
case D3D10_SB_OPCODE_DCL_TEMPS:
{
uint i;
assert(opcode.specific.dcl_num_temps + sx.declared_temps <=
SHADER_MAX_TEMPS);
sx.temp_offset = sx.declared_temps;
for (i = 0; i < opcode.specific.dcl_num_temps; i++) {
sx.temps[sx.declared_temps + i] = ureg_DECL_temporary(ureg);
}
sx.declared_temps += opcode.specific.dcl_num_temps;
}
break;
case D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP:
{
uint i;
/* XXX: Add true indexable temps to gallium.
*/
assert(opcode.specific.dcl_indexable_temp.index <
SHADER_MAX_INDEXABLE_TEMPS);
assert(opcode.specific.dcl_indexable_temp.count + sx.declared_temps <=
SHADER_MAX_TEMPS);
sx.indexable_temp_offsets[opcode.specific.dcl_indexable_temp.index] =
sx.declared_temps;
for (i = 0; i < opcode.specific.dcl_indexable_temp.count; i++) {
sx.temps[sx.declared_temps + i] = ureg_DECL_temporary(ureg);
}
sx.declared_temps += opcode.specific.dcl_indexable_temp.count;
}
break;
case D3D10_SB_OPCODE_IF: {
unsigned label = 0;
if (opcode.specific.test_boolean == D3D10_SB_INSTRUCTION_TEST_ZERO) {
struct ureg_src src =
translate_src_operand(&sx, &opcode.src[0], OF_INT);
struct ureg_dst src_nz = ureg_DECL_temporary(ureg);
ureg_USEQ(ureg, src_nz, src, ureg_imm1u(ureg, 0));
ureg_UIF(ureg, ureg_src(src_nz), &label);
ureg_release_temporary(ureg, src_nz);;
} else {
ureg_UIF(ureg, translate_src_operand(&sx, &opcode.src[0], OF_INT), &label);
}
}
break;
case D3D10_SB_OPCODE_RETC:
case D3D10_SB_OPCODE_CONTINUEC:
case D3D10_SB_OPCODE_CALLC:
case D3D10_SB_OPCODE_DISCARD:
case D3D10_SB_OPCODE_BREAKC:
{
unsigned label = 0;
assert(operand_is_scalar(&opcode.src[0]));
if (opcode.specific.test_boolean == D3D10_SB_INSTRUCTION_TEST_ZERO) {
struct ureg_src src =
translate_src_operand(&sx, &opcode.src[0], OF_INT);
struct ureg_dst src_nz = ureg_DECL_temporary(ureg);
ureg_USEQ(ureg, src_nz, src, ureg_imm1u(ureg, 0));
ureg_UIF(ureg, ureg_src(src_nz), &label);
ureg_release_temporary(ureg, src_nz);
}
else {
ureg_UIF(ureg, translate_src_operand(&sx, &opcode.src[0], OF_INT), &label);
}
switch (opcode.type) {
case D3D10_SB_OPCODE_RETC:
ureg_RET(ureg);
break;
case D3D10_SB_OPCODE_CONTINUEC:
ureg_CONT(ureg);
break;
case D3D10_SB_OPCODE_CALLC: {
unsigned label = opcode.src[1].base.index[0].imm;
unsigned tgsi_token_label = 0;
ureg_CAL(ureg, &tgsi_token_label);
Shader_add_call(&sx, label, tgsi_token_label);
}
break;
case D3D10_SB_OPCODE_DISCARD:
ureg_KILL(ureg);
break;
case D3D10_SB_OPCODE_BREAKC:
ureg_BRK(ureg);
break;
default:
assert(0);
break;
}
ureg_ENDIF(ureg);
}
break;
case D3D10_SB_OPCODE_LABEL: {
unsigned label = opcode.src[0].base.index[0].imm;
unsigned tgsi_inst_no = 0;
if (inside_sub) {
ureg_ENDSUB(ureg);
}
tgsi_inst_no = ureg_get_instruction_number(ureg);
ureg_BGNSUB(ureg);
inside_sub = TRUE;
Shader_add_label(&sx, label, tgsi_inst_no);
}
break;
case D3D10_SB_OPCODE_CALL: {
unsigned label = opcode.src[0].base.index[0].imm;
unsigned tgsi_token_label = 0;
ureg_CAL(ureg, &tgsi_token_label);
Shader_add_call(&sx, label, tgsi_token_label);
}
break;
case D3D10_SB_OPCODE_EMIT:
ureg_EMIT(ureg, ureg_imm1u(ureg, 0));
break;
case D3D10_SB_OPCODE_CUT:
ureg_ENDPRIM(ureg, ureg_imm1u(ureg, 0));
break;
case D3D10_SB_OPCODE_EMITTHENCUT:
ureg_EMIT(ureg, ureg_imm1u(ureg, 0));
ureg_ENDPRIM(ureg, ureg_imm1u(ureg, 0));
break;
case D3D10_SB_OPCODE_DCL_INDEX_RANGE:
case D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS:
/* Ignore */
break;
default:
{
uint i;
struct ureg_dst dst[SHADER_MAX_DST_OPERANDS];
struct ureg_src src[SHADER_MAX_SRC_OPERANDS];
assert(ox->tgsi_opcode != TGSI_EXPAND);
if (ox->tgsi_opcode == TGSI_LOG_UNSUPPORTED) {
if (!shader_dumped) {
dx10_shader_dump_tokens(code);
shader_dumped = TRUE;
}
debug_printf("%s: unsupported opcode %i\n",
__FUNCTION__, ox->type);
assert(ox->tgsi_opcode != TGSI_LOG_UNSUPPORTED);
}
/* Destination operands. */
for (i = 0; i < opcode.num_dst; i++) {
dst[i] = translate_dst_operand(&sx, &opcode.dst[i],
opcode.saturate);
}
/* Source operands. */
for (i = 0; i < opcode.num_src; i++) {
src[i] = translate_src_operand(&sx, &opcode.src[i], ox->format);
}
/* Try to re-route output depth to Z channel. */
if (opcode.dst[0].base.type == D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {
LOG_UNSUPPORTED(opcode.type != D3D10_SB_OPCODE_MOV);
dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_Z);
src[0] = ureg_scalar(src[0], TGSI_SWIZZLE_X);
}
ureg_insn(ureg,
ox->tgsi_opcode,
dst,
opcode.num_dst,
src,
opcode.num_src, 0);
}
}
Shader_opcode_free(&opcode);
}
if (inside_sub) {
ureg_ENDSUB(ureg);
}
ureg_END(ureg);
for (i = 0; i < sx.num_calls; ++i) {
for (j = 0; j < sx.num_labels; ++j) {
if (sx.calls[i].d3d_label == sx.labels[j].d3d_label) {
ureg_fixup_label(sx.ureg,
sx.calls[i].tgsi_label_token,
sx.labels[j].tgsi_insn_no);
break;
}
}
ASSERT(j < sx.num_labels);
}
FREE(sx.labels);
FREE(sx.calls);
tokens = ureg_get_tokens(ureg, &nr_tokens);
assert(tokens);
ureg_destroy(ureg);
if (st_debug & ST_DEBUG_TGSI) {
tgsi_dump(tokens, 0);
}
return tokens;
}