2303 lines
87 KiB
C
2303 lines
87 KiB
C
/**************************************************************************
|
|
*
|
|
* Copyright 2012-2021 VMware, Inc.
|
|
* All Rights Reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sub license, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
|
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
|
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
|
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
|
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*
|
|
* The above copyright notice and this permission notice (including the
|
|
* next paragraph) shall be included in all copies or substantial portions
|
|
* of the Software.
|
|
*
|
|
**************************************************************************/
|
|
|
|
/*
|
|
* ShaderTGSI.c --
|
|
* Functions for translating shaders.
|
|
*/
|
|
|
|
#include "Debug.h"
|
|
#include "ShaderParse.h"
|
|
|
|
#include "pipe/p_state.h"
|
|
#include "tgsi/tgsi_ureg.h"
|
|
#include "tgsi/tgsi_dump.h"
|
|
#include "util/u_memory.h"
|
|
|
|
#include "ShaderDump.h"
|
|
|
|
|
|
enum dx10_opcode_format {
|
|
OF_FLOAT,
|
|
OF_INT,
|
|
OF_UINT
|
|
};
|
|
|
|
struct dx10_opcode_xlate {
|
|
D3D10_SB_OPCODE_TYPE type;
|
|
enum dx10_opcode_format format;
|
|
uint tgsi_opcode;
|
|
};
|
|
|
|
/* Opcodes that we have not even attempted to implement:
|
|
*/
|
|
#define TGSI_LOG_UNSUPPORTED TGSI_OPCODE_LAST
|
|
|
|
/* Opcodes which do not translate directly to a TGSI opcode, but which
|
|
* have at least a partial implemention coded below:
|
|
*/
|
|
#define TGSI_EXPAND (TGSI_OPCODE_LAST+1)
|
|
|
|
static struct dx10_opcode_xlate opcode_xlate[D3D10_SB_NUM_OPCODES] = {
|
|
{D3D10_SB_OPCODE_ADD, OF_FLOAT, TGSI_OPCODE_ADD},
|
|
{D3D10_SB_OPCODE_AND, OF_UINT, TGSI_OPCODE_AND},
|
|
{D3D10_SB_OPCODE_BREAK, OF_FLOAT, TGSI_OPCODE_BRK},
|
|
{D3D10_SB_OPCODE_BREAKC, OF_UINT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_CALL, OF_UINT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_CALLC, OF_UINT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_CASE, OF_UINT, TGSI_OPCODE_CASE},
|
|
{D3D10_SB_OPCODE_CONTINUE, OF_FLOAT, TGSI_OPCODE_CONT},
|
|
{D3D10_SB_OPCODE_CONTINUEC, OF_UINT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_CUT, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_DEFAULT, OF_FLOAT, TGSI_OPCODE_DEFAULT},
|
|
{D3D10_SB_OPCODE_DERIV_RTX, OF_FLOAT, TGSI_OPCODE_DDX},
|
|
{D3D10_SB_OPCODE_DERIV_RTY, OF_FLOAT, TGSI_OPCODE_DDY},
|
|
{D3D10_SB_OPCODE_DISCARD, OF_UINT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_DIV, OF_FLOAT, TGSI_OPCODE_DIV},
|
|
{D3D10_SB_OPCODE_DP2, OF_FLOAT, TGSI_OPCODE_DP2},
|
|
{D3D10_SB_OPCODE_DP3, OF_FLOAT, TGSI_OPCODE_DP3},
|
|
{D3D10_SB_OPCODE_DP4, OF_FLOAT, TGSI_OPCODE_DP4},
|
|
{D3D10_SB_OPCODE_ELSE, OF_FLOAT, TGSI_OPCODE_ELSE},
|
|
{D3D10_SB_OPCODE_EMIT, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_EMITTHENCUT, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_ENDIF, OF_FLOAT, TGSI_OPCODE_ENDIF},
|
|
{D3D10_SB_OPCODE_ENDLOOP, OF_FLOAT, TGSI_OPCODE_ENDLOOP},
|
|
{D3D10_SB_OPCODE_ENDSWITCH, OF_FLOAT, TGSI_OPCODE_ENDSWITCH},
|
|
{D3D10_SB_OPCODE_EQ, OF_FLOAT, TGSI_OPCODE_FSEQ},
|
|
{D3D10_SB_OPCODE_EXP, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_FRC, OF_FLOAT, TGSI_OPCODE_FRC},
|
|
{D3D10_SB_OPCODE_FTOI, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_FTOU, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_GE, OF_FLOAT, TGSI_OPCODE_FSGE},
|
|
{D3D10_SB_OPCODE_IADD, OF_INT, TGSI_OPCODE_UADD},
|
|
{D3D10_SB_OPCODE_IF, OF_UINT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_IEQ, OF_INT, TGSI_OPCODE_USEQ},
|
|
{D3D10_SB_OPCODE_IGE, OF_INT, TGSI_OPCODE_ISGE},
|
|
{D3D10_SB_OPCODE_ILT, OF_INT, TGSI_OPCODE_ISLT},
|
|
{D3D10_SB_OPCODE_IMAD, OF_INT, TGSI_OPCODE_UMAD},
|
|
{D3D10_SB_OPCODE_IMAX, OF_INT, TGSI_OPCODE_IMAX},
|
|
{D3D10_SB_OPCODE_IMIN, OF_INT, TGSI_OPCODE_IMIN},
|
|
{D3D10_SB_OPCODE_IMUL, OF_INT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_INE, OF_INT, TGSI_OPCODE_USNE},
|
|
{D3D10_SB_OPCODE_INEG, OF_INT, TGSI_OPCODE_INEG},
|
|
{D3D10_SB_OPCODE_ISHL, OF_INT, TGSI_OPCODE_SHL},
|
|
{D3D10_SB_OPCODE_ISHR, OF_INT, TGSI_OPCODE_ISHR},
|
|
{D3D10_SB_OPCODE_ITOF, OF_INT, TGSI_OPCODE_I2F},
|
|
{D3D10_SB_OPCODE_LABEL, OF_INT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_LD, OF_UINT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_LD_MS, OF_UINT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_LOG, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_LOOP, OF_FLOAT, TGSI_OPCODE_BGNLOOP},
|
|
{D3D10_SB_OPCODE_LT, OF_FLOAT, TGSI_OPCODE_FSLT},
|
|
{D3D10_SB_OPCODE_MAD, OF_FLOAT, TGSI_OPCODE_MAD},
|
|
{D3D10_SB_OPCODE_MIN, OF_FLOAT, TGSI_OPCODE_MIN},
|
|
{D3D10_SB_OPCODE_MAX, OF_FLOAT, TGSI_OPCODE_MAX},
|
|
{D3D10_SB_OPCODE_CUSTOMDATA, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_MOV, OF_UINT, TGSI_OPCODE_MOV},
|
|
{D3D10_SB_OPCODE_MOVC, OF_UINT, TGSI_OPCODE_UCMP},
|
|
{D3D10_SB_OPCODE_MUL, OF_FLOAT, TGSI_OPCODE_MUL},
|
|
{D3D10_SB_OPCODE_NE, OF_FLOAT, TGSI_OPCODE_FSNE},
|
|
{D3D10_SB_OPCODE_NOP, OF_FLOAT, TGSI_OPCODE_NOP},
|
|
{D3D10_SB_OPCODE_NOT, OF_UINT, TGSI_OPCODE_NOT},
|
|
{D3D10_SB_OPCODE_OR, OF_UINT, TGSI_OPCODE_OR},
|
|
{D3D10_SB_OPCODE_RESINFO, OF_UINT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_RET, OF_FLOAT, TGSI_OPCODE_RET},
|
|
{D3D10_SB_OPCODE_RETC, OF_UINT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_ROUND_NE, OF_FLOAT, TGSI_OPCODE_ROUND},
|
|
{D3D10_SB_OPCODE_ROUND_NI, OF_FLOAT, TGSI_OPCODE_FLR},
|
|
{D3D10_SB_OPCODE_ROUND_PI, OF_FLOAT, TGSI_OPCODE_CEIL},
|
|
{D3D10_SB_OPCODE_ROUND_Z, OF_FLOAT, TGSI_OPCODE_TRUNC},
|
|
{D3D10_SB_OPCODE_RSQ, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_SAMPLE, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_SAMPLE_C, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_SAMPLE_C_LZ, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_SAMPLE_L, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_SAMPLE_D, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_SAMPLE_B, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_SQRT, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_SWITCH, OF_UINT, TGSI_OPCODE_SWITCH},
|
|
{D3D10_SB_OPCODE_SINCOS, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_UDIV, OF_UINT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_ULT, OF_UINT, TGSI_OPCODE_USLT},
|
|
{D3D10_SB_OPCODE_UGE, OF_UINT, TGSI_OPCODE_USGE},
|
|
{D3D10_SB_OPCODE_UMUL, OF_UINT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_UMAD, OF_UINT, TGSI_OPCODE_UMAD},
|
|
{D3D10_SB_OPCODE_UMAX, OF_UINT, TGSI_OPCODE_UMAX},
|
|
{D3D10_SB_OPCODE_UMIN, OF_UINT, TGSI_OPCODE_UMIN},
|
|
{D3D10_SB_OPCODE_USHR, OF_UINT, TGSI_OPCODE_USHR},
|
|
{D3D10_SB_OPCODE_UTOF, OF_UINT, TGSI_OPCODE_U2F},
|
|
{D3D10_SB_OPCODE_XOR, OF_UINT, TGSI_OPCODE_XOR},
|
|
{D3D10_SB_OPCODE_DCL_RESOURCE, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_DCL_SAMPLER, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_DCL_INDEX_RANGE, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
|
|
{D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_DCL_INPUT, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_DCL_INPUT_SGV, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_DCL_INPUT_SIV, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_DCL_INPUT_PS, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_DCL_INPUT_PS_SGV, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_DCL_INPUT_PS_SIV, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_DCL_OUTPUT, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_DCL_OUTPUT_SGV, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_DCL_OUTPUT_SIV, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_DCL_TEMPS, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP, OF_FLOAT, TGSI_EXPAND},
|
|
{D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
|
|
{D3D10_SB_OPCODE_RESERVED0, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
|
|
{D3D10_1_SB_OPCODE_LOD, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
|
|
{D3D10_1_SB_OPCODE_GATHER4, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
|
|
{D3D10_1_SB_OPCODE_SAMPLE_POS, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
|
|
{D3D10_1_SB_OPCODE_SAMPLE_INFO, OF_FLOAT, TGSI_LOG_UNSUPPORTED}
|
|
};
|
|
|
|
#define SHADER_MAX_TEMPS 4096
|
|
#define SHADER_MAX_INPUTS 32
|
|
#define SHADER_MAX_OUTPUTS 32
|
|
#define SHADER_MAX_CONSTS 4096
|
|
#define SHADER_MAX_RESOURCES PIPE_MAX_SHADER_SAMPLER_VIEWS
|
|
#define SHADER_MAX_SAMPLERS PIPE_MAX_SAMPLERS
|
|
#define SHADER_MAX_INDEXABLE_TEMPS 4096
|
|
|
|
struct Shader_call {
|
|
unsigned d3d_label;
|
|
unsigned tgsi_label_token;
|
|
};
|
|
|
|
struct Shader_label {
|
|
unsigned d3d_label;
|
|
unsigned tgsi_insn_no;
|
|
};
|
|
|
|
struct Shader_resource {
|
|
uint target; /* TGSI_TEXTURE_x */
|
|
};
|
|
|
|
struct Shader_xlate {
|
|
struct ureg_program *ureg;
|
|
|
|
uint vertices_in;
|
|
uint declared_temps;
|
|
|
|
struct ureg_dst temps[SHADER_MAX_TEMPS];
|
|
struct ureg_dst output_depth;
|
|
struct Shader_resource resources[SHADER_MAX_RESOURCES];
|
|
struct ureg_src sv[SHADER_MAX_RESOURCES];
|
|
struct ureg_src samplers[SHADER_MAX_SAMPLERS];
|
|
struct ureg_src imms;
|
|
struct ureg_src prim_id;
|
|
|
|
uint temp_offset;
|
|
uint indexable_temp_offsets[SHADER_MAX_INDEXABLE_TEMPS];
|
|
|
|
struct {
|
|
boolean declared;
|
|
uint writemask;
|
|
uint siv_name;
|
|
boolean overloaded;
|
|
struct ureg_src reg;
|
|
} inputs[SHADER_MAX_INPUTS];
|
|
|
|
struct {
|
|
struct ureg_dst reg[4];
|
|
} outputs[SHADER_MAX_OUTPUTS];
|
|
|
|
struct {
|
|
uint d3d;
|
|
uint tgsi;
|
|
} clip_distance_mapping[2], cull_distance_mapping[2];
|
|
uint num_clip_distances_declared;
|
|
uint num_cull_distances_declared;
|
|
|
|
struct Shader_call *calls;
|
|
uint num_calls;
|
|
uint max_calls;
|
|
struct Shader_label *labels;
|
|
uint num_labels;
|
|
uint max_labels;
|
|
};
|
|
|
|
static uint
|
|
translate_interpolation(D3D10_SB_INTERPOLATION_MODE interpolation)
|
|
{
|
|
switch (interpolation) {
|
|
case D3D10_SB_INTERPOLATION_UNDEFINED:
|
|
assert(0);
|
|
return TGSI_INTERPOLATE_LINEAR;
|
|
|
|
case D3D10_SB_INTERPOLATION_CONSTANT:
|
|
return TGSI_INTERPOLATE_CONSTANT;
|
|
case D3D10_SB_INTERPOLATION_LINEAR:
|
|
return TGSI_INTERPOLATE_PERSPECTIVE;
|
|
case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE:
|
|
return TGSI_INTERPOLATE_LINEAR;
|
|
|
|
case D3D10_SB_INTERPOLATION_LINEAR_CENTROID:
|
|
case D3D10_SB_INTERPOLATION_LINEAR_SAMPLE: // DX10.1
|
|
LOG_UNSUPPORTED(TRUE);
|
|
return TGSI_INTERPOLATE_PERSPECTIVE;
|
|
|
|
case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID:
|
|
case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: // DX10.1
|
|
LOG_UNSUPPORTED(TRUE);
|
|
return TGSI_INTERPOLATE_LINEAR;
|
|
}
|
|
|
|
assert(0);
|
|
return TGSI_INTERPOLATE_LINEAR;
|
|
}
|
|
|
|
static uint
|
|
translate_system_name(D3D10_SB_NAME name)
|
|
{
|
|
switch (name) {
|
|
case D3D10_SB_NAME_UNDEFINED:
|
|
assert(0); /* should not happen */
|
|
return TGSI_SEMANTIC_GENERIC;
|
|
case D3D10_SB_NAME_POSITION:
|
|
return TGSI_SEMANTIC_POSITION;
|
|
case D3D10_SB_NAME_CLIP_DISTANCE:
|
|
case D3D10_SB_NAME_CULL_DISTANCE:
|
|
return TGSI_SEMANTIC_CLIPDIST;
|
|
case D3D10_SB_NAME_PRIMITIVE_ID:
|
|
return TGSI_SEMANTIC_PRIMID;
|
|
case D3D10_SB_NAME_INSTANCE_ID:
|
|
return TGSI_SEMANTIC_INSTANCEID;
|
|
case D3D10_SB_NAME_VERTEX_ID:
|
|
return TGSI_SEMANTIC_VERTEXID_NOBASE;
|
|
case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX:
|
|
return TGSI_SEMANTIC_VIEWPORT_INDEX;
|
|
case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX:
|
|
return TGSI_SEMANTIC_LAYER;
|
|
case D3D10_SB_NAME_IS_FRONT_FACE:
|
|
return TGSI_SEMANTIC_FACE;
|
|
case D3D10_SB_NAME_SAMPLE_INDEX:
|
|
LOG_UNSUPPORTED(TRUE);
|
|
return TGSI_SEMANTIC_GENERIC;
|
|
}
|
|
|
|
assert(0);
|
|
return TGSI_SEMANTIC_GENERIC;
|
|
}
|
|
|
|
static uint
|
|
translate_semantic_index(struct Shader_xlate *sx,
|
|
D3D10_SB_NAME name,
|
|
const struct Shader_dst_operand *operand)
|
|
{
|
|
unsigned idx;
|
|
switch (name) {
|
|
case D3D10_SB_NAME_CLIP_DISTANCE:
|
|
case D3D10_SB_NAME_CULL_DISTANCE:
|
|
if (sx->clip_distance_mapping[0].d3d == operand->base.index[0].imm) {
|
|
idx = sx->clip_distance_mapping[0].tgsi;
|
|
} else {
|
|
assert(sx->clip_distance_mapping[1].d3d == operand->base.index[0].imm);
|
|
idx = sx->clip_distance_mapping[1].tgsi;
|
|
}
|
|
break;
|
|
/* case D3D10_SB_NAME_CULL_DISTANCE:
|
|
if (sx->cull_distance_mapping[0].d3d == operand->base.index[0].imm) {
|
|
idx = sx->cull_distance_mapping[0].tgsi;
|
|
} else {
|
|
assert(sx->cull_distance_mapping[1].d3d == operand->base.index[0].imm);
|
|
idx = sx->cull_distance_mapping[1].tgsi;
|
|
}
|
|
break;*/
|
|
default:
|
|
idx = 0;
|
|
}
|
|
return idx;
|
|
}
|
|
|
|
static enum tgsi_return_type
|
|
trans_dcl_ret_type(D3D10_SB_RESOURCE_RETURN_TYPE d3drettype) {
|
|
switch (d3drettype) {
|
|
case D3D10_SB_RETURN_TYPE_UNORM:
|
|
return TGSI_RETURN_TYPE_UNORM;
|
|
case D3D10_SB_RETURN_TYPE_SNORM:
|
|
return TGSI_RETURN_TYPE_SNORM;
|
|
case D3D10_SB_RETURN_TYPE_SINT:
|
|
return TGSI_RETURN_TYPE_SINT;
|
|
case D3D10_SB_RETURN_TYPE_UINT:
|
|
return TGSI_RETURN_TYPE_UINT;
|
|
case D3D10_SB_RETURN_TYPE_FLOAT:
|
|
return TGSI_RETURN_TYPE_FLOAT;
|
|
case D3D10_SB_RETURN_TYPE_MIXED:
|
|
default:
|
|
LOG_UNSUPPORTED(TRUE);
|
|
return TGSI_RETURN_TYPE_FLOAT;
|
|
}
|
|
}
|
|
|
|
static void
|
|
declare_vertices_in(struct Shader_xlate *sx,
|
|
unsigned in)
|
|
{
|
|
/* Make sure vertices_in is consistent with input primitive
|
|
* and other input declarations.
|
|
*/
|
|
if (sx->vertices_in) {
|
|
assert(sx->vertices_in == in);
|
|
} else {
|
|
sx->vertices_in = in;
|
|
}
|
|
}
|
|
|
|
struct swizzle_mapping {
|
|
unsigned x;
|
|
unsigned y;
|
|
unsigned z;
|
|
unsigned w;
|
|
};
|
|
|
|
/* mapping of writmask to swizzles */
|
|
static const struct swizzle_mapping writemask_to_swizzle[] = {
|
|
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_NONE
|
|
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_X
|
|
{ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y }, //TGSI_WRITEMASK_Y
|
|
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y }, //TGSI_WRITEMASK_XY
|
|
{ TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_Z
|
|
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_XZ
|
|
{ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_YZ
|
|
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_XYZ
|
|
{ TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_W
|
|
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_W, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XW
|
|
{ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_YW
|
|
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XYW
|
|
{ TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_ZW
|
|
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XZW
|
|
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_YZW
|
|
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XYZW
|
|
};
|
|
|
|
static struct ureg_src
|
|
swizzle_reg(struct ureg_src src, uint writemask,
|
|
unsigned siv_name)
|
|
{
|
|
switch (siv_name) {
|
|
case D3D10_SB_NAME_PRIMITIVE_ID:
|
|
case D3D10_SB_NAME_INSTANCE_ID:
|
|
case D3D10_SB_NAME_VERTEX_ID:
|
|
case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX:
|
|
case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX:
|
|
case D3D10_SB_NAME_IS_FRONT_FACE:
|
|
return ureg_scalar(src, TGSI_SWIZZLE_X);
|
|
default: {
|
|
const struct swizzle_mapping *swizzle =
|
|
&writemask_to_swizzle[writemask];
|
|
return ureg_swizzle(src, swizzle->x, swizzle->y,
|
|
swizzle->z, swizzle->w);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
dcl_base_output(struct Shader_xlate *sx,
|
|
struct ureg_program *ureg,
|
|
struct ureg_dst reg,
|
|
const struct Shader_dst_operand *operand)
|
|
{
|
|
unsigned writemask =
|
|
operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;
|
|
unsigned idx = operand->base.index[0].imm;
|
|
unsigned i;
|
|
|
|
if (!writemask) {
|
|
sx->outputs[idx].reg[0] = reg;
|
|
sx->outputs[idx].reg[1] = reg;
|
|
sx->outputs[idx].reg[2] = reg;
|
|
sx->outputs[idx].reg[3] = reg;
|
|
return;
|
|
}
|
|
|
|
for (i = 0; i < 4; ++i) {
|
|
unsigned mask = 1 << i;
|
|
if ((writemask & mask)) {
|
|
sx->outputs[idx].reg[i] = reg;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
dcl_base_input(struct Shader_xlate *sx,
|
|
struct ureg_program *ureg,
|
|
const struct Shader_dst_operand *operand,
|
|
struct ureg_src dcl_reg,
|
|
uint index,
|
|
uint siv_name)
|
|
{
|
|
unsigned writemask =
|
|
operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;
|
|
|
|
if (sx->inputs[index].declared && !sx->inputs[index].overloaded) {
|
|
struct ureg_dst temp = ureg_DECL_temporary(sx->ureg);
|
|
|
|
ureg_MOV(ureg,
|
|
ureg_writemask(temp, sx->inputs[index].writemask),
|
|
swizzle_reg(sx->inputs[index].reg, sx->inputs[index].writemask,
|
|
sx->inputs[index].siv_name));
|
|
ureg_MOV(ureg, ureg_writemask(temp, writemask),
|
|
swizzle_reg(dcl_reg, writemask, siv_name));
|
|
sx->inputs[index].reg = ureg_src(temp);
|
|
sx->inputs[index].overloaded = TRUE;
|
|
sx->inputs[index].writemask |= writemask;
|
|
} else if (sx->inputs[index].overloaded) {
|
|
struct ureg_dst temp = ureg_dst(sx->inputs[index].reg);
|
|
ureg_MOV(ureg, ureg_writemask(temp, writemask),
|
|
swizzle_reg(dcl_reg, writemask, siv_name));
|
|
sx->inputs[index].writemask |= writemask;
|
|
} else {
|
|
assert(!sx->inputs[index].declared);
|
|
|
|
sx->inputs[index].reg = dcl_reg;
|
|
sx->inputs[index].declared = TRUE;
|
|
sx->inputs[index].writemask = writemask;
|
|
sx->inputs[index].siv_name = siv_name;
|
|
}
|
|
}
|
|
|
|
static void
|
|
dcl_vs_input(struct Shader_xlate *sx,
|
|
struct ureg_program *ureg,
|
|
const struct Shader_dst_operand *dst)
|
|
{
|
|
struct ureg_src reg;
|
|
assert(dst->base.index_dim == 1);
|
|
assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
|
|
|
|
reg = ureg_DECL_vs_input(ureg, dst->base.index[0].imm);
|
|
|
|
dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
|
|
D3D10_SB_NAME_UNDEFINED);
|
|
}
|
|
|
|
static void
|
|
dcl_gs_input(struct Shader_xlate *sx,
|
|
struct ureg_program *ureg,
|
|
const struct Shader_dst_operand *dst)
|
|
{
|
|
if (dst->base.index_dim == 2) {
|
|
assert(dst->base.index[1].imm < SHADER_MAX_INPUTS);
|
|
|
|
declare_vertices_in(sx, dst->base.index[0].imm);
|
|
|
|
/* XXX: Implement declaration masks in gallium.
|
|
*/
|
|
if (!sx->inputs[dst->base.index[1].imm].reg.File) {
|
|
struct ureg_src reg =
|
|
ureg_DECL_input(ureg,
|
|
TGSI_SEMANTIC_GENERIC,
|
|
dst->base.index[1].imm,
|
|
0, 1);
|
|
dcl_base_input(sx, ureg, dst, reg, dst->base.index[1].imm,
|
|
D3D10_SB_NAME_UNDEFINED);
|
|
}
|
|
} else {
|
|
assert(dst->base.type == D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID);
|
|
assert(dst->base.index_dim == 0);
|
|
|
|
sx->prim_id = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_PRIMID, 0);
|
|
}
|
|
}
|
|
|
|
static void
|
|
dcl_sgv_input(struct Shader_xlate *sx,
|
|
struct ureg_program *ureg,
|
|
const struct Shader_dst_operand *dst,
|
|
uint dcl_siv_name)
|
|
{
|
|
struct ureg_src reg;
|
|
assert(dst->base.index_dim == 1);
|
|
assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
|
|
|
|
reg = ureg_DECL_system_value(ureg, translate_system_name(dcl_siv_name), 0);
|
|
|
|
dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
|
|
dcl_siv_name);
|
|
}
|
|
|
|
static void
|
|
dcl_siv_input(struct Shader_xlate *sx,
|
|
struct ureg_program *ureg,
|
|
const struct Shader_dst_operand *dst,
|
|
uint dcl_siv_name)
|
|
{
|
|
struct ureg_src reg;
|
|
assert(dst->base.index_dim == 2);
|
|
assert(dst->base.index[1].imm < SHADER_MAX_INPUTS);
|
|
|
|
declare_vertices_in(sx, dst->base.index[0].imm);
|
|
|
|
reg = ureg_DECL_input(ureg,
|
|
translate_system_name(dcl_siv_name), 0,
|
|
0, 1);
|
|
|
|
dcl_base_input(sx, ureg, dst, reg, dst->base.index[1].imm,
|
|
dcl_siv_name);
|
|
}
|
|
|
|
static void
|
|
dcl_ps_input(struct Shader_xlate *sx,
|
|
struct ureg_program *ureg,
|
|
const struct Shader_dst_operand *dst,
|
|
uint dcl_in_ps_interp)
|
|
{
|
|
struct ureg_src reg;
|
|
assert(dst->base.index_dim == 1);
|
|
assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
|
|
|
|
reg = ureg_DECL_fs_input(ureg,
|
|
TGSI_SEMANTIC_GENERIC,
|
|
dst->base.index[0].imm,
|
|
translate_interpolation(dcl_in_ps_interp));
|
|
|
|
dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
|
|
D3D10_SB_NAME_UNDEFINED);
|
|
}
|
|
|
|
static void
|
|
dcl_ps_sgv_input(struct Shader_xlate *sx,
|
|
struct ureg_program *ureg,
|
|
const struct Shader_dst_operand *dst,
|
|
uint dcl_siv_name)
|
|
{
|
|
struct ureg_src reg;
|
|
assert(dst->base.index_dim == 1);
|
|
assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
|
|
|
|
if (dcl_siv_name == D3D10_SB_NAME_POSITION) {
|
|
ureg_property(ureg,
|
|
TGSI_PROPERTY_FS_COORD_ORIGIN,
|
|
TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
|
|
ureg_property(ureg,
|
|
TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
|
|
TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
|
|
}
|
|
|
|
reg = ureg_DECL_fs_input(ureg,
|
|
translate_system_name(dcl_siv_name),
|
|
0,
|
|
TGSI_INTERPOLATE_CONSTANT);
|
|
|
|
if (dcl_siv_name == D3D10_SB_NAME_IS_FRONT_FACE) {
|
|
/* We need to map gallium's front_face to the one expected
|
|
* by D3D10 */
|
|
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
|
|
|
|
tmp = ureg_writemask(tmp, TGSI_WRITEMASK_X);
|
|
|
|
ureg_CMP(ureg, tmp, reg,
|
|
ureg_imm1i(ureg, 0), ureg_imm1i(ureg, -1));
|
|
|
|
reg = ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X);
|
|
}
|
|
|
|
dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
|
|
dcl_siv_name);
|
|
}
|
|
|
|
static void
|
|
dcl_ps_siv_input(struct Shader_xlate *sx,
|
|
struct ureg_program *ureg,
|
|
const struct Shader_dst_operand *dst,
|
|
uint dcl_siv_name, uint dcl_in_ps_interp)
|
|
{
|
|
struct ureg_src reg;
|
|
assert(dst->base.index_dim == 1);
|
|
assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
|
|
|
|
reg = ureg_DECL_fs_input(ureg,
|
|
translate_system_name(dcl_siv_name),
|
|
0,
|
|
translate_interpolation(dcl_in_ps_interp));
|
|
|
|
if (dcl_siv_name == D3D10_SB_NAME_POSITION) {
|
|
/* D3D10 expects reciprocal of interpolated 1/w as 4th component,
|
|
* gallium/GL just interpolated 1/w */
|
|
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
|
|
|
|
ureg_MOV(ureg, tmp, reg);
|
|
ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W),
|
|
ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_W));
|
|
reg = ureg_src(tmp);
|
|
}
|
|
|
|
dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
|
|
dcl_siv_name);
|
|
}
|
|
|
|
static struct ureg_src
|
|
translate_relative_operand(struct Shader_xlate *sx,
|
|
const struct Shader_relative_operand *operand)
|
|
{
|
|
struct ureg_src reg;
|
|
|
|
switch (operand->type) {
|
|
case D3D10_SB_OPERAND_TYPE_TEMP:
|
|
assert(operand->index[0].imm < SHADER_MAX_TEMPS);
|
|
|
|
reg = ureg_src(sx->temps[sx->temp_offset + operand->index[0].imm]);
|
|
break;
|
|
|
|
case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
|
|
reg = sx->prim_id;
|
|
break;
|
|
|
|
case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
|
|
assert(operand->index[1].imm < SHADER_MAX_TEMPS);
|
|
|
|
reg = ureg_src(sx->temps[sx->indexable_temp_offsets[operand->index[0].imm] +
|
|
operand->index[1].imm]);
|
|
break;
|
|
|
|
case D3D10_SB_OPERAND_TYPE_INPUT:
|
|
case D3D10_SB_OPERAND_TYPE_OUTPUT:
|
|
case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
|
|
case D3D10_SB_OPERAND_TYPE_IMMEDIATE64:
|
|
case D3D10_SB_OPERAND_TYPE_SAMPLER:
|
|
case D3D10_SB_OPERAND_TYPE_RESOURCE:
|
|
case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:
|
|
case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
|
|
case D3D10_SB_OPERAND_TYPE_LABEL:
|
|
case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH:
|
|
case D3D10_SB_OPERAND_TYPE_NULL:
|
|
case D3D10_SB_OPERAND_TYPE_RASTERIZER:
|
|
case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
|
|
LOG_UNSUPPORTED(TRUE);
|
|
reg = ureg_src(ureg_DECL_temporary(sx->ureg));
|
|
break;
|
|
|
|
default:
|
|
assert(0); /* should never happen */
|
|
reg = ureg_src(ureg_DECL_temporary(sx->ureg));
|
|
}
|
|
|
|
reg = ureg_scalar(reg, operand->comp);
|
|
return reg;
|
|
}
|
|
|
|
static struct ureg_dst
|
|
translate_operand(struct Shader_xlate *sx,
|
|
const struct Shader_operand *operand,
|
|
unsigned writemask)
|
|
{
|
|
struct ureg_dst reg;
|
|
|
|
switch (operand->type) {
|
|
case D3D10_SB_OPERAND_TYPE_TEMP:
|
|
assert(operand->index_dim == 1);
|
|
assert(operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
|
|
assert(operand->index[0].imm < SHADER_MAX_TEMPS);
|
|
|
|
reg = sx->temps[sx->temp_offset + operand->index[0].imm];
|
|
break;
|
|
|
|
case D3D10_SB_OPERAND_TYPE_OUTPUT:
|
|
assert(operand->index_dim == 1);
|
|
assert(operand->index[0].imm < SHADER_MAX_OUTPUTS);
|
|
|
|
if (operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32) {
|
|
if (!writemask) {
|
|
reg = sx->outputs[operand->index[0].imm].reg[0];
|
|
} else {
|
|
unsigned i;
|
|
for (i = 0; i < 4; ++i) {
|
|
unsigned mask = 1 << i;
|
|
if ((writemask & mask)) {
|
|
reg = sx->outputs[operand->index[0].imm].reg[i];
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
struct ureg_src addr =
|
|
translate_relative_operand(sx, &operand->index[0].rel);
|
|
assert(operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE);
|
|
reg = ureg_dst_indirect(sx->outputs[operand->index[0].imm].reg[0], addr);
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH:
|
|
assert(operand->index_dim == 0);
|
|
|
|
reg = sx->output_depth;
|
|
break;
|
|
|
|
case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
|
|
assert(operand->index_dim == 0);
|
|
|
|
reg = ureg_dst(sx->prim_id);
|
|
break;
|
|
|
|
case D3D10_SB_OPERAND_TYPE_INPUT:
|
|
case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
|
|
case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
|
|
case D3D10_SB_OPERAND_TYPE_IMMEDIATE64:
|
|
case D3D10_SB_OPERAND_TYPE_SAMPLER:
|
|
case D3D10_SB_OPERAND_TYPE_RESOURCE:
|
|
case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:
|
|
case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
|
|
case D3D10_SB_OPERAND_TYPE_LABEL:
|
|
case D3D10_SB_OPERAND_TYPE_NULL:
|
|
case D3D10_SB_OPERAND_TYPE_RASTERIZER:
|
|
case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
|
|
/* XXX: Translate more operands types.
|
|
*/
|
|
LOG_UNSUPPORTED(TRUE);
|
|
reg = ureg_DECL_temporary(sx->ureg);
|
|
}
|
|
|
|
return reg;
|
|
}
|
|
|
|
static struct ureg_src
|
|
translate_indexable_temp(struct Shader_xlate *sx,
|
|
const struct Shader_operand *operand)
|
|
{
|
|
struct ureg_src reg;
|
|
switch (operand->index[1].index_rep) {
|
|
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
|
|
reg = ureg_src(
|
|
sx->temps[sx->indexable_temp_offsets[operand->index[0].imm] +
|
|
operand->index[1].imm]);
|
|
break;
|
|
case D3D10_SB_OPERAND_INDEX_RELATIVE:
|
|
reg = ureg_src_indirect(
|
|
ureg_src(sx->temps[
|
|
sx->indexable_temp_offsets[operand->index[0].imm]]),
|
|
translate_relative_operand(sx,
|
|
&operand->index[1].rel));
|
|
break;
|
|
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
|
|
reg = ureg_src_indirect(
|
|
ureg_src(sx->temps[
|
|
operand->index[1].imm +
|
|
sx->indexable_temp_offsets[operand->index[0].imm]]),
|
|
translate_relative_operand(sx,
|
|
&operand->index[1].rel));
|
|
break;
|
|
default:
|
|
/* XXX: Other index representations.
|
|
*/
|
|
LOG_UNSUPPORTED(TRUE);
|
|
reg = ureg_src(ureg_DECL_temporary(sx->ureg));
|
|
}
|
|
return reg;
|
|
}
|
|
|
|
static struct ureg_dst
|
|
translate_dst_operand(struct Shader_xlate *sx,
|
|
const struct Shader_dst_operand *operand,
|
|
boolean saturate)
|
|
{
|
|
struct ureg_dst reg;
|
|
unsigned writemask =
|
|
operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;
|
|
|
|
assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT) == 4);
|
|
assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_X >> 4) == TGSI_WRITEMASK_X);
|
|
assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_Y >> 4) == TGSI_WRITEMASK_Y);
|
|
assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_Z >> 4) == TGSI_WRITEMASK_Z);
|
|
assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_W >> 4) == TGSI_WRITEMASK_W);
|
|
|
|
switch (operand->base.type) {
|
|
case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
|
|
assert(operand->base.index_dim == 2);
|
|
assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
|
|
assert(operand->base.index[0].imm < SHADER_MAX_INDEXABLE_TEMPS);
|
|
|
|
reg = ureg_dst(translate_indexable_temp(sx, &operand->base));
|
|
break;
|
|
|
|
default:
|
|
reg = translate_operand(sx, &operand->base, writemask);
|
|
}
|
|
|
|
/* oDepth often has an empty writemask */
|
|
if (operand->base.type != D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {
|
|
reg = ureg_writemask(reg, writemask);
|
|
}
|
|
|
|
if (saturate) {
|
|
reg = ureg_saturate(reg);
|
|
}
|
|
|
|
return reg;
|
|
}
|
|
|
|
static struct ureg_src
|
|
translate_src_operand(struct Shader_xlate *sx,
|
|
const struct Shader_src_operand *operand,
|
|
const enum dx10_opcode_format format)
|
|
{
|
|
struct ureg_src reg;
|
|
|
|
switch (operand->base.type) {
|
|
case D3D10_SB_OPERAND_TYPE_INPUT:
|
|
if (operand->base.index_dim == 1) {
|
|
switch (operand->base.index[0].index_rep) {
|
|
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
|
|
assert(operand->base.index[0].imm < SHADER_MAX_INPUTS);
|
|
reg = sx->inputs[operand->base.index[0].imm].reg;
|
|
break;
|
|
case D3D10_SB_OPERAND_INDEX_RELATIVE: {
|
|
struct ureg_src tmp =
|
|
translate_relative_operand(sx, &operand->base.index[0].rel);
|
|
reg = ureg_src_indirect(sx->inputs[0].reg, tmp);
|
|
}
|
|
break;
|
|
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {
|
|
struct ureg_src tmp =
|
|
translate_relative_operand(sx, &operand->base.index[0].rel);
|
|
reg = ureg_src_indirect(sx->inputs[operand->base.index[0].imm].reg, tmp);
|
|
}
|
|
break;
|
|
default:
|
|
/* XXX: Other index representations.
|
|
*/
|
|
LOG_UNSUPPORTED(TRUE);
|
|
|
|
}
|
|
} else {
|
|
assert(operand->base.index_dim == 2);
|
|
assert(operand->base.index[1].imm < SHADER_MAX_INPUTS);
|
|
|
|
switch (operand->base.index[1].index_rep) {
|
|
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
|
|
reg = sx->inputs[operand->base.index[1].imm].reg;
|
|
break;
|
|
case D3D10_SB_OPERAND_INDEX_RELATIVE: {
|
|
struct ureg_src tmp =
|
|
translate_relative_operand(sx, &operand->base.index[1].rel);
|
|
reg = ureg_src_indirect(sx->inputs[0].reg, tmp);
|
|
}
|
|
break;
|
|
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {
|
|
struct ureg_src tmp =
|
|
translate_relative_operand(sx, &operand->base.index[1].rel);
|
|
reg = ureg_src_indirect(sx->inputs[operand->base.index[1].imm].reg, tmp);
|
|
}
|
|
break;
|
|
default:
|
|
/* XXX: Other index representations.
|
|
*/
|
|
LOG_UNSUPPORTED(TRUE);
|
|
}
|
|
|
|
switch (operand->base.index[0].index_rep) {
|
|
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
|
|
reg = ureg_src_dimension(reg, operand->base.index[0].imm);
|
|
break;
|
|
case D3D10_SB_OPERAND_INDEX_RELATIVE:{
|
|
struct ureg_src tmp =
|
|
translate_relative_operand(sx, &operand->base.index[0].rel);
|
|
reg = ureg_src_dimension_indirect(reg, tmp, 0);
|
|
}
|
|
break;
|
|
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {
|
|
struct ureg_src tmp =
|
|
translate_relative_operand(sx, &operand->base.index[0].rel);
|
|
reg = ureg_src_dimension_indirect(reg, tmp, operand->base.index[0].imm);
|
|
}
|
|
break;
|
|
default:
|
|
/* XXX: Other index representations.
|
|
*/
|
|
LOG_UNSUPPORTED(TRUE);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
|
|
assert(operand->base.index_dim == 2);
|
|
assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
|
|
assert(operand->base.index[0].imm < SHADER_MAX_INDEXABLE_TEMPS);
|
|
|
|
reg = translate_indexable_temp(sx, &operand->base);
|
|
break;
|
|
|
|
case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
|
|
switch (format) {
|
|
case OF_FLOAT:
|
|
reg = ureg_imm4f(sx->ureg,
|
|
operand->imm[0].f32,
|
|
operand->imm[1].f32,
|
|
operand->imm[2].f32,
|
|
operand->imm[3].f32);
|
|
break;
|
|
case OF_INT:
|
|
reg = ureg_imm4i(sx->ureg,
|
|
operand->imm[0].i32,
|
|
operand->imm[1].i32,
|
|
operand->imm[2].i32,
|
|
operand->imm[3].i32);
|
|
break;
|
|
case OF_UINT:
|
|
reg = ureg_imm4u(sx->ureg,
|
|
operand->imm[0].u32,
|
|
operand->imm[1].u32,
|
|
operand->imm[2].u32,
|
|
operand->imm[3].u32);
|
|
break;
|
|
default:
|
|
assert(0);
|
|
reg = ureg_src(ureg_DECL_temporary(sx->ureg));
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPERAND_TYPE_SAMPLER:
|
|
assert(operand->base.index_dim == 1);
|
|
assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
|
|
assert(operand->base.index[0].imm < SHADER_MAX_SAMPLERS);
|
|
|
|
reg = sx->samplers[operand->base.index[0].imm];
|
|
break;
|
|
|
|
case D3D10_SB_OPERAND_TYPE_RESOURCE:
|
|
assert(operand->base.index_dim == 1);
|
|
assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
|
|
assert(operand->base.index[0].imm < SHADER_MAX_RESOURCES);
|
|
|
|
reg = sx->sv[operand->base.index[0].imm];
|
|
break;
|
|
|
|
case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:
|
|
assert(operand->base.index_dim == 2);
|
|
|
|
assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
|
|
assert(operand->base.index[0].imm < PIPE_MAX_CONSTANT_BUFFERS);
|
|
|
|
switch (operand->base.index[1].index_rep) {
|
|
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
|
|
assert(operand->base.index[1].imm < SHADER_MAX_CONSTS);
|
|
|
|
reg = ureg_src_register(TGSI_FILE_CONSTANT, operand->base.index[1].imm);
|
|
reg = ureg_src_dimension(reg, operand->base.index[0].imm);
|
|
break;
|
|
case D3D10_SB_OPERAND_INDEX_RELATIVE:
|
|
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
|
|
reg = ureg_src_register(TGSI_FILE_CONSTANT, operand->base.index[1].imm);
|
|
reg = ureg_src_indirect(
|
|
reg,
|
|
translate_relative_operand(sx, &operand->base.index[1].rel));
|
|
reg = ureg_src_dimension(reg, operand->base.index[0].imm);
|
|
break;
|
|
default:
|
|
/* XXX: Other index representations.
|
|
*/
|
|
LOG_UNSUPPORTED(TRUE);
|
|
}
|
|
|
|
break;
|
|
|
|
case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
|
|
assert(operand->base.index_dim == 1);
|
|
|
|
switch (operand->base.index[0].index_rep) {
|
|
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
|
|
reg = sx->imms;
|
|
reg.Index += operand->base.index[0].imm;
|
|
break;
|
|
case D3D10_SB_OPERAND_INDEX_RELATIVE:
|
|
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
|
|
reg = sx->imms;
|
|
reg.Index += operand->base.index[0].imm;
|
|
reg = ureg_src_indirect(
|
|
sx->imms,
|
|
translate_relative_operand(sx, &operand->base.index[0].rel));
|
|
break;
|
|
default:
|
|
/* XXX: Other index representations.
|
|
*/
|
|
LOG_UNSUPPORTED(TRUE);
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
|
|
reg = sx->prim_id;
|
|
break;
|
|
|
|
default:
|
|
reg = ureg_src(translate_operand(sx, &operand->base, 0));
|
|
}
|
|
|
|
reg = ureg_swizzle(reg,
|
|
operand->swizzle[0],
|
|
operand->swizzle[1],
|
|
operand->swizzle[2],
|
|
operand->swizzle[3]);
|
|
|
|
switch (operand->modifier) {
|
|
case D3D10_SB_OPERAND_MODIFIER_NONE:
|
|
break;
|
|
case D3D10_SB_OPERAND_MODIFIER_NEG:
|
|
reg = ureg_negate(reg);
|
|
break;
|
|
case D3D10_SB_OPERAND_MODIFIER_ABS:
|
|
reg = ureg_abs(reg);
|
|
break;
|
|
case D3D10_SB_OPERAND_MODIFIER_ABSNEG:
|
|
reg = ureg_negate(ureg_abs(reg));
|
|
break;
|
|
default:
|
|
assert(0);
|
|
}
|
|
|
|
return reg;
|
|
}
|
|
|
|
static uint
|
|
translate_resource_dimension(D3D10_SB_RESOURCE_DIMENSION dim)
|
|
{
|
|
switch (dim) {
|
|
case D3D10_SB_RESOURCE_DIMENSION_UNKNOWN:
|
|
return TGSI_TEXTURE_UNKNOWN;
|
|
case D3D10_SB_RESOURCE_DIMENSION_BUFFER:
|
|
return TGSI_TEXTURE_BUFFER;
|
|
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1D:
|
|
return TGSI_TEXTURE_1D;
|
|
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2D:
|
|
return TGSI_TEXTURE_2D;
|
|
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMS:
|
|
return TGSI_TEXTURE_2D_MSAA;
|
|
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D:
|
|
return TGSI_TEXTURE_3D;
|
|
case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE:
|
|
return TGSI_TEXTURE_CUBE;
|
|
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1DARRAY:
|
|
return TGSI_TEXTURE_1D_ARRAY;
|
|
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY:
|
|
return TGSI_TEXTURE_2D_ARRAY;
|
|
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMSARRAY:
|
|
return TGSI_TEXTURE_2D_ARRAY_MSAA;
|
|
case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBEARRAY:
|
|
return TGSI_TEXTURE_CUBE_ARRAY;
|
|
default:
|
|
assert(0);
|
|
return TGSI_TEXTURE_UNKNOWN;
|
|
}
|
|
}
|
|
|
|
static uint
|
|
texture_dim_from_tgsi_target(unsigned tgsi_target)
|
|
{
|
|
switch (tgsi_target) {
|
|
case TGSI_TEXTURE_BUFFER:
|
|
case TGSI_TEXTURE_1D:
|
|
case TGSI_TEXTURE_1D_ARRAY:
|
|
return 1;
|
|
case TGSI_TEXTURE_2D:
|
|
case TGSI_TEXTURE_2D_MSAA:
|
|
case TGSI_TEXTURE_CUBE:
|
|
case TGSI_TEXTURE_2D_ARRAY:
|
|
case TGSI_TEXTURE_2D_ARRAY_MSAA:
|
|
return 2;
|
|
case TGSI_TEXTURE_3D:
|
|
return 3;
|
|
case TGSI_TEXTURE_UNKNOWN:
|
|
default:
|
|
assert(0);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
static boolean
|
|
operand_is_scalar(const struct Shader_src_operand *operand)
|
|
{
|
|
return operand->swizzle[0] == operand->swizzle[1] &&
|
|
operand->swizzle[1] == operand->swizzle[2] &&
|
|
operand->swizzle[2] == operand->swizzle[3];
|
|
}
|
|
|
|
static void
|
|
Shader_add_call(struct Shader_xlate *sx,
|
|
unsigned d3d_label,
|
|
unsigned tgsi_label_token)
|
|
{
|
|
ASSERT(sx->num_calls < sx->max_calls);
|
|
|
|
sx->calls[sx->num_calls].d3d_label = d3d_label;
|
|
sx->calls[sx->num_calls].tgsi_label_token = tgsi_label_token;
|
|
sx->num_calls++;
|
|
}
|
|
|
|
static void
|
|
Shader_add_label(struct Shader_xlate *sx,
|
|
unsigned d3d_label,
|
|
unsigned tgsi_insn_no)
|
|
{
|
|
ASSERT(sx->num_labels < sx->max_labels);
|
|
|
|
sx->labels[sx->num_labels].d3d_label = d3d_label;
|
|
sx->labels[sx->num_labels].tgsi_insn_no = tgsi_insn_no;
|
|
sx->num_labels++;
|
|
}
|
|
|
|
|
|
static void
|
|
sample_ureg_emit(struct ureg_program *ureg,
|
|
unsigned tgsi_opcode,
|
|
unsigned num_src,
|
|
struct Shader_opcode *opcode,
|
|
struct ureg_dst dst,
|
|
struct ureg_src *src)
|
|
{
|
|
unsigned num_offsets = 0;
|
|
struct tgsi_texture_offset texoffsets;
|
|
|
|
memset(&texoffsets, 0, sizeof texoffsets);
|
|
|
|
if (opcode->imm_texel_offset.u ||
|
|
opcode->imm_texel_offset.v ||
|
|
opcode->imm_texel_offset.w) {
|
|
struct ureg_src offsetreg;
|
|
num_offsets = 1;
|
|
/* don't actually always need all 3 values */
|
|
offsetreg = ureg_imm3i(ureg,
|
|
opcode->imm_texel_offset.u,
|
|
opcode->imm_texel_offset.v,
|
|
opcode->imm_texel_offset.w);
|
|
texoffsets.File = offsetreg.File;
|
|
texoffsets.Index = offsetreg.Index;
|
|
texoffsets.SwizzleX = offsetreg.SwizzleX;
|
|
texoffsets.SwizzleY = offsetreg.SwizzleY;
|
|
texoffsets.SwizzleZ = offsetreg.SwizzleZ;
|
|
}
|
|
|
|
ureg_tex_insn(ureg,
|
|
tgsi_opcode,
|
|
&dst, 1,
|
|
TGSI_TEXTURE_UNKNOWN,
|
|
TGSI_RETURN_TYPE_UNKNOWN,
|
|
&texoffsets, num_offsets,
|
|
src, num_src);
|
|
}
|
|
|
|
typedef void (*unary_ureg_func)(struct ureg_program *ureg, struct ureg_dst dst,
|
|
struct ureg_src src);
|
|
static void
|
|
expand_unary_to_scalarf(struct ureg_program *ureg, unary_ureg_func func,
|
|
struct Shader_xlate *sx, struct Shader_opcode *opcode)
|
|
{
|
|
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
|
|
struct ureg_dst dst = translate_dst_operand(sx, &opcode->dst[0],
|
|
opcode->saturate);
|
|
struct ureg_src src = translate_src_operand(sx, &opcode->src[0], OF_FLOAT);
|
|
struct ureg_dst scalar_dst;
|
|
ureg_MOV(ureg, tmp, src);
|
|
src = ureg_src(tmp);
|
|
|
|
scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_X);
|
|
if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
|
|
func(ureg, scalar_dst,
|
|
ureg_scalar(src, TGSI_SWIZZLE_X));
|
|
}
|
|
scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_Y);
|
|
if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
|
|
func(ureg, scalar_dst,
|
|
ureg_scalar(src, TGSI_SWIZZLE_Y));
|
|
}
|
|
scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_Z);
|
|
if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
|
|
func(ureg, scalar_dst,
|
|
ureg_scalar(src, TGSI_SWIZZLE_Z));
|
|
}
|
|
scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_W);
|
|
if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
|
|
func(ureg, scalar_dst,
|
|
ureg_scalar(src, TGSI_SWIZZLE_W));
|
|
}
|
|
ureg_release_temporary(ureg, tmp);
|
|
}
|
|
|
|
const struct tgsi_token *
|
|
Shader_tgsi_translate(const unsigned *code,
|
|
unsigned *output_mapping)
|
|
{
|
|
struct Shader_xlate sx;
|
|
struct Shader_parser parser;
|
|
struct ureg_program *ureg = NULL;
|
|
struct Shader_opcode opcode;
|
|
const struct tgsi_token *tokens = NULL;
|
|
uint nr_tokens;
|
|
boolean shader_dumped = FALSE;
|
|
boolean inside_sub = FALSE;
|
|
uint i, j;
|
|
|
|
memset(&sx, 0, sizeof sx);
|
|
|
|
Shader_parse_init(&parser, code);
|
|
|
|
if (st_debug & ST_DEBUG_TGSI) {
|
|
dx10_shader_dump_tokens(code);
|
|
shader_dumped = TRUE;
|
|
}
|
|
|
|
sx.max_calls = 64;
|
|
sx.calls = (struct Shader_call *)MALLOC(sx.max_calls *
|
|
sizeof(struct Shader_call));
|
|
sx.num_calls = 0;
|
|
|
|
sx.max_labels = 64;
|
|
sx.labels = (struct Shader_label *)MALLOC(sx.max_labels *
|
|
sizeof(struct Shader_call));
|
|
sx.num_labels = 0;
|
|
|
|
|
|
|
|
/* Header. */
|
|
switch (parser.header.type) {
|
|
case D3D10_SB_PIXEL_SHADER:
|
|
ureg = ureg_create(PIPE_SHADER_FRAGMENT);
|
|
break;
|
|
case D3D10_SB_VERTEX_SHADER:
|
|
ureg = ureg_create(PIPE_SHADER_VERTEX);
|
|
break;
|
|
case D3D10_SB_GEOMETRY_SHADER:
|
|
ureg = ureg_create(PIPE_SHADER_GEOMETRY);
|
|
break;
|
|
}
|
|
|
|
assert(ureg);
|
|
sx.ureg = ureg;
|
|
|
|
while (Shader_parse_opcode(&parser, &opcode)) {
|
|
const struct dx10_opcode_xlate *ox;
|
|
|
|
assert(opcode.type < D3D10_SB_NUM_OPCODES);
|
|
ox = &opcode_xlate[opcode.type];
|
|
|
|
switch (opcode.type) {
|
|
case D3D10_SB_OPCODE_EXP:
|
|
expand_unary_to_scalarf(ureg, ureg_EX2, &sx, &opcode);
|
|
break;
|
|
case D3D10_SB_OPCODE_SQRT:
|
|
expand_unary_to_scalarf(ureg, ureg_SQRT, &sx, &opcode);
|
|
break;
|
|
case D3D10_SB_OPCODE_RSQ:
|
|
expand_unary_to_scalarf(ureg, ureg_RSQ, &sx, &opcode);
|
|
break;
|
|
case D3D10_SB_OPCODE_LOG:
|
|
expand_unary_to_scalarf(ureg, ureg_LG2, &sx, &opcode);
|
|
break;
|
|
case D3D10_SB_OPCODE_IMUL:
|
|
if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
|
|
ureg_IMUL_HI(ureg,
|
|
translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
|
|
translate_src_operand(&sx, &opcode.src[0], OF_INT),
|
|
translate_src_operand(&sx, &opcode.src[1], OF_INT));
|
|
}
|
|
|
|
if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
|
|
ureg_UMUL(ureg,
|
|
translate_dst_operand(&sx, &opcode.dst[1], opcode.saturate),
|
|
translate_src_operand(&sx, &opcode.src[0], OF_INT),
|
|
translate_src_operand(&sx, &opcode.src[1], OF_INT));
|
|
}
|
|
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_FTOI: {
|
|
/* XXX: tgsi (and just about everybody else, c, opencl, glsl) has
|
|
* out-of-range (and NaN) values undefined for f2i/f2u, but d3d10
|
|
* requires clamping to min and max representable value (as well as 0
|
|
* for NaNs) (this applies to both ftoi and ftou). At least the online
|
|
* docs state that - this is consistent with generic d3d10 conversion
|
|
* rules.
|
|
* For FTOI, we cheat a bit here - in particular depending on noone
|
|
* caring about NaNs, and depending on the (undefined!) behavior of
|
|
* F2I returning 0x80000000 for too negative values (which works with
|
|
* x86 sse). Hence only need to clamp too positive values.
|
|
* Note that it is impossible to clamp using a float, since 2^31 - 1
|
|
* is not exactly representable with a float.
|
|
*/
|
|
struct ureg_dst too_large = ureg_DECL_temporary(ureg);
|
|
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
|
|
ureg_FSGE(ureg, too_large,
|
|
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
|
|
ureg_imm1f(ureg, 2147483648.0f));
|
|
ureg_F2I(ureg, tmp,
|
|
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
|
|
ureg_UCMP(ureg,
|
|
translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
|
|
ureg_src(too_large),
|
|
ureg_imm1i(ureg, 0x7fffffff),
|
|
ureg_src(tmp));
|
|
ureg_release_temporary(ureg, too_large);
|
|
ureg_release_temporary(ureg, tmp);
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_FTOU: {
|
|
/* For ftou, we need to do both clamps, which as a bonus also
|
|
* gets us correct NaN behavior.
|
|
* Note that it is impossible to clamp using a float against the upper
|
|
* limit, since 2^32 - 1 is not exactly representable with a float,
|
|
* but the clamp against 0.0 certainly works just fine.
|
|
*/
|
|
struct ureg_dst too_large = ureg_DECL_temporary(ureg);
|
|
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
|
|
ureg_FSGE(ureg, too_large,
|
|
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
|
|
ureg_imm1f(ureg, 4294967296.0f));
|
|
/* clamp negative values + NaN to zero.
|
|
* (Could be done slightly more efficient in llvmpipe due to
|
|
* MAX NaN behavior handling.)
|
|
*/
|
|
ureg_MAX(ureg, tmp,
|
|
ureg_imm1f(ureg, 0.0f),
|
|
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
|
|
ureg_F2U(ureg, tmp,
|
|
ureg_src(tmp));
|
|
ureg_UCMP(ureg,
|
|
translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
|
|
ureg_src(too_large),
|
|
ureg_imm1u(ureg, 0xffffffff),
|
|
ureg_src(tmp));
|
|
ureg_release_temporary(ureg, too_large);
|
|
ureg_release_temporary(ureg, tmp);
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_LD_MS:
|
|
/* XXX: We don't support multi-sampling yet, but we need to parse
|
|
* this opcode regardless, so we just ignore sample index operand
|
|
* for now */
|
|
case D3D10_SB_OPCODE_LD:
|
|
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
|
|
unsigned resource = opcode.src[1].base.index[0].imm;
|
|
assert(opcode.src[1].base.index_dim == 1);
|
|
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
|
|
|
|
if (ureg_src_is_undef(sx.samplers[resource])) {
|
|
sx.samplers[resource] =
|
|
ureg_DECL_sampler(ureg, resource);
|
|
}
|
|
|
|
ureg_TXF(ureg,
|
|
translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
|
|
sx.resources[resource].target,
|
|
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
|
|
sx.samplers[resource]);
|
|
}
|
|
else {
|
|
struct ureg_src srcreg[2];
|
|
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_INT);
|
|
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_INT);
|
|
|
|
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_I, 2, &opcode,
|
|
translate_dst_operand(&sx, &opcode.dst[0],
|
|
opcode.saturate),
|
|
srcreg);
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_CUSTOMDATA:
|
|
if (opcode.customdata._class ==
|
|
D3D10_SB_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER) {
|
|
sx.imms =
|
|
ureg_DECL_immediate_block_uint(ureg,
|
|
opcode.customdata.u.constbuf.data,
|
|
opcode.customdata.u.constbuf.count);
|
|
} else {
|
|
assert(0);
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_RESINFO:
|
|
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
|
|
unsigned resource = opcode.src[1].base.index[0].imm;
|
|
assert(opcode.src[1].base.index_dim == 1);
|
|
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
|
|
|
|
if (ureg_src_is_undef(sx.samplers[resource])) {
|
|
sx.samplers[resource] =
|
|
ureg_DECL_sampler(ureg, resource);
|
|
}
|
|
/* don't bother with swizzle, ret type etc. */
|
|
ureg_TXQ(ureg,
|
|
translate_dst_operand(&sx, &opcode.dst[0],
|
|
opcode.saturate),
|
|
sx.resources[resource].target,
|
|
translate_src_operand(&sx, &opcode.src[0], OF_UINT),
|
|
sx.samplers[resource]);
|
|
}
|
|
else {
|
|
struct ureg_dst r0 = ureg_DECL_temporary(ureg);
|
|
struct ureg_src tsrc = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
|
|
struct ureg_dst dstreg = translate_dst_operand(&sx, &opcode.dst[0],
|
|
opcode.saturate);
|
|
|
|
/* while specs say swizzle is ignored better safe than sorry */
|
|
tsrc.SwizzleX = TGSI_SWIZZLE_X;
|
|
tsrc.SwizzleY = TGSI_SWIZZLE_Y;
|
|
tsrc.SwizzleZ = TGSI_SWIZZLE_Z;
|
|
tsrc.SwizzleW = TGSI_SWIZZLE_W;
|
|
|
|
ureg_SVIEWINFO(ureg, r0,
|
|
translate_src_operand(&sx, &opcode.src[0], OF_UINT),
|
|
tsrc);
|
|
|
|
tsrc = ureg_src(r0);
|
|
tsrc.SwizzleX = opcode.src[1].swizzle[0];
|
|
tsrc.SwizzleY = opcode.src[1].swizzle[1];
|
|
tsrc.SwizzleZ = opcode.src[1].swizzle[2];
|
|
tsrc.SwizzleW = opcode.src[1].swizzle[3];
|
|
|
|
if (opcode.specific.resinfo_ret_type ==
|
|
D3D10_SB_RESINFO_INSTRUCTION_RETURN_UINT) {
|
|
ureg_MOV(ureg, dstreg, tsrc);
|
|
}
|
|
else if (opcode.specific.resinfo_ret_type ==
|
|
D3D10_SB_RESINFO_INSTRUCTION_RETURN_FLOAT) {
|
|
ureg_I2F(ureg, dstreg, tsrc);
|
|
}
|
|
else { /* D3D10_SB_RESINFO_INSTRUCTION_RETURN_RCPFLOAT */
|
|
unsigned i;
|
|
/*
|
|
* Must apply rcp only to parts determined by dims,
|
|
* (width/height/depth) but NOT to array size nor mip levels
|
|
* hence need to figure that out here.
|
|
* This is one sick modifier if you ask me!
|
|
*/
|
|
unsigned res_index = opcode.src[1].base.index[0].imm;
|
|
unsigned target = sx.resources[res_index].target;
|
|
unsigned dims = texture_dim_from_tgsi_target(target);
|
|
|
|
ureg_I2F(ureg, r0, ureg_src(r0));
|
|
tsrc = ureg_src(r0);
|
|
for (i = 0; i < 4; i++) {
|
|
unsigned dst_swizzle = opcode.src[1].swizzle[i];
|
|
struct ureg_dst dstregmasked = ureg_writemask(dstreg, 1 << i);
|
|
/*
|
|
* could do one mov with multiple write mask bits set
|
|
* but rcp is scalar anyway.
|
|
*/
|
|
if (dst_swizzle < dims) {
|
|
ureg_RCP(ureg, dstregmasked, ureg_scalar(tsrc, dst_swizzle));
|
|
}
|
|
else {
|
|
ureg_MOV(ureg, dstregmasked, ureg_scalar(tsrc, dst_swizzle));
|
|
}
|
|
}
|
|
}
|
|
ureg_release_temporary(ureg, r0);
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_SAMPLE:
|
|
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
|
|
assert(opcode.src[1].base.index_dim == 1);
|
|
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
|
|
|
|
LOG_UNSUPPORTED(opcode.src[1].base.index[0].imm != opcode.src[2].base.index[0].imm);
|
|
|
|
ureg_TEX(ureg,
|
|
translate_dst_operand(&sx, &opcode.dst[0],
|
|
opcode.saturate),
|
|
sx.resources[opcode.src[1].base.index[0].imm].target,
|
|
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
|
|
translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
|
|
}
|
|
else {
|
|
struct ureg_src srcreg[3];
|
|
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
|
|
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
|
|
srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
|
|
|
|
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE, 3, &opcode,
|
|
translate_dst_operand(&sx, &opcode.dst[0],
|
|
opcode.saturate),
|
|
srcreg);
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_SAMPLE_C:
|
|
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
|
|
struct ureg_dst r0 = ureg_DECL_temporary(ureg);
|
|
|
|
/* XXX: Support only 2D texture targets for now.
|
|
* Need to figure out how to pack the compare value
|
|
* for other dimensions and if there is enough space
|
|
* in a single operand for all possible cases.
|
|
*/
|
|
LOG_UNSUPPORTED(sx.resources[opcode.src[1].base.index[0].imm].target !=
|
|
TGSI_TEXTURE_2D);
|
|
|
|
assert(opcode.src[1].base.index_dim == 1);
|
|
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
|
|
|
|
/* Insert the compare value into .z component.
|
|
*/
|
|
ureg_MOV(ureg,
|
|
ureg_writemask(r0, TGSI_WRITEMASK_XYW),
|
|
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
|
|
ureg_MOV(ureg,
|
|
ureg_writemask(r0, TGSI_WRITEMASK_Z),
|
|
translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
|
|
|
|
/* XXX: Pass explicit Lod=0 in D3D10_SB_OPCODE_SAMPLE_C_LZ case.
|
|
*/
|
|
|
|
ureg_TEX(ureg,
|
|
translate_dst_operand(&sx, &opcode.dst[0],
|
|
opcode.saturate),
|
|
sx.resources[opcode.src[1].base.index[0].imm].target,
|
|
ureg_src(r0),
|
|
translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
|
|
|
|
ureg_release_temporary(ureg, r0);
|
|
}
|
|
else {
|
|
struct ureg_src srcreg[4];
|
|
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
|
|
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
|
|
srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
|
|
srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
|
|
|
|
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_C, 4, &opcode,
|
|
translate_dst_operand(&sx, &opcode.dst[0],
|
|
opcode.saturate),
|
|
srcreg);
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_SAMPLE_C_LZ:
|
|
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
|
|
struct ureg_dst r0 = ureg_DECL_temporary(ureg);
|
|
|
|
assert(opcode.src[1].base.index_dim == 1);
|
|
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
|
|
|
|
/* XXX: Support only 2D texture targets for now.
|
|
* Need to figure out how to pack the compare value
|
|
* for other dimensions and if there is enough space
|
|
* in a single operand for all possible cases.
|
|
*/
|
|
LOG_UNSUPPORTED(sx.resources[opcode.src[1].base.index[0].imm].target !=
|
|
TGSI_TEXTURE_2D);
|
|
|
|
/* Insert the compare value into .z component.
|
|
* Insert 0 into .w component.
|
|
*/
|
|
ureg_MOV(ureg,
|
|
ureg_writemask(r0, TGSI_WRITEMASK_XY),
|
|
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
|
|
ureg_MOV(ureg,
|
|
ureg_writemask(r0, TGSI_WRITEMASK_Z),
|
|
translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
|
|
ureg_MOV(ureg,
|
|
ureg_writemask(r0, TGSI_WRITEMASK_W),
|
|
ureg_imm1f(ureg, 0.0f));
|
|
|
|
ureg_TXL(ureg,
|
|
translate_dst_operand(&sx, &opcode.dst[0],
|
|
opcode.saturate),
|
|
sx.resources[opcode.src[1].base.index[0].imm].target,
|
|
ureg_src(r0),
|
|
translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
|
|
|
|
ureg_release_temporary(ureg, r0);
|
|
}
|
|
else {
|
|
struct ureg_src srcreg[4];
|
|
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
|
|
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
|
|
srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
|
|
srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
|
|
|
|
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_C_LZ, 4, &opcode,
|
|
translate_dst_operand(&sx, &opcode.dst[0],
|
|
opcode.saturate),
|
|
srcreg);
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_SAMPLE_L:
|
|
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
|
|
struct ureg_dst r0 = ureg_DECL_temporary(ureg);
|
|
|
|
assert(opcode.src[1].base.index_dim == 1);
|
|
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
|
|
|
|
/* Insert LOD into .w component.
|
|
*/
|
|
ureg_MOV(ureg,
|
|
ureg_writemask(r0, TGSI_WRITEMASK_XYZ),
|
|
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
|
|
ureg_MOV(ureg,
|
|
ureg_writemask(r0, TGSI_WRITEMASK_W),
|
|
translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
|
|
|
|
ureg_TXL(ureg,
|
|
translate_dst_operand(&sx, &opcode.dst[0],
|
|
opcode.saturate),
|
|
sx.resources[opcode.src[1].base.index[0].imm].target,
|
|
ureg_src(r0),
|
|
translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
|
|
|
|
ureg_release_temporary(ureg, r0);
|
|
}
|
|
else {
|
|
struct ureg_src srcreg[4];
|
|
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
|
|
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
|
|
srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
|
|
srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
|
|
|
|
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_L, 4, &opcode,
|
|
translate_dst_operand(&sx, &opcode.dst[0],
|
|
opcode.saturate),
|
|
srcreg);
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_SAMPLE_D:
|
|
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
|
|
assert(opcode.src[1].base.index_dim == 1);
|
|
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
|
|
|
|
ureg_TXD(ureg,
|
|
translate_dst_operand(&sx, &opcode.dst[0],
|
|
opcode.saturate),
|
|
sx.resources[opcode.src[1].base.index[0].imm].target,
|
|
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
|
|
translate_src_operand(&sx, &opcode.src[3], OF_FLOAT),
|
|
translate_src_operand(&sx, &opcode.src[4], OF_FLOAT),
|
|
translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
|
|
}
|
|
else {
|
|
struct ureg_src srcreg[5];
|
|
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
|
|
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
|
|
srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
|
|
srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
|
|
srcreg[4] = translate_src_operand(&sx, &opcode.src[4], OF_FLOAT);
|
|
|
|
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_D, 5, &opcode,
|
|
translate_dst_operand(&sx, &opcode.dst[0],
|
|
opcode.saturate),
|
|
srcreg);
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_SAMPLE_B:
|
|
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
|
|
struct ureg_dst r0 = ureg_DECL_temporary(ureg);
|
|
|
|
assert(opcode.src[1].base.index_dim == 1);
|
|
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
|
|
|
|
/* Insert LOD bias into .w component.
|
|
*/
|
|
ureg_MOV(ureg,
|
|
ureg_writemask(r0, TGSI_WRITEMASK_XYZ),
|
|
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
|
|
ureg_MOV(ureg,
|
|
ureg_writemask(r0, TGSI_WRITEMASK_W),
|
|
translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
|
|
|
|
ureg_TXB(ureg,
|
|
translate_dst_operand(&sx, &opcode.dst[0],
|
|
opcode.saturate),
|
|
sx.resources[opcode.src[1].base.index[0].imm].target,
|
|
ureg_src(r0),
|
|
translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
|
|
|
|
ureg_release_temporary(ureg, r0);
|
|
}
|
|
else {
|
|
struct ureg_src srcreg[4];
|
|
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
|
|
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
|
|
srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
|
|
srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
|
|
|
|
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_B, 4, &opcode,
|
|
translate_dst_operand(&sx, &opcode.dst[0],
|
|
opcode.saturate),
|
|
srcreg);
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_SINCOS: {
|
|
struct ureg_dst src0 = ureg_DECL_temporary(ureg);
|
|
ureg_MOV(ureg, src0, translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
|
|
if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
|
|
struct ureg_dst dst = translate_dst_operand(&sx, &opcode.dst[0],
|
|
opcode.saturate);
|
|
struct ureg_src src = ureg_src(src0);
|
|
ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
|
|
ureg_scalar(src, TGSI_SWIZZLE_X));
|
|
ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
|
|
ureg_scalar(src, TGSI_SWIZZLE_Y));
|
|
ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
|
|
ureg_scalar(src, TGSI_SWIZZLE_Z));
|
|
ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
|
|
ureg_scalar(src, TGSI_SWIZZLE_W));
|
|
}
|
|
if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
|
|
struct ureg_dst dst = translate_dst_operand(&sx, &opcode.dst[1],
|
|
opcode.saturate);
|
|
struct ureg_src src = ureg_src(src0);
|
|
ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
|
|
ureg_scalar(src, TGSI_SWIZZLE_X));
|
|
ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
|
|
ureg_scalar(src, TGSI_SWIZZLE_Y));
|
|
ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
|
|
ureg_scalar(src, TGSI_SWIZZLE_Z));
|
|
ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
|
|
ureg_scalar(src, TGSI_SWIZZLE_W));
|
|
}
|
|
ureg_release_temporary(ureg, src0);
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_UDIV: {
|
|
struct ureg_dst src0 = ureg_DECL_temporary(ureg);
|
|
struct ureg_dst src1 = ureg_DECL_temporary(ureg);
|
|
ureg_MOV(ureg, src0, translate_src_operand(&sx, &opcode.src[0], OF_UINT));
|
|
ureg_MOV(ureg, src1, translate_src_operand(&sx, &opcode.src[1], OF_UINT));
|
|
if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
|
|
ureg_UDIV(ureg,
|
|
translate_dst_operand(&sx, &opcode.dst[0],
|
|
opcode.saturate),
|
|
ureg_src(src0), ureg_src(src1));
|
|
}
|
|
if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
|
|
ureg_UMOD(ureg,
|
|
translate_dst_operand(&sx, &opcode.dst[1],
|
|
opcode.saturate),
|
|
ureg_src(src0), ureg_src(src1));
|
|
}
|
|
ureg_release_temporary(ureg, src0);
|
|
ureg_release_temporary(ureg, src1);
|
|
}
|
|
break;
|
|
case D3D10_SB_OPCODE_UMUL: {
|
|
if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
|
|
ureg_UMUL_HI(ureg,
|
|
translate_dst_operand(&sx, &opcode.dst[0],
|
|
opcode.saturate),
|
|
translate_src_operand(&sx, &opcode.src[0], OF_UINT),
|
|
translate_src_operand(&sx, &opcode.src[1], OF_UINT));
|
|
}
|
|
if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
|
|
ureg_UMUL(ureg,
|
|
translate_dst_operand(&sx, &opcode.dst[1],
|
|
opcode.saturate),
|
|
translate_src_operand(&sx, &opcode.src[0], OF_UINT),
|
|
translate_src_operand(&sx, &opcode.src[1], OF_UINT));
|
|
}
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_DCL_RESOURCE:
|
|
{
|
|
unsigned target;
|
|
unsigned res_index = opcode.dst[0].base.index[0].imm;
|
|
assert(opcode.dst[0].base.index_dim == 1);
|
|
assert(res_index < SHADER_MAX_RESOURCES);
|
|
|
|
target = translate_resource_dimension(opcode.specific.dcl_resource_dimension);
|
|
sx.resources[res_index].target = target;
|
|
if (!(st_debug & ST_DEBUG_OLD_TEX_OPS)) {
|
|
sx.sv[res_index] =
|
|
ureg_DECL_sampler_view(ureg, res_index, target,
|
|
trans_dcl_ret_type(opcode.dcl_resource_ret_type[0]),
|
|
trans_dcl_ret_type(opcode.dcl_resource_ret_type[1]),
|
|
trans_dcl_ret_type(opcode.dcl_resource_ret_type[2]),
|
|
trans_dcl_ret_type(opcode.dcl_resource_ret_type[3]));
|
|
}
|
|
break;
|
|
}
|
|
|
|
case D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER: {
|
|
unsigned num_constants = opcode.src[0].base.index[1].imm;
|
|
|
|
assert(opcode.src[0].base.index[0].imm < PIPE_MAX_CONSTANT_BUFFERS);
|
|
|
|
if (num_constants == 0) {
|
|
num_constants = SHADER_MAX_CONSTS;
|
|
} else {
|
|
assert(num_constants <= SHADER_MAX_CONSTS);
|
|
}
|
|
|
|
ureg_DECL_constant2D(ureg,
|
|
0,
|
|
num_constants - 1,
|
|
opcode.src[0].base.index[0].imm);
|
|
break;
|
|
}
|
|
|
|
case D3D10_SB_OPCODE_DCL_SAMPLER:
|
|
assert(opcode.dst[0].base.index_dim == 1);
|
|
assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_SAMPLERS);
|
|
|
|
sx.samplers[opcode.dst[0].base.index[0].imm] =
|
|
ureg_DECL_sampler(ureg,
|
|
opcode.dst[0].base.index[0].imm);
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
|
|
assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
|
|
|
|
switch (opcode.specific.dcl_gs_output_primitive_topology) {
|
|
case D3D10_SB_PRIMITIVE_TOPOLOGY_POINTLIST:
|
|
ureg_property(sx.ureg,
|
|
TGSI_PROPERTY_GS_OUTPUT_PRIM,
|
|
PIPE_PRIM_POINTS);
|
|
break;
|
|
|
|
case D3D10_SB_PRIMITIVE_TOPOLOGY_LINESTRIP:
|
|
ureg_property(sx.ureg,
|
|
TGSI_PROPERTY_GS_OUTPUT_PRIM,
|
|
PIPE_PRIM_LINE_STRIP);
|
|
break;
|
|
|
|
case D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP:
|
|
ureg_property(sx.ureg,
|
|
TGSI_PROPERTY_GS_OUTPUT_PRIM,
|
|
PIPE_PRIM_TRIANGLE_STRIP);
|
|
break;
|
|
|
|
default:
|
|
assert(0);
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE:
|
|
assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
|
|
|
|
/* Figure out the second dimension of GS inputs.
|
|
*/
|
|
switch (opcode.specific.dcl_gs_input_primitive) {
|
|
case D3D10_SB_PRIMITIVE_POINT:
|
|
declare_vertices_in(&sx, 1);
|
|
ureg_property(sx.ureg,
|
|
TGSI_PROPERTY_GS_INPUT_PRIM,
|
|
PIPE_PRIM_POINTS);
|
|
break;
|
|
|
|
case D3D10_SB_PRIMITIVE_LINE:
|
|
declare_vertices_in(&sx, 2);
|
|
ureg_property(sx.ureg,
|
|
TGSI_PROPERTY_GS_INPUT_PRIM,
|
|
PIPE_PRIM_LINES);
|
|
break;
|
|
|
|
case D3D10_SB_PRIMITIVE_TRIANGLE:
|
|
declare_vertices_in(&sx, 3);
|
|
ureg_property(sx.ureg,
|
|
TGSI_PROPERTY_GS_INPUT_PRIM,
|
|
PIPE_PRIM_TRIANGLES);
|
|
break;
|
|
|
|
case D3D10_SB_PRIMITIVE_LINE_ADJ:
|
|
declare_vertices_in(&sx, 4);
|
|
ureg_property(sx.ureg,
|
|
TGSI_PROPERTY_GS_INPUT_PRIM,
|
|
PIPE_PRIM_LINES_ADJACENCY);
|
|
break;
|
|
|
|
case D3D10_SB_PRIMITIVE_TRIANGLE_ADJ:
|
|
declare_vertices_in(&sx, 6);
|
|
ureg_property(sx.ureg,
|
|
TGSI_PROPERTY_GS_INPUT_PRIM,
|
|
PIPE_PRIM_TRIANGLES_ADJACENCY);
|
|
break;
|
|
|
|
default:
|
|
assert(0);
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
|
|
assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
|
|
|
|
ureg_property(sx.ureg,
|
|
TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
|
|
opcode.specific.dcl_max_output_vertex_count);
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_DCL_INPUT:
|
|
if (parser.header.type == D3D10_SB_VERTEX_SHADER) {
|
|
dcl_vs_input(&sx, ureg, &opcode.dst[0]);
|
|
} else {
|
|
assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
|
|
dcl_gs_input(&sx, ureg, &opcode.dst[0]);
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_DCL_INPUT_SGV:
|
|
assert(parser.header.type == D3D10_SB_VERTEX_SHADER);
|
|
dcl_sgv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name);
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_DCL_INPUT_SIV:
|
|
assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
|
|
dcl_siv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name);
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_DCL_INPUT_PS:
|
|
assert(parser.header.type == D3D10_SB_PIXEL_SHADER);
|
|
dcl_ps_input(&sx, ureg, &opcode.dst[0],
|
|
opcode.specific.dcl_in_ps_interp);
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_DCL_INPUT_PS_SGV:
|
|
assert(parser.header.type == D3D10_SB_PIXEL_SHADER);
|
|
dcl_ps_sgv_input(&sx, ureg, &opcode.dst[0],
|
|
opcode.dcl_siv_name);
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_DCL_INPUT_PS_SIV:
|
|
assert(parser.header.type == D3D10_SB_PIXEL_SHADER);
|
|
dcl_ps_siv_input(&sx, ureg, &opcode.dst[0],
|
|
opcode.dcl_siv_name,
|
|
opcode.specific.dcl_in_ps_interp);
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_DCL_OUTPUT:
|
|
if (parser.header.type == D3D10_SB_PIXEL_SHADER) {
|
|
/* Pixel shader outputs. */
|
|
if (opcode.dst[0].base.type == D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {
|
|
/* Depth output. */
|
|
assert(opcode.dst[0].base.index_dim == 0);
|
|
|
|
sx.output_depth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z, 0, 1);
|
|
sx.output_depth = ureg_writemask(sx.output_depth, TGSI_WRITEMASK_Z);
|
|
} else {
|
|
/* Color outputs. */
|
|
assert(opcode.dst[0].base.index_dim == 1);
|
|
assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
|
|
|
|
dcl_base_output(&sx, ureg,
|
|
ureg_DECL_output(ureg,
|
|
TGSI_SEMANTIC_COLOR,
|
|
opcode.dst[0].base.index[0].imm),
|
|
&opcode.dst[0]);
|
|
}
|
|
} else {
|
|
assert(opcode.dst[0].base.index_dim == 1);
|
|
assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
|
|
|
|
if (output_mapping) {
|
|
unsigned nr_outputs = ureg_get_nr_outputs(ureg);
|
|
output_mapping[nr_outputs]
|
|
= opcode.dst[0].base.index[0].imm;
|
|
}
|
|
dcl_base_output(&sx, ureg,
|
|
ureg_DECL_output(ureg,
|
|
TGSI_SEMANTIC_GENERIC,
|
|
opcode.dst[0].base.index[0].imm),
|
|
&opcode.dst[0]);
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_DCL_OUTPUT_SIV:
|
|
assert(opcode.dst[0].base.index_dim == 1);
|
|
assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
|
|
|
|
if (output_mapping) {
|
|
unsigned nr_outputs = ureg_get_nr_outputs(ureg);
|
|
output_mapping[nr_outputs]
|
|
= opcode.dst[0].base.index[0].imm;
|
|
}
|
|
if (opcode.dcl_siv_name == D3D10_SB_NAME_CLIP_DISTANCE ||
|
|
opcode.dcl_siv_name == D3D10_SB_NAME_CULL_DISTANCE) {
|
|
/*
|
|
* FIXME: this is quite broken. gallium no longer has separate
|
|
* clip/cull dists, using (max 2) combined clipdist/culldist regs
|
|
* instead. Unlike d3d10 though, which is clip and which cull is
|
|
* simply determined by by number of clip/cull dists (that is,
|
|
* all clip dists must come first).
|
|
*/
|
|
unsigned numcliporcull = sx.num_clip_distances_declared +
|
|
sx.num_cull_distances_declared;
|
|
sx.clip_distance_mapping[numcliporcull].d3d =
|
|
opcode.dst[0].base.index[0].imm;
|
|
sx.clip_distance_mapping[numcliporcull].tgsi = numcliporcull;
|
|
if (opcode.dcl_siv_name == D3D10_SB_NAME_CLIP_DISTANCE) {
|
|
++sx.num_clip_distances_declared;
|
|
/* re-emit should be safe... */
|
|
ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
|
|
sx.num_clip_distances_declared);
|
|
} else {
|
|
++sx.num_cull_distances_declared;
|
|
ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
|
|
sx.num_cull_distances_declared);
|
|
}
|
|
} else if (0 && opcode.dcl_siv_name == D3D10_SB_NAME_CULL_DISTANCE) {
|
|
sx.cull_distance_mapping[sx.num_cull_distances_declared].d3d =
|
|
opcode.dst[0].base.index[0].imm;
|
|
sx.cull_distance_mapping[sx.num_cull_distances_declared].tgsi =
|
|
sx.num_cull_distances_declared;
|
|
++sx.num_cull_distances_declared;
|
|
ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
|
|
sx.num_cull_distances_declared);
|
|
}
|
|
|
|
dcl_base_output(&sx, ureg,
|
|
ureg_DECL_output_masked(
|
|
ureg,
|
|
translate_system_name(opcode.dcl_siv_name),
|
|
translate_semantic_index(&sx, opcode.dcl_siv_name,
|
|
&opcode.dst[0]),
|
|
opcode.dst[0].mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT,
|
|
0, 1),
|
|
&opcode.dst[0]);
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_DCL_OUTPUT_SGV:
|
|
assert(opcode.dst[0].base.index_dim == 1);
|
|
assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
|
|
|
|
if (output_mapping) {
|
|
unsigned nr_outputs = ureg_get_nr_outputs(ureg);
|
|
output_mapping[nr_outputs]
|
|
= opcode.dst[0].base.index[0].imm;
|
|
}
|
|
dcl_base_output(&sx, ureg,
|
|
ureg_DECL_output(ureg,
|
|
translate_system_name(opcode.dcl_siv_name),
|
|
0),
|
|
&opcode.dst[0]);
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_DCL_TEMPS:
|
|
{
|
|
uint i;
|
|
|
|
assert(opcode.specific.dcl_num_temps + sx.declared_temps <=
|
|
SHADER_MAX_TEMPS);
|
|
|
|
sx.temp_offset = sx.declared_temps;
|
|
|
|
for (i = 0; i < opcode.specific.dcl_num_temps; i++) {
|
|
sx.temps[sx.declared_temps + i] = ureg_DECL_temporary(ureg);
|
|
}
|
|
sx.declared_temps += opcode.specific.dcl_num_temps;
|
|
}
|
|
break;
|
|
|
|
case D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP:
|
|
{
|
|
uint i;
|
|
|
|
/* XXX: Add true indexable temps to gallium.
|
|
*/
|
|
|
|
assert(opcode.specific.dcl_indexable_temp.index <
|
|
SHADER_MAX_INDEXABLE_TEMPS);
|
|
assert(opcode.specific.dcl_indexable_temp.count + sx.declared_temps <=
|
|
SHADER_MAX_TEMPS);
|
|
|
|
sx.indexable_temp_offsets[opcode.specific.dcl_indexable_temp.index] =
|
|
sx.declared_temps;
|
|
|
|
for (i = 0; i < opcode.specific.dcl_indexable_temp.count; i++) {
|
|
sx.temps[sx.declared_temps + i] = ureg_DECL_temporary(ureg);
|
|
}
|
|
sx.declared_temps += opcode.specific.dcl_indexable_temp.count;
|
|
}
|
|
break;
|
|
case D3D10_SB_OPCODE_IF: {
|
|
unsigned label = 0;
|
|
if (opcode.specific.test_boolean == D3D10_SB_INSTRUCTION_TEST_ZERO) {
|
|
struct ureg_src src =
|
|
translate_src_operand(&sx, &opcode.src[0], OF_INT);
|
|
struct ureg_dst src_nz = ureg_DECL_temporary(ureg);
|
|
ureg_USEQ(ureg, src_nz, src, ureg_imm1u(ureg, 0));
|
|
ureg_UIF(ureg, ureg_src(src_nz), &label);
|
|
ureg_release_temporary(ureg, src_nz);;
|
|
} else {
|
|
ureg_UIF(ureg, translate_src_operand(&sx, &opcode.src[0], OF_INT), &label);
|
|
}
|
|
}
|
|
break;
|
|
case D3D10_SB_OPCODE_RETC:
|
|
case D3D10_SB_OPCODE_CONTINUEC:
|
|
case D3D10_SB_OPCODE_CALLC:
|
|
case D3D10_SB_OPCODE_DISCARD:
|
|
case D3D10_SB_OPCODE_BREAKC:
|
|
{
|
|
unsigned label = 0;
|
|
assert(operand_is_scalar(&opcode.src[0]));
|
|
if (opcode.specific.test_boolean == D3D10_SB_INSTRUCTION_TEST_ZERO) {
|
|
struct ureg_src src =
|
|
translate_src_operand(&sx, &opcode.src[0], OF_INT);
|
|
struct ureg_dst src_nz = ureg_DECL_temporary(ureg);
|
|
ureg_USEQ(ureg, src_nz, src, ureg_imm1u(ureg, 0));
|
|
ureg_UIF(ureg, ureg_src(src_nz), &label);
|
|
ureg_release_temporary(ureg, src_nz);
|
|
}
|
|
else {
|
|
ureg_UIF(ureg, translate_src_operand(&sx, &opcode.src[0], OF_INT), &label);
|
|
}
|
|
switch (opcode.type) {
|
|
case D3D10_SB_OPCODE_RETC:
|
|
ureg_RET(ureg);
|
|
break;
|
|
case D3D10_SB_OPCODE_CONTINUEC:
|
|
ureg_CONT(ureg);
|
|
break;
|
|
case D3D10_SB_OPCODE_CALLC: {
|
|
unsigned label = opcode.src[1].base.index[0].imm;
|
|
unsigned tgsi_token_label = 0;
|
|
ureg_CAL(ureg, &tgsi_token_label);
|
|
Shader_add_call(&sx, label, tgsi_token_label);
|
|
}
|
|
break;
|
|
case D3D10_SB_OPCODE_DISCARD:
|
|
ureg_KILL(ureg);
|
|
break;
|
|
case D3D10_SB_OPCODE_BREAKC:
|
|
ureg_BRK(ureg);
|
|
break;
|
|
default:
|
|
assert(0);
|
|
break;
|
|
}
|
|
ureg_ENDIF(ureg);
|
|
}
|
|
break;
|
|
case D3D10_SB_OPCODE_LABEL: {
|
|
unsigned label = opcode.src[0].base.index[0].imm;
|
|
unsigned tgsi_inst_no = 0;
|
|
if (inside_sub) {
|
|
ureg_ENDSUB(ureg);
|
|
}
|
|
tgsi_inst_no = ureg_get_instruction_number(ureg);
|
|
ureg_BGNSUB(ureg);
|
|
inside_sub = TRUE;
|
|
Shader_add_label(&sx, label, tgsi_inst_no);
|
|
}
|
|
break;
|
|
case D3D10_SB_OPCODE_CALL: {
|
|
unsigned label = opcode.src[0].base.index[0].imm;
|
|
unsigned tgsi_token_label = 0;
|
|
ureg_CAL(ureg, &tgsi_token_label);
|
|
Shader_add_call(&sx, label, tgsi_token_label);
|
|
}
|
|
break;
|
|
case D3D10_SB_OPCODE_EMIT:
|
|
ureg_EMIT(ureg, ureg_imm1u(ureg, 0));
|
|
break;
|
|
case D3D10_SB_OPCODE_CUT:
|
|
ureg_ENDPRIM(ureg, ureg_imm1u(ureg, 0));
|
|
break;
|
|
case D3D10_SB_OPCODE_EMITTHENCUT:
|
|
ureg_EMIT(ureg, ureg_imm1u(ureg, 0));
|
|
ureg_ENDPRIM(ureg, ureg_imm1u(ureg, 0));
|
|
break;
|
|
case D3D10_SB_OPCODE_DCL_INDEX_RANGE:
|
|
case D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS:
|
|
/* Ignore */
|
|
break;
|
|
default:
|
|
{
|
|
uint i;
|
|
struct ureg_dst dst[SHADER_MAX_DST_OPERANDS];
|
|
struct ureg_src src[SHADER_MAX_SRC_OPERANDS];
|
|
|
|
assert(ox->tgsi_opcode != TGSI_EXPAND);
|
|
|
|
if (ox->tgsi_opcode == TGSI_LOG_UNSUPPORTED) {
|
|
if (!shader_dumped) {
|
|
dx10_shader_dump_tokens(code);
|
|
shader_dumped = TRUE;
|
|
}
|
|
debug_printf("%s: unsupported opcode %i\n",
|
|
__FUNCTION__, ox->type);
|
|
assert(ox->tgsi_opcode != TGSI_LOG_UNSUPPORTED);
|
|
}
|
|
|
|
/* Destination operands. */
|
|
for (i = 0; i < opcode.num_dst; i++) {
|
|
dst[i] = translate_dst_operand(&sx, &opcode.dst[i],
|
|
opcode.saturate);
|
|
}
|
|
|
|
/* Source operands. */
|
|
for (i = 0; i < opcode.num_src; i++) {
|
|
src[i] = translate_src_operand(&sx, &opcode.src[i], ox->format);
|
|
}
|
|
|
|
/* Try to re-route output depth to Z channel. */
|
|
if (opcode.dst[0].base.type == D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {
|
|
LOG_UNSUPPORTED(opcode.type != D3D10_SB_OPCODE_MOV);
|
|
dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_Z);
|
|
src[0] = ureg_scalar(src[0], TGSI_SWIZZLE_X);
|
|
}
|
|
|
|
ureg_insn(ureg,
|
|
ox->tgsi_opcode,
|
|
dst,
|
|
opcode.num_dst,
|
|
src,
|
|
opcode.num_src, 0);
|
|
}
|
|
}
|
|
|
|
Shader_opcode_free(&opcode);
|
|
}
|
|
|
|
if (inside_sub) {
|
|
ureg_ENDSUB(ureg);
|
|
}
|
|
|
|
ureg_END(ureg);
|
|
|
|
for (i = 0; i < sx.num_calls; ++i) {
|
|
for (j = 0; j < sx.num_labels; ++j) {
|
|
if (sx.calls[i].d3d_label == sx.labels[j].d3d_label) {
|
|
ureg_fixup_label(sx.ureg,
|
|
sx.calls[i].tgsi_label_token,
|
|
sx.labels[j].tgsi_insn_no);
|
|
break;
|
|
}
|
|
}
|
|
ASSERT(j < sx.num_labels);
|
|
}
|
|
FREE(sx.labels);
|
|
FREE(sx.calls);
|
|
|
|
tokens = ureg_get_tokens(ureg, &nr_tokens);
|
|
assert(tokens);
|
|
ureg_destroy(ureg);
|
|
|
|
if (st_debug & ST_DEBUG_TGSI) {
|
|
tgsi_dump(tokens, 0);
|
|
}
|
|
|
|
return tokens;
|
|
}
|