Remove tgsi_sse2.

tgsi_exec is simple. llvm is fast. tgsi_sse2 ends up being neither.
This commit is contained in:
José Fonseca 2011-11-08 00:10:47 +00:00
parent 207a016eca
commit 4eb3225b38
20 changed files with 3 additions and 7048 deletions

View File

@ -33,12 +33,8 @@ C_SOURCES := \
draw/draw_pt_vsplit.c \
draw/draw_vertex.c \
draw/draw_vs.c \
draw/draw_vs_aos.c \
draw/draw_vs_aos_io.c \
draw/draw_vs_aos_machine.c \
draw/draw_vs_exec.c \
draw/draw_vs_ppc.c \
draw/draw_vs_sse.c \
draw/draw_vs_variant.c \
os/os_misc.c \
os/os_stream.c \
@ -83,7 +79,6 @@ C_SOURCES := \
tgsi/tgsi_ppc.c \
tgsi/tgsi_sanity.c \
tgsi/tgsi_scan.c \
tgsi/tgsi_sse2.c \
tgsi/tgsi_text.c \
tgsi/tgsi_transform.c \
tgsi/tgsi_ureg.c \

View File

@ -237,10 +237,6 @@ struct draw_context
uint num_samplers;
struct tgsi_sampler **samplers;
/* Here's another one:
*/
struct aos_machine *aos_machine;
const void *aligned_constants[PIPE_MAX_CONSTANT_BUFFERS];

View File

@ -81,14 +81,12 @@ draw_vs_set_constants(struct draw_context *draw,
}
draw->vs.aligned_constants[slot] = constants;
draw_vs_aos_machine_constants(draw->vs.aos_machine, slot, constants);
}
void draw_vs_set_viewport( struct draw_context *draw,
const struct pipe_viewport_state *viewport )
{
draw_vs_aos_machine_viewport( draw->vs.aos_machine, viewport );
}
@ -103,22 +101,8 @@ draw_create_vertex_shader(struct draw_context *draw,
tgsi_dump(shader->tokens, 0);
}
if (!draw->pt.middle.llvm) {
#if 0
/* these paths don't support vertex clamping
* TODO: either add it, or remove them completely
* use LLVM instead if you want performance
* use exec instead if you want debugging/more correctness
*/
#if defined(PIPE_ARCH_X86)
vs = draw_create_vs_sse( draw, shader );
#elif defined(PIPE_ARCH_PPC)
vs = draw_create_vs_ppc( draw, shader );
#endif
#endif
}
#if HAVE_LLVM
else {
if (draw->pt.middle.llvm) {
vs = draw_create_vs_llvm(draw, shader);
}
#endif
@ -199,12 +183,6 @@ draw_vs_init( struct draw_context *draw )
if (!draw->vs.fetch_cache)
return FALSE;
draw->vs.aos_machine = draw_vs_aos_machine();
#ifdef PIPE_ARCH_X86
if (!draw->vs.aos_machine)
return FALSE;
#endif
return TRUE;
}
@ -219,9 +197,6 @@ draw_vs_destroy( struct draw_context *draw )
if (draw->vs.emit_cache)
translate_cache_destroy(draw->vs.emit_cache);
if (draw->vs.aos_machine)
draw_vs_aos_machine_destroy(draw->vs.aos_machine);
for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
if (draw->vs.aligned_constant_storage[i]) {
align_free((void *)draw->vs.aligned_constant_storage[i]);

View File

@ -158,10 +158,6 @@ struct draw_vertex_shader *
draw_create_vs_exec(struct draw_context *draw,
const struct pipe_shader_state *templ);
struct draw_vertex_shader *
draw_create_vs_sse(struct draw_context *draw,
const struct pipe_shader_state *templ);
struct draw_vertex_shader *
draw_create_vs_ppc(struct draw_context *draw,
const struct pipe_shader_state *templ);
@ -170,10 +166,6 @@ draw_create_vs_ppc(struct draw_context *draw,
struct draw_vs_variant_key;
struct draw_vertex_shader;
struct draw_vs_variant *
draw_vs_create_variant_aos_sse( struct draw_vertex_shader *vs,
const struct draw_vs_variant_key *key );
#if HAVE_LLVM
struct draw_vertex_shader *
draw_create_vs_llvm(struct draw_context *draw,
@ -214,18 +206,6 @@ static INLINE int draw_vs_variant_key_compare( const struct draw_vs_variant_key
}
struct aos_machine *draw_vs_aos_machine( void );
void draw_vs_aos_machine_destroy( struct aos_machine *machine );
void
draw_vs_aos_machine_constants(struct aos_machine *machine,
unsigned slot,
const void *constants);
void draw_vs_aos_machine_viewport( struct aos_machine *machine,
const struct pipe_viewport_state *viewport );
#define MAX_TGSI_VERTICES 4

File diff suppressed because it is too large Load Diff

View File

@ -1,255 +0,0 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/* Authors: Keith Whitwell <keith@tungstengraphics.com>
*/
#ifndef DRAW_VS_AOS_H
#define DRAW_VS_AOS_H
#include "pipe/p_config.h"
#include "tgsi/tgsi_exec.h"
#include "draw_vs.h"
#ifdef PIPE_ARCH_X86
struct tgsi_token;
struct x86_function;
#include "pipe/p_state.h"
#include "rtasm/rtasm_x86sse.h"
#define X 0
#define Y 1
#define Z 2
#define W 3
#define MAX_INPUTS PIPE_MAX_ATTRIBS
#define MAX_OUTPUTS PIPE_MAX_SHADER_OUTPUTS
#define MAX_TEMPS TGSI_EXEC_NUM_TEMPS
#define MAX_CONSTANTS 1024 /** only used for sanity checking */
#define MAX_IMMEDIATES 1024 /** only used for sanity checking */
#define MAX_INTERNALS 8 /** see IMM_x values below */
#define AOS_FILE_INTERNAL TGSI_FILE_COUNT
#define FPU_RND_NEG 1
#define FPU_RND_NEAREST 2
struct aos_machine;
typedef void (PIPE_CDECL *lit_func)( struct aos_machine *,
float *result,
const float *in,
unsigned count );
void PIPE_CDECL aos_do_lit( struct aos_machine *machine,
float *result,
const float *in,
unsigned count );
struct shine_tab {
float exponent;
float values[258];
unsigned last_used;
};
struct lit_info {
lit_func func;
struct shine_tab *shine_tab;
};
#define MAX_SHINE_TAB 4
#define MAX_LIT_INFO 16
struct aos_buffer {
const void *base_ptr;
unsigned stride;
void *ptr; /* updated per vertex */
};
/* This is the temporary storage used by all the aos_sse vs variants.
* Create one per context and reuse by passing a pointer in at
* vs_variant creation??
*/
struct aos_machine {
float input [MAX_INPUTS ][4];
float output [MAX_OUTPUTS ][4];
float temp [MAX_TEMPS ][4];
float internal [MAX_INTERNALS ][4];
float scale[4]; /* viewport */
float translate[4]; /* viewport */
float tmp[2][4]; /* scratch space for LIT */
struct shine_tab shine_tab[MAX_SHINE_TAB];
struct lit_info lit_info[MAX_LIT_INFO];
unsigned now;
ushort fpu_rnd_nearest;
ushort fpu_rnd_neg_inf;
ushort fpu_restore;
ushort fpucntl; /* one of FPU_* above */
const float (*immediates)[4]; /* points to shader data */
const void *constants[PIPE_MAX_CONSTANT_BUFFERS]; /* points to draw data */
const struct aos_buffer *buffer; /* points to ? */
};
struct aos_compilation {
struct x86_function *func;
struct draw_vs_variant_aos_sse *vaos;
unsigned insn_counter;
unsigned num_immediates;
unsigned count;
unsigned lit_count;
struct {
unsigned idx:16;
unsigned file:8;
unsigned dirty:8;
unsigned last_used;
} xmm[8];
unsigned x86_reg[2]; /* one of X86_* */
boolean input_fetched[PIPE_MAX_ATTRIBS];
unsigned output_last_write[PIPE_MAX_ATTRIBS];
boolean have_sse2;
boolean error;
short fpucntl;
/* these are actually known values, but putting them in a struct
* like this is helpful to keep them in sync across the file.
*/
struct x86_reg tmp_EAX;
struct x86_reg idx_EBX; /* either start+i or &elt[i] */
struct x86_reg outbuf_ECX;
struct x86_reg machine_EDX;
struct x86_reg count_ESI; /* decrements to zero */
struct x86_reg temp_EBP;
struct x86_reg stack_ESP;
};
struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp );
void aos_release_xmm_reg( struct aos_compilation *cp, unsigned idx );
void aos_adopt_xmm_reg( struct aos_compilation *cp,
struct x86_reg reg,
unsigned file,
unsigned idx,
unsigned dirty );
void aos_spill_all( struct aos_compilation *cp );
struct x86_reg aos_get_shader_reg( struct aos_compilation *cp,
unsigned file,
unsigned idx );
boolean aos_init_inputs( struct aos_compilation *cp, boolean linear );
boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear );
boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear );
boolean aos_emit_outputs( struct aos_compilation *cp );
#define IMM_ONES 0 /* 1, 1,1,1 */
#define IMM_SWZ 1 /* 1,-1,0, 0xffffffff */
#define IMM_IDENTITY 2 /* 0, 0,0,1 */
#define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */
#define IMM_255 4 /* 255, 255, 255, 255 */
#define IMM_NEGS 5 /* -1,-1,-1,-1 */
#define IMM_RSQ 6 /* -.5,1.5,_,_ */
#define IMM_PSIZE 7 /* not really an immediate - updated each run */
struct x86_reg aos_get_internal( struct aos_compilation *cp,
unsigned imm );
struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp,
unsigned imm );
#define AOS_ERROR(cp, msg) \
do { \
if (0) debug_printf("%s: x86 translation failed: %s\n", __FUNCTION__, msg); \
cp->error = 1; \
} while (0)
#define X86_NULL 0
#define X86_IMMEDIATES 1
#define X86_CONSTANTS 2
#define X86_BUFFERS 3
struct x86_reg aos_get_x86( struct aos_compilation *cp,
unsigned which_reg,
unsigned value );
typedef void (PIPE_CDECL *vaos_run_elts_func)( struct aos_machine *,
const unsigned *elts,
unsigned count,
void *output_buffer);
typedef void (PIPE_CDECL *vaos_run_linear_func)( struct aos_machine *,
unsigned start,
unsigned count,
void *output_buffer);
struct draw_vs_variant_aos_sse {
struct draw_vs_variant base;
struct draw_context *draw;
struct aos_buffer *buffer;
unsigned nr_vb;
vaos_run_linear_func gen_run_linear;
vaos_run_elts_func gen_run_elts;
struct x86_function func[2];
};
#endif
#endif

View File

@ -1,460 +0,0 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#include "util/u_memory.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_exec.h"
#include "draw_vs.h"
#include "draw_vs_aos.h"
#include "draw_vertex.h"
#include "rtasm/rtasm_x86sse.h"
#ifdef PIPE_ARCH_X86
/* Note - don't yet have to worry about interacting with the code in
* draw_vs_aos.c as there is no intermingling of generated code...
* That may have to change, we'll see.
*/
static void emit_load_R32G32B32A32( struct aos_compilation *cp,
struct x86_reg data,
struct x86_reg src_ptr )
{
sse_movups(cp->func, data, src_ptr);
}
static void emit_load_R32G32B32( struct aos_compilation *cp,
struct x86_reg data,
struct x86_reg src_ptr )
{
#if 1
sse_movss(cp->func, data, x86_make_disp(src_ptr, 8));
/* data = z ? ? ? */
sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) );
/* data = z ? 0 1 */
sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) );
/* data = ? 0 z 1 */
sse_movlps(cp->func, data, src_ptr);
/* data = x y z 1 */
#else
sse_movups(cp->func, data, src_ptr);
/* data = x y z ? */
sse2_pshufd(cp->func, data, data, SHUF(W,X,Y,Z) );
/* data = ? x y z */
sse_movss(cp->func, data, aos_get_internal_xmm( cp, IMM_ONES ) );
/* data = 1 x y z */
sse2_pshufd(cp->func, data, data, SHUF(Y,Z,W,X) );
/* data = x y z 1 */
#endif
}
static void emit_load_R32G32( struct aos_compilation *cp,
struct x86_reg data,
struct x86_reg src_ptr )
{
sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
sse_movlps(cp->func, data, src_ptr);
}
static void emit_load_R32( struct aos_compilation *cp,
struct x86_reg data,
struct x86_reg src_ptr )
{
sse_movss(cp->func, data, src_ptr);
sse_orps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
}
static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
struct x86_reg data,
struct x86_reg src_ptr )
{
sse_movss(cp->func, data, src_ptr);
sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
sse2_cvtdq2ps(cp->func, data, data);
sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255));
}
/* Extended swizzles? Maybe later.
*/
static void emit_swizzle( struct aos_compilation *cp,
struct x86_reg dest,
struct x86_reg src,
ubyte shuffle )
{
sse_shufps(cp->func, dest, src, shuffle);
}
static boolean get_buffer_ptr( struct aos_compilation *cp,
boolean linear,
unsigned buf_idx,
struct x86_reg elt,
struct x86_reg ptr)
{
struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
buf_idx * sizeof(struct aos_buffer));
struct x86_reg buf_stride = x86_make_disp(buf,
Offset(struct aos_buffer, stride));
if (linear) {
struct x86_reg buf_ptr = x86_make_disp(buf,
Offset(struct aos_buffer, ptr));
/* Calculate pointer to current attrib:
*/
x86_mov(cp->func, ptr, buf_ptr);
x86_mov(cp->func, elt, buf_stride);
x86_add(cp->func, elt, ptr);
if (buf_idx == 0) sse_prefetchnta(cp->func, x86_make_disp(elt, 192));
x86_mov(cp->func, buf_ptr, elt);
}
else {
struct x86_reg buf_base_ptr = x86_make_disp(buf,
Offset(struct aos_buffer, base_ptr));
/* Calculate pointer to current attrib:
*/
x86_mov(cp->func, ptr, buf_stride);
x86_imul(cp->func, ptr, elt);
x86_add(cp->func, ptr, buf_base_ptr);
}
cp->insn_counter++;
return TRUE;
}
static boolean load_input( struct aos_compilation *cp,
unsigned idx,
struct x86_reg bufptr )
{
unsigned format = cp->vaos->base.key.element[idx].in.format;
unsigned offset = cp->vaos->base.key.element[idx].in.offset;
struct x86_reg dataXMM = aos_get_xmm_reg(cp);
/* Figure out source pointer address:
*/
struct x86_reg src = x86_make_disp(bufptr, offset);
aos_adopt_xmm_reg( cp,
dataXMM,
TGSI_FILE_INPUT,
idx,
TRUE );
switch (format) {
case PIPE_FORMAT_R32_FLOAT:
emit_load_R32(cp, dataXMM, src);
break;
case PIPE_FORMAT_R32G32_FLOAT:
emit_load_R32G32(cp, dataXMM, src);
break;
case PIPE_FORMAT_R32G32B32_FLOAT:
emit_load_R32G32B32(cp, dataXMM, src);
break;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
emit_load_R32G32B32A32(cp, dataXMM, src);
break;
case PIPE_FORMAT_A8R8G8B8_UNORM:
emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
break;
case PIPE_FORMAT_R8G8B8A8_UNORM:
emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
break;
default:
AOS_ERROR(cp, "unhandled input format");
return FALSE;
}
return TRUE;
}
static boolean load_inputs( struct aos_compilation *cp,
unsigned buffer,
struct x86_reg ptr )
{
unsigned i;
for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) {
if (cp->vaos->base.key.element[i].in.buffer == buffer) {
if (!load_input( cp, i, ptr ))
return FALSE;
cp->insn_counter++;
}
}
return TRUE;
}
boolean aos_init_inputs( struct aos_compilation *cp, boolean linear )
{
unsigned i;
for (i = 0; i < cp->vaos->nr_vb; i++) {
struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
i * sizeof(struct aos_buffer));
struct x86_reg buf_base_ptr = x86_make_disp(buf,
Offset(struct aos_buffer, base_ptr));
if (cp->vaos->base.key.const_vbuffers & (1<<i)) {
struct x86_reg ptr = cp->tmp_EAX;
x86_mov(cp->func, ptr, buf_base_ptr);
/* Load all inputs for this constant vertex buffer
*/
load_inputs( cp, i, x86_deref(ptr) );
/* Then just force them out to aos_machine.input[]
*/
aos_spill_all( cp );
}
else if (linear) {
struct x86_reg elt = cp->idx_EBX;
struct x86_reg ptr = cp->tmp_EAX;
struct x86_reg buf_stride = x86_make_disp(buf,
Offset(struct aos_buffer, stride));
struct x86_reg buf_ptr = x86_make_disp(buf,
Offset(struct aos_buffer, ptr));
/* Calculate pointer to current attrib:
*/
x86_mov(cp->func, ptr, buf_stride);
x86_imul(cp->func, ptr, elt);
x86_add(cp->func, ptr, buf_base_ptr);
/* In the linear case, keep the buffer pointer instead of the
* index number.
*/
if (cp->vaos->nr_vb == 1)
x86_mov( cp->func, elt, ptr );
else
x86_mov( cp->func, buf_ptr, ptr );
cp->insn_counter++;
}
}
return TRUE;
}
boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
{
unsigned j;
for (j = 0; j < cp->vaos->nr_vb; j++) {
if (cp->vaos->base.key.const_vbuffers & (1<<j)) {
/* just retreive pre-transformed input */
}
else if (linear && cp->vaos->nr_vb == 1) {
load_inputs( cp, 0, cp->idx_EBX );
}
else {
struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX);
struct x86_reg ptr = cp->tmp_EAX;
if (!get_buffer_ptr( cp, linear, j, elt, ptr ))
return FALSE;
if (!load_inputs( cp, j, ptr ))
return FALSE;
}
}
return TRUE;
}
boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear )
{
if (linear && cp->vaos->nr_vb == 1) {
struct x86_reg stride = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
(0 * sizeof(struct aos_buffer) +
Offset(struct aos_buffer, stride)));
x86_add(cp->func, cp->idx_EBX, stride);
sse_prefetchnta(cp->func, x86_make_disp(cp->idx_EBX, 192));
}
else if (linear) {
/* Nothing to do */
}
else {
x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4));
}
return TRUE;
}
static void emit_store_R32G32B32A32( struct aos_compilation *cp,
struct x86_reg dst_ptr,
struct x86_reg dataXMM )
{
sse_movups(cp->func, dst_ptr, dataXMM);
}
static void emit_store_R32G32B32( struct aos_compilation *cp,
struct x86_reg dst_ptr,
struct x86_reg dataXMM )
{
sse_movlps(cp->func, dst_ptr, dataXMM);
sse_shufps(cp->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
sse_movss(cp->func, x86_make_disp(dst_ptr,8), dataXMM);
}
static void emit_store_R32G32( struct aos_compilation *cp,
struct x86_reg dst_ptr,
struct x86_reg dataXMM )
{
sse_movlps(cp->func, dst_ptr, dataXMM);
}
static void emit_store_R32( struct aos_compilation *cp,
struct x86_reg dst_ptr,
struct x86_reg dataXMM )
{
sse_movss(cp->func, dst_ptr, dataXMM);
}
static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp,
struct x86_reg dst_ptr,
struct x86_reg dataXMM )
{
sse_mulps(cp->func, dataXMM, aos_get_internal(cp, IMM_255));
sse2_cvtps2dq(cp->func, dataXMM, dataXMM);
sse2_packssdw(cp->func, dataXMM, dataXMM);
sse2_packuswb(cp->func, dataXMM, dataXMM);
sse_movss(cp->func, dst_ptr, dataXMM);
}
static boolean emit_output( struct aos_compilation *cp,
struct x86_reg ptr,
struct x86_reg dataXMM,
enum attrib_emit format )
{
switch (format) {
case EMIT_1F:
case EMIT_1F_PSIZE:
emit_store_R32(cp, ptr, dataXMM);
break;
case EMIT_2F:
emit_store_R32G32(cp, ptr, dataXMM);
break;
case EMIT_3F:
emit_store_R32G32B32(cp, ptr, dataXMM);
break;
case EMIT_4F:
emit_store_R32G32B32A32(cp, ptr, dataXMM);
break;
case EMIT_4UB:
emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
break;
case EMIT_4UB_BGRA:
emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
break;
default:
AOS_ERROR(cp, "unhandled output format");
return FALSE;
}
return TRUE;
}
boolean aos_emit_outputs( struct aos_compilation *cp )
{
unsigned i;
for (i = 0; i < cp->vaos->base.key.nr_outputs; i++) {
enum attrib_emit format = cp->vaos->base.key.element[i].out.format;
unsigned offset = cp->vaos->base.key.element[i].out.offset;
unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output;
struct x86_reg data;
if (format == EMIT_1F_PSIZE) {
data = aos_get_internal_xmm( cp, IMM_PSIZE );
}
else {
data = aos_get_shader_reg( cp,
TGSI_FILE_OUTPUT,
vs_output );
}
if (data.file != file_XMM) {
struct x86_reg tmp = aos_get_xmm_reg( cp );
sse_movaps(cp->func, tmp, data);
data = tmp;
}
if (!emit_output( cp,
x86_make_disp( cp->outbuf_ECX, offset ),
data,
format ))
return FALSE;
aos_release_xmm_reg( cp, data.idx );
cp->insn_counter++;
}
return TRUE;
}
#endif

View File

@ -1,328 +0,0 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#include "pipe/p_config.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_exec.h"
#include "draw_vs.h"
#include "draw_vs_aos.h"
#include "draw_vertex.h"
#ifdef PIPE_ARCH_X86
#include "rtasm/rtasm_x86sse.h"
#define X87_CW_EXCEPTION_INV_OP (1<<0)
#define X87_CW_EXCEPTION_DENORM_OP (1<<1)
#define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2)
#define X87_CW_EXCEPTION_OVERFLOW (1<<3)
#define X87_CW_EXCEPTION_UNDERFLOW (1<<4)
#define X87_CW_EXCEPTION_PRECISION (1<<5)
#define X87_CW_PRECISION_SINGLE (0<<8)
#define X87_CW_PRECISION_RESERVED (1<<8)
#define X87_CW_PRECISION_DOUBLE (2<<8)
#define X87_CW_PRECISION_DOUBLE_EXT (3<<8)
#define X87_CW_PRECISION_MASK (3<<8)
#define X87_CW_ROUND_NEAREST (0<<10)
#define X87_CW_ROUND_DOWN (1<<10)
#define X87_CW_ROUND_UP (2<<10)
#define X87_CW_ROUND_ZERO (3<<10)
#define X87_CW_ROUND_MASK (3<<10)
#define X87_CW_INFINITY (1<<12)
void PIPE_CDECL aos_do_lit( struct aos_machine *machine,
float *result,
const float *in,
unsigned count )
{
if (in[0] > 0)
{
if (in[1] <= 0.0)
{
result[0] = 1.0F;
result[1] = in[0];
result[2] = 0.0F;
result[3] = 1.0F;
}
else
{
const float epsilon = 1.0F / 256.0F;
float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
result[0] = 1.0F;
result[1] = in[0];
result[2] = powf(in[1], exponent);
result[3] = 1.0;
}
}
else
{
result[0] = 1.0F;
result[1] = 0.0;
result[2] = 0.0;
result[3] = 1.0F;
}
}
static void PIPE_CDECL do_lit_lut( struct aos_machine *machine,
float *result,
const float *in,
unsigned count )
{
if (in[0] > 0)
{
if (in[1] <= 0.0)
{
result[0] = 1.0F;
result[1] = in[0];
result[2] = 0.0F;
result[3] = 1.0F;
return;
}
if (machine->lit_info[count].shine_tab->exponent != in[3]) {
machine->lit_info[count].func = aos_do_lit;
goto no_luck;
}
if (in[1] <= 1.0)
{
const float *tab = machine->lit_info[count].shine_tab->values;
float f = in[1] * 256;
int k = (int)f;
float frac = f - (float)k;
result[0] = 1.0F;
result[1] = in[0];
result[2] = tab[k] + frac*(tab[k+1]-tab[k]);
result[3] = 1.0;
return;
}
no_luck:
{
const float epsilon = 1.0F / 256.0F;
float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
result[0] = 1.0F;
result[1] = in[0];
result[2] = powf(in[1], exponent);
result[3] = 1.0;
}
}
else
{
result[0] = 1.0F;
result[1] = 0.0;
result[2] = 0.0;
result[3] = 1.0F;
}
}
static void do_populate_lut( struct shine_tab *tab,
float unclamped_exponent )
{
const float epsilon = 1.0F / 256.0F;
float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon));
unsigned i;
tab->exponent = unclamped_exponent; /* for later comparison */
tab->values[0] = 0;
if (exponent == 0) {
for (i = 1; i < 258; i++) {
tab->values[i] = 1.0;
}
}
else {
for (i = 1; i < 258; i++) {
tab->values[i] = powf((float)i * epsilon, exponent);
}
}
}
static void PIPE_CDECL populate_lut( struct aos_machine *machine,
float *result,
const float *in,
unsigned count )
{
unsigned i, tab;
/* Search for an existing table for this value. Note that without
* static analysis we don't really know if in[3] will be constant,
* but it usually is...
*/
for (tab = 0; tab < 4; tab++) {
if (machine->shine_tab[tab].exponent == in[3]) {
goto found;
}
}
for (tab = 0, i = 1; i < 4; i++) {
if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used)
tab = i;
}
if (machine->shine_tab[tab].last_used == machine->now) {
/* No unused tables (this is not a ffvertex program...). Just
* call pow each time:
*/
machine->lit_info[count].func = aos_do_lit;
machine->lit_info[count].func( machine, result, in, count );
return;
}
else {
do_populate_lut( &machine->shine_tab[tab], in[3] );
}
found:
machine->shine_tab[tab].last_used = machine->now;
machine->lit_info[count].shine_tab = &machine->shine_tab[tab];
machine->lit_info[count].func = do_lit_lut;
machine->lit_info[count].func( machine, result, in, count );
}
void
draw_vs_aos_machine_constants(struct aos_machine *machine,
unsigned slot,
const void *constants)
{
machine->constants[slot] = constants;
{
unsigned i;
for (i = 0; i < MAX_LIT_INFO; i++) {
machine->lit_info[i].func = populate_lut;
machine->now++;
}
}
}
void draw_vs_aos_machine_viewport( struct aos_machine *machine,
const struct pipe_viewport_state *viewport )
{
memcpy(machine->scale, viewport->scale, 4 * sizeof(float));
memcpy(machine->translate, viewport->translate, 4 * sizeof(float));
}
void draw_vs_aos_machine_destroy( struct aos_machine *machine )
{
align_free(machine);
}
struct aos_machine *draw_vs_aos_machine( void )
{
struct aos_machine *machine;
unsigned i;
float inv = 1.0f/255.0f;
float f255 = 255.0f;
machine = align_malloc(sizeof(struct aos_machine), 16);
if (!machine)
return NULL;
memset(machine, 0, sizeof(*machine));
ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f);
*(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff;
ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f);
ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f);
ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f);
ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv);
ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255);
ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f);
machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP |
X87_CW_EXCEPTION_DENORM_OP |
X87_CW_EXCEPTION_ZERO_DIVIDE |
X87_CW_EXCEPTION_OVERFLOW |
X87_CW_EXCEPTION_UNDERFLOW |
X87_CW_EXCEPTION_PRECISION |
(1<<6) |
X87_CW_ROUND_NEAREST |
X87_CW_PRECISION_DOUBLE_EXT);
assert(machine->fpu_rnd_nearest == 0x37f);
machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP |
X87_CW_EXCEPTION_DENORM_OP |
X87_CW_EXCEPTION_ZERO_DIVIDE |
X87_CW_EXCEPTION_OVERFLOW |
X87_CW_EXCEPTION_UNDERFLOW |
X87_CW_EXCEPTION_PRECISION |
(1<<6) |
X87_CW_ROUND_DOWN |
X87_CW_PRECISION_DOUBLE_EXT);
for (i = 0; i < MAX_SHINE_TAB; i++)
do_populate_lut( &machine->shine_tab[i], 1.0f );
return machine;
}
#else
void draw_vs_aos_machine_viewport( struct aos_machine *machine,
const struct pipe_viewport_state *viewport )
{
}
void
draw_vs_aos_machine_constants(struct aos_machine *machine,
unsigned slot,
const void *constants)
{
}
void draw_vs_aos_machine_destroy( struct aos_machine *machine )
{
}
struct aos_machine *draw_vs_aos_machine( void )
{
return NULL;
}
#endif

View File

@ -185,12 +185,7 @@ draw_create_vs_ppc(struct draw_context *draw,
tgsi_scan_shader(templ->tokens, &vs->base.info);
vs->base.draw = draw;
#if 0
if (1)
vs->base.create_variant = draw_vs_variant_aos_ppc;
else
#endif
vs->base.create_variant = draw_vs_create_variant_generic;
vs->base.create_variant = draw_vs_create_variant_generic;
vs->base.prepare = vs_ppc_prepare;
vs->base.run_linear = vs_ppc_run_linear;
vs->base.delete = vs_ppc_delete;

View File

@ -1,225 +0,0 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
* Brian Paul
*/
#include "util/u_math.h"
#include "util/u_memory.h"
#include "pipe/p_config.h"
#include "draw_vs.h"
#if defined(PIPE_ARCH_X86)
#include "pipe/p_shader_tokens.h"
#include "draw_private.h"
#include "draw_context.h"
#include "rtasm/rtasm_cpu.h"
#include "rtasm/rtasm_x86sse.h"
#include "tgsi/tgsi_sse2.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_exec.h"
#define SSE_MAX_VERTICES 4
struct draw_sse_vertex_shader {
struct draw_vertex_shader base;
struct x86_function sse2_program;
tgsi_sse2_vs_func func;
struct tgsi_exec_machine *machine;
};
static void
vs_sse_prepare( struct draw_vertex_shader *base,
struct draw_context *draw )
{
struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
struct tgsi_exec_machine *machine = shader->machine;
machine->Samplers = draw->vs.samplers;
if (base->info.uses_instanceid) {
unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_INSTANCEID];
assert(i < Elements(machine->SystemValue));
machine->SystemValue[i][0] = base->draw->instance_id;
}
}
/* Simplified vertex shader interface for the pt paths. Given the
* complexity of code-generating all the above operations together,
* it's time to try doing all the other stuff separately.
*/
static void
vs_sse_run_linear( struct draw_vertex_shader *base,
const float (*input)[4],
float (*output)[4],
const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS],
unsigned count,
unsigned input_stride,
unsigned output_stride )
{
struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
struct tgsi_exec_machine *machine = shader->machine;
unsigned int i;
/* By default, execute all channels. XXX move this inside the loop
* below when we support shader conditionals/loops.
*/
tgsi_set_exec_mask(machine, 1, 1, 1, 1);
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
if (max_vertices < 4) {
/* disable the unused execution channels */
tgsi_set_exec_mask(machine,
1,
max_vertices > 1,
max_vertices > 2,
0);
}
/* run compiled shader
*/
shader->func(machine,
(const float (*)[4])constants[0],
shader->base.immediates,
input,
base->info.num_inputs,
input_stride,
output,
base->info.num_outputs,
output_stride );
input = (const float (*)[4])((const char *)input + input_stride * max_vertices);
output = (float (*)[4])((char *)output + output_stride * max_vertices);
}
}
static void
vs_sse_delete( struct draw_vertex_shader *base )
{
struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
x86_release_func( &shader->sse2_program );
align_free( (void *) shader->base.immediates );
FREE( (void*) shader->base.state.tokens );
FREE( shader );
}
struct draw_vertex_shader *
draw_create_vs_sse(struct draw_context *draw,
const struct pipe_shader_state *templ)
{
struct draw_sse_vertex_shader *vs;
if (!rtasm_cpu_has_sse2())
return NULL;
vs = CALLOC_STRUCT( draw_sse_vertex_shader );
if (vs == NULL)
return NULL;
/* we make a private copy of the tokens */
vs->base.state.tokens = tgsi_dup_tokens(templ->tokens);
if (!vs->base.state.tokens)
goto fail;
tgsi_scan_shader(templ->tokens, &vs->base.info);
vs->base.draw = draw;
if (1)
vs->base.create_variant = draw_vs_create_variant_aos_sse;
else
vs->base.create_variant = draw_vs_create_variant_generic;
vs->base.prepare = vs_sse_prepare;
vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
sizeof(float), 16);
vs->machine = draw->vs.machine;
x86_init_func( &vs->sse2_program );
if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
&vs->sse2_program,
(float (*)[4])vs->base.immediates,
TRUE ))
goto fail;
vs->func = (tgsi_sse2_vs_func) x86_get_func( &vs->sse2_program );
if (!vs->func) {
goto fail;
}
return &vs->base;
fail:
if (0)
debug_warning("tgsi_emit_sse2() failed, falling back to interpreter\n");
x86_release_func( &vs->sse2_program );
FREE(vs);
return NULL;
}
#else
struct draw_vertex_shader *
draw_create_vs_sse( struct draw_context *draw,
const struct pipe_shader_state *templ )
{
return (void *) 0;
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,80 +0,0 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#ifndef TGSI_SSE2_H
#define TGSI_SSE2_H
#if defined __cplusplus
extern "C" {
#endif
#include "pipe/p_compiler.h"
struct tgsi_exec_machine;
struct tgsi_interp_coef;
struct tgsi_token;
struct x86_function;
unsigned
tgsi_emit_sse2(
const struct tgsi_token *tokens,
struct x86_function *function,
float (*immediates)[4],
boolean do_swizzles );
/* This is the function prototype generated when do_swizzles is false
* -- effectively for fragment shaders.
*/
typedef void (PIPE_CDECL *tgsi_sse2_fs_function) (
struct tgsi_exec_machine *machine, /* 1 */
const float (*constant)[4], /* 2 */
const float (*immediate)[4], /* 3 */
const struct tgsi_interp_coef *coef /* 4 */
);
/* This is the function prototype generated when do_swizzles is true
* -- effectively for vertex shaders.
*/
typedef void (PIPE_CDECL *tgsi_sse2_vs_func) (
struct tgsi_exec_machine *machine, /* 1 */
const float (*constant)[4], /* 2 */
const float (*immediate)[4], /* 3 */
const float (*aos_input)[4], /* 4 */
uint num_inputs, /* 5 */
uint input_stride, /* 6 */
float (*aos_output)[4], /* 7 */
uint num_outputs, /* 8 */
uint output_stride ); /* 9 */
#if defined __cplusplus
}
#endif
#endif /* TGSI_SSE2_H */

View File

@ -26,7 +26,6 @@ LOCAL_PATH := $(call my-dir)
# from Makefile
C_SOURCES = \
sp_fs_exec.c \
sp_fs_sse.c \
sp_clear.c \
sp_fence.c \
sp_flush.c \

View File

@ -5,7 +5,6 @@ LIBNAME = softpipe
C_SOURCES = \
sp_fs_exec.c \
sp_fs_sse.c \
sp_clear.c \
sp_fence.c \
sp_flush.c \

View File

@ -6,7 +6,6 @@ softpipe = env.ConvenienceLibrary(
target = 'softpipe',
source = [
'sp_fs_exec.c',
'sp_fs_sse.c',
'sp_clear.c',
'sp_context.c',
'sp_draw_arrays.c',

View File

@ -235,12 +235,6 @@ softpipe_create_context( struct pipe_screen *screen,
util_init_math();
#ifdef PIPE_ARCH_X86
softpipe->use_sse = !debug_get_bool_option( "GALLIUM_NOSSE", FALSE );
#else
softpipe->use_sse = FALSE;
#endif
softpipe->dump_fs = debug_get_bool_option( "SOFTPIPE_DUMP_FS", FALSE );
softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE );

View File

@ -190,7 +190,6 @@ struct softpipe_context {
struct softpipe_tex_tile_cache *vertex_tex_cache[PIPE_MAX_VERTEX_SAMPLERS];
struct softpipe_tex_tile_cache *geometry_tex_cache[PIPE_MAX_GEOMETRY_SAMPLERS];
unsigned use_sse : 1;
unsigned dump_fs : 1;
unsigned dump_gs : 1;
unsigned no_rast : 1;

View File

@ -36,10 +36,6 @@ struct sp_fragment_shader_variant *
softpipe_create_fs_variant_exec(struct softpipe_context *softpipe,
const struct pipe_shader_state *templ);
struct sp_fragment_shader_variant *
softpipe_create_fs_variant_sse(struct softpipe_context *softpipe,
const struct pipe_shader_state *templ);
struct tgsi_interp_coef;
struct tgsi_exec_vector;

View File

@ -1,248 +0,0 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/**
* Execute fragment shader using runtime SSE code generation.
*/
#include "sp_context.h"
#include "sp_state.h"
#include "sp_fs.h"
#include "sp_quad.h"
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
#include "util/u_memory.h"
#include "tgsi/tgsi_exec.h"
#include "tgsi/tgsi_sse2.h"
#if defined(PIPE_ARCH_X86)
#include "rtasm/rtasm_x86sse.h"
/**
* Subclass of sp_fragment_shader_variant
*/
struct sp_sse_fragment_shader
{
struct sp_fragment_shader_variant base;
struct x86_function sse2_program;
tgsi_sse2_fs_function func;
float immediates[TGSI_EXEC_NUM_IMMEDIATES][4];
};
/** cast wrapper */
static INLINE struct sp_sse_fragment_shader *
sp_sse_fragment_shader(const struct sp_fragment_shader_variant *base)
{
return (struct sp_sse_fragment_shader *) base;
}
static void
fs_sse_prepare( const struct sp_fragment_shader_variant *base,
struct tgsi_exec_machine *machine,
struct tgsi_sampler **samplers )
{
machine->Samplers = samplers;
}
/**
* Compute quad X,Y,Z,W for the four fragments in a quad.
*
* This should really be part of the compiled shader.
*/
static void
setup_pos_vector(const struct tgsi_interp_coef *coef,
float x, float y,
struct tgsi_exec_vector *quadpos)
{
uint chan;
/* do X */
quadpos->xyzw[0].f[0] = x;
quadpos->xyzw[0].f[1] = x + 1;
quadpos->xyzw[0].f[2] = x;
quadpos->xyzw[0].f[3] = x + 1;
/* do Y */
quadpos->xyzw[1].f[0] = y;
quadpos->xyzw[1].f[1] = y;
quadpos->xyzw[1].f[2] = y + 1;
quadpos->xyzw[1].f[3] = y + 1;
/* do Z and W for all fragments in the quad */
for (chan = 2; chan < 4; chan++) {
const float dadx = coef->dadx[chan];
const float dady = coef->dady[chan];
const float a0 = coef->a0[chan] + dadx * x + dady * y;
quadpos->xyzw[chan].f[0] = a0;
quadpos->xyzw[chan].f[1] = a0 + dadx;
quadpos->xyzw[chan].f[2] = a0 + dady;
quadpos->xyzw[chan].f[3] = a0 + dadx + dady;
}
}
/* TODO: codegenerate the whole run function, skip this wrapper.
* TODO: break dependency on tgsi_exec_machine struct
* TODO: push Position calculation into the generated shader
* TODO: process >1 quad at a time
*/
static unsigned
fs_sse_run( const struct sp_fragment_shader_variant *base,
struct tgsi_exec_machine *machine,
struct quad_header *quad )
{
struct sp_sse_fragment_shader *shader = sp_sse_fragment_shader(base);
/* Compute X, Y, Z, W vals for this quad -- place in temp[0] for now */
setup_pos_vector(quad->posCoef,
(float)quad->input.x0, (float)quad->input.y0,
machine->Temps);
/* init kill mask */
tgsi_set_kill_mask(machine, 0x0);
tgsi_set_exec_mask(machine, 1, 1, 1, 1);
shader->func( machine,
(const float (*)[4])machine->Consts[0],
(const float (*)[4])shader->immediates,
machine->InterpCoefs
/*, &machine->QuadPos*/
);
quad->inout.mask &= ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]);
if (quad->inout.mask == 0)
return FALSE;
/* store outputs */
{
const ubyte *sem_name = base->info.output_semantic_name;
const ubyte *sem_index = base->info.output_semantic_index;
const uint n = base->info.num_outputs;
uint i;
for (i = 0; i < n; i++) {
switch (sem_name[i]) {
case TGSI_SEMANTIC_COLOR:
{
uint cbuf = sem_index[i];
assert(sizeof(quad->output.color[cbuf]) ==
sizeof(machine->Outputs[i]));
/* copy float[4][4] result */
memcpy(quad->output.color[cbuf],
&machine->Outputs[i],
sizeof(quad->output.color[0]) );
}
break;
case TGSI_SEMANTIC_POSITION:
{
uint j;
for (j = 0; j < 4; j++)
quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j];
}
break;
case TGSI_SEMANTIC_STENCIL:
{
uint j;
for (j = 0; j < 4; j++)
quad->output.stencil[j] = machine->Outputs[i].xyzw[1].f[j];
}
break;
}
}
}
return TRUE;
}
static void
fs_sse_delete( struct sp_fragment_shader_variant *base )
{
struct sp_sse_fragment_shader *shader = sp_sse_fragment_shader(base);
x86_release_func( &shader->sse2_program );
FREE(shader);
}
struct sp_fragment_shader_variant *
softpipe_create_fs_variant_sse(struct softpipe_context *softpipe,
const struct pipe_shader_state *templ)
{
struct sp_sse_fragment_shader *shader;
if (!softpipe->use_sse)
return NULL;
shader = CALLOC_STRUCT(sp_sse_fragment_shader);
if (!shader)
return NULL;
x86_init_func( &shader->sse2_program );
if (!tgsi_emit_sse2( templ->tokens, &shader->sse2_program,
shader->immediates, FALSE )) {
FREE(shader);
return NULL;
}
shader->func = (tgsi_sse2_fs_function) x86_get_func( &shader->sse2_program );
if (!shader->func) {
x86_release_func( &shader->sse2_program );
FREE(shader);
return NULL;
}
shader->base.prepare = fs_sse_prepare;
shader->base.run = fs_sse_run;
shader->base.delete = fs_sse_delete;
return &shader->base;
}
#else
/* Maybe put this variant in the header file.
*/
struct sp_fragment_shader_variant *
softpipe_create_fs_variant_sse(struct softpipe_context *softpipe,
const struct pipe_shader_state *templ)
{
return NULL;
}
#endif

View File

@ -65,10 +65,7 @@ create_fs_variant(struct softpipe_context *softpipe,
#endif
/* codegen, create variant object */
var = softpipe_create_fs_variant_sse(softpipe, curfs);
if (!var) {
var = softpipe_create_fs_variant_exec(softpipe, curfs);
}
var = softpipe_create_fs_variant_exec(softpipe, curfs);
if (var) {
var->key = *key;