Remove tgsi_sse2.
tgsi_exec is simple. llvm is fast. tgsi_sse2 ends up being neither.
This commit is contained in:
parent
207a016eca
commit
4eb3225b38
|
@ -33,12 +33,8 @@ C_SOURCES := \
|
|||
draw/draw_pt_vsplit.c \
|
||||
draw/draw_vertex.c \
|
||||
draw/draw_vs.c \
|
||||
draw/draw_vs_aos.c \
|
||||
draw/draw_vs_aos_io.c \
|
||||
draw/draw_vs_aos_machine.c \
|
||||
draw/draw_vs_exec.c \
|
||||
draw/draw_vs_ppc.c \
|
||||
draw/draw_vs_sse.c \
|
||||
draw/draw_vs_variant.c \
|
||||
os/os_misc.c \
|
||||
os/os_stream.c \
|
||||
|
@ -83,7 +79,6 @@ C_SOURCES := \
|
|||
tgsi/tgsi_ppc.c \
|
||||
tgsi/tgsi_sanity.c \
|
||||
tgsi/tgsi_scan.c \
|
||||
tgsi/tgsi_sse2.c \
|
||||
tgsi/tgsi_text.c \
|
||||
tgsi/tgsi_transform.c \
|
||||
tgsi/tgsi_ureg.c \
|
||||
|
|
|
@ -237,10 +237,6 @@ struct draw_context
|
|||
uint num_samplers;
|
||||
struct tgsi_sampler **samplers;
|
||||
|
||||
/* Here's another one:
|
||||
*/
|
||||
struct aos_machine *aos_machine;
|
||||
|
||||
|
||||
const void *aligned_constants[PIPE_MAX_CONSTANT_BUFFERS];
|
||||
|
||||
|
|
|
@ -81,14 +81,12 @@ draw_vs_set_constants(struct draw_context *draw,
|
|||
}
|
||||
|
||||
draw->vs.aligned_constants[slot] = constants;
|
||||
draw_vs_aos_machine_constants(draw->vs.aos_machine, slot, constants);
|
||||
}
|
||||
|
||||
|
||||
void draw_vs_set_viewport( struct draw_context *draw,
|
||||
const struct pipe_viewport_state *viewport )
|
||||
{
|
||||
draw_vs_aos_machine_viewport( draw->vs.aos_machine, viewport );
|
||||
}
|
||||
|
||||
|
||||
|
@ -103,22 +101,8 @@ draw_create_vertex_shader(struct draw_context *draw,
|
|||
tgsi_dump(shader->tokens, 0);
|
||||
}
|
||||
|
||||
if (!draw->pt.middle.llvm) {
|
||||
#if 0
|
||||
/* these paths don't support vertex clamping
|
||||
* TODO: either add it, or remove them completely
|
||||
* use LLVM instead if you want performance
|
||||
* use exec instead if you want debugging/more correctness
|
||||
*/
|
||||
#if defined(PIPE_ARCH_X86)
|
||||
vs = draw_create_vs_sse( draw, shader );
|
||||
#elif defined(PIPE_ARCH_PPC)
|
||||
vs = draw_create_vs_ppc( draw, shader );
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
#if HAVE_LLVM
|
||||
else {
|
||||
if (draw->pt.middle.llvm) {
|
||||
vs = draw_create_vs_llvm(draw, shader);
|
||||
}
|
||||
#endif
|
||||
|
@ -199,12 +183,6 @@ draw_vs_init( struct draw_context *draw )
|
|||
if (!draw->vs.fetch_cache)
|
||||
return FALSE;
|
||||
|
||||
draw->vs.aos_machine = draw_vs_aos_machine();
|
||||
#ifdef PIPE_ARCH_X86
|
||||
if (!draw->vs.aos_machine)
|
||||
return FALSE;
|
||||
#endif
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
@ -219,9 +197,6 @@ draw_vs_destroy( struct draw_context *draw )
|
|||
if (draw->vs.emit_cache)
|
||||
translate_cache_destroy(draw->vs.emit_cache);
|
||||
|
||||
if (draw->vs.aos_machine)
|
||||
draw_vs_aos_machine_destroy(draw->vs.aos_machine);
|
||||
|
||||
for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
|
||||
if (draw->vs.aligned_constant_storage[i]) {
|
||||
align_free((void *)draw->vs.aligned_constant_storage[i]);
|
||||
|
|
|
@ -158,10 +158,6 @@ struct draw_vertex_shader *
|
|||
draw_create_vs_exec(struct draw_context *draw,
|
||||
const struct pipe_shader_state *templ);
|
||||
|
||||
struct draw_vertex_shader *
|
||||
draw_create_vs_sse(struct draw_context *draw,
|
||||
const struct pipe_shader_state *templ);
|
||||
|
||||
struct draw_vertex_shader *
|
||||
draw_create_vs_ppc(struct draw_context *draw,
|
||||
const struct pipe_shader_state *templ);
|
||||
|
@ -170,10 +166,6 @@ draw_create_vs_ppc(struct draw_context *draw,
|
|||
struct draw_vs_variant_key;
|
||||
struct draw_vertex_shader;
|
||||
|
||||
struct draw_vs_variant *
|
||||
draw_vs_create_variant_aos_sse( struct draw_vertex_shader *vs,
|
||||
const struct draw_vs_variant_key *key );
|
||||
|
||||
#if HAVE_LLVM
|
||||
struct draw_vertex_shader *
|
||||
draw_create_vs_llvm(struct draw_context *draw,
|
||||
|
@ -214,18 +206,6 @@ static INLINE int draw_vs_variant_key_compare( const struct draw_vs_variant_key
|
|||
}
|
||||
|
||||
|
||||
struct aos_machine *draw_vs_aos_machine( void );
|
||||
void draw_vs_aos_machine_destroy( struct aos_machine *machine );
|
||||
|
||||
void
|
||||
draw_vs_aos_machine_constants(struct aos_machine *machine,
|
||||
unsigned slot,
|
||||
const void *constants);
|
||||
|
||||
void draw_vs_aos_machine_viewport( struct aos_machine *machine,
|
||||
const struct pipe_viewport_state *viewport );
|
||||
|
||||
|
||||
#define MAX_TGSI_VERTICES 4
|
||||
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,255 +0,0 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
/* Authors: Keith Whitwell <keith@tungstengraphics.com>
|
||||
*/
|
||||
|
||||
#ifndef DRAW_VS_AOS_H
|
||||
#define DRAW_VS_AOS_H
|
||||
|
||||
#include "pipe/p_config.h"
|
||||
#include "tgsi/tgsi_exec.h"
|
||||
#include "draw_vs.h"
|
||||
|
||||
#ifdef PIPE_ARCH_X86
|
||||
|
||||
struct tgsi_token;
|
||||
struct x86_function;
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
#include "rtasm/rtasm_x86sse.h"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define X 0
|
||||
#define Y 1
|
||||
#define Z 2
|
||||
#define W 3
|
||||
|
||||
#define MAX_INPUTS PIPE_MAX_ATTRIBS
|
||||
#define MAX_OUTPUTS PIPE_MAX_SHADER_OUTPUTS
|
||||
#define MAX_TEMPS TGSI_EXEC_NUM_TEMPS
|
||||
#define MAX_CONSTANTS 1024 /** only used for sanity checking */
|
||||
#define MAX_IMMEDIATES 1024 /** only used for sanity checking */
|
||||
#define MAX_INTERNALS 8 /** see IMM_x values below */
|
||||
|
||||
#define AOS_FILE_INTERNAL TGSI_FILE_COUNT
|
||||
|
||||
#define FPU_RND_NEG 1
|
||||
#define FPU_RND_NEAREST 2
|
||||
|
||||
struct aos_machine;
|
||||
typedef void (PIPE_CDECL *lit_func)( struct aos_machine *,
|
||||
float *result,
|
||||
const float *in,
|
||||
unsigned count );
|
||||
|
||||
void PIPE_CDECL aos_do_lit( struct aos_machine *machine,
|
||||
float *result,
|
||||
const float *in,
|
||||
unsigned count );
|
||||
|
||||
struct shine_tab {
|
||||
float exponent;
|
||||
float values[258];
|
||||
unsigned last_used;
|
||||
};
|
||||
|
||||
struct lit_info {
|
||||
lit_func func;
|
||||
struct shine_tab *shine_tab;
|
||||
};
|
||||
|
||||
#define MAX_SHINE_TAB 4
|
||||
#define MAX_LIT_INFO 16
|
||||
|
||||
struct aos_buffer {
|
||||
const void *base_ptr;
|
||||
unsigned stride;
|
||||
void *ptr; /* updated per vertex */
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/* This is the temporary storage used by all the aos_sse vs variants.
|
||||
* Create one per context and reuse by passing a pointer in at
|
||||
* vs_variant creation??
|
||||
*/
|
||||
struct aos_machine {
|
||||
float input [MAX_INPUTS ][4];
|
||||
float output [MAX_OUTPUTS ][4];
|
||||
float temp [MAX_TEMPS ][4];
|
||||
float internal [MAX_INTERNALS ][4];
|
||||
|
||||
float scale[4]; /* viewport */
|
||||
float translate[4]; /* viewport */
|
||||
|
||||
float tmp[2][4]; /* scratch space for LIT */
|
||||
|
||||
struct shine_tab shine_tab[MAX_SHINE_TAB];
|
||||
struct lit_info lit_info[MAX_LIT_INFO];
|
||||
unsigned now;
|
||||
|
||||
|
||||
ushort fpu_rnd_nearest;
|
||||
ushort fpu_rnd_neg_inf;
|
||||
ushort fpu_restore;
|
||||
ushort fpucntl; /* one of FPU_* above */
|
||||
|
||||
const float (*immediates)[4]; /* points to shader data */
|
||||
const void *constants[PIPE_MAX_CONSTANT_BUFFERS]; /* points to draw data */
|
||||
|
||||
const struct aos_buffer *buffer; /* points to ? */
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
struct aos_compilation {
|
||||
struct x86_function *func;
|
||||
struct draw_vs_variant_aos_sse *vaos;
|
||||
|
||||
unsigned insn_counter;
|
||||
unsigned num_immediates;
|
||||
unsigned count;
|
||||
unsigned lit_count;
|
||||
|
||||
struct {
|
||||
unsigned idx:16;
|
||||
unsigned file:8;
|
||||
unsigned dirty:8;
|
||||
unsigned last_used;
|
||||
} xmm[8];
|
||||
|
||||
unsigned x86_reg[2]; /* one of X86_* */
|
||||
|
||||
boolean input_fetched[PIPE_MAX_ATTRIBS];
|
||||
unsigned output_last_write[PIPE_MAX_ATTRIBS];
|
||||
|
||||
boolean have_sse2;
|
||||
boolean error;
|
||||
short fpucntl;
|
||||
|
||||
/* these are actually known values, but putting them in a struct
|
||||
* like this is helpful to keep them in sync across the file.
|
||||
*/
|
||||
struct x86_reg tmp_EAX;
|
||||
struct x86_reg idx_EBX; /* either start+i or &elt[i] */
|
||||
struct x86_reg outbuf_ECX;
|
||||
struct x86_reg machine_EDX;
|
||||
struct x86_reg count_ESI; /* decrements to zero */
|
||||
struct x86_reg temp_EBP;
|
||||
struct x86_reg stack_ESP;
|
||||
};
|
||||
|
||||
struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp );
|
||||
void aos_release_xmm_reg( struct aos_compilation *cp, unsigned idx );
|
||||
|
||||
void aos_adopt_xmm_reg( struct aos_compilation *cp,
|
||||
struct x86_reg reg,
|
||||
unsigned file,
|
||||
unsigned idx,
|
||||
unsigned dirty );
|
||||
|
||||
void aos_spill_all( struct aos_compilation *cp );
|
||||
|
||||
struct x86_reg aos_get_shader_reg( struct aos_compilation *cp,
|
||||
unsigned file,
|
||||
unsigned idx );
|
||||
|
||||
boolean aos_init_inputs( struct aos_compilation *cp, boolean linear );
|
||||
boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear );
|
||||
boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear );
|
||||
|
||||
boolean aos_emit_outputs( struct aos_compilation *cp );
|
||||
|
||||
|
||||
#define IMM_ONES 0 /* 1, 1,1,1 */
|
||||
#define IMM_SWZ 1 /* 1,-1,0, 0xffffffff */
|
||||
#define IMM_IDENTITY 2 /* 0, 0,0,1 */
|
||||
#define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */
|
||||
#define IMM_255 4 /* 255, 255, 255, 255 */
|
||||
#define IMM_NEGS 5 /* -1,-1,-1,-1 */
|
||||
#define IMM_RSQ 6 /* -.5,1.5,_,_ */
|
||||
#define IMM_PSIZE 7 /* not really an immediate - updated each run */
|
||||
|
||||
struct x86_reg aos_get_internal( struct aos_compilation *cp,
|
||||
unsigned imm );
|
||||
struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp,
|
||||
unsigned imm );
|
||||
|
||||
|
||||
#define AOS_ERROR(cp, msg) \
|
||||
do { \
|
||||
if (0) debug_printf("%s: x86 translation failed: %s\n", __FUNCTION__, msg); \
|
||||
cp->error = 1; \
|
||||
} while (0)
|
||||
|
||||
|
||||
#define X86_NULL 0
|
||||
#define X86_IMMEDIATES 1
|
||||
#define X86_CONSTANTS 2
|
||||
#define X86_BUFFERS 3
|
||||
|
||||
struct x86_reg aos_get_x86( struct aos_compilation *cp,
|
||||
unsigned which_reg,
|
||||
unsigned value );
|
||||
|
||||
|
||||
typedef void (PIPE_CDECL *vaos_run_elts_func)( struct aos_machine *,
|
||||
const unsigned *elts,
|
||||
unsigned count,
|
||||
void *output_buffer);
|
||||
|
||||
typedef void (PIPE_CDECL *vaos_run_linear_func)( struct aos_machine *,
|
||||
unsigned start,
|
||||
unsigned count,
|
||||
void *output_buffer);
|
||||
|
||||
|
||||
struct draw_vs_variant_aos_sse {
|
||||
struct draw_vs_variant base;
|
||||
struct draw_context *draw;
|
||||
|
||||
struct aos_buffer *buffer;
|
||||
unsigned nr_vb;
|
||||
|
||||
vaos_run_linear_func gen_run_linear;
|
||||
vaos_run_elts_func gen_run_elts;
|
||||
|
||||
|
||||
struct x86_function func[2];
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -1,460 +0,0 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
#include "util/u_memory.h"
|
||||
#include "pipe/p_shader_tokens.h"
|
||||
#include "tgsi/tgsi_parse.h"
|
||||
#include "tgsi/tgsi_util.h"
|
||||
#include "tgsi/tgsi_exec.h"
|
||||
#include "draw_vs.h"
|
||||
#include "draw_vs_aos.h"
|
||||
#include "draw_vertex.h"
|
||||
|
||||
#include "rtasm/rtasm_x86sse.h"
|
||||
|
||||
#ifdef PIPE_ARCH_X86
|
||||
|
||||
/* Note - don't yet have to worry about interacting with the code in
|
||||
* draw_vs_aos.c as there is no intermingling of generated code...
|
||||
* That may have to change, we'll see.
|
||||
*/
|
||||
static void emit_load_R32G32B32A32( struct aos_compilation *cp,
|
||||
struct x86_reg data,
|
||||
struct x86_reg src_ptr )
|
||||
{
|
||||
sse_movups(cp->func, data, src_ptr);
|
||||
}
|
||||
|
||||
static void emit_load_R32G32B32( struct aos_compilation *cp,
|
||||
struct x86_reg data,
|
||||
struct x86_reg src_ptr )
|
||||
{
|
||||
#if 1
|
||||
sse_movss(cp->func, data, x86_make_disp(src_ptr, 8));
|
||||
/* data = z ? ? ? */
|
||||
sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) );
|
||||
/* data = z ? 0 1 */
|
||||
sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) );
|
||||
/* data = ? 0 z 1 */
|
||||
sse_movlps(cp->func, data, src_ptr);
|
||||
/* data = x y z 1 */
|
||||
#else
|
||||
sse_movups(cp->func, data, src_ptr);
|
||||
/* data = x y z ? */
|
||||
sse2_pshufd(cp->func, data, data, SHUF(W,X,Y,Z) );
|
||||
/* data = ? x y z */
|
||||
sse_movss(cp->func, data, aos_get_internal_xmm( cp, IMM_ONES ) );
|
||||
/* data = 1 x y z */
|
||||
sse2_pshufd(cp->func, data, data, SHUF(Y,Z,W,X) );
|
||||
/* data = x y z 1 */
|
||||
#endif
|
||||
}
|
||||
|
||||
static void emit_load_R32G32( struct aos_compilation *cp,
|
||||
struct x86_reg data,
|
||||
struct x86_reg src_ptr )
|
||||
{
|
||||
sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
|
||||
sse_movlps(cp->func, data, src_ptr);
|
||||
}
|
||||
|
||||
|
||||
static void emit_load_R32( struct aos_compilation *cp,
|
||||
struct x86_reg data,
|
||||
struct x86_reg src_ptr )
|
||||
{
|
||||
sse_movss(cp->func, data, src_ptr);
|
||||
sse_orps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
|
||||
}
|
||||
|
||||
|
||||
static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
|
||||
struct x86_reg data,
|
||||
struct x86_reg src_ptr )
|
||||
{
|
||||
sse_movss(cp->func, data, src_ptr);
|
||||
sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
|
||||
sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
|
||||
sse2_cvtdq2ps(cp->func, data, data);
|
||||
sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Extended swizzles? Maybe later.
|
||||
*/
|
||||
static void emit_swizzle( struct aos_compilation *cp,
|
||||
struct x86_reg dest,
|
||||
struct x86_reg src,
|
||||
ubyte shuffle )
|
||||
{
|
||||
sse_shufps(cp->func, dest, src, shuffle);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static boolean get_buffer_ptr( struct aos_compilation *cp,
|
||||
boolean linear,
|
||||
unsigned buf_idx,
|
||||
struct x86_reg elt,
|
||||
struct x86_reg ptr)
|
||||
{
|
||||
struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
|
||||
buf_idx * sizeof(struct aos_buffer));
|
||||
|
||||
struct x86_reg buf_stride = x86_make_disp(buf,
|
||||
Offset(struct aos_buffer, stride));
|
||||
if (linear) {
|
||||
struct x86_reg buf_ptr = x86_make_disp(buf,
|
||||
Offset(struct aos_buffer, ptr));
|
||||
|
||||
|
||||
/* Calculate pointer to current attrib:
|
||||
*/
|
||||
x86_mov(cp->func, ptr, buf_ptr);
|
||||
x86_mov(cp->func, elt, buf_stride);
|
||||
x86_add(cp->func, elt, ptr);
|
||||
if (buf_idx == 0) sse_prefetchnta(cp->func, x86_make_disp(elt, 192));
|
||||
x86_mov(cp->func, buf_ptr, elt);
|
||||
}
|
||||
else {
|
||||
struct x86_reg buf_base_ptr = x86_make_disp(buf,
|
||||
Offset(struct aos_buffer, base_ptr));
|
||||
|
||||
|
||||
/* Calculate pointer to current attrib:
|
||||
*/
|
||||
x86_mov(cp->func, ptr, buf_stride);
|
||||
x86_imul(cp->func, ptr, elt);
|
||||
x86_add(cp->func, ptr, buf_base_ptr);
|
||||
}
|
||||
|
||||
cp->insn_counter++;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
static boolean load_input( struct aos_compilation *cp,
|
||||
unsigned idx,
|
||||
struct x86_reg bufptr )
|
||||
{
|
||||
unsigned format = cp->vaos->base.key.element[idx].in.format;
|
||||
unsigned offset = cp->vaos->base.key.element[idx].in.offset;
|
||||
struct x86_reg dataXMM = aos_get_xmm_reg(cp);
|
||||
|
||||
/* Figure out source pointer address:
|
||||
*/
|
||||
struct x86_reg src = x86_make_disp(bufptr, offset);
|
||||
|
||||
aos_adopt_xmm_reg( cp,
|
||||
dataXMM,
|
||||
TGSI_FILE_INPUT,
|
||||
idx,
|
||||
TRUE );
|
||||
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_R32_FLOAT:
|
||||
emit_load_R32(cp, dataXMM, src);
|
||||
break;
|
||||
case PIPE_FORMAT_R32G32_FLOAT:
|
||||
emit_load_R32G32(cp, dataXMM, src);
|
||||
break;
|
||||
case PIPE_FORMAT_R32G32B32_FLOAT:
|
||||
emit_load_R32G32B32(cp, dataXMM, src);
|
||||
break;
|
||||
case PIPE_FORMAT_R32G32B32A32_FLOAT:
|
||||
emit_load_R32G32B32A32(cp, dataXMM, src);
|
||||
break;
|
||||
case PIPE_FORMAT_A8R8G8B8_UNORM:
|
||||
emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
|
||||
emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
|
||||
break;
|
||||
case PIPE_FORMAT_R8G8B8A8_UNORM:
|
||||
emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
|
||||
break;
|
||||
default:
|
||||
AOS_ERROR(cp, "unhandled input format");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static boolean load_inputs( struct aos_compilation *cp,
|
||||
unsigned buffer,
|
||||
struct x86_reg ptr )
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) {
|
||||
if (cp->vaos->base.key.element[i].in.buffer == buffer) {
|
||||
|
||||
if (!load_input( cp, i, ptr ))
|
||||
return FALSE;
|
||||
|
||||
cp->insn_counter++;
|
||||
}
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
boolean aos_init_inputs( struct aos_compilation *cp, boolean linear )
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < cp->vaos->nr_vb; i++) {
|
||||
struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
|
||||
i * sizeof(struct aos_buffer));
|
||||
|
||||
struct x86_reg buf_base_ptr = x86_make_disp(buf,
|
||||
Offset(struct aos_buffer, base_ptr));
|
||||
|
||||
if (cp->vaos->base.key.const_vbuffers & (1<<i)) {
|
||||
struct x86_reg ptr = cp->tmp_EAX;
|
||||
|
||||
x86_mov(cp->func, ptr, buf_base_ptr);
|
||||
|
||||
/* Load all inputs for this constant vertex buffer
|
||||
*/
|
||||
load_inputs( cp, i, x86_deref(ptr) );
|
||||
|
||||
/* Then just force them out to aos_machine.input[]
|
||||
*/
|
||||
aos_spill_all( cp );
|
||||
|
||||
}
|
||||
else if (linear) {
|
||||
|
||||
struct x86_reg elt = cp->idx_EBX;
|
||||
struct x86_reg ptr = cp->tmp_EAX;
|
||||
|
||||
struct x86_reg buf_stride = x86_make_disp(buf,
|
||||
Offset(struct aos_buffer, stride));
|
||||
|
||||
struct x86_reg buf_ptr = x86_make_disp(buf,
|
||||
Offset(struct aos_buffer, ptr));
|
||||
|
||||
|
||||
/* Calculate pointer to current attrib:
|
||||
*/
|
||||
x86_mov(cp->func, ptr, buf_stride);
|
||||
x86_imul(cp->func, ptr, elt);
|
||||
x86_add(cp->func, ptr, buf_base_ptr);
|
||||
|
||||
|
||||
/* In the linear case, keep the buffer pointer instead of the
|
||||
* index number.
|
||||
*/
|
||||
if (cp->vaos->nr_vb == 1)
|
||||
x86_mov( cp->func, elt, ptr );
|
||||
else
|
||||
x86_mov( cp->func, buf_ptr, ptr );
|
||||
|
||||
cp->insn_counter++;
|
||||
}
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
|
||||
{
|
||||
unsigned j;
|
||||
|
||||
for (j = 0; j < cp->vaos->nr_vb; j++) {
|
||||
if (cp->vaos->base.key.const_vbuffers & (1<<j)) {
|
||||
/* just retreive pre-transformed input */
|
||||
}
|
||||
else if (linear && cp->vaos->nr_vb == 1) {
|
||||
load_inputs( cp, 0, cp->idx_EBX );
|
||||
}
|
||||
else {
|
||||
struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX);
|
||||
struct x86_reg ptr = cp->tmp_EAX;
|
||||
|
||||
if (!get_buffer_ptr( cp, linear, j, elt, ptr ))
|
||||
return FALSE;
|
||||
|
||||
if (!load_inputs( cp, j, ptr ))
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear )
|
||||
{
|
||||
if (linear && cp->vaos->nr_vb == 1) {
|
||||
struct x86_reg stride = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
|
||||
(0 * sizeof(struct aos_buffer) +
|
||||
Offset(struct aos_buffer, stride)));
|
||||
|
||||
x86_add(cp->func, cp->idx_EBX, stride);
|
||||
sse_prefetchnta(cp->func, x86_make_disp(cp->idx_EBX, 192));
|
||||
}
|
||||
else if (linear) {
|
||||
/* Nothing to do */
|
||||
}
|
||||
else {
|
||||
x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4));
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static void emit_store_R32G32B32A32( struct aos_compilation *cp,
|
||||
struct x86_reg dst_ptr,
|
||||
struct x86_reg dataXMM )
|
||||
{
|
||||
sse_movups(cp->func, dst_ptr, dataXMM);
|
||||
}
|
||||
|
||||
static void emit_store_R32G32B32( struct aos_compilation *cp,
|
||||
struct x86_reg dst_ptr,
|
||||
struct x86_reg dataXMM )
|
||||
{
|
||||
sse_movlps(cp->func, dst_ptr, dataXMM);
|
||||
sse_shufps(cp->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
|
||||
sse_movss(cp->func, x86_make_disp(dst_ptr,8), dataXMM);
|
||||
}
|
||||
|
||||
static void emit_store_R32G32( struct aos_compilation *cp,
|
||||
struct x86_reg dst_ptr,
|
||||
struct x86_reg dataXMM )
|
||||
{
|
||||
sse_movlps(cp->func, dst_ptr, dataXMM);
|
||||
}
|
||||
|
||||
static void emit_store_R32( struct aos_compilation *cp,
|
||||
struct x86_reg dst_ptr,
|
||||
struct x86_reg dataXMM )
|
||||
{
|
||||
sse_movss(cp->func, dst_ptr, dataXMM);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp,
|
||||
struct x86_reg dst_ptr,
|
||||
struct x86_reg dataXMM )
|
||||
{
|
||||
sse_mulps(cp->func, dataXMM, aos_get_internal(cp, IMM_255));
|
||||
sse2_cvtps2dq(cp->func, dataXMM, dataXMM);
|
||||
sse2_packssdw(cp->func, dataXMM, dataXMM);
|
||||
sse2_packuswb(cp->func, dataXMM, dataXMM);
|
||||
sse_movss(cp->func, dst_ptr, dataXMM);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static boolean emit_output( struct aos_compilation *cp,
|
||||
struct x86_reg ptr,
|
||||
struct x86_reg dataXMM,
|
||||
enum attrib_emit format )
|
||||
{
|
||||
switch (format) {
|
||||
case EMIT_1F:
|
||||
case EMIT_1F_PSIZE:
|
||||
emit_store_R32(cp, ptr, dataXMM);
|
||||
break;
|
||||
case EMIT_2F:
|
||||
emit_store_R32G32(cp, ptr, dataXMM);
|
||||
break;
|
||||
case EMIT_3F:
|
||||
emit_store_R32G32B32(cp, ptr, dataXMM);
|
||||
break;
|
||||
case EMIT_4F:
|
||||
emit_store_R32G32B32A32(cp, ptr, dataXMM);
|
||||
break;
|
||||
case EMIT_4UB:
|
||||
emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
|
||||
break;
|
||||
case EMIT_4UB_BGRA:
|
||||
emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
|
||||
emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
|
||||
break;
|
||||
default:
|
||||
AOS_ERROR(cp, "unhandled output format");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
boolean aos_emit_outputs( struct aos_compilation *cp )
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < cp->vaos->base.key.nr_outputs; i++) {
|
||||
enum attrib_emit format = cp->vaos->base.key.element[i].out.format;
|
||||
unsigned offset = cp->vaos->base.key.element[i].out.offset;
|
||||
unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output;
|
||||
|
||||
struct x86_reg data;
|
||||
|
||||
if (format == EMIT_1F_PSIZE) {
|
||||
data = aos_get_internal_xmm( cp, IMM_PSIZE );
|
||||
}
|
||||
else {
|
||||
data = aos_get_shader_reg( cp,
|
||||
TGSI_FILE_OUTPUT,
|
||||
vs_output );
|
||||
}
|
||||
|
||||
if (data.file != file_XMM) {
|
||||
struct x86_reg tmp = aos_get_xmm_reg( cp );
|
||||
sse_movaps(cp->func, tmp, data);
|
||||
data = tmp;
|
||||
}
|
||||
|
||||
if (!emit_output( cp,
|
||||
x86_make_disp( cp->outbuf_ECX, offset ),
|
||||
data,
|
||||
format ))
|
||||
return FALSE;
|
||||
|
||||
aos_release_xmm_reg( cp, data.idx );
|
||||
|
||||
cp->insn_counter++;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,328 +0,0 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
#include "pipe/p_config.h"
|
||||
|
||||
|
||||
#include "pipe/p_shader_tokens.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "tgsi/tgsi_parse.h"
|
||||
#include "tgsi/tgsi_util.h"
|
||||
#include "tgsi/tgsi_exec.h"
|
||||
#include "draw_vs.h"
|
||||
#include "draw_vs_aos.h"
|
||||
#include "draw_vertex.h"
|
||||
|
||||
#ifdef PIPE_ARCH_X86
|
||||
|
||||
#include "rtasm/rtasm_x86sse.h"
|
||||
|
||||
|
||||
#define X87_CW_EXCEPTION_INV_OP (1<<0)
|
||||
#define X87_CW_EXCEPTION_DENORM_OP (1<<1)
|
||||
#define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2)
|
||||
#define X87_CW_EXCEPTION_OVERFLOW (1<<3)
|
||||
#define X87_CW_EXCEPTION_UNDERFLOW (1<<4)
|
||||
#define X87_CW_EXCEPTION_PRECISION (1<<5)
|
||||
#define X87_CW_PRECISION_SINGLE (0<<8)
|
||||
#define X87_CW_PRECISION_RESERVED (1<<8)
|
||||
#define X87_CW_PRECISION_DOUBLE (2<<8)
|
||||
#define X87_CW_PRECISION_DOUBLE_EXT (3<<8)
|
||||
#define X87_CW_PRECISION_MASK (3<<8)
|
||||
#define X87_CW_ROUND_NEAREST (0<<10)
|
||||
#define X87_CW_ROUND_DOWN (1<<10)
|
||||
#define X87_CW_ROUND_UP (2<<10)
|
||||
#define X87_CW_ROUND_ZERO (3<<10)
|
||||
#define X87_CW_ROUND_MASK (3<<10)
|
||||
#define X87_CW_INFINITY (1<<12)
|
||||
|
||||
|
||||
void PIPE_CDECL aos_do_lit( struct aos_machine *machine,
|
||||
float *result,
|
||||
const float *in,
|
||||
unsigned count )
|
||||
{
|
||||
if (in[0] > 0)
|
||||
{
|
||||
if (in[1] <= 0.0)
|
||||
{
|
||||
result[0] = 1.0F;
|
||||
result[1] = in[0];
|
||||
result[2] = 0.0F;
|
||||
result[3] = 1.0F;
|
||||
}
|
||||
else
|
||||
{
|
||||
const float epsilon = 1.0F / 256.0F;
|
||||
float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
|
||||
result[0] = 1.0F;
|
||||
result[1] = in[0];
|
||||
result[2] = powf(in[1], exponent);
|
||||
result[3] = 1.0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
result[0] = 1.0F;
|
||||
result[1] = 0.0;
|
||||
result[2] = 0.0;
|
||||
result[3] = 1.0F;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void PIPE_CDECL do_lit_lut( struct aos_machine *machine,
|
||||
float *result,
|
||||
const float *in,
|
||||
unsigned count )
|
||||
{
|
||||
if (in[0] > 0)
|
||||
{
|
||||
if (in[1] <= 0.0)
|
||||
{
|
||||
result[0] = 1.0F;
|
||||
result[1] = in[0];
|
||||
result[2] = 0.0F;
|
||||
result[3] = 1.0F;
|
||||
return;
|
||||
}
|
||||
|
||||
if (machine->lit_info[count].shine_tab->exponent != in[3]) {
|
||||
machine->lit_info[count].func = aos_do_lit;
|
||||
goto no_luck;
|
||||
}
|
||||
|
||||
if (in[1] <= 1.0)
|
||||
{
|
||||
const float *tab = machine->lit_info[count].shine_tab->values;
|
||||
float f = in[1] * 256;
|
||||
int k = (int)f;
|
||||
float frac = f - (float)k;
|
||||
|
||||
result[0] = 1.0F;
|
||||
result[1] = in[0];
|
||||
result[2] = tab[k] + frac*(tab[k+1]-tab[k]);
|
||||
result[3] = 1.0;
|
||||
return;
|
||||
}
|
||||
|
||||
no_luck:
|
||||
{
|
||||
const float epsilon = 1.0F / 256.0F;
|
||||
float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
|
||||
result[0] = 1.0F;
|
||||
result[1] = in[0];
|
||||
result[2] = powf(in[1], exponent);
|
||||
result[3] = 1.0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
result[0] = 1.0F;
|
||||
result[1] = 0.0;
|
||||
result[2] = 0.0;
|
||||
result[3] = 1.0F;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void do_populate_lut( struct shine_tab *tab,
|
||||
float unclamped_exponent )
|
||||
{
|
||||
const float epsilon = 1.0F / 256.0F;
|
||||
float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon));
|
||||
unsigned i;
|
||||
|
||||
tab->exponent = unclamped_exponent; /* for later comparison */
|
||||
|
||||
tab->values[0] = 0;
|
||||
if (exponent == 0) {
|
||||
for (i = 1; i < 258; i++) {
|
||||
tab->values[i] = 1.0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (i = 1; i < 258; i++) {
|
||||
tab->values[i] = powf((float)i * epsilon, exponent);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static void PIPE_CDECL populate_lut( struct aos_machine *machine,
|
||||
float *result,
|
||||
const float *in,
|
||||
unsigned count )
|
||||
{
|
||||
unsigned i, tab;
|
||||
|
||||
/* Search for an existing table for this value. Note that without
|
||||
* static analysis we don't really know if in[3] will be constant,
|
||||
* but it usually is...
|
||||
*/
|
||||
for (tab = 0; tab < 4; tab++) {
|
||||
if (machine->shine_tab[tab].exponent == in[3]) {
|
||||
goto found;
|
||||
}
|
||||
}
|
||||
|
||||
for (tab = 0, i = 1; i < 4; i++) {
|
||||
if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used)
|
||||
tab = i;
|
||||
}
|
||||
|
||||
if (machine->shine_tab[tab].last_used == machine->now) {
|
||||
/* No unused tables (this is not a ffvertex program...). Just
|
||||
* call pow each time:
|
||||
*/
|
||||
machine->lit_info[count].func = aos_do_lit;
|
||||
machine->lit_info[count].func( machine, result, in, count );
|
||||
return;
|
||||
}
|
||||
else {
|
||||
do_populate_lut( &machine->shine_tab[tab], in[3] );
|
||||
}
|
||||
|
||||
found:
|
||||
machine->shine_tab[tab].last_used = machine->now;
|
||||
machine->lit_info[count].shine_tab = &machine->shine_tab[tab];
|
||||
machine->lit_info[count].func = do_lit_lut;
|
||||
machine->lit_info[count].func( machine, result, in, count );
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
draw_vs_aos_machine_constants(struct aos_machine *machine,
|
||||
unsigned slot,
|
||||
const void *constants)
|
||||
{
|
||||
machine->constants[slot] = constants;
|
||||
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < MAX_LIT_INFO; i++) {
|
||||
machine->lit_info[i].func = populate_lut;
|
||||
machine->now++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void draw_vs_aos_machine_viewport( struct aos_machine *machine,
|
||||
const struct pipe_viewport_state *viewport )
|
||||
{
|
||||
memcpy(machine->scale, viewport->scale, 4 * sizeof(float));
|
||||
memcpy(machine->translate, viewport->translate, 4 * sizeof(float));
|
||||
}
|
||||
|
||||
|
||||
|
||||
void draw_vs_aos_machine_destroy( struct aos_machine *machine )
|
||||
{
|
||||
align_free(machine);
|
||||
}
|
||||
|
||||
struct aos_machine *draw_vs_aos_machine( void )
|
||||
{
|
||||
struct aos_machine *machine;
|
||||
unsigned i;
|
||||
float inv = 1.0f/255.0f;
|
||||
float f255 = 255.0f;
|
||||
|
||||
machine = align_malloc(sizeof(struct aos_machine), 16);
|
||||
if (!machine)
|
||||
return NULL;
|
||||
|
||||
memset(machine, 0, sizeof(*machine));
|
||||
|
||||
ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f);
|
||||
*(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff;
|
||||
|
||||
ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f);
|
||||
ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f);
|
||||
ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f);
|
||||
ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv);
|
||||
ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255);
|
||||
ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f);
|
||||
|
||||
|
||||
machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP |
|
||||
X87_CW_EXCEPTION_DENORM_OP |
|
||||
X87_CW_EXCEPTION_ZERO_DIVIDE |
|
||||
X87_CW_EXCEPTION_OVERFLOW |
|
||||
X87_CW_EXCEPTION_UNDERFLOW |
|
||||
X87_CW_EXCEPTION_PRECISION |
|
||||
(1<<6) |
|
||||
X87_CW_ROUND_NEAREST |
|
||||
X87_CW_PRECISION_DOUBLE_EXT);
|
||||
|
||||
assert(machine->fpu_rnd_nearest == 0x37f);
|
||||
|
||||
machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP |
|
||||
X87_CW_EXCEPTION_DENORM_OP |
|
||||
X87_CW_EXCEPTION_ZERO_DIVIDE |
|
||||
X87_CW_EXCEPTION_OVERFLOW |
|
||||
X87_CW_EXCEPTION_UNDERFLOW |
|
||||
X87_CW_EXCEPTION_PRECISION |
|
||||
(1<<6) |
|
||||
X87_CW_ROUND_DOWN |
|
||||
X87_CW_PRECISION_DOUBLE_EXT);
|
||||
|
||||
for (i = 0; i < MAX_SHINE_TAB; i++)
|
||||
do_populate_lut( &machine->shine_tab[i], 1.0f );
|
||||
|
||||
return machine;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void draw_vs_aos_machine_viewport( struct aos_machine *machine,
|
||||
const struct pipe_viewport_state *viewport )
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
draw_vs_aos_machine_constants(struct aos_machine *machine,
|
||||
unsigned slot,
|
||||
const void *constants)
|
||||
{
|
||||
}
|
||||
|
||||
void draw_vs_aos_machine_destroy( struct aos_machine *machine )
|
||||
{
|
||||
}
|
||||
|
||||
struct aos_machine *draw_vs_aos_machine( void )
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
|
@ -185,12 +185,7 @@ draw_create_vs_ppc(struct draw_context *draw,
|
|||
tgsi_scan_shader(templ->tokens, &vs->base.info);
|
||||
|
||||
vs->base.draw = draw;
|
||||
#if 0
|
||||
if (1)
|
||||
vs->base.create_variant = draw_vs_variant_aos_ppc;
|
||||
else
|
||||
#endif
|
||||
vs->base.create_variant = draw_vs_create_variant_generic;
|
||||
vs->base.create_variant = draw_vs_create_variant_generic;
|
||||
vs->base.prepare = vs_ppc_prepare;
|
||||
vs->base.run_linear = vs_ppc_run_linear;
|
||||
vs->base.delete = vs_ppc_delete;
|
||||
|
|
|
@ -1,225 +0,0 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
/*
|
||||
* Authors:
|
||||
* Keith Whitwell <keith@tungstengraphics.com>
|
||||
* Brian Paul
|
||||
*/
|
||||
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "pipe/p_config.h"
|
||||
|
||||
#include "draw_vs.h"
|
||||
|
||||
#if defined(PIPE_ARCH_X86)
|
||||
|
||||
#include "pipe/p_shader_tokens.h"
|
||||
|
||||
#include "draw_private.h"
|
||||
#include "draw_context.h"
|
||||
|
||||
#include "rtasm/rtasm_cpu.h"
|
||||
#include "rtasm/rtasm_x86sse.h"
|
||||
#include "tgsi/tgsi_sse2.h"
|
||||
#include "tgsi/tgsi_parse.h"
|
||||
#include "tgsi/tgsi_exec.h"
|
||||
|
||||
#define SSE_MAX_VERTICES 4
|
||||
|
||||
|
||||
struct draw_sse_vertex_shader {
|
||||
struct draw_vertex_shader base;
|
||||
struct x86_function sse2_program;
|
||||
|
||||
tgsi_sse2_vs_func func;
|
||||
|
||||
struct tgsi_exec_machine *machine;
|
||||
};
|
||||
|
||||
|
||||
static void
|
||||
vs_sse_prepare( struct draw_vertex_shader *base,
|
||||
struct draw_context *draw )
|
||||
{
|
||||
struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
|
||||
struct tgsi_exec_machine *machine = shader->machine;
|
||||
|
||||
machine->Samplers = draw->vs.samplers;
|
||||
|
||||
if (base->info.uses_instanceid) {
|
||||
unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_INSTANCEID];
|
||||
assert(i < Elements(machine->SystemValue));
|
||||
machine->SystemValue[i][0] = base->draw->instance_id;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Simplified vertex shader interface for the pt paths. Given the
|
||||
* complexity of code-generating all the above operations together,
|
||||
* it's time to try doing all the other stuff separately.
|
||||
*/
|
||||
static void
|
||||
vs_sse_run_linear( struct draw_vertex_shader *base,
|
||||
const float (*input)[4],
|
||||
float (*output)[4],
|
||||
const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
|
||||
const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS],
|
||||
unsigned count,
|
||||
unsigned input_stride,
|
||||
unsigned output_stride )
|
||||
{
|
||||
struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
|
||||
struct tgsi_exec_machine *machine = shader->machine;
|
||||
unsigned int i;
|
||||
|
||||
/* By default, execute all channels. XXX move this inside the loop
|
||||
* below when we support shader conditionals/loops.
|
||||
*/
|
||||
tgsi_set_exec_mask(machine, 1, 1, 1, 1);
|
||||
|
||||
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
|
||||
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
|
||||
|
||||
if (max_vertices < 4) {
|
||||
/* disable the unused execution channels */
|
||||
tgsi_set_exec_mask(machine,
|
||||
1,
|
||||
max_vertices > 1,
|
||||
max_vertices > 2,
|
||||
0);
|
||||
}
|
||||
|
||||
/* run compiled shader
|
||||
*/
|
||||
shader->func(machine,
|
||||
(const float (*)[4])constants[0],
|
||||
shader->base.immediates,
|
||||
input,
|
||||
base->info.num_inputs,
|
||||
input_stride,
|
||||
output,
|
||||
base->info.num_outputs,
|
||||
output_stride );
|
||||
|
||||
input = (const float (*)[4])((const char *)input + input_stride * max_vertices);
|
||||
output = (float (*)[4])((char *)output + output_stride * max_vertices);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static void
|
||||
vs_sse_delete( struct draw_vertex_shader *base )
|
||||
{
|
||||
struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
|
||||
|
||||
x86_release_func( &shader->sse2_program );
|
||||
|
||||
align_free( (void *) shader->base.immediates );
|
||||
|
||||
FREE( (void*) shader->base.state.tokens );
|
||||
FREE( shader );
|
||||
}
|
||||
|
||||
|
||||
struct draw_vertex_shader *
|
||||
draw_create_vs_sse(struct draw_context *draw,
|
||||
const struct pipe_shader_state *templ)
|
||||
{
|
||||
struct draw_sse_vertex_shader *vs;
|
||||
|
||||
if (!rtasm_cpu_has_sse2())
|
||||
return NULL;
|
||||
|
||||
vs = CALLOC_STRUCT( draw_sse_vertex_shader );
|
||||
if (vs == NULL)
|
||||
return NULL;
|
||||
|
||||
/* we make a private copy of the tokens */
|
||||
vs->base.state.tokens = tgsi_dup_tokens(templ->tokens);
|
||||
if (!vs->base.state.tokens)
|
||||
goto fail;
|
||||
|
||||
tgsi_scan_shader(templ->tokens, &vs->base.info);
|
||||
|
||||
vs->base.draw = draw;
|
||||
if (1)
|
||||
vs->base.create_variant = draw_vs_create_variant_aos_sse;
|
||||
else
|
||||
vs->base.create_variant = draw_vs_create_variant_generic;
|
||||
vs->base.prepare = vs_sse_prepare;
|
||||
vs->base.run_linear = vs_sse_run_linear;
|
||||
vs->base.delete = vs_sse_delete;
|
||||
|
||||
vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
|
||||
sizeof(float), 16);
|
||||
|
||||
vs->machine = draw->vs.machine;
|
||||
|
||||
x86_init_func( &vs->sse2_program );
|
||||
|
||||
if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
|
||||
&vs->sse2_program,
|
||||
(float (*)[4])vs->base.immediates,
|
||||
TRUE ))
|
||||
goto fail;
|
||||
|
||||
vs->func = (tgsi_sse2_vs_func) x86_get_func( &vs->sse2_program );
|
||||
if (!vs->func) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
return &vs->base;
|
||||
|
||||
fail:
|
||||
if (0)
|
||||
debug_warning("tgsi_emit_sse2() failed, falling back to interpreter\n");
|
||||
|
||||
x86_release_func( &vs->sse2_program );
|
||||
|
||||
FREE(vs);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#else
|
||||
|
||||
struct draw_vertex_shader *
|
||||
draw_create_vs_sse( struct draw_context *draw,
|
||||
const struct pipe_shader_state *templ )
|
||||
{
|
||||
return (void *) 0;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -1,80 +0,0 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#ifndef TGSI_SSE2_H
|
||||
#define TGSI_SSE2_H
|
||||
|
||||
#if defined __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "pipe/p_compiler.h"
|
||||
|
||||
struct tgsi_exec_machine;
|
||||
struct tgsi_interp_coef;
|
||||
struct tgsi_token;
|
||||
struct x86_function;
|
||||
|
||||
unsigned
|
||||
tgsi_emit_sse2(
|
||||
const struct tgsi_token *tokens,
|
||||
struct x86_function *function,
|
||||
float (*immediates)[4],
|
||||
boolean do_swizzles );
|
||||
|
||||
|
||||
/* This is the function prototype generated when do_swizzles is false
|
||||
* -- effectively for fragment shaders.
|
||||
*/
|
||||
typedef void (PIPE_CDECL *tgsi_sse2_fs_function) (
|
||||
struct tgsi_exec_machine *machine, /* 1 */
|
||||
const float (*constant)[4], /* 2 */
|
||||
const float (*immediate)[4], /* 3 */
|
||||
const struct tgsi_interp_coef *coef /* 4 */
|
||||
);
|
||||
|
||||
|
||||
/* This is the function prototype generated when do_swizzles is true
|
||||
* -- effectively for vertex shaders.
|
||||
*/
|
||||
typedef void (PIPE_CDECL *tgsi_sse2_vs_func) (
|
||||
struct tgsi_exec_machine *machine, /* 1 */
|
||||
const float (*constant)[4], /* 2 */
|
||||
const float (*immediate)[4], /* 3 */
|
||||
const float (*aos_input)[4], /* 4 */
|
||||
uint num_inputs, /* 5 */
|
||||
uint input_stride, /* 6 */
|
||||
float (*aos_output)[4], /* 7 */
|
||||
uint num_outputs, /* 8 */
|
||||
uint output_stride ); /* 9 */
|
||||
|
||||
|
||||
#if defined __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* TGSI_SSE2_H */
|
|
@ -26,7 +26,6 @@ LOCAL_PATH := $(call my-dir)
|
|||
# from Makefile
|
||||
C_SOURCES = \
|
||||
sp_fs_exec.c \
|
||||
sp_fs_sse.c \
|
||||
sp_clear.c \
|
||||
sp_fence.c \
|
||||
sp_flush.c \
|
||||
|
|
|
@ -5,7 +5,6 @@ LIBNAME = softpipe
|
|||
|
||||
C_SOURCES = \
|
||||
sp_fs_exec.c \
|
||||
sp_fs_sse.c \
|
||||
sp_clear.c \
|
||||
sp_fence.c \
|
||||
sp_flush.c \
|
||||
|
|
|
@ -6,7 +6,6 @@ softpipe = env.ConvenienceLibrary(
|
|||
target = 'softpipe',
|
||||
source = [
|
||||
'sp_fs_exec.c',
|
||||
'sp_fs_sse.c',
|
||||
'sp_clear.c',
|
||||
'sp_context.c',
|
||||
'sp_draw_arrays.c',
|
||||
|
|
|
@ -235,12 +235,6 @@ softpipe_create_context( struct pipe_screen *screen,
|
|||
|
||||
util_init_math();
|
||||
|
||||
#ifdef PIPE_ARCH_X86
|
||||
softpipe->use_sse = !debug_get_bool_option( "GALLIUM_NOSSE", FALSE );
|
||||
#else
|
||||
softpipe->use_sse = FALSE;
|
||||
#endif
|
||||
|
||||
softpipe->dump_fs = debug_get_bool_option( "SOFTPIPE_DUMP_FS", FALSE );
|
||||
softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE );
|
||||
|
||||
|
|
|
@ -190,7 +190,6 @@ struct softpipe_context {
|
|||
struct softpipe_tex_tile_cache *vertex_tex_cache[PIPE_MAX_VERTEX_SAMPLERS];
|
||||
struct softpipe_tex_tile_cache *geometry_tex_cache[PIPE_MAX_GEOMETRY_SAMPLERS];
|
||||
|
||||
unsigned use_sse : 1;
|
||||
unsigned dump_fs : 1;
|
||||
unsigned dump_gs : 1;
|
||||
unsigned no_rast : 1;
|
||||
|
|
|
@ -36,10 +36,6 @@ struct sp_fragment_shader_variant *
|
|||
softpipe_create_fs_variant_exec(struct softpipe_context *softpipe,
|
||||
const struct pipe_shader_state *templ);
|
||||
|
||||
struct sp_fragment_shader_variant *
|
||||
softpipe_create_fs_variant_sse(struct softpipe_context *softpipe,
|
||||
const struct pipe_shader_state *templ);
|
||||
|
||||
|
||||
struct tgsi_interp_coef;
|
||||
struct tgsi_exec_vector;
|
||||
|
|
|
@ -1,248 +0,0 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* Execute fragment shader using runtime SSE code generation.
|
||||
*/
|
||||
|
||||
#include "sp_context.h"
|
||||
#include "sp_state.h"
|
||||
#include "sp_fs.h"
|
||||
#include "sp_quad.h"
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
#include "pipe/p_defines.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "tgsi/tgsi_exec.h"
|
||||
#include "tgsi/tgsi_sse2.h"
|
||||
|
||||
|
||||
#if defined(PIPE_ARCH_X86)
|
||||
|
||||
#include "rtasm/rtasm_x86sse.h"
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Subclass of sp_fragment_shader_variant
|
||||
*/
|
||||
struct sp_sse_fragment_shader
|
||||
{
|
||||
struct sp_fragment_shader_variant base;
|
||||
struct x86_function sse2_program;
|
||||
tgsi_sse2_fs_function func;
|
||||
float immediates[TGSI_EXEC_NUM_IMMEDIATES][4];
|
||||
};
|
||||
|
||||
|
||||
/** cast wrapper */
|
||||
static INLINE struct sp_sse_fragment_shader *
|
||||
sp_sse_fragment_shader(const struct sp_fragment_shader_variant *base)
|
||||
{
|
||||
return (struct sp_sse_fragment_shader *) base;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
fs_sse_prepare( const struct sp_fragment_shader_variant *base,
|
||||
struct tgsi_exec_machine *machine,
|
||||
struct tgsi_sampler **samplers )
|
||||
{
|
||||
machine->Samplers = samplers;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Compute quad X,Y,Z,W for the four fragments in a quad.
|
||||
*
|
||||
* This should really be part of the compiled shader.
|
||||
*/
|
||||
static void
|
||||
setup_pos_vector(const struct tgsi_interp_coef *coef,
|
||||
float x, float y,
|
||||
struct tgsi_exec_vector *quadpos)
|
||||
{
|
||||
uint chan;
|
||||
/* do X */
|
||||
quadpos->xyzw[0].f[0] = x;
|
||||
quadpos->xyzw[0].f[1] = x + 1;
|
||||
quadpos->xyzw[0].f[2] = x;
|
||||
quadpos->xyzw[0].f[3] = x + 1;
|
||||
|
||||
/* do Y */
|
||||
quadpos->xyzw[1].f[0] = y;
|
||||
quadpos->xyzw[1].f[1] = y;
|
||||
quadpos->xyzw[1].f[2] = y + 1;
|
||||
quadpos->xyzw[1].f[3] = y + 1;
|
||||
|
||||
/* do Z and W for all fragments in the quad */
|
||||
for (chan = 2; chan < 4; chan++) {
|
||||
const float dadx = coef->dadx[chan];
|
||||
const float dady = coef->dady[chan];
|
||||
const float a0 = coef->a0[chan] + dadx * x + dady * y;
|
||||
quadpos->xyzw[chan].f[0] = a0;
|
||||
quadpos->xyzw[chan].f[1] = a0 + dadx;
|
||||
quadpos->xyzw[chan].f[2] = a0 + dady;
|
||||
quadpos->xyzw[chan].f[3] = a0 + dadx + dady;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* TODO: codegenerate the whole run function, skip this wrapper.
|
||||
* TODO: break dependency on tgsi_exec_machine struct
|
||||
* TODO: push Position calculation into the generated shader
|
||||
* TODO: process >1 quad at a time
|
||||
*/
|
||||
static unsigned
|
||||
fs_sse_run( const struct sp_fragment_shader_variant *base,
|
||||
struct tgsi_exec_machine *machine,
|
||||
struct quad_header *quad )
|
||||
{
|
||||
struct sp_sse_fragment_shader *shader = sp_sse_fragment_shader(base);
|
||||
|
||||
/* Compute X, Y, Z, W vals for this quad -- place in temp[0] for now */
|
||||
setup_pos_vector(quad->posCoef,
|
||||
(float)quad->input.x0, (float)quad->input.y0,
|
||||
machine->Temps);
|
||||
|
||||
/* init kill mask */
|
||||
tgsi_set_kill_mask(machine, 0x0);
|
||||
tgsi_set_exec_mask(machine, 1, 1, 1, 1);
|
||||
|
||||
shader->func( machine,
|
||||
(const float (*)[4])machine->Consts[0],
|
||||
(const float (*)[4])shader->immediates,
|
||||
machine->InterpCoefs
|
||||
/*, &machine->QuadPos*/
|
||||
);
|
||||
|
||||
quad->inout.mask &= ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]);
|
||||
if (quad->inout.mask == 0)
|
||||
return FALSE;
|
||||
|
||||
/* store outputs */
|
||||
{
|
||||
const ubyte *sem_name = base->info.output_semantic_name;
|
||||
const ubyte *sem_index = base->info.output_semantic_index;
|
||||
const uint n = base->info.num_outputs;
|
||||
uint i;
|
||||
for (i = 0; i < n; i++) {
|
||||
switch (sem_name[i]) {
|
||||
case TGSI_SEMANTIC_COLOR:
|
||||
{
|
||||
uint cbuf = sem_index[i];
|
||||
|
||||
assert(sizeof(quad->output.color[cbuf]) ==
|
||||
sizeof(machine->Outputs[i]));
|
||||
|
||||
/* copy float[4][4] result */
|
||||
memcpy(quad->output.color[cbuf],
|
||||
&machine->Outputs[i],
|
||||
sizeof(quad->output.color[0]) );
|
||||
}
|
||||
break;
|
||||
case TGSI_SEMANTIC_POSITION:
|
||||
{
|
||||
uint j;
|
||||
for (j = 0; j < 4; j++)
|
||||
quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j];
|
||||
}
|
||||
break;
|
||||
case TGSI_SEMANTIC_STENCIL:
|
||||
{
|
||||
uint j;
|
||||
for (j = 0; j < 4; j++)
|
||||
quad->output.stencil[j] = machine->Outputs[i].xyzw[1].f[j];
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
fs_sse_delete( struct sp_fragment_shader_variant *base )
|
||||
{
|
||||
struct sp_sse_fragment_shader *shader = sp_sse_fragment_shader(base);
|
||||
|
||||
x86_release_func( &shader->sse2_program );
|
||||
FREE(shader);
|
||||
}
|
||||
|
||||
|
||||
struct sp_fragment_shader_variant *
|
||||
softpipe_create_fs_variant_sse(struct softpipe_context *softpipe,
|
||||
const struct pipe_shader_state *templ)
|
||||
{
|
||||
struct sp_sse_fragment_shader *shader;
|
||||
|
||||
if (!softpipe->use_sse)
|
||||
return NULL;
|
||||
|
||||
shader = CALLOC_STRUCT(sp_sse_fragment_shader);
|
||||
if (!shader)
|
||||
return NULL;
|
||||
|
||||
x86_init_func( &shader->sse2_program );
|
||||
|
||||
if (!tgsi_emit_sse2( templ->tokens, &shader->sse2_program,
|
||||
shader->immediates, FALSE )) {
|
||||
FREE(shader);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
shader->func = (tgsi_sse2_fs_function) x86_get_func( &shader->sse2_program );
|
||||
if (!shader->func) {
|
||||
x86_release_func( &shader->sse2_program );
|
||||
FREE(shader);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
shader->base.prepare = fs_sse_prepare;
|
||||
shader->base.run = fs_sse_run;
|
||||
shader->base.delete = fs_sse_delete;
|
||||
|
||||
return &shader->base;
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
|
||||
/* Maybe put this variant in the header file.
|
||||
*/
|
||||
struct sp_fragment_shader_variant *
|
||||
softpipe_create_fs_variant_sse(struct softpipe_context *softpipe,
|
||||
const struct pipe_shader_state *templ)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -65,10 +65,7 @@ create_fs_variant(struct softpipe_context *softpipe,
|
|||
#endif
|
||||
|
||||
/* codegen, create variant object */
|
||||
var = softpipe_create_fs_variant_sse(softpipe, curfs);
|
||||
if (!var) {
|
||||
var = softpipe_create_fs_variant_exec(softpipe, curfs);
|
||||
}
|
||||
var = softpipe_create_fs_variant_exec(softpipe, curfs);
|
||||
|
||||
if (var) {
|
||||
var->key = *key;
|
||||
|
|
Loading…
Reference in New Issue