Merge commit 'origin/gallium-0.1' into gallium-0.2
Conflicts: progs/fp/Makefile
This commit is contained in:
commit
176c454765
|
@ -40,13 +40,13 @@ UTIL_FILES = readtex.h readtex.c
|
|||
.SUFFIXES: .c
|
||||
|
||||
.c:
|
||||
$(CC) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $< $(LIBS) -o $@
|
||||
$(APP_CC) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $< $(LIBS) -o $@
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
.S.o:
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
|
||||
##### TARGETS #####
|
||||
|
@ -67,19 +67,27 @@ getproclist.h: $(TOP)/src/mesa/glapi/gl_API.xml getprocaddress.c getprocaddress.
|
|||
|
||||
|
||||
texrect: texrect.o readtex.o
|
||||
<<<<<<< HEAD:progs/fp/Makefile
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) texrect.o readtex.o $(LIBS) -o $@
|
||||
=======
|
||||
$(APP_CC) texrect.o readtex.o $(LIBS) -o $@
|
||||
>>>>>>> origin/gallium-0.1:progs/fp/Makefile
|
||||
|
||||
texrect.o: texrect.c readtex.h
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
invert: invert.o readtex.o
|
||||
<<<<<<< HEAD:progs/fp/Makefile
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) invert.o readtex.o $(LIBS) -o $@
|
||||
=======
|
||||
$(APP_CC) invert.o readtex.o $(LIBS) -o $@
|
||||
>>>>>>> origin/gallium-0.1:progs/fp/Makefile
|
||||
|
||||
invert.o: invert.c readtex.h
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
readtex.o: readtex.c
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
|
||||
readtex.h: $(TOP)/progs/util/readtex.h
|
||||
|
|
|
@ -8,7 +8,7 @@ include $(TOP)/configs/current
|
|||
INCLUDE_DIRS = \
|
||||
-I. \
|
||||
-I/usr/include \
|
||||
-I/usr/include/drm \
|
||||
$(shell pkg-config --cflags-only-I libdrm) \
|
||||
-I$(TOP)/include \
|
||||
-I$(TOP)/include/GL/internal \
|
||||
-I$(TOP)/src/mesa \
|
||||
|
|
|
@ -13,7 +13,7 @@ DRIVER_NAME = egl_xdri.so
|
|||
INCLUDE_DIRS = \
|
||||
-I. \
|
||||
-I/usr/include \
|
||||
-I/usr/include/drm \
|
||||
$(shell pkg-config --cflags-only-I libdrm) \
|
||||
-I$(TOP)/include \
|
||||
-I$(TOP)/include/GL/internal \
|
||||
-I$(TOP)/src/mesa/glapi \
|
||||
|
@ -48,6 +48,7 @@ $(TOP)/$(LIB_DIR)/$(DRIVER_NAME): $(OBJECTS)
|
|||
$(TOP)/bin/mklib -o $(DRIVER_NAME) \
|
||||
-noprefix \
|
||||
-major 1 -minor 0 \
|
||||
-L $(TOP)/$(LIB_DIR) \
|
||||
-install $(TOP)/$(LIB_DIR) \
|
||||
$(OBJECTS) $(DRM_LIB) $(MISC_LIBS)
|
||||
|
||||
|
|
|
@ -473,21 +473,48 @@ EMIT_R (spe_mtspr, 0x10c);
|
|||
void
|
||||
spe_load_float(struct spe_function *p, unsigned rT, float x)
|
||||
{
|
||||
union {
|
||||
float f;
|
||||
unsigned u;
|
||||
} bits;
|
||||
bits.f = x;
|
||||
spe_ilhu(p, rT, bits.u >> 16);
|
||||
spe_iohl(p, rT, bits.u & 0xffff);
|
||||
if (x == 0.0f) {
|
||||
spe_il(p, rT, 0x0);
|
||||
}
|
||||
else if (x == 0.5f) {
|
||||
spe_ilhu(p, rT, 0x3f00);
|
||||
}
|
||||
else if (x == 1.0f) {
|
||||
spe_ilhu(p, rT, 0x3f80);
|
||||
}
|
||||
else if (x == -1.0f) {
|
||||
spe_ilhu(p, rT, 0xbf80);
|
||||
}
|
||||
else {
|
||||
union {
|
||||
float f;
|
||||
unsigned u;
|
||||
} bits;
|
||||
bits.f = x;
|
||||
spe_ilhu(p, rT, bits.u >> 16);
|
||||
spe_iohl(p, rT, bits.u & 0xffff);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
spe_load_int(struct spe_function *p, unsigned rT, int i)
|
||||
{
|
||||
spe_ilhu(p, rT, i >> 16);
|
||||
spe_iohl(p, rT, i & 0xffff);
|
||||
if (-32768 <= i && i <= 32767) {
|
||||
spe_il(p, rT, i);
|
||||
}
|
||||
else {
|
||||
spe_ilhu(p, rT, i >> 16);
|
||||
spe_iohl(p, rT, i & 0xffff);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
spe_splat(struct spe_function *p, unsigned rT, unsigned rA)
|
||||
{
|
||||
spe_ila(p, rT, 66051);
|
||||
spe_shufb(p, rT, rA, rA, rT);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -292,6 +292,10 @@ spe_load_float(struct spe_function *p, unsigned rT, float x);
|
|||
extern void
|
||||
spe_load_int(struct spe_function *p, unsigned rT, int i);
|
||||
|
||||
/** Replicate word 0 of rA across rT. */
|
||||
extern void
|
||||
spe_splat(struct spe_function *p, unsigned rT, unsigned rA);
|
||||
|
||||
/** Complement/invert all bits in rT. */
|
||||
extern void
|
||||
spe_complement(struct spe_function *p, unsigned rT);
|
||||
|
|
|
@ -92,6 +92,7 @@
|
|||
#define CELL_CMD_STATE_UNIFORMS 16
|
||||
#define CELL_CMD_STATE_VS_ARRAY_INFO 17
|
||||
#define CELL_CMD_STATE_BIND_VS 18
|
||||
#define CELL_CMD_STATE_FRAGMENT_PROGRAM 19
|
||||
#define CELL_CMD_STATE_ATTRIB_FETCH 20
|
||||
#define CELL_CMD_VS_EXECUTE 22
|
||||
#define CELL_CMD_FLUSH_BUFFER_RANGE 23
|
||||
|
@ -125,6 +126,20 @@ struct cell_command_fragment_ops
|
|||
};
|
||||
|
||||
|
||||
/** Max instructions for fragment programs */
|
||||
#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 128
|
||||
|
||||
/**
|
||||
* Command to send a fragment progra to SPUs.
|
||||
*/
|
||||
struct cell_command_fragment_program
|
||||
{
|
||||
uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */
|
||||
uint num_inst; /**< Number of instructions */
|
||||
unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Tell SPUs about the framebuffer size, location
|
||||
*/
|
||||
|
|
|
@ -26,6 +26,7 @@ SOURCES = \
|
|||
cell_draw_arrays.c \
|
||||
cell_flush.c \
|
||||
cell_gen_fragment.c \
|
||||
cell_gen_fp.c \
|
||||
cell_state_derived.c \
|
||||
cell_state_emit.c \
|
||||
cell_state_shader.c \
|
||||
|
|
|
@ -61,6 +61,7 @@ struct cell_fragment_shader_state
|
|||
{
|
||||
struct pipe_shader_state shader;
|
||||
struct tgsi_shader_info info;
|
||||
struct spe_function code;
|
||||
void *data;
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,523 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Generate SPU fragment program/shader code.
|
||||
*
|
||||
* Note that we generate SOA-style code here. So each TGSI instruction
|
||||
* operates on four pixels (and is translated into four SPU instructions,
|
||||
* generally speaking).
|
||||
*
|
||||
* \author Brian Paul
|
||||
*/
|
||||
|
||||
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "pipe/p_shader_tokens.h"
|
||||
#include "tgsi/tgsi_parse.h"
|
||||
#include "tgsi/tgsi_util.h"
|
||||
#include "tgsi/tgsi_exec.h"
|
||||
#include "tgsi/tgsi_dump.h"
|
||||
#include "rtasm/rtasm_ppc_spe.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "cell_context.h"
|
||||
#include "cell_gen_fp.h"
|
||||
|
||||
|
||||
/** Set to 1 to enable debug/disassembly printfs */
|
||||
#define DISASSEM 01
|
||||
|
||||
|
||||
/**
|
||||
* Context needed during code generation.
|
||||
*/
|
||||
struct codegen
|
||||
{
|
||||
int inputs_reg; /**< 1st function parameter */
|
||||
int outputs_reg; /**< 2nd function parameter */
|
||||
int constants_reg; /**< 3rd function parameter */
|
||||
int temp_regs[8][4]; /**< maps TGSI temps to SPE registers */
|
||||
|
||||
int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
|
||||
|
||||
/** Per-instruction temps / intermediate temps */
|
||||
int num_itemps;
|
||||
int itemps[3];
|
||||
|
||||
struct spe_function *f;
|
||||
boolean error;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Allocate an intermediate temporary register.
|
||||
*/
|
||||
static int
|
||||
get_itemp(struct codegen *gen)
|
||||
{
|
||||
int t = spe_allocate_available_register(gen->f);
|
||||
assert(gen->num_itemps < Elements(gen->itemps));
|
||||
gen->itemps[gen->num_itemps++] = t;
|
||||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
* Free all intermediate temporary registers. To be called after each
|
||||
* instruction has been emitted.
|
||||
*/
|
||||
static void
|
||||
free_itemps(struct codegen *gen)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < gen->num_itemps; i++) {
|
||||
spe_release_register(gen->f, gen->itemps[i]);
|
||||
}
|
||||
gen->num_itemps = 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
|
||||
* The register is allocated and initialized upon the first call.
|
||||
*/
|
||||
static int
|
||||
get_const_one_reg(struct codegen *gen)
|
||||
{
|
||||
if (gen->one_reg <= 0) {
|
||||
gen->one_reg = spe_allocate_available_register(gen->f);
|
||||
}
|
||||
|
||||
/* one = {1.0, 1.0, 1.0, 1.0} */
|
||||
spe_load_float(gen->f, gen->one_reg, 1.0f);
|
||||
#if DISASSEM
|
||||
printf("il\tr%d, 1.0f\n", gen->one_reg);
|
||||
#endif
|
||||
|
||||
return gen->one_reg;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the index of the SPU temporary containing the named TGSI
|
||||
* source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
|
||||
* just return the corresponding SPE register. If the TGIS register
|
||||
* is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
|
||||
* and emit an SPE load instruction.
|
||||
*/
|
||||
static int
|
||||
get_src_reg(struct codegen *gen,
|
||||
int channel,
|
||||
const struct tgsi_full_src_register *src)
|
||||
{
|
||||
int reg;
|
||||
|
||||
/* XXX need to examine src swizzle info here.
|
||||
* That will involve changing the channel var...
|
||||
*/
|
||||
|
||||
|
||||
switch (src->SrcRegister.File) {
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
reg = gen->temp_regs[src->SrcRegister.Index][channel];
|
||||
break;
|
||||
case TGSI_FILE_INPUT:
|
||||
{
|
||||
/* offset is measured in quadwords, not bytes */
|
||||
int offset = src->SrcRegister.Index * 4 + channel;
|
||||
reg = get_itemp(gen);
|
||||
/* Load: reg = memory[(machine_reg) + offset] */
|
||||
spe_lqd(gen->f, reg, gen->inputs_reg, offset);
|
||||
#if DISASSEM
|
||||
printf("lqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset);
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
case TGSI_FILE_IMMEDIATE:
|
||||
/* xxx fall-through for now / fix */
|
||||
case TGSI_FILE_CONSTANT:
|
||||
/* xxx fall-through for now / fix */
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the index of an SPE register to use for the given TGSI register.
|
||||
* If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
|
||||
* corresponding SPE register is returned. If the TGSI register is
|
||||
* TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
|
||||
* See store_dest_reg() below...
|
||||
*/
|
||||
static int
|
||||
get_dst_reg(struct codegen *gen,
|
||||
int channel,
|
||||
const struct tgsi_full_dst_register *dest)
|
||||
{
|
||||
int reg;
|
||||
|
||||
switch (dest->DstRegister.File) {
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
reg = gen->temp_regs[dest->DstRegister.Index][channel];
|
||||
break;
|
||||
case TGSI_FILE_OUTPUT:
|
||||
reg = get_itemp(gen);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* When a TGSI instruction is writing to an output register, this
|
||||
* function emits the SPE store instruction to store the value_reg.
|
||||
* \param value_reg the SPE register containing the value to store.
|
||||
* This would have been returned by get_dst_reg().
|
||||
*/
|
||||
static void
|
||||
store_dest_reg(struct codegen *gen,
|
||||
int value_reg, int channel,
|
||||
const struct tgsi_full_dst_register *dest)
|
||||
{
|
||||
switch (dest->DstRegister.File) {
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
/* no-op */
|
||||
break;
|
||||
case TGSI_FILE_OUTPUT:
|
||||
{
|
||||
/* offset is measured in quadwords, not bytes */
|
||||
int offset = dest->DstRegister.Index * 4 + channel;
|
||||
/* Store: memory[(machine_reg) + offset] = reg */
|
||||
spe_stqd(gen->f, value_reg, gen->outputs_reg, offset);
|
||||
#if DISASSEM
|
||||
printf("stqd\tr%d, r%d + %d\n", value_reg, gen->outputs_reg, offset);
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static boolean
|
||||
emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
int ch;
|
||||
for (ch = 0; ch < 4; ch++) {
|
||||
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
|
||||
int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
|
||||
int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
|
||||
/* XXX we don't always need to actually emit a mov instruction here */
|
||||
spe_move(gen->f, dst_reg, src_reg);
|
||||
#if DISASSEM
|
||||
printf("mov\tr%d, r%d\n", dst_reg, src_reg);
|
||||
#endif
|
||||
store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]);
|
||||
free_itemps(gen);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
|
||||
* becomes (up to) four SPU "fa" instructions because we're doing SOA
|
||||
* processing.
|
||||
*/
|
||||
static boolean
|
||||
emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
int ch;
|
||||
/* Loop over Red/Green/Blue/Alpha channels */
|
||||
for (ch = 0; ch < 4; ch++) {
|
||||
/* If the dest R, G, B or A writemask is enabled... */
|
||||
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
|
||||
/* get indexes of the two src, one dest SPE registers */
|
||||
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
|
||||
int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
|
||||
int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
|
||||
|
||||
/* Emit actual SPE instruction: d = s1 + s2 */
|
||||
spe_fa(gen->f, d_reg, s1_reg, s2_reg);
|
||||
#if DISASSEM
|
||||
printf("fa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
|
||||
#endif
|
||||
|
||||
/* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
|
||||
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
|
||||
/* Free any intermediate temps we allocated */
|
||||
free_itemps(gen);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit multiply. See emit_ADD for comments.
|
||||
*/
|
||||
static boolean
|
||||
emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
int ch;
|
||||
for (ch = 0; ch < 4; ch++) {
|
||||
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
|
||||
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
|
||||
int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
|
||||
int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
|
||||
/* d = s1 * s2 */
|
||||
spe_fm(gen->f, d_reg, s1_reg, s2_reg);
|
||||
#if DISASSEM
|
||||
printf("fm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
|
||||
#endif
|
||||
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
|
||||
free_itemps(gen);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit set-if-greater-than.
|
||||
* Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
|
||||
* the result but OpenGL/TGSI needs 0.0 and 1.0 results.
|
||||
* We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
|
||||
*/
|
||||
static boolean
|
||||
emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
int ch;
|
||||
|
||||
for (ch = 0; ch < 4; ch++) {
|
||||
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
|
||||
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
|
||||
int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
|
||||
int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
|
||||
|
||||
/* d = (s1 > s2) */
|
||||
spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
|
||||
#if DISASSEM
|
||||
printf("fcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
|
||||
#endif
|
||||
|
||||
/* convert d from 0x0/0xffffffff to 0.0/1.0 */
|
||||
/* d = d & one_reg */
|
||||
spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
|
||||
#if DISASSEM
|
||||
printf("and\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen));
|
||||
#endif
|
||||
|
||||
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
|
||||
free_itemps(gen);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit END instruction.
|
||||
* We just return from the shader function at this point.
|
||||
*
|
||||
* Note that there may be more code after this that would be
|
||||
* called by TGSI_OPCODE_CALL.
|
||||
*/
|
||||
static boolean
|
||||
emit_END(struct codegen *gen)
|
||||
{
|
||||
/* return from function call */
|
||||
spe_bi(gen->f, SPE_REG_RA, 0, 0);
|
||||
#if DISASSEM
|
||||
printf("bi\trRA\n");
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit code for the given instruction. Just a big switch stmt.
|
||||
*/
|
||||
static boolean
|
||||
emit_instruction(struct codegen *gen,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
switch (inst->Instruction.Opcode) {
|
||||
case TGSI_OPCODE_MOV:
|
||||
return emit_MOV(gen, inst);
|
||||
case TGSI_OPCODE_MUL:
|
||||
return emit_MUL(gen, inst);
|
||||
case TGSI_OPCODE_ADD:
|
||||
return emit_ADD(gen, inst);
|
||||
case TGSI_OPCODE_SGT:
|
||||
return emit_SGT(gen, inst);
|
||||
case TGSI_OPCODE_END:
|
||||
return emit_END(gen);
|
||||
|
||||
/* XXX lots more cases to do... */
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Emit "code" for a TGSI declaration.
|
||||
* We only care about TGSI TEMPORARY register declarations at this time.
|
||||
* For each TGSI TEMPORARY we allocate four SPE registers.
|
||||
*/
|
||||
static void
|
||||
emit_declaration(struct codegen *gen, const struct tgsi_full_declaration *decl)
|
||||
{
|
||||
int i, ch;
|
||||
|
||||
switch (decl->Declaration.File) {
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
#if DISASSEM
|
||||
printf("Declare temp reg %d .. %d\n",
|
||||
decl->DeclarationRange.First,
|
||||
decl->DeclarationRange.Last);
|
||||
#endif
|
||||
for (i = decl->DeclarationRange.First;
|
||||
i <= decl->DeclarationRange.Last;
|
||||
i++) {
|
||||
for (ch = 0; ch < 4; ch++) {
|
||||
gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
|
||||
}
|
||||
|
||||
/* XXX if we run out of SPE registers, we need to spill
|
||||
* to SPU memory. someday...
|
||||
*/
|
||||
|
||||
#if DISASSEM
|
||||
printf(" SPE regs: %d %d %d %d\n",
|
||||
gen->temp_regs[i][0],
|
||||
gen->temp_regs[i][1],
|
||||
gen->temp_regs[i][2],
|
||||
gen->temp_regs[i][3]);
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
default:
|
||||
; /* ignore */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Translate TGSI shader code to SPE instructions. This is done when
|
||||
* the state tracker gives us a new shader (via pipe->create_fs_state()).
|
||||
*
|
||||
* \param cell the rendering context (in)
|
||||
* \param tokens the TGSI shader (in)
|
||||
* \param f the generated function (out)
|
||||
*/
|
||||
boolean
|
||||
cell_gen_fragment_program(struct cell_context *cell,
|
||||
const struct tgsi_token *tokens,
|
||||
struct spe_function *f)
|
||||
{
|
||||
struct tgsi_parse_context parse;
|
||||
struct codegen gen;
|
||||
|
||||
memset(&gen, 0, sizeof(gen));
|
||||
gen.f = f;
|
||||
|
||||
/* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
|
||||
gen.inputs_reg = 3; /* pointer to inputs array */
|
||||
gen.outputs_reg = 4; /* pointer to outputs array */
|
||||
gen.constants_reg = 5; /* pointer to constants array */
|
||||
|
||||
spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
|
||||
spe_allocate_register(f, gen.inputs_reg);
|
||||
spe_allocate_register(f, gen.outputs_reg);
|
||||
spe_allocate_register(f, gen.constants_reg);
|
||||
|
||||
#if DISASSEM
|
||||
printf("Begin %s\n", __FUNCTION__);
|
||||
tgsi_dump(tokens, 0);
|
||||
#endif
|
||||
|
||||
tgsi_parse_init(&parse, tokens);
|
||||
|
||||
while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
|
||||
tgsi_parse_token(&parse);
|
||||
|
||||
switch (parse.FullToken.Token.Type) {
|
||||
case TGSI_TOKEN_TYPE_IMMEDIATE:
|
||||
#if 0
|
||||
if (!note_immediate(&gen, &parse.FullToken.FullImmediate ))
|
||||
goto fail;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case TGSI_TOKEN_TYPE_DECLARATION:
|
||||
emit_declaration(&gen, &parse.FullToken.FullDeclaration);
|
||||
break;
|
||||
|
||||
case TGSI_TOKEN_TYPE_INSTRUCTION:
|
||||
if (!emit_instruction(&gen, &parse.FullToken.FullInstruction )) {
|
||||
gen.error = true;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (gen.error) {
|
||||
/* terminate the SPE code */
|
||||
return emit_END(&gen);
|
||||
}
|
||||
|
||||
#if DISASSEM
|
||||
printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
|
||||
printf("End %s\n", __FUNCTION__);
|
||||
#endif
|
||||
|
||||
tgsi_parse_free( &parse );
|
||||
|
||||
return !gen.error;
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
|
||||
#ifndef CELL_GEN_FP_H
|
||||
#define CELL_GEN_FP_H
|
||||
|
||||
|
||||
|
||||
extern boolean
|
||||
cell_gen_fragment_program(struct cell_context *cell,
|
||||
const struct tgsi_token *tokens,
|
||||
struct spe_function *f);
|
||||
|
||||
|
||||
#endif /* CELL_GEN_FP_H */
|
||||
|
|
@ -265,6 +265,8 @@ gen_blend(const struct pipe_blend_state *blend,
|
|||
int one_reg = spe_allocate_available_register(f);
|
||||
int tmp_reg = spe_allocate_available_register(f);
|
||||
|
||||
boolean one_reg_set = false; /* avoid setting one_reg more than once */
|
||||
|
||||
ASSERT(blend->blend_enable);
|
||||
|
||||
/* Unpack/convert framebuffer colors from four 32-bit packed colors
|
||||
|
@ -275,7 +277,7 @@ gen_blend(const struct pipe_blend_state *blend,
|
|||
int mask_reg = spe_allocate_available_register(f);
|
||||
|
||||
/* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */
|
||||
spe_fsmbi(f, mask_reg, 0x1111);
|
||||
spe_load_int(f, mask_reg, 0xff);
|
||||
|
||||
/* XXX there may be more clever ways to implement the following code */
|
||||
switch (color_format) {
|
||||
|
@ -418,7 +420,10 @@ gen_blend(const struct pipe_blend_state *blend,
|
|||
break;
|
||||
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
|
||||
/* one = {1.0, 1.0, 1.0, 1.0} */
|
||||
spe_load_float(f, one_reg, 1.0f);
|
||||
if (!one_reg_set) {
|
||||
spe_load_float(f, one_reg, 1.0f);
|
||||
one_reg_set = true;
|
||||
}
|
||||
/* tmp = one - fragA */
|
||||
spe_fs(f, tmp_reg, one_reg, fragA_reg);
|
||||
/* term = fb * tmp */
|
||||
|
@ -446,7 +451,10 @@ gen_blend(const struct pipe_blend_state *blend,
|
|||
break;
|
||||
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
|
||||
/* one = {1.0, 1.0, 1.0, 1.0} */
|
||||
spe_load_float(f, one_reg, 1.0f);
|
||||
if (!one_reg_set) {
|
||||
spe_load_float(f, one_reg, 1.0f);
|
||||
one_reg_set = true;
|
||||
}
|
||||
/* tmp = one - fragA */
|
||||
spe_fs(f, tmp_reg, one_reg, fragA_reg);
|
||||
/* termA = fbA * tmp */
|
||||
|
@ -616,7 +624,7 @@ gen_pack_colors(struct spe_function *f,
|
|||
* \param f the generated function (out)
|
||||
*/
|
||||
void
|
||||
gen_fragment_function(struct cell_context *cell, struct spe_function *f)
|
||||
cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
|
||||
{
|
||||
const struct pipe_depth_stencil_alpha_state *dsa =
|
||||
&cell->depth_stencil->base;
|
||||
|
@ -850,7 +858,7 @@ gen_fragment_function(struct cell_context *cell, struct spe_function *f)
|
|||
spe_release_register(f, rgba_reg);
|
||||
}
|
||||
|
||||
printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);
|
||||
//printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);
|
||||
|
||||
spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@
|
|||
|
||||
|
||||
extern void
|
||||
gen_fragment_function(struct cell_context *cell, struct spe_function *f);
|
||||
cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f);
|
||||
|
||||
|
||||
#endif /* CELL_GEN_FRAGMENT_H */
|
||||
|
|
|
@ -73,6 +73,22 @@ cell_emit_state(struct cell_context *cell)
|
|||
#endif
|
||||
}
|
||||
|
||||
if (cell->dirty & (CELL_NEW_FS)) {
|
||||
/* Send new fragment program to SPUs */
|
||||
struct cell_command_fragment_program *fp
|
||||
= cell_batch_alloc(cell, sizeof(*fp));
|
||||
fp->opcode = CELL_CMD_STATE_FRAGMENT_PROGRAM;
|
||||
fp->num_inst = cell->fs->code.num_inst;
|
||||
memcpy(&fp->code, cell->fs->code.store,
|
||||
SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
|
||||
if (0) {
|
||||
int i;
|
||||
printf("PPU Emit CELL_CMD_STATE_FRAGMENT_PROGRAM:\n");
|
||||
for (i = 0; i < fp->num_inst; i++) {
|
||||
printf(" %3d: 0x%08x\n", i, fp->code[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cell->dirty & (CELL_NEW_FRAMEBUFFER |
|
||||
CELL_NEW_DEPTH_STENCIL |
|
||||
|
@ -85,7 +101,7 @@ cell_emit_state(struct cell_context *cell)
|
|||
struct spe_function spe_code;
|
||||
|
||||
/* generate new code */
|
||||
gen_fragment_function(cell, &spe_code);
|
||||
cell_gen_fragment_function(cell, &spe_code);
|
||||
/* put the new code into the batch buffer */
|
||||
fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS;
|
||||
memcpy(&fops->code, spe_code.store,
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
|
||||
#include "cell_context.h"
|
||||
#include "cell_state.h"
|
||||
|
||||
#include "cell_gen_fp.h"
|
||||
|
||||
|
||||
/** cast wrapper */
|
||||
|
@ -61,7 +61,7 @@ static void *
|
|||
cell_create_fs_state(struct pipe_context *pipe,
|
||||
const struct pipe_shader_state *templ)
|
||||
{
|
||||
/*struct cell_context *cell = cell_context(pipe);*/
|
||||
struct cell_context *cell = cell_context(pipe);
|
||||
struct cell_fragment_shader_state *cfs;
|
||||
|
||||
cfs = CALLOC_STRUCT(cell_fragment_shader_state);
|
||||
|
@ -76,6 +76,8 @@ cell_create_fs_state(struct pipe_context *pipe,
|
|||
|
||||
tgsi_scan_shader(templ->tokens, &cfs->info);
|
||||
|
||||
cell_gen_fragment_program(cell, cfs->shader.tokens, &cfs->code);
|
||||
|
||||
return cfs;
|
||||
}
|
||||
|
||||
|
@ -102,6 +104,8 @@ cell_delete_fs_state(struct pipe_context *pipe, void *fs)
|
|||
{
|
||||
struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs);
|
||||
|
||||
spe_release_func(&cfs->code);
|
||||
|
||||
FREE((void *) cfs->shader.tokens);
|
||||
FREE(cfs);
|
||||
}
|
||||
|
|
|
@ -232,7 +232,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
|
|||
printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id);
|
||||
/* Copy SPU code from batch buffer to spu buffer */
|
||||
memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
|
||||
/* Copy state info */
|
||||
/* Copy state info (for fallback case only) */
|
||||
memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
|
||||
memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
|
||||
|
||||
|
@ -244,6 +244,21 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
|
|||
}
|
||||
|
||||
|
||||
static void
|
||||
cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
|
||||
{
|
||||
if (Debug)
|
||||
printf("SPU %u: CMD_STATE_FRAGMENT_PROGRAM\n", spu.init.id);
|
||||
/* Copy SPU code from batch buffer to spu buffer */
|
||||
memcpy(spu.fragment_program_code, fp->code,
|
||||
SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
|
||||
#if 01
|
||||
/* Point function pointer at new code */
|
||||
spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
|
||||
{
|
||||
|
@ -473,6 +488,14 @@ cmd_batch(uint opcode)
|
|||
pos += sizeof(*fops) / 8;
|
||||
}
|
||||
break;
|
||||
case CELL_CMD_STATE_FRAGMENT_PROGRAM:
|
||||
{
|
||||
struct cell_command_fragment_program *fp
|
||||
= (struct cell_command_fragment_program *) &buffer[pos];
|
||||
cmd_state_fragment_program(fp);
|
||||
pos += sizeof(*fp) / 8;
|
||||
}
|
||||
break;
|
||||
case CELL_CMD_STATE_SAMPLER:
|
||||
{
|
||||
struct cell_command_sampler *sampler
|
||||
|
|
|
@ -75,6 +75,12 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y,
|
|||
vector float fragAlpha,
|
||||
vector unsigned int mask);
|
||||
|
||||
/** Function for running fragment program */
|
||||
typedef void (*spu_fragment_program_func)(vector float *inputs,
|
||||
vector float *outputs,
|
||||
vector float *constants);
|
||||
|
||||
|
||||
struct spu_framebuffer
|
||||
{
|
||||
void *color_start; /**< addr of color surface in main memory */
|
||||
|
@ -142,9 +148,18 @@ struct spu_global
|
|||
/** Current fragment ops function */
|
||||
spu_fragment_ops_func fragment_ops;
|
||||
|
||||
/** Current fragment program machine code */
|
||||
uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
|
||||
/** Current fragment ops function */
|
||||
spu_fragment_program_func fragment_program;
|
||||
|
||||
/** Current texture sampler function */
|
||||
spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS];
|
||||
|
||||
/** Fragment program constants (XXX preliminary/used) */
|
||||
#define MAX_CONSTANTS 32
|
||||
vector float constants[MAX_CONSTANTS];
|
||||
|
||||
} ALIGN16_ATTRIB;
|
||||
|
||||
|
||||
|
|
|
@ -314,7 +314,42 @@ emit_quad( int x, int y, mask_t mask )
|
|||
}
|
||||
else {
|
||||
/* simple shading */
|
||||
#if 0
|
||||
eval_coeff(1, (float) x, (float) y, colors);
|
||||
|
||||
#else
|
||||
/* XXX new fragment program code */
|
||||
|
||||
if (spu.fragment_program) {
|
||||
vector float inputs[4*4], outputs[2*4];
|
||||
|
||||
/* setup inputs */
|
||||
eval_coeff(1, (float) x, (float) y, inputs);
|
||||
|
||||
/* Execute the current fragment program */
|
||||
spu.fragment_program(inputs, outputs, spu.constants);
|
||||
|
||||
/* Copy outputs */
|
||||
colors[0] = outputs[0*4+0];
|
||||
colors[1] = outputs[0*4+1];
|
||||
colors[2] = outputs[0*4+2];
|
||||
colors[3] = outputs[0*4+3];
|
||||
|
||||
if (0 && spu.init.id==0 && y == 48) {
|
||||
printf("colors[0] = %f %f %f %f\n",
|
||||
spu_extract(colors[0], 0),
|
||||
spu_extract(colors[0], 1),
|
||||
spu_extract(colors[0], 2),
|
||||
spu_extract(colors[0], 3));
|
||||
printf("colors[1] = %f %f %f %f\n",
|
||||
spu_extract(colors[1], 0),
|
||||
spu_extract(colors[1], 1),
|
||||
spu_extract(colors[1], 2),
|
||||
spu_extract(colors[1], 3));
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -289,21 +289,19 @@ xm_buffer_destroy(struct pipe_winsys *pws,
|
|||
* +--+--+
|
||||
*/
|
||||
static void
|
||||
twiddle_tile(uint *tile)
|
||||
twiddle_tile(const uint *tileIn, uint *tileOut)
|
||||
{
|
||||
uint tile2[TILE_SIZE * TILE_SIZE];
|
||||
int y, x;
|
||||
|
||||
for (y = 0; y < TILE_SIZE; y+=2) {
|
||||
for (x = 0; x < TILE_SIZE; x+=2) {
|
||||
int k = 4 * (y/2 * TILE_SIZE/2 + x/2);
|
||||
tile2[y * TILE_SIZE + (x + 0)] = tile[k];
|
||||
tile2[y * TILE_SIZE + (x + 1)] = tile[k+1];
|
||||
tile2[(y + 1) * TILE_SIZE + (x + 0)] = tile[k+2];
|
||||
tile2[(y + 1) * TILE_SIZE + (x + 1)] = tile[k+3];
|
||||
tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k];
|
||||
tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1];
|
||||
tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2];
|
||||
tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3];
|
||||
}
|
||||
}
|
||||
memcpy(tile, tile2, sizeof(tile2));
|
||||
}
|
||||
|
||||
|
||||
|
@ -339,6 +337,7 @@ xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf)
|
|||
|
||||
for (y = 0; y < surf->height; y += TILE_SIZE) {
|
||||
for (x = 0; x < surf->width; x += TILE_SIZE) {
|
||||
uint tmpTile[TILE_SIZE * TILE_SIZE];
|
||||
int tx = x / TILE_SIZE;
|
||||
int ty = y / TILE_SIZE;
|
||||
int offset = ty * tilesPerRow + tx;
|
||||
|
@ -352,9 +351,9 @@ xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf)
|
|||
|
||||
offset *= 4 * TILE_SIZE * TILE_SIZE;
|
||||
|
||||
ximage->data = (char *) xm_buf->data + offset;
|
||||
|
||||
twiddle_tile((uint *) ximage->data);
|
||||
twiddle_tile((uint *) ((char *) xm_buf->data + offset),
|
||||
tmpTile);
|
||||
ximage->data = (char*) tmpTile;
|
||||
|
||||
if (XSHM_ENABLED(xm_buf)) {
|
||||
#if defined(USE_XSHM) && !defined(XFree86Server)
|
||||
|
|
Loading…
Reference in New Issue