Merge remote branch 'upstream/gallium-0.1' into nouveau-gallium-0.1
This commit is contained in:
commit
81335d0f17
|
@ -23,6 +23,10 @@ CFLAGS = -O
|
|||
CXXFLAGS = -O
|
||||
GLU_CFLAGS =
|
||||
|
||||
# Compiler for building demos/tests/etc
|
||||
APP_CC = $(CC)
|
||||
APP_CXX = $(CXX)
|
||||
|
||||
# Misc tools and flags
|
||||
MKLIB_OPTIONS =
|
||||
MKDEP = makedepend
|
||||
|
|
|
@ -12,6 +12,8 @@ GALLIUM_DRIVER_DIRS += cell
|
|||
CC = ppu32-gcc
|
||||
CXX = ppu32-g++
|
||||
HOST_CC = gcc
|
||||
APP_CC = gcc
|
||||
APP_CXX = g++
|
||||
|
||||
OPT_FLAGS = -O3
|
||||
|
||||
|
@ -19,7 +21,7 @@ OPT_FLAGS = -O3
|
|||
## For SDK 2.1: (plus, remove -DSPU_MAIN_PARAM_LONG_LONG below)
|
||||
#SDK = /opt/ibm/cell-sdk/prototype/sysroot/usr
|
||||
## For SDK 3.0:
|
||||
SDK = /opt/cell/sdk/usr/
|
||||
SDK = /opt/cell/sdk/usr
|
||||
|
||||
|
||||
CFLAGS = $(OPT_FLAGS) -Wall -Winline -fPIC -m32 -mabi=altivec -maltivec \
|
||||
|
|
|
@ -74,7 +74,7 @@ PROGS = \
|
|||
|
||||
# make executable from .c file:
|
||||
.c: $(LIB_DEP) readtex.o
|
||||
$(CC) -I$(INCDIR) $(CFLAGS) $< readtex.o $(APP_LIB_DEPS) -o $@
|
||||
$(APP_CC) -I$(INCDIR) $(CFLAGS) $< readtex.o $(APP_LIB_DEPS) -o $@
|
||||
|
||||
|
||||
##### TARGETS #####
|
||||
|
@ -90,7 +90,7 @@ readtex.h: $(TOP)/progs/util/readtex.h
|
|||
cp $< .
|
||||
|
||||
readtex.o: readtex.c readtex.h
|
||||
$(CC) -c -I$(INCDIR) $(CFLAGS) readtex.c
|
||||
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) readtex.c
|
||||
|
||||
|
||||
showbuffer.c: $(TOP)/progs/util/showbuffer.c
|
||||
|
@ -100,7 +100,7 @@ showbuffer.h: $(TOP)/progs/util/showbuffer.h
|
|||
cp $< .
|
||||
|
||||
showbuffer.o: showbuffer.c showbuffer.h
|
||||
$(CC) -c -I$(INCDIR) $(CFLAGS) showbuffer.c
|
||||
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) showbuffer.c
|
||||
|
||||
|
||||
trackball.c: $(TOP)/progs/util/trackball.c
|
||||
|
@ -110,7 +110,7 @@ trackball.h: $(TOP)/progs/util/trackball.h
|
|||
cp $< .
|
||||
|
||||
trackball.o: trackball.c trackball.h
|
||||
$(CC) -c -I$(INCDIR) $(CFLAGS) trackball.c
|
||||
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) trackball.c
|
||||
|
||||
|
||||
extfuncs.h: $(TOP)/progs/util/extfuncs.h
|
||||
|
@ -118,38 +118,38 @@ extfuncs.h: $(TOP)/progs/util/extfuncs.h
|
|||
|
||||
|
||||
reflect: reflect.o showbuffer.o readtex.o
|
||||
$(CC) reflect.o showbuffer.o readtex.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
|
||||
$(APP_CC) reflect.o showbuffer.o readtex.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
|
||||
|
||||
reflect.o: reflect.c showbuffer.h
|
||||
$(CC) -c -I$(INCDIR) $(CFLAGS) reflect.c
|
||||
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) reflect.c
|
||||
|
||||
|
||||
shadowtex: shadowtex.o showbuffer.o
|
||||
$(CC) shadowtex.o showbuffer.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
|
||||
$(APP_CC) shadowtex.o showbuffer.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
|
||||
|
||||
shadowtex.o: shadowtex.c showbuffer.h
|
||||
$(CC) -c -I$(INCDIR) $(CFLAGS) shadowtex.c
|
||||
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) shadowtex.c
|
||||
|
||||
|
||||
gloss: gloss.o trackball.o readtex.o
|
||||
$(CC) gloss.o trackball.o readtex.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
|
||||
$(APP_CC) gloss.o trackball.o readtex.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
|
||||
|
||||
gloss.o: gloss.c trackball.h
|
||||
$(CC) -c -I$(INCDIR) $(CFLAGS) gloss.c
|
||||
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) gloss.c
|
||||
|
||||
|
||||
engine: engine.o trackball.o readtex.o
|
||||
$(CC) engine.o trackball.o readtex.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
|
||||
$(APP_CC) engine.o trackball.o readtex.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
|
||||
|
||||
engine.o: engine.c trackball.h
|
||||
$(CC) -c -I$(INCDIR) $(CFLAGS) engine.c
|
||||
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) engine.c
|
||||
|
||||
|
||||
fslight: fslight.o
|
||||
$(CC) fslight.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
|
||||
$(APP_CC) fslight.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
|
||||
|
||||
fslight.o: fslight.c extfuncs.h
|
||||
$(CC) -c -I$(INCDIR) $(CFLAGS) fslight.c
|
||||
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) fslight.c
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -40,13 +40,13 @@ UTIL_FILES = readtex.h readtex.c
|
|||
.SUFFIXES: .c
|
||||
|
||||
.c:
|
||||
$(CC) $(INCLUDES) $(CFLAGS) $< $(LIBS) -o $@
|
||||
$(APP_CC) $(INCLUDES) $(CFLAGS) $< $(LIBS) -o $@
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
.S.o:
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
|
||||
##### TARGETS #####
|
||||
|
@ -67,19 +67,19 @@ getproclist.h: $(TOP)/src/mesa/glapi/gl_API.xml getprocaddress.c getprocaddress.
|
|||
|
||||
|
||||
texrect: texrect.o readtex.o
|
||||
$(CC) texrect.o readtex.o $(LIBS) -o $@
|
||||
$(APP_CC) texrect.o readtex.o $(LIBS) -o $@
|
||||
|
||||
texrect.o: texrect.c readtex.h
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
invert: invert.o readtex.o
|
||||
$(CC) invert.o readtex.o $(LIBS) -o $@
|
||||
$(APP_CC) invert.o readtex.o $(LIBS) -o $@
|
||||
|
||||
invert.o: invert.c readtex.h
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
readtex.o: readtex.c
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
|
||||
readtex.h: $(TOP)/progs/util/readtex.h
|
||||
|
|
|
@ -26,7 +26,7 @@ PROGS = \
|
|||
|
||||
# make executable from .c file:
|
||||
.c: $(LIB_DEP)
|
||||
$(CC) -I$(INCDIR) $(CFLAGS) $< $(APP_LIB_DEPS) -o $@
|
||||
$(APP_CC) -I$(INCDIR) $(CFLAGS) $< $(APP_LIB_DEPS) -o $@
|
||||
|
||||
|
||||
##### TARGETS #####
|
||||
|
@ -47,7 +47,7 @@ readtex.h: $(TOP)/progs/util/readtex.h
|
|||
cp $< .
|
||||
|
||||
readtex.o: readtex.c readtex.h
|
||||
$(CC) -c -I$(INCDIR) $(CFLAGS) readtex.c
|
||||
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) readtex.c
|
||||
|
||||
brick.c: extfuncs.h
|
||||
|
||||
|
@ -58,16 +58,16 @@ mandelbrot.c: extfuncs.h
|
|||
toyball.c: extfuncs.h
|
||||
|
||||
texdemo1: texdemo1.o readtex.o
|
||||
$(CC) -I$(INCDIR) $(CFLAGS) texdemo1.o readtex.o $(APP_LIB_DEPS) -o $@
|
||||
$(APP_CC) -I$(INCDIR) $(CFLAGS) texdemo1.o readtex.o $(APP_LIB_DEPS) -o $@
|
||||
|
||||
texdemo1.o: texdemo1.c readtex.h extfuncs.h
|
||||
$(CC) -c -I$(INCDIR) $(CFLAGS) texdemo1.c
|
||||
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) texdemo1.c
|
||||
|
||||
convolutions: convolutions.o readtex.o
|
||||
$(CC) -I$(INCDIR) $(CFLAGS) convolutions.o readtex.o $(APP_LIB_DEPS) -o $@
|
||||
$(APP_CC) -I$(INCDIR) $(CFLAGS) convolutions.o readtex.o $(APP_LIB_DEPS) -o $@
|
||||
|
||||
convolutions.o: convolutions.c readtex.h
|
||||
$(CC) -c -I$(INCDIR) $(CFLAGS) convolutions.c
|
||||
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) convolutions.c
|
||||
|
||||
|
||||
clean:
|
||||
|
|
|
@ -24,7 +24,7 @@ PROGS = aaindex aapoly aargb accanti accpersp alpha alpha3D anti \
|
|||
.SUFFIXES: .c
|
||||
|
||||
.c: $(LIB_DEP)
|
||||
$(CC) -I$(INCDIR) $(CFLAGS) $< $(APP_LIB_DEPS) -o $@
|
||||
$(APP_CC) -I$(INCDIR) $(CFLAGS) $< $(APP_LIB_DEPS) -o $@
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ PROGS = accum bitmap1 bitmap2 blendeq blendxor copy cursor depth eval fog \
|
|||
.SUFFIXES: .c
|
||||
|
||||
.c: $(LIB_DEP)
|
||||
$(CC) -I$(INCDIR) $(CFLAGS) $< $(APP_LIB_DEPS) -o $@
|
||||
$(APP_CC) -I$(INCDIR) $(CFLAGS) $< $(APP_LIB_DEPS) -o $@
|
||||
|
||||
|
||||
##### TARGETS #####
|
||||
|
@ -27,10 +27,10 @@ default: $(PROGS)
|
|||
|
||||
|
||||
sphere: sphere.o readtex.o
|
||||
$(CC) -I$(INCDIR) $(CFLAGS) sphere.o readtex.o $(APP_LIB_DEPS) -o $@
|
||||
$(APP_CC) -I$(INCDIR) $(CFLAGS) sphere.o readtex.o $(APP_LIB_DEPS) -o $@
|
||||
|
||||
sphere.o: sphere.c readtex.h
|
||||
$(CC) -c -I$(INCDIR) $(CFLAGS) sphere.c
|
||||
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) sphere.c
|
||||
|
||||
readtex.c: $(TOP)/progs/util/readtex.c
|
||||
cp $< .
|
||||
|
@ -39,7 +39,7 @@ readtex.h: $(TOP)/progs/util/readtex.h
|
|||
cp $< .
|
||||
|
||||
readtex.o: readtex.c readtex.h
|
||||
$(CC) -c -I$(INCDIR) $(CFLAGS) $< -o $@
|
||||
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) $< -o $@
|
||||
|
||||
|
||||
clean:
|
||||
|
|
|
@ -60,6 +60,7 @@ SOURCES = \
|
|||
pbo.c \
|
||||
prog_parameter.c \
|
||||
projtex.c \
|
||||
quads.c \
|
||||
random.c \
|
||||
readrate.c \
|
||||
seccolor.c \
|
||||
|
@ -102,13 +103,13 @@ UTIL_FILES = readtex.h readtex.c
|
|||
.SUFFIXES: .c
|
||||
|
||||
.c:
|
||||
$(CC) $(INCLUDES) $(CFLAGS) $< $(LIBS) -o $@
|
||||
$(APP_CC) $(INCLUDES) $(CFLAGS) $< $(LIBS) -o $@
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
.S.o:
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
|
||||
##### TARGETS #####
|
||||
|
@ -128,58 +129,58 @@ getproclist.h: $(TOP)/src/mesa/glapi/gl_API.xml getprocaddress.c getprocaddress.
|
|||
python getprocaddress.py > getproclist.h
|
||||
|
||||
arraytexture: arraytexture.o readtex.o
|
||||
$(CC) $(CFLAGS) arraytexture.o readtex.o $(LIBS) -o $@
|
||||
$(APP_CC) $(CFLAGS) arraytexture.o readtex.o $(LIBS) -o $@
|
||||
|
||||
arraytexture.o: arraytexture.c readtex.h
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
afsmultiarb: afsmultiarb.o readtex.o
|
||||
$(CC) $(CFLAGS) afsmultiarb.o readtex.o $(LIBS) -o $@
|
||||
$(APP_CC) $(CFLAGS) afsmultiarb.o readtex.o $(LIBS) -o $@
|
||||
|
||||
afsmultiarb.o: afsmultiarb.c readtex.h
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
drawbuffers: drawbuffers.o
|
||||
$(CC) $(CFLAGS) drawbuffers.o $(LIBS) -o $@
|
||||
$(APP_CC) $(CFLAGS) drawbuffers.o $(LIBS) -o $@
|
||||
|
||||
drawbuffers.o: drawbuffers.c extfuncs.h
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
texrect: texrect.o readtex.o
|
||||
$(CC) $(CFLAGS) texrect.o readtex.o $(LIBS) -o $@
|
||||
$(APP_CC) $(CFLAGS) texrect.o readtex.o $(LIBS) -o $@
|
||||
|
||||
texrect.o: texrect.c readtex.h
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
bug_3195: bug_3195.o readtex.o
|
||||
$(CC) $(CFLAGS) bug_3195.o readtex.o $(LIBS) -o $@
|
||||
$(APP_CC) $(CFLAGS) bug_3195.o readtex.o $(LIBS) -o $@
|
||||
|
||||
bug_3195.o: bug_3195.c readtex.h
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
invert: invert.o readtex.o
|
||||
$(CC) $(CFLAGS) invert.o readtex.o $(LIBS) -o $@
|
||||
$(APP_CC) $(CFLAGS) invert.o readtex.o $(LIBS) -o $@
|
||||
|
||||
invert.o: invert.c readtex.h
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
mipmap_view: mipmap_view.o readtex.o
|
||||
$(CC) $(CFLAGS) mipmap_view.o readtex.o $(LIBS) -o $@
|
||||
$(APP_CC) $(CFLAGS) mipmap_view.o readtex.o $(LIBS) -o $@
|
||||
|
||||
mipmap_view.o: mipmap_view.c readtex.h
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
|
||||
fillrate: fillrate.o readtex.o
|
||||
$(CC) $(CFLAGS) fillrate.o readtex.o $(LIBS) -o $@
|
||||
$(APP_CC) $(CFLAGS) fillrate.o readtex.o $(LIBS) -o $@
|
||||
|
||||
fillrate.o: fillrate.c readtex.h
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
|
||||
|
||||
readtex.o: readtex.c
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
|
||||
readtex.h: $(TOP)/progs/util/readtex.h
|
||||
|
|
|
@ -0,0 +1,258 @@
|
|||
/**
|
||||
* Draw colored quads.
|
||||
*/
|
||||
|
||||
|
||||
#define GL_GLEXT_PROTOTYPES
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <GL/glut.h>
|
||||
|
||||
#define NUM_QUADS 20
|
||||
|
||||
|
||||
static int Win;
|
||||
static GLfloat Xrot = 40, Yrot = 0, Zrot = 0;
|
||||
static GLboolean Anim = GL_TRUE;
|
||||
static GLuint Vbuffer = 0;
|
||||
|
||||
static GLfloat buf[NUM_QUADS * 6 * 4];
|
||||
|
||||
static GLboolean SwapBuffers = GL_TRUE;
|
||||
|
||||
static GLint Frames = 0, T0 = 0;
|
||||
|
||||
|
||||
static void
|
||||
Idle(void)
|
||||
{
|
||||
Xrot += 3.0;
|
||||
Yrot += 4.0;
|
||||
Zrot += 2.0;
|
||||
glutPostRedisplay();
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
Draw(void)
|
||||
{
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
|
||||
|
||||
glPushMatrix();
|
||||
glRotatef(Xrot, 1, 0, 0);
|
||||
glRotatef(Yrot, 0, 1, 0);
|
||||
glRotatef(Zrot, 0, 0, 1);
|
||||
|
||||
glDrawArrays(GL_QUADS, 0, NUM_QUADS*4);
|
||||
|
||||
glPopMatrix();
|
||||
|
||||
if (SwapBuffers)
|
||||
glutSwapBuffers();
|
||||
/*
|
||||
else
|
||||
glFinish();
|
||||
*/
|
||||
|
||||
{
|
||||
GLint t = glutGet(GLUT_ELAPSED_TIME);
|
||||
Frames++;
|
||||
if (t - T0 >= 5000) {
|
||||
GLfloat seconds = (t - T0) / 1000.0;
|
||||
GLfloat fps = Frames / seconds;
|
||||
printf("%d frames in %6.3f seconds = %6.3f FPS\n",
|
||||
Frames, seconds, fps);
|
||||
T0 = t;
|
||||
Frames = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
Reshape(int width, int height)
|
||||
{
|
||||
glViewport(0, 0, width, height);
|
||||
glMatrixMode(GL_PROJECTION);
|
||||
glLoadIdentity();
|
||||
glFrustum(-1.0, 1.0, -1.0, 1.0, 5.0, 25.0);
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
glLoadIdentity();
|
||||
glTranslatef(0.0, 0.0, -8.0);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
Key(unsigned char key, int x, int y)
|
||||
{
|
||||
const GLfloat step = 3.0;
|
||||
(void) x;
|
||||
(void) y;
|
||||
switch (key) {
|
||||
case 's':
|
||||
SwapBuffers = !SwapBuffers;
|
||||
break;
|
||||
case 'a':
|
||||
Anim = !Anim;
|
||||
if (Anim)
|
||||
glutIdleFunc(Idle);
|
||||
else
|
||||
glutIdleFunc(NULL);
|
||||
break;
|
||||
case 'z':
|
||||
Zrot -= step;
|
||||
break;
|
||||
case 'Z':
|
||||
Zrot += step;
|
||||
break;
|
||||
case 27:
|
||||
glutDestroyWindow(Win);
|
||||
exit(0);
|
||||
break;
|
||||
}
|
||||
glutPostRedisplay();
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
SpecialKey(int key, int x, int y)
|
||||
{
|
||||
const GLfloat step = 3.0;
|
||||
(void) x;
|
||||
(void) y;
|
||||
switch (key) {
|
||||
case GLUT_KEY_UP:
|
||||
Xrot -= step;
|
||||
break;
|
||||
case GLUT_KEY_DOWN:
|
||||
Xrot += step;
|
||||
break;
|
||||
case GLUT_KEY_LEFT:
|
||||
Yrot -= step;
|
||||
break;
|
||||
case GLUT_KEY_RIGHT:
|
||||
Yrot += step;
|
||||
break;
|
||||
}
|
||||
glutPostRedisplay();
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
quad(float x, float y, float z, float *v)
|
||||
{
|
||||
int k = 0;
|
||||
|
||||
/* color */
|
||||
v[k++] = x * 0.5 + 0.5;
|
||||
v[k++] = y * 0.5 + 0.5;
|
||||
v[k++] = z * 0.5 + 0.5;
|
||||
/* vert */
|
||||
v[k++] = x;
|
||||
v[k++] = y;
|
||||
v[k++] = z;
|
||||
|
||||
/* color */
|
||||
v[k++] = -x * 0.5 + 0.5;
|
||||
v[k++] = -y * 0.5 + 0.5;
|
||||
v[k++] = z * 0.5 + 0.5;
|
||||
/* vert */
|
||||
v[k++] = -x;
|
||||
v[k++] = -y;
|
||||
v[k++] = z;
|
||||
|
||||
/* color */
|
||||
v[k++] = -x * 0.5 + 0.5;
|
||||
v[k++] = -y * 0.5 + 0.5;
|
||||
v[k++] = -z * 0.5 + 0.5;
|
||||
/* vert */
|
||||
v[k++] = -x;
|
||||
v[k++] = -y;
|
||||
v[k++] = -z;
|
||||
|
||||
/* color */
|
||||
v[k++] = x * 0.5 + 0.5;
|
||||
v[k++] = y * 0.5 + 0.5;
|
||||
v[k++] = -z * 0.5 + 0.5;
|
||||
/* vert */
|
||||
v[k++] = x;
|
||||
v[k++] = y;
|
||||
v[k++] = -z;
|
||||
}
|
||||
|
||||
static void
|
||||
gen_quads(GLfloat *buf)
|
||||
{
|
||||
float *v = buf;
|
||||
float r = 1.0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM_QUADS; i++) {
|
||||
float angle = i / (float) NUM_QUADS * M_PI;
|
||||
float x = r * cos(angle);
|
||||
float y = r * sin(angle);
|
||||
float z = 1.10;
|
||||
quad(x, y, z, v);
|
||||
v += 24;
|
||||
}
|
||||
|
||||
if (0) {
|
||||
float *p = buf;
|
||||
for (i = 0; i < NUM_QUADS * 4 * 2; i++) {
|
||||
printf("%d: %f %f %f\n", i, p[0], p[1], p[2]);
|
||||
p += 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
Init(void)
|
||||
{
|
||||
int bytes = NUM_QUADS * 4 * 2 * 3 * sizeof(float);
|
||||
GLfloat *f;
|
||||
|
||||
#if 1
|
||||
glGenBuffers(1, &Vbuffer);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, Vbuffer);
|
||||
glBufferData(GL_ARRAY_BUFFER_ARB, bytes, NULL, GL_STATIC_DRAW_ARB);
|
||||
f = (float *) glMapBuffer(GL_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB);
|
||||
gen_quads(f);
|
||||
glUnmapBuffer(GL_ARRAY_BUFFER_ARB);
|
||||
glColorPointer(3, GL_FLOAT, 6*sizeof(float), (void *) 0);
|
||||
glVertexPointer(3, GL_FLOAT, 6*sizeof(float), (void *) 12);
|
||||
#else
|
||||
f = buf;
|
||||
gen_quads(f);
|
||||
glColorPointer(3, GL_FLOAT, 6*sizeof(float), buf);
|
||||
glVertexPointer(3, GL_FLOAT, 6*sizeof(float), buf + 3);
|
||||
#endif
|
||||
|
||||
glEnableClientState(GL_COLOR_ARRAY);
|
||||
glEnableClientState(GL_VERTEX_ARRAY);
|
||||
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
|
||||
glClearColor(0.5, 0.5, 0.5, 0.0);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
glutInit(&argc, argv);
|
||||
glutInitWindowPosition(0, 0);
|
||||
glutInitWindowSize(600, 600);
|
||||
glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH);
|
||||
Win = glutCreateWindow(argv[0]);
|
||||
glutReshapeFunc(Reshape);
|
||||
glutKeyboardFunc(Key);
|
||||
glutSpecialFunc(SpecialKey);
|
||||
glutDisplayFunc(Draw);
|
||||
if (Anim)
|
||||
glutIdleFunc(Idle);
|
||||
Init();
|
||||
glutMainLoop();
|
||||
return 0;
|
||||
}
|
|
@ -139,13 +139,13 @@ UTIL_FILES = readtex.h readtex.c
|
|||
.SUFFIXES: .c
|
||||
|
||||
.c:
|
||||
$(CC) $(INCLUDES) $(CFLAGS) $< $(LIBS) -o $@
|
||||
$(APP_CC) $(INCLUDES) $(CFLAGS) $< $(LIBS) -o $@
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
.S.o:
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
|
||||
##### TARGETS #####
|
||||
|
@ -166,19 +166,19 @@ getproclist.h: $(TOP)/src/mesa/glapi/gl_API.xml getprocaddress.c getprocaddress.
|
|||
|
||||
|
||||
texrect: texrect.o readtex.o
|
||||
$(CC) texrect.o readtex.o $(LIBS) -o $@
|
||||
$(APP_CC) texrect.o readtex.o $(LIBS) -o $@
|
||||
|
||||
texrect.o: texrect.c readtex.h
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
invert: invert.o readtex.o
|
||||
$(CC) invert.o readtex.o $(LIBS) -o $@
|
||||
$(APP_CC) invert.o readtex.o $(LIBS) -o $@
|
||||
|
||||
invert.o: invert.c readtex.h
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
readtex.o: readtex.c
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
|
||||
readtex.h: $(TOP)/progs/util/readtex.h
|
||||
|
|
|
@ -37,9 +37,36 @@
|
|||
|
||||
#include <GL/glut.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
static int leftFirst = GL_TRUE;
|
||||
|
||||
static struct { GLenum func; const char *str; } funcs[] =
|
||||
{
|
||||
{ GL_LESS, "GL_LESS" },
|
||||
{ GL_LEQUAL, "GL_LEQUAL" },
|
||||
{ GL_GREATER, "GL_GREATER" },
|
||||
{ GL_GEQUAL, "GL_GEQUAL" },
|
||||
{ GL_EQUAL, "GL_EQUAL" },
|
||||
{ GL_NOTEQUAL, "GL_NOTEQUAL" },
|
||||
{ GL_ALWAYS, "GL_ALWAYS" },
|
||||
{ GL_NEVER, "GL_NEVER" },
|
||||
};
|
||||
|
||||
#define NUM_FUNCS (sizeof(funcs) / sizeof(funcs[0]))
|
||||
|
||||
static int curFunc = 0;
|
||||
static double clearVal = 1.0;
|
||||
|
||||
|
||||
static void usage(void)
|
||||
{
|
||||
printf("t - toggle rendering order of triangles\n");
|
||||
printf("c - toggle Z clear value between 0, 1\n");
|
||||
printf("f - cycle through depth test functions\n");
|
||||
}
|
||||
|
||||
|
||||
static void init(void)
|
||||
{
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
|
@ -70,6 +97,11 @@ static void drawRightTriangle(void)
|
|||
|
||||
void display(void)
|
||||
{
|
||||
printf("GL_CLEAR_DEPTH = %f GL_DEPTH_FUNC = %s\n",
|
||||
clearVal, funcs[curFunc].str);
|
||||
glClearDepth(clearVal);
|
||||
glDepthFunc(funcs[curFunc].func);
|
||||
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
|
||||
|
||||
if (leftFirst) {
|
||||
|
@ -99,6 +131,16 @@ void reshape(int w, int h)
|
|||
void keyboard(unsigned char key, int x, int y)
|
||||
{
|
||||
switch (key) {
|
||||
case 'c':
|
||||
case 'C':
|
||||
clearVal = 1.0 - clearVal;
|
||||
glutPostRedisplay();
|
||||
break;
|
||||
case 'f':
|
||||
case 'F':
|
||||
curFunc = (curFunc + 1) % NUM_FUNCS;
|
||||
glutPostRedisplay();
|
||||
break;
|
||||
case 't':
|
||||
case 'T':
|
||||
leftFirst = !leftFirst;
|
||||
|
@ -122,10 +164,11 @@ int main(int argc, char** argv)
|
|||
glutInitDisplayMode (GLUT_SINGLE | GLUT_RGB | GLUT_DEPTH);
|
||||
glutInitWindowSize (200, 200);
|
||||
glutCreateWindow (argv[0]);
|
||||
init();
|
||||
glutReshapeFunc (reshape);
|
||||
glutKeyboardFunc (keyboard);
|
||||
glutDisplayFunc (display);
|
||||
init();
|
||||
usage();
|
||||
glutMainLoop();
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ PROGS = glthreads \
|
|||
.SUFFIXES: .c
|
||||
|
||||
.c: $(LIB_DEP)
|
||||
$(CC) -I$(INCDIR) $(CFLAGS) $< $(APP_LIB_DEPS) -o $@
|
||||
$(APP_CC) -I$(INCDIR) $(CFLAGS) $< $(APP_LIB_DEPS) -o $@
|
||||
|
||||
|
||||
##### TARGETS #####
|
||||
|
@ -54,32 +54,32 @@ clean:
|
|||
|
||||
# special cases
|
||||
pbinfo: pbinfo.o pbutil.o
|
||||
$(CC) -I$(INCDIR) $(CFLAGS) pbinfo.o pbutil.o $(APP_LIB_DEPS) -o $@
|
||||
$(APP_CC) -I$(INCDIR) $(CFLAGS) pbinfo.o pbutil.o $(APP_LIB_DEPS) -o $@
|
||||
|
||||
pbdemo: pbdemo.o pbutil.o
|
||||
$(CC) -I$(INCDIR) $(CFLAGS) pbdemo.o pbutil.o $(APP_LIB_DEPS) -o $@
|
||||
$(APP_CC) -I$(INCDIR) $(CFLAGS) pbdemo.o pbutil.o $(APP_LIB_DEPS) -o $@
|
||||
|
||||
pbinfo.o: pbinfo.c pbutil.h
|
||||
$(CC) -c -I. -I$(INCDIR) $(CFLAGS) pbinfo.c
|
||||
$(APP_CC) -c -I. -I$(INCDIR) $(CFLAGS) pbinfo.c
|
||||
|
||||
pbdemo.o: pbdemo.c pbutil.h
|
||||
$(CC) -c -I. -I$(INCDIR) $(CFLAGS) pbdemo.c
|
||||
$(APP_CC) -c -I. -I$(INCDIR) $(CFLAGS) pbdemo.c
|
||||
|
||||
pbutil.o: pbutil.c pbutil.h
|
||||
$(CC) -c -I. -I$(INCDIR) $(CFLAGS) pbutil.c
|
||||
$(APP_CC) -c -I. -I$(INCDIR) $(CFLAGS) pbutil.c
|
||||
|
||||
glxgears_fbconfig: glxgears_fbconfig.o pbutil.o
|
||||
$(CC) -I$(INCDIR) $(CFLAGS) glxgears_fbconfig.o pbutil.o $(APP_LIB_DEPS) -o $@
|
||||
$(APP_CC) -I$(INCDIR) $(CFLAGS) glxgears_fbconfig.o pbutil.o $(APP_LIB_DEPS) -o $@
|
||||
|
||||
glxgears_fbconfig.o: glxgears_fbconfig.c pbutil.h
|
||||
$(CC) -I$(INCDIR) $(CFLAGS) -c -I. $(CFLAGS) glxgears_fbconfig.c
|
||||
$(APP_CC) -I$(INCDIR) $(CFLAGS) -c -I. $(CFLAGS) glxgears_fbconfig.c
|
||||
|
||||
xrotfontdemo: xrotfontdemo.o xuserotfont.o
|
||||
$(CC) -I$(INCDIR) $(CFLAGS) xrotfontdemo.o xuserotfont.o $(APP_LIB_DEPS) -o $@
|
||||
$(APP_CC) -I$(INCDIR) $(CFLAGS) xrotfontdemo.o xuserotfont.o $(APP_LIB_DEPS) -o $@
|
||||
|
||||
xuserotfont.o: xuserotfont.c xuserotfont.h
|
||||
$(CC) -c -I. -I$(INCDIR) $(CFLAGS) xuserotfont.c
|
||||
$(APP_CC) -c -I. -I$(INCDIR) $(CFLAGS) xuserotfont.c
|
||||
|
||||
xrotfontdemo.o: xrotfontdemo.c xuserotfont.h
|
||||
$(CC) -c -I. -I$(INCDIR) $(CFLAGS) xrotfontdemo.c
|
||||
$(APP_CC) -c -I. -I$(INCDIR) $(CFLAGS) xrotfontdemo.c
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ include $(TOP)/configs/current
|
|||
INCLUDE_DIRS = \
|
||||
-I. \
|
||||
-I/usr/include \
|
||||
-I/usr/include/drm \
|
||||
$(shell pkg-config --cflags-only-I libdrm) \
|
||||
-I$(TOP)/include \
|
||||
-I$(TOP)/include/GL/internal \
|
||||
-I$(TOP)/src/mesa \
|
||||
|
|
|
@ -13,7 +13,7 @@ DRIVER_NAME = egl_xdri.so
|
|||
INCLUDE_DIRS = \
|
||||
-I. \
|
||||
-I/usr/include \
|
||||
-I/usr/include/drm \
|
||||
$(shell pkg-config --cflags-only-I libdrm) \
|
||||
-I$(TOP)/include \
|
||||
-I$(TOP)/include/GL/internal \
|
||||
-I$(TOP)/src/mesa/glapi \
|
||||
|
@ -48,6 +48,7 @@ $(TOP)/$(LIB_DIR)/$(DRIVER_NAME): $(OBJECTS)
|
|||
$(TOP)/bin/mklib -o $(DRIVER_NAME) \
|
||||
-noprefix \
|
||||
-major 1 -minor 0 \
|
||||
-L $(TOP)/$(LIB_DIR) \
|
||||
-install $(TOP)/$(LIB_DIR) \
|
||||
$(OBJECTS) $(DRM_LIB) $(MISC_LIBS)
|
||||
|
||||
|
|
|
@ -151,8 +151,8 @@ static void emit_RR(struct spe_function *p, unsigned op, unsigned rT,
|
|||
inst.inst.rB = rB;
|
||||
inst.inst.rA = rA;
|
||||
inst.inst.rT = rT;
|
||||
*p->csr = inst.bits;
|
||||
p->csr++;
|
||||
p->store[p->num_inst++] = inst.bits;
|
||||
assert(p->num_inst <= p->max_inst);
|
||||
}
|
||||
|
||||
|
||||
|
@ -165,8 +165,8 @@ static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT,
|
|||
inst.inst.rB = rB;
|
||||
inst.inst.rA = rA;
|
||||
inst.inst.rC = rC;
|
||||
*p->csr = inst.bits;
|
||||
p->csr++;
|
||||
p->store[p->num_inst++] = inst.bits;
|
||||
assert(p->num_inst <= p->max_inst);
|
||||
}
|
||||
|
||||
|
||||
|
@ -178,8 +178,8 @@ static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT,
|
|||
inst.inst.i7 = imm;
|
||||
inst.inst.rA = rA;
|
||||
inst.inst.rT = rT;
|
||||
*p->csr = inst.bits;
|
||||
p->csr++;
|
||||
p->store[p->num_inst++] = inst.bits;
|
||||
assert(p->num_inst <= p->max_inst);
|
||||
}
|
||||
|
||||
|
||||
|
@ -192,8 +192,8 @@ static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT,
|
|||
inst.inst.i8 = imm;
|
||||
inst.inst.rA = rA;
|
||||
inst.inst.rT = rT;
|
||||
*p->csr = inst.bits;
|
||||
p->csr++;
|
||||
p->store[p->num_inst++] = inst.bits;
|
||||
assert(p->num_inst <= p->max_inst);
|
||||
}
|
||||
|
||||
|
||||
|
@ -206,8 +206,8 @@ static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT,
|
|||
inst.inst.i10 = imm;
|
||||
inst.inst.rA = rA;
|
||||
inst.inst.rT = rT;
|
||||
*p->csr = inst.bits;
|
||||
p->csr++;
|
||||
p->store[p->num_inst++] = inst.bits;
|
||||
assert(p->num_inst <= p->max_inst);
|
||||
}
|
||||
|
||||
|
||||
|
@ -218,8 +218,8 @@ static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT,
|
|||
inst.inst.op = op;
|
||||
inst.inst.i16 = imm;
|
||||
inst.inst.rT = rT;
|
||||
*p->csr = inst.bits;
|
||||
p->csr++;
|
||||
p->store[p->num_inst++] = inst.bits;
|
||||
assert(p->num_inst <= p->max_inst);
|
||||
}
|
||||
|
||||
|
||||
|
@ -230,8 +230,8 @@ static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT,
|
|||
inst.inst.op = op;
|
||||
inst.inst.i18 = imm;
|
||||
inst.inst.rT = rT;
|
||||
*p->csr = inst.bits;
|
||||
p->csr++;
|
||||
p->store[p->num_inst++] = inst.bits;
|
||||
assert(p->num_inst <= p->max_inst);
|
||||
}
|
||||
|
||||
|
||||
|
@ -300,13 +300,16 @@ void _name (struct spe_function *p, int imm) \
|
|||
#include "rtasm_ppc_spe.h"
|
||||
|
||||
|
||||
/*
|
||||
/**
|
||||
* Initialize an spe_function.
|
||||
* \param code_size size of instruction buffer to allocate, in bytes.
|
||||
*/
|
||||
void spe_init_func(struct spe_function *p, unsigned code_size)
|
||||
{
|
||||
p->store = align_malloc(code_size, 16);
|
||||
p->csr = p->store;
|
||||
|
||||
p->num_inst = 0;
|
||||
p->max_inst = code_size / SPE_INST_SIZE;
|
||||
|
||||
/* Conservatively treat R0 - R2 and R80 - R127 as non-volatile.
|
||||
*/
|
||||
p->regs[0] = ~7;
|
||||
|
@ -316,21 +319,26 @@ void spe_init_func(struct spe_function *p, unsigned code_size)
|
|||
|
||||
void spe_release_func(struct spe_function *p)
|
||||
{
|
||||
assert(p->num_inst <= p->max_inst);
|
||||
if (p->store != NULL) {
|
||||
align_free(p->store);
|
||||
}
|
||||
p->store = NULL;
|
||||
p->csr = NULL;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Alloate a SPE register.
|
||||
* \return register index or -1 if none left.
|
||||
*/
|
||||
int spe_allocate_available_register(struct spe_function *p)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < 128; i++) {
|
||||
for (i = 0; i < SPE_NUM_REGS; i++) {
|
||||
const uint64_t mask = (1ULL << (i % 64));
|
||||
const unsigned idx = i / 64;
|
||||
|
||||
assert(idx < 2);
|
||||
if ((p->regs[idx] & mask) != 0) {
|
||||
p->regs[idx] &= ~mask;
|
||||
return i;
|
||||
|
@ -341,11 +349,15 @@ int spe_allocate_available_register(struct spe_function *p)
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Mark the given SPE register as "allocated".
|
||||
*/
|
||||
int spe_allocate_register(struct spe_function *p, int reg)
|
||||
{
|
||||
const unsigned idx = reg / 64;
|
||||
const unsigned bit = reg % 64;
|
||||
|
||||
assert(reg < SPE_NUM_REGS);
|
||||
assert((p->regs[idx] & (1ULL << bit)) != 0);
|
||||
|
||||
p->regs[idx] &= ~(1ULL << bit);
|
||||
|
@ -353,57 +365,75 @@ int spe_allocate_register(struct spe_function *p, int reg)
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Mark the given SPE register as "unallocated".
|
||||
*/
|
||||
void spe_release_register(struct spe_function *p, int reg)
|
||||
{
|
||||
const unsigned idx = reg / 64;
|
||||
const unsigned bit = reg % 64;
|
||||
|
||||
assert(idx < 2);
|
||||
|
||||
assert(reg < SPE_NUM_REGS);
|
||||
assert((p->regs[idx] & (1ULL << bit)) == 0);
|
||||
|
||||
p->regs[idx] |= (1ULL << bit);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* For branch instructions:
|
||||
* \param d if 1, disable interupts if branch is taken
|
||||
* \param e if 1, enable interupts if branch is taken
|
||||
* If d and e are both zero, don't change interupt status (right?)
|
||||
*/
|
||||
|
||||
|
||||
/** Branch Indirect to address in rA */
|
||||
void spe_bi(struct spe_function *p, unsigned rA, int d, int e)
|
||||
{
|
||||
emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4));
|
||||
}
|
||||
|
||||
/** Interupt Return */
|
||||
void spe_iret(struct spe_function *p, unsigned rA, int d, int e)
|
||||
{
|
||||
emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4));
|
||||
}
|
||||
|
||||
/** Branch indirect and set link on external data */
|
||||
void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d,
|
||||
int e)
|
||||
{
|
||||
emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4));
|
||||
}
|
||||
|
||||
/** Branch indirect and set link. Save PC in rT, jump to rA. */
|
||||
void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d,
|
||||
int e)
|
||||
{
|
||||
emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4));
|
||||
}
|
||||
|
||||
void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d,
|
||||
int e)
|
||||
/** Branch indirect if zero word. If rT.word[0]==0, jump to rA. */
|
||||
void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
|
||||
{
|
||||
emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4));
|
||||
}
|
||||
|
||||
/** Branch indirect if non-zero word. If rT.word[0]!=0, jump to rA. */
|
||||
void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
|
||||
{
|
||||
emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4));
|
||||
}
|
||||
|
||||
/** Branch indirect if zero halfword. If rT.halfword[1]==0, jump to rA. */
|
||||
void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
|
||||
{
|
||||
emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4));
|
||||
}
|
||||
|
||||
/** Branch indirect if non-zero halfword. If rT.halfword[1]!=0, jump to rA. */
|
||||
void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
|
||||
{
|
||||
emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4));
|
||||
|
@ -432,4 +462,81 @@ EMIT_R (spe_mfspr, 0x00c);
|
|||
EMIT_R (spe_mtspr, 0x10c);
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
** Helper / "macro" instructions.
|
||||
** Use somewhat verbose names as a reminder that these aren't native
|
||||
** SPE instructions.
|
||||
**/
|
||||
|
||||
|
||||
void
|
||||
spe_load_float(struct spe_function *p, unsigned rT, float x)
|
||||
{
|
||||
if (x == 0.0f) {
|
||||
spe_il(p, rT, 0x0);
|
||||
}
|
||||
else if (x == 0.5f) {
|
||||
spe_ilhu(p, rT, 0x3f00);
|
||||
}
|
||||
else if (x == 1.0f) {
|
||||
spe_ilhu(p, rT, 0x3f80);
|
||||
}
|
||||
else if (x == -1.0f) {
|
||||
spe_ilhu(p, rT, 0xbf80);
|
||||
}
|
||||
else {
|
||||
union {
|
||||
float f;
|
||||
unsigned u;
|
||||
} bits;
|
||||
bits.f = x;
|
||||
spe_ilhu(p, rT, bits.u >> 16);
|
||||
spe_iohl(p, rT, bits.u & 0xffff);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
spe_load_int(struct spe_function *p, unsigned rT, int i)
|
||||
{
|
||||
if (-32768 <= i && i <= 32767) {
|
||||
spe_il(p, rT, i);
|
||||
}
|
||||
else {
|
||||
spe_ilhu(p, rT, i >> 16);
|
||||
spe_iohl(p, rT, i & 0xffff);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
spe_splat(struct spe_function *p, unsigned rT, unsigned rA)
|
||||
{
|
||||
spe_ila(p, rT, 66051);
|
||||
spe_shufb(p, rT, rA, rA, rT);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
spe_complement(struct spe_function *p, unsigned rT)
|
||||
{
|
||||
spe_nor(p, rT, rT, rT);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
spe_move(struct spe_function *p, unsigned rT, unsigned rA)
|
||||
{
|
||||
spe_ori(p, rT, rA, 0);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
spe_zero(struct spe_function *p, unsigned rT)
|
||||
{
|
||||
spe_xor(p, rT, rT, rT);
|
||||
}
|
||||
|
||||
|
||||
#endif /* GALLIUM_CELL */
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
/**
|
||||
* \file
|
||||
* Real-time assembly generation interface for Cell B.E. SPEs.
|
||||
* For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf
|
||||
*
|
||||
* \author Ian Romanick <idr@us.ibm.com>
|
||||
*/
|
||||
|
@ -32,13 +33,24 @@
|
|||
#ifndef RTASM_PPC_SPE_H
|
||||
#define RTASM_PPC_SPE_H
|
||||
|
||||
struct spe_function {
|
||||
/**
|
||||
*
|
||||
*/
|
||||
uint32_t *store;
|
||||
uint32_t *csr;
|
||||
const char *fn;
|
||||
/** 4 bytes per instruction */
|
||||
#define SPE_INST_SIZE 4
|
||||
|
||||
/** number of general-purpose SIMD registers */
|
||||
#define SPE_NUM_REGS 128
|
||||
|
||||
/** Return Address register */
|
||||
#define SPE_REG_RA 0
|
||||
|
||||
/** Stack Pointer register */
|
||||
#define SPE_REG_SP 1
|
||||
|
||||
|
||||
struct spe_function
|
||||
{
|
||||
uint32_t *store; /**< instruction buffer */
|
||||
uint num_inst;
|
||||
uint max_inst;
|
||||
|
||||
/**
|
||||
* Mask of used / unused registers
|
||||
|
@ -50,7 +62,7 @@ struct spe_function {
|
|||
* spe_allocate_register, spe_allocate_available_register,
|
||||
* spe_release_register
|
||||
*/
|
||||
uint64_t regs[2];
|
||||
uint64_t regs[SPE_NUM_REGS / 64];
|
||||
};
|
||||
|
||||
extern void spe_init_func(struct spe_function *p, unsigned code_size);
|
||||
|
@ -119,7 +131,8 @@ EMIT_RI16(spe_ilhu, 0x082);
|
|||
EMIT_RI16(spe_il, 0x081);
|
||||
EMIT_RI18(spe_ila, 0x021);
|
||||
EMIT_RI16(spe_iohl, 0x0c1);
|
||||
EMIT_RI16(spe_fsmbi, 0x0c5);
|
||||
EMIT_RI16(spe_fsmbi, 0x065);
|
||||
|
||||
|
||||
|
||||
/* Integer and logical instructions
|
||||
|
@ -271,6 +284,31 @@ extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA,
|
|||
int d, int e);
|
||||
|
||||
|
||||
/** Load/splat immediate float into rT. */
|
||||
extern void
|
||||
spe_load_float(struct spe_function *p, unsigned rT, float x);
|
||||
|
||||
/** Load/splat immediate int into rT. */
|
||||
extern void
|
||||
spe_load_int(struct spe_function *p, unsigned rT, int i);
|
||||
|
||||
/** Replicate word 0 of rA across rT. */
|
||||
extern void
|
||||
spe_splat(struct spe_function *p, unsigned rT, unsigned rA);
|
||||
|
||||
/** Complement/invert all bits in rT. */
|
||||
extern void
|
||||
spe_complement(struct spe_function *p, unsigned rT);
|
||||
|
||||
/** rT = rA. */
|
||||
extern void
|
||||
spe_move(struct spe_function *p, unsigned rT, unsigned rA);
|
||||
|
||||
/** rT = {0,0,0,0}. */
|
||||
extern void
|
||||
spe_zero(struct spe_function *p, unsigned rT);
|
||||
|
||||
|
||||
/* Floating-point instructions
|
||||
*/
|
||||
EMIT_RR (spe_fa, 0x2c4);
|
||||
|
|
|
@ -84,7 +84,7 @@
|
|||
#define CELL_CMD_BATCH 5
|
||||
#define CELL_CMD_RELEASE_VERTS 6
|
||||
#define CELL_CMD_STATE_FRAMEBUFFER 10
|
||||
#define CELL_CMD_STATE_DEPTH_STENCIL 11
|
||||
#define CELL_CMD_STATE_FRAGMENT_OPS 11
|
||||
#define CELL_CMD_STATE_SAMPLER 12
|
||||
#define CELL_CMD_STATE_TEXTURE 13
|
||||
#define CELL_CMD_STATE_VERTEX_INFO 14
|
||||
|
@ -92,9 +92,8 @@
|
|||
#define CELL_CMD_STATE_UNIFORMS 16
|
||||
#define CELL_CMD_STATE_VS_ARRAY_INFO 17
|
||||
#define CELL_CMD_STATE_BIND_VS 18
|
||||
#define CELL_CMD_STATE_BLEND 19
|
||||
#define CELL_CMD_STATE_FRAGMENT_PROGRAM 19
|
||||
#define CELL_CMD_STATE_ATTRIB_FETCH 20
|
||||
#define CELL_CMD_STATE_LOGICOP 21
|
||||
#define CELL_CMD_VS_EXECUTE 22
|
||||
#define CELL_CMD_FLUSH_BUFFER_RANGE 23
|
||||
|
||||
|
@ -110,29 +109,34 @@
|
|||
#define CELL_DEBUG_SYNC (1 << 1)
|
||||
|
||||
|
||||
/**
|
||||
*/
|
||||
struct cell_command_depth_stencil_alpha_test {
|
||||
uint64_t base; /**< Effective address of code start. */
|
||||
unsigned size; /**< Size in bytes of SPE code. */
|
||||
unsigned read_depth; /**< Flag: should depth be read? */
|
||||
unsigned read_stencil; /**< Flag: should stencil be read? */
|
||||
};
|
||||
|
||||
/** Max instructions for doing per-fragment operations */
|
||||
#define SPU_MAX_FRAGMENT_OPS_INSTS 64
|
||||
|
||||
|
||||
/**
|
||||
* Upload code to perform framebuffer blend operation
|
||||
* Command to specify per-fragment operations state and generated code.
|
||||
*/
|
||||
struct cell_command_blend {
|
||||
uint64_t base; /**< Effective address of code start. */
|
||||
unsigned size; /**< Size in bytes of SPE code. */
|
||||
unsigned read_fb; /**< Flag: should framebuffer be read? */
|
||||
struct cell_command_fragment_ops
|
||||
{
|
||||
uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */
|
||||
struct pipe_depth_stencil_alpha_state dsa;
|
||||
struct pipe_blend_state blend;
|
||||
unsigned code[SPU_MAX_FRAGMENT_OPS_INSTS];
|
||||
};
|
||||
|
||||
|
||||
struct cell_command_logicop {
|
||||
uint64_t base; /**< Effective address of code start. */
|
||||
unsigned size; /**< Size in bytes of SPE code. */
|
||||
/** Max instructions for fragment programs */
|
||||
#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 128
|
||||
|
||||
/**
|
||||
* Command to send a fragment progra to SPUs.
|
||||
*/
|
||||
struct cell_command_fragment_program
|
||||
{
|
||||
uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */
|
||||
uint num_inst; /**< Number of instructions */
|
||||
unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
|
||||
};
|
||||
|
||||
|
||||
|
@ -172,13 +176,15 @@ struct cell_array_info
|
|||
};
|
||||
|
||||
|
||||
struct cell_attribute_fetch_code {
|
||||
struct cell_attribute_fetch_code
|
||||
{
|
||||
uint64_t base;
|
||||
uint size;
|
||||
};
|
||||
|
||||
|
||||
struct cell_buffer_range {
|
||||
struct cell_buffer_range
|
||||
{
|
||||
uint64_t base;
|
||||
unsigned size;
|
||||
};
|
||||
|
|
|
@ -25,9 +25,10 @@ SOURCES = \
|
|||
cell_context.c \
|
||||
cell_draw_arrays.c \
|
||||
cell_flush.c \
|
||||
cell_gen_fragment.c \
|
||||
cell_gen_fp.c \
|
||||
cell_state_derived.c \
|
||||
cell_state_emit.c \
|
||||
cell_state_per_fragment.c \
|
||||
cell_state_shader.c \
|
||||
cell_pipe_state.c \
|
||||
cell_screen.c \
|
||||
|
|
|
@ -61,6 +61,7 @@ struct cell_fragment_shader_state
|
|||
{
|
||||
struct pipe_shader_state shader;
|
||||
struct tgsi_shader_info info;
|
||||
struct spe_function code;
|
||||
void *data;
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,523 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Generate SPU fragment program/shader code.
|
||||
*
|
||||
* Note that we generate SOA-style code here. So each TGSI instruction
|
||||
* operates on four pixels (and is translated into four SPU instructions,
|
||||
* generally speaking).
|
||||
*
|
||||
* \author Brian Paul
|
||||
*/
|
||||
|
||||
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "pipe/p_shader_tokens.h"
|
||||
#include "tgsi/tgsi_parse.h"
|
||||
#include "tgsi/tgsi_util.h"
|
||||
#include "tgsi/tgsi_exec.h"
|
||||
#include "tgsi/tgsi_dump.h"
|
||||
#include "rtasm/rtasm_ppc_spe.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "cell_context.h"
|
||||
#include "cell_gen_fp.h"
|
||||
|
||||
|
||||
/** Set to 1 to enable debug/disassembly printfs */
|
||||
#define DISASSEM 01
|
||||
|
||||
|
||||
/**
|
||||
* Context needed during code generation.
|
||||
*/
|
||||
struct codegen
|
||||
{
|
||||
int inputs_reg; /**< 1st function parameter */
|
||||
int outputs_reg; /**< 2nd function parameter */
|
||||
int constants_reg; /**< 3rd function parameter */
|
||||
int temp_regs[8][4]; /**< maps TGSI temps to SPE registers */
|
||||
|
||||
int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
|
||||
|
||||
/** Per-instruction temps / intermediate temps */
|
||||
int num_itemps;
|
||||
int itemps[3];
|
||||
|
||||
struct spe_function *f;
|
||||
boolean error;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Allocate an intermediate temporary register.
|
||||
*/
|
||||
static int
|
||||
get_itemp(struct codegen *gen)
|
||||
{
|
||||
int t = spe_allocate_available_register(gen->f);
|
||||
assert(gen->num_itemps < Elements(gen->itemps));
|
||||
gen->itemps[gen->num_itemps++] = t;
|
||||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
* Free all intermediate temporary registers. To be called after each
|
||||
* instruction has been emitted.
|
||||
*/
|
||||
static void
|
||||
free_itemps(struct codegen *gen)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < gen->num_itemps; i++) {
|
||||
spe_release_register(gen->f, gen->itemps[i]);
|
||||
}
|
||||
gen->num_itemps = 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
|
||||
* The register is allocated and initialized upon the first call.
|
||||
*/
|
||||
static int
|
||||
get_const_one_reg(struct codegen *gen)
|
||||
{
|
||||
if (gen->one_reg <= 0) {
|
||||
gen->one_reg = spe_allocate_available_register(gen->f);
|
||||
}
|
||||
|
||||
/* one = {1.0, 1.0, 1.0, 1.0} */
|
||||
spe_load_float(gen->f, gen->one_reg, 1.0f);
|
||||
#if DISASSEM
|
||||
printf("il\tr%d, 1.0f\n", gen->one_reg);
|
||||
#endif
|
||||
|
||||
return gen->one_reg;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the index of the SPU temporary containing the named TGSI
|
||||
* source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
|
||||
* just return the corresponding SPE register. If the TGIS register
|
||||
* is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
|
||||
* and emit an SPE load instruction.
|
||||
*/
|
||||
static int
|
||||
get_src_reg(struct codegen *gen,
|
||||
int channel,
|
||||
const struct tgsi_full_src_register *src)
|
||||
{
|
||||
int reg;
|
||||
|
||||
/* XXX need to examine src swizzle info here.
|
||||
* That will involve changing the channel var...
|
||||
*/
|
||||
|
||||
|
||||
switch (src->SrcRegister.File) {
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
reg = gen->temp_regs[src->SrcRegister.Index][channel];
|
||||
break;
|
||||
case TGSI_FILE_INPUT:
|
||||
{
|
||||
/* offset is measured in quadwords, not bytes */
|
||||
int offset = src->SrcRegister.Index * 4 + channel;
|
||||
reg = get_itemp(gen);
|
||||
/* Load: reg = memory[(machine_reg) + offset] */
|
||||
spe_lqd(gen->f, reg, gen->inputs_reg, offset);
|
||||
#if DISASSEM
|
||||
printf("lqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset);
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
case TGSI_FILE_IMMEDIATE:
|
||||
/* xxx fall-through for now / fix */
|
||||
case TGSI_FILE_CONSTANT:
|
||||
/* xxx fall-through for now / fix */
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the index of an SPE register to use for the given TGSI register.
|
||||
* If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
|
||||
* corresponding SPE register is returned. If the TGSI register is
|
||||
* TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
|
||||
* See store_dest_reg() below...
|
||||
*/
|
||||
static int
|
||||
get_dst_reg(struct codegen *gen,
|
||||
int channel,
|
||||
const struct tgsi_full_dst_register *dest)
|
||||
{
|
||||
int reg;
|
||||
|
||||
switch (dest->DstRegister.File) {
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
reg = gen->temp_regs[dest->DstRegister.Index][channel];
|
||||
break;
|
||||
case TGSI_FILE_OUTPUT:
|
||||
reg = get_itemp(gen);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* When a TGSI instruction is writing to an output register, this
|
||||
* function emits the SPE store instruction to store the value_reg.
|
||||
* \param value_reg the SPE register containing the value to store.
|
||||
* This would have been returned by get_dst_reg().
|
||||
*/
|
||||
static void
|
||||
store_dest_reg(struct codegen *gen,
|
||||
int value_reg, int channel,
|
||||
const struct tgsi_full_dst_register *dest)
|
||||
{
|
||||
switch (dest->DstRegister.File) {
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
/* no-op */
|
||||
break;
|
||||
case TGSI_FILE_OUTPUT:
|
||||
{
|
||||
/* offset is measured in quadwords, not bytes */
|
||||
int offset = dest->DstRegister.Index * 4 + channel;
|
||||
/* Store: memory[(machine_reg) + offset] = reg */
|
||||
spe_stqd(gen->f, value_reg, gen->outputs_reg, offset);
|
||||
#if DISASSEM
|
||||
printf("stqd\tr%d, r%d + %d\n", value_reg, gen->outputs_reg, offset);
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static boolean
|
||||
emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
int ch;
|
||||
for (ch = 0; ch < 4; ch++) {
|
||||
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
|
||||
int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
|
||||
int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
|
||||
/* XXX we don't always need to actually emit a mov instruction here */
|
||||
spe_move(gen->f, dst_reg, src_reg);
|
||||
#if DISASSEM
|
||||
printf("mov\tr%d, r%d\n", dst_reg, src_reg);
|
||||
#endif
|
||||
store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]);
|
||||
free_itemps(gen);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
|
||||
* becomes (up to) four SPU "fa" instructions because we're doing SOA
|
||||
* processing.
|
||||
*/
|
||||
static boolean
|
||||
emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
int ch;
|
||||
/* Loop over Red/Green/Blue/Alpha channels */
|
||||
for (ch = 0; ch < 4; ch++) {
|
||||
/* If the dest R, G, B or A writemask is enabled... */
|
||||
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
|
||||
/* get indexes of the two src, one dest SPE registers */
|
||||
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
|
||||
int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
|
||||
int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
|
||||
|
||||
/* Emit actual SPE instruction: d = s1 + s2 */
|
||||
spe_fa(gen->f, d_reg, s1_reg, s2_reg);
|
||||
#if DISASSEM
|
||||
printf("fa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
|
||||
#endif
|
||||
|
||||
/* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
|
||||
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
|
||||
/* Free any intermediate temps we allocated */
|
||||
free_itemps(gen);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit multiply. See emit_ADD for comments.
|
||||
*/
|
||||
static boolean
|
||||
emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
int ch;
|
||||
for (ch = 0; ch < 4; ch++) {
|
||||
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
|
||||
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
|
||||
int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
|
||||
int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
|
||||
/* d = s1 * s2 */
|
||||
spe_fm(gen->f, d_reg, s1_reg, s2_reg);
|
||||
#if DISASSEM
|
||||
printf("fm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
|
||||
#endif
|
||||
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
|
||||
free_itemps(gen);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit set-if-greater-than.
|
||||
* Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
|
||||
* the result but OpenGL/TGSI needs 0.0 and 1.0 results.
|
||||
* We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
|
||||
*/
|
||||
static boolean
|
||||
emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
int ch;
|
||||
|
||||
for (ch = 0; ch < 4; ch++) {
|
||||
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
|
||||
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
|
||||
int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
|
||||
int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
|
||||
|
||||
/* d = (s1 > s2) */
|
||||
spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
|
||||
#if DISASSEM
|
||||
printf("fcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
|
||||
#endif
|
||||
|
||||
/* convert d from 0x0/0xffffffff to 0.0/1.0 */
|
||||
/* d = d & one_reg */
|
||||
spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
|
||||
#if DISASSEM
|
||||
printf("and\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen));
|
||||
#endif
|
||||
|
||||
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
|
||||
free_itemps(gen);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit END instruction.
|
||||
* We just return from the shader function at this point.
|
||||
*
|
||||
* Note that there may be more code after this that would be
|
||||
* called by TGSI_OPCODE_CALL.
|
||||
*/
|
||||
static boolean
|
||||
emit_END(struct codegen *gen)
|
||||
{
|
||||
/* return from function call */
|
||||
spe_bi(gen->f, SPE_REG_RA, 0, 0);
|
||||
#if DISASSEM
|
||||
printf("bi\trRA\n");
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit code for the given instruction. Just a big switch stmt.
|
||||
*/
|
||||
static boolean
|
||||
emit_instruction(struct codegen *gen,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
switch (inst->Instruction.Opcode) {
|
||||
case TGSI_OPCODE_MOV:
|
||||
return emit_MOV(gen, inst);
|
||||
case TGSI_OPCODE_MUL:
|
||||
return emit_MUL(gen, inst);
|
||||
case TGSI_OPCODE_ADD:
|
||||
return emit_ADD(gen, inst);
|
||||
case TGSI_OPCODE_SGT:
|
||||
return emit_SGT(gen, inst);
|
||||
case TGSI_OPCODE_END:
|
||||
return emit_END(gen);
|
||||
|
||||
/* XXX lots more cases to do... */
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Emit "code" for a TGSI declaration.
|
||||
* We only care about TGSI TEMPORARY register declarations at this time.
|
||||
* For each TGSI TEMPORARY we allocate four SPE registers.
|
||||
*/
|
||||
static void
|
||||
emit_declaration(struct codegen *gen, const struct tgsi_full_declaration *decl)
|
||||
{
|
||||
int i, ch;
|
||||
|
||||
switch (decl->Declaration.File) {
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
#if DISASSEM
|
||||
printf("Declare temp reg %d .. %d\n",
|
||||
decl->DeclarationRange.First,
|
||||
decl->DeclarationRange.Last);
|
||||
#endif
|
||||
for (i = decl->DeclarationRange.First;
|
||||
i <= decl->DeclarationRange.Last;
|
||||
i++) {
|
||||
for (ch = 0; ch < 4; ch++) {
|
||||
gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
|
||||
}
|
||||
|
||||
/* XXX if we run out of SPE registers, we need to spill
|
||||
* to SPU memory. someday...
|
||||
*/
|
||||
|
||||
#if DISASSEM
|
||||
printf(" SPE regs: %d %d %d %d\n",
|
||||
gen->temp_regs[i][0],
|
||||
gen->temp_regs[i][1],
|
||||
gen->temp_regs[i][2],
|
||||
gen->temp_regs[i][3]);
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
default:
|
||||
; /* ignore */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Translate TGSI shader code to SPE instructions. This is done when
|
||||
* the state tracker gives us a new shader (via pipe->create_fs_state()).
|
||||
*
|
||||
* \param cell the rendering context (in)
|
||||
* \param tokens the TGSI shader (in)
|
||||
* \param f the generated function (out)
|
||||
*/
|
||||
boolean
|
||||
cell_gen_fragment_program(struct cell_context *cell,
|
||||
const struct tgsi_token *tokens,
|
||||
struct spe_function *f)
|
||||
{
|
||||
struct tgsi_parse_context parse;
|
||||
struct codegen gen;
|
||||
|
||||
memset(&gen, 0, sizeof(gen));
|
||||
gen.f = f;
|
||||
|
||||
/* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
|
||||
gen.inputs_reg = 3; /* pointer to inputs array */
|
||||
gen.outputs_reg = 4; /* pointer to outputs array */
|
||||
gen.constants_reg = 5; /* pointer to constants array */
|
||||
|
||||
spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
|
||||
spe_allocate_register(f, gen.inputs_reg);
|
||||
spe_allocate_register(f, gen.outputs_reg);
|
||||
spe_allocate_register(f, gen.constants_reg);
|
||||
|
||||
#if DISASSEM
|
||||
printf("Begin %s\n", __FUNCTION__);
|
||||
tgsi_dump(tokens, 0);
|
||||
#endif
|
||||
|
||||
tgsi_parse_init(&parse, tokens);
|
||||
|
||||
while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
|
||||
tgsi_parse_token(&parse);
|
||||
|
||||
switch (parse.FullToken.Token.Type) {
|
||||
case TGSI_TOKEN_TYPE_IMMEDIATE:
|
||||
#if 0
|
||||
if (!note_immediate(&gen, &parse.FullToken.FullImmediate ))
|
||||
goto fail;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case TGSI_TOKEN_TYPE_DECLARATION:
|
||||
emit_declaration(&gen, &parse.FullToken.FullDeclaration);
|
||||
break;
|
||||
|
||||
case TGSI_TOKEN_TYPE_INSTRUCTION:
|
||||
if (!emit_instruction(&gen, &parse.FullToken.FullInstruction )) {
|
||||
gen.error = true;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (gen.error) {
|
||||
/* terminate the SPE code */
|
||||
return emit_END(&gen);
|
||||
}
|
||||
|
||||
#if DISASSEM
|
||||
printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
|
||||
printf("End %s\n", __FUNCTION__);
|
||||
#endif
|
||||
|
||||
tgsi_parse_free( &parse );
|
||||
|
||||
return !gen.error;
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
|
||||
#ifndef CELL_GEN_FP_H
|
||||
#define CELL_GEN_FP_H
|
||||
|
||||
|
||||
|
||||
extern boolean
|
||||
cell_gen_fragment_program(struct cell_context *cell,
|
||||
const struct tgsi_token *tokens,
|
||||
struct spe_function *f);
|
||||
|
||||
|
||||
#endif /* CELL_GEN_FP_H */
|
||||
|
|
@ -0,0 +1,870 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Generate SPU per-fragment code (actually per-quad code).
|
||||
* \author Brian Paul
|
||||
*/
|
||||
|
||||
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "rtasm/rtasm_ppc_spe.h"
|
||||
#include "cell_context.h"
|
||||
#include "cell_gen_fragment.h"
|
||||
|
||||
|
||||
|
||||
/** Do extra optimizations? */
|
||||
#define OPTIMIZATIONS 1
|
||||
|
||||
|
||||
/**
|
||||
* Generate SPE code to perform Z/depth testing.
|
||||
*
|
||||
* \param dsa Gallium depth/stencil/alpha state to gen code for
|
||||
* \param f SPE function to append instruction onto.
|
||||
* \param mask_reg register containing quad/pixel "alive" mask (in/out)
|
||||
* \param ifragZ_reg register containing integer fragment Z values (in)
|
||||
* \param ifbZ_reg register containing integer frame buffer Z values (in/out)
|
||||
* \param zmask_reg register containing result of Z test/comparison (out)
|
||||
*/
|
||||
static void
|
||||
gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa,
|
||||
struct spe_function *f,
|
||||
int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg)
|
||||
{
|
||||
ASSERT(dsa->depth.enabled);
|
||||
|
||||
switch (dsa->depth.func) {
|
||||
case PIPE_FUNC_EQUAL:
|
||||
/* zmask = (ifragZ == ref) */
|
||||
spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
|
||||
/* mask = (mask & zmask) */
|
||||
spe_and(f, mask_reg, mask_reg, zmask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_NOTEQUAL:
|
||||
/* zmask = (ifragZ == ref) */
|
||||
spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
|
||||
/* mask = (mask & ~zmask) */
|
||||
spe_andc(f, mask_reg, mask_reg, zmask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_GREATER:
|
||||
/* zmask = (ifragZ > ref) */
|
||||
spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
|
||||
/* mask = (mask & zmask) */
|
||||
spe_and(f, mask_reg, mask_reg, zmask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_LESS:
|
||||
/* zmask = (ref > ifragZ) */
|
||||
spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
|
||||
/* mask = (mask & zmask) */
|
||||
spe_and(f, mask_reg, mask_reg, zmask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_LEQUAL:
|
||||
/* zmask = (ifragZ > ref) */
|
||||
spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
|
||||
/* mask = (mask & ~zmask) */
|
||||
spe_andc(f, mask_reg, mask_reg, zmask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_GEQUAL:
|
||||
/* zmask = (ref > ifragZ) */
|
||||
spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
|
||||
/* mask = (mask & ~zmask) */
|
||||
spe_andc(f, mask_reg, mask_reg, zmask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_NEVER:
|
||||
spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */
|
||||
spe_move(f, zmask_reg, mask_reg); /* zmask = mask */
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_ALWAYS:
|
||||
/* mask unchanged */
|
||||
spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */
|
||||
break;
|
||||
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
|
||||
if (dsa->depth.writemask) {
|
||||
/*
|
||||
* If (ztest passed) {
|
||||
* framebufferZ = fragmentZ;
|
||||
* }
|
||||
* OR,
|
||||
* framebufferZ = (ztest_passed ? fragmentZ : framebufferZ;
|
||||
*/
|
||||
spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Generate SPE code to perform alpha testing.
|
||||
*
|
||||
* \param dsa Gallium depth/stencil/alpha state to gen code for
|
||||
* \param f SPE function to append instruction onto.
|
||||
* \param mask_reg register containing quad/pixel "alive" mask (in/out)
|
||||
* \param fragA_reg register containing four fragment alpha values (in)
|
||||
*/
|
||||
static void
|
||||
gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,
|
||||
struct spe_function *f, int mask_reg, int fragA_reg)
|
||||
{
|
||||
int ref_reg = spe_allocate_available_register(f);
|
||||
int amask_reg = spe_allocate_available_register(f);
|
||||
|
||||
ASSERT(dsa->alpha.enabled);
|
||||
|
||||
if ((dsa->alpha.func != PIPE_FUNC_NEVER) &&
|
||||
(dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
|
||||
/* load/splat the alpha reference float value */
|
||||
spe_load_float(f, ref_reg, dsa->alpha.ref);
|
||||
}
|
||||
|
||||
/* emit code to do the alpha comparison, updating 'mask' */
|
||||
switch (dsa->alpha.func) {
|
||||
case PIPE_FUNC_EQUAL:
|
||||
/* amask = (fragA == ref) */
|
||||
spe_fceq(f, amask_reg, fragA_reg, ref_reg);
|
||||
/* mask = (mask & amask) */
|
||||
spe_and(f, mask_reg, mask_reg, amask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_NOTEQUAL:
|
||||
/* amask = (fragA == ref) */
|
||||
spe_fceq(f, amask_reg, fragA_reg, ref_reg);
|
||||
/* mask = (mask & ~amask) */
|
||||
spe_andc(f, mask_reg, mask_reg, amask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_GREATER:
|
||||
/* amask = (fragA > ref) */
|
||||
spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
|
||||
/* mask = (mask & amask) */
|
||||
spe_and(f, mask_reg, mask_reg, amask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_LESS:
|
||||
/* amask = (ref > fragA) */
|
||||
spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
|
||||
/* mask = (mask & amask) */
|
||||
spe_and(f, mask_reg, mask_reg, amask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_LEQUAL:
|
||||
/* amask = (fragA > ref) */
|
||||
spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
|
||||
/* mask = (mask & ~amask) */
|
||||
spe_andc(f, mask_reg, mask_reg, amask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_GEQUAL:
|
||||
/* amask = (ref > fragA) */
|
||||
spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
|
||||
/* mask = (mask & ~amask) */
|
||||
spe_andc(f, mask_reg, mask_reg, amask_reg);
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_NEVER:
|
||||
spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_ALWAYS:
|
||||
/* no-op, mask unchanged */
|
||||
break;
|
||||
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
|
||||
#if OPTIMIZATIONS
|
||||
/* if mask == {0,0,0,0} we're all done, return */
|
||||
{
|
||||
/* re-use amask reg here */
|
||||
int tmp_reg = amask_reg;
|
||||
/* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */
|
||||
spe_orx(f, tmp_reg, mask_reg);
|
||||
/* if tmp[0] == 0 then return from function call */
|
||||
spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
spe_release_register(f, ref_reg);
|
||||
spe_release_register(f, amask_reg);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Generate SPE code to implement the given blend mode for a quad of pixels.
|
||||
* \param f SPE function to append instruction onto.
|
||||
* \param fragR_reg register with fragment red values (float) (in/out)
|
||||
* \param fragG_reg register with fragment green values (float) (in/out)
|
||||
* \param fragB_reg register with fragment blue values (float) (in/out)
|
||||
* \param fragA_reg register with fragment alpha values (float) (in/out)
|
||||
* \param fbRGBA_reg register with packed framebuffer colors (integer) (in)
|
||||
*/
|
||||
static void
|
||||
gen_blend(const struct pipe_blend_state *blend,
|
||||
struct spe_function *f,
|
||||
enum pipe_format color_format,
|
||||
int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg,
|
||||
int fbRGBA_reg)
|
||||
{
|
||||
int term1R_reg = spe_allocate_available_register(f);
|
||||
int term1G_reg = spe_allocate_available_register(f);
|
||||
int term1B_reg = spe_allocate_available_register(f);
|
||||
int term1A_reg = spe_allocate_available_register(f);
|
||||
|
||||
int term2R_reg = spe_allocate_available_register(f);
|
||||
int term2G_reg = spe_allocate_available_register(f);
|
||||
int term2B_reg = spe_allocate_available_register(f);
|
||||
int term2A_reg = spe_allocate_available_register(f);
|
||||
|
||||
int fbR_reg = spe_allocate_available_register(f);
|
||||
int fbG_reg = spe_allocate_available_register(f);
|
||||
int fbB_reg = spe_allocate_available_register(f);
|
||||
int fbA_reg = spe_allocate_available_register(f);
|
||||
|
||||
int one_reg = spe_allocate_available_register(f);
|
||||
int tmp_reg = spe_allocate_available_register(f);
|
||||
|
||||
boolean one_reg_set = false; /* avoid setting one_reg more than once */
|
||||
|
||||
ASSERT(blend->blend_enable);
|
||||
|
||||
/* Unpack/convert framebuffer colors from four 32-bit packed colors
|
||||
* (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA).
|
||||
* Each 8-bit color component is expanded into a float in [0.0, 1.0].
|
||||
*/
|
||||
{
|
||||
int mask_reg = spe_allocate_available_register(f);
|
||||
|
||||
/* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */
|
||||
spe_load_int(f, mask_reg, 0xff);
|
||||
|
||||
/* XXX there may be more clever ways to implement the following code */
|
||||
switch (color_format) {
|
||||
case PIPE_FORMAT_A8R8G8B8_UNORM:
|
||||
/* fbB = fbB & mask */
|
||||
spe_and(f, fbB_reg, fbRGBA_reg, mask_reg);
|
||||
/* mask = mask << 8 */
|
||||
spe_roti(f, mask_reg, mask_reg, 8);
|
||||
|
||||
/* fbG = fbRGBA & mask */
|
||||
spe_and(f, fbG_reg, fbRGBA_reg, mask_reg);
|
||||
/* fbG = fbG >> 8 */
|
||||
spe_roti(f, fbG_reg, fbG_reg, -8);
|
||||
/* mask = mask << 8 */
|
||||
spe_roti(f, mask_reg, mask_reg, 8);
|
||||
|
||||
/* fbR = fbRGBA & mask */
|
||||
spe_and(f, fbR_reg, fbRGBA_reg, mask_reg);
|
||||
/* fbR = fbR >> 16 */
|
||||
spe_roti(f, fbR_reg, fbR_reg, -16);
|
||||
/* mask = mask << 8 */
|
||||
spe_roti(f, mask_reg, mask_reg, 8);
|
||||
|
||||
/* fbA = fbRGBA & mask */
|
||||
spe_and(f, fbA_reg, fbRGBA_reg, mask_reg);
|
||||
/* fbA = fbA >> 24 */
|
||||
spe_roti(f, fbA_reg, fbA_reg, -24);
|
||||
break;
|
||||
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
/* fbA = fbA & mask */
|
||||
spe_and(f, fbA_reg, fbRGBA_reg, mask_reg);
|
||||
/* mask = mask << 8 */
|
||||
spe_roti(f, mask_reg, mask_reg, 8);
|
||||
|
||||
/* fbR = fbRGBA & mask */
|
||||
spe_and(f, fbR_reg, fbRGBA_reg, mask_reg);
|
||||
/* fbR = fbR >> 8 */
|
||||
spe_roti(f, fbR_reg, fbR_reg, -8);
|
||||
/* mask = mask << 8 */
|
||||
spe_roti(f, mask_reg, mask_reg, 8);
|
||||
|
||||
/* fbG = fbRGBA & mask */
|
||||
spe_and(f, fbG_reg, fbRGBA_reg, mask_reg);
|
||||
/* fbG = fbG >> 16 */
|
||||
spe_roti(f, fbG_reg, fbG_reg, -16);
|
||||
/* mask = mask << 8 */
|
||||
spe_roti(f, mask_reg, mask_reg, 8);
|
||||
|
||||
/* fbB = fbRGBA & mask */
|
||||
spe_and(f, fbB_reg, fbRGBA_reg, mask_reg);
|
||||
/* fbB = fbB >> 24 */
|
||||
spe_roti(f, fbB_reg, fbB_reg, -24);
|
||||
break;
|
||||
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
/* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */
|
||||
spe_cuflt(f, fbR_reg, fbR_reg, 8);
|
||||
spe_cuflt(f, fbG_reg, fbG_reg, 8);
|
||||
spe_cuflt(f, fbB_reg, fbB_reg, 8);
|
||||
spe_cuflt(f, fbA_reg, fbA_reg, 8);
|
||||
|
||||
spe_release_register(f, mask_reg);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Compute Src RGB terms
|
||||
*/
|
||||
switch (blend->rgb_src_factor) {
|
||||
case PIPE_BLENDFACTOR_ONE:
|
||||
spe_move(f, term1R_reg, fragR_reg);
|
||||
spe_move(f, term1G_reg, fragG_reg);
|
||||
spe_move(f, term1B_reg, fragB_reg);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_ZERO:
|
||||
spe_zero(f, term1R_reg);
|
||||
spe_zero(f, term1G_reg);
|
||||
spe_zero(f, term1B_reg);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_SRC_COLOR:
|
||||
spe_fm(f, term1R_reg, fragR_reg, fragR_reg);
|
||||
spe_fm(f, term1G_reg, fragG_reg, fragG_reg);
|
||||
spe_fm(f, term1B_reg, fragB_reg, fragB_reg);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_SRC_ALPHA:
|
||||
spe_fm(f, term1R_reg, fragR_reg, fragA_reg);
|
||||
spe_fm(f, term1G_reg, fragG_reg, fragA_reg);
|
||||
spe_fm(f, term1B_reg, fragB_reg, fragA_reg);
|
||||
break;
|
||||
/* XXX more cases */
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute Src Alpha term
|
||||
*/
|
||||
switch (blend->alpha_src_factor) {
|
||||
case PIPE_BLENDFACTOR_ONE:
|
||||
spe_move(f, term1A_reg, fragA_reg);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_SRC_COLOR:
|
||||
spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_SRC_ALPHA:
|
||||
spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
|
||||
break;
|
||||
/* XXX more cases */
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute Dest RGB terms
|
||||
*/
|
||||
switch (blend->rgb_dst_factor) {
|
||||
case PIPE_BLENDFACTOR_ONE:
|
||||
spe_move(f, term2R_reg, fbR_reg);
|
||||
spe_move(f, term2G_reg, fbG_reg);
|
||||
spe_move(f, term2B_reg, fbB_reg);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_ZERO:
|
||||
spe_zero(f, term2R_reg);
|
||||
spe_zero(f, term2G_reg);
|
||||
spe_zero(f, term2B_reg);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_SRC_COLOR:
|
||||
spe_fm(f, term2R_reg, fbR_reg, fragR_reg);
|
||||
spe_fm(f, term2G_reg, fbG_reg, fragG_reg);
|
||||
spe_fm(f, term2B_reg, fbB_reg, fragB_reg);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_SRC_ALPHA:
|
||||
spe_fm(f, term2R_reg, fbR_reg, fragA_reg);
|
||||
spe_fm(f, term2G_reg, fbG_reg, fragA_reg);
|
||||
spe_fm(f, term2B_reg, fbB_reg, fragA_reg);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
|
||||
/* one = {1.0, 1.0, 1.0, 1.0} */
|
||||
if (!one_reg_set) {
|
||||
spe_load_float(f, one_reg, 1.0f);
|
||||
one_reg_set = true;
|
||||
}
|
||||
/* tmp = one - fragA */
|
||||
spe_fs(f, tmp_reg, one_reg, fragA_reg);
|
||||
/* term = fb * tmp */
|
||||
spe_fm(f, term2R_reg, fbR_reg, tmp_reg);
|
||||
spe_fm(f, term2G_reg, fbG_reg, tmp_reg);
|
||||
spe_fm(f, term2B_reg, fbB_reg, tmp_reg);
|
||||
break;
|
||||
/* XXX more cases */
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute Dest Alpha term
|
||||
*/
|
||||
switch (blend->alpha_dst_factor) {
|
||||
case PIPE_BLENDFACTOR_ONE:
|
||||
spe_move(f, term2A_reg, fbA_reg);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_ZERO:
|
||||
spe_zero(f, term2A_reg);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_SRC_ALPHA:
|
||||
spe_fm(f, term2A_reg, fbA_reg, fragA_reg);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
|
||||
/* one = {1.0, 1.0, 1.0, 1.0} */
|
||||
if (!one_reg_set) {
|
||||
spe_load_float(f, one_reg, 1.0f);
|
||||
one_reg_set = true;
|
||||
}
|
||||
/* tmp = one - fragA */
|
||||
spe_fs(f, tmp_reg, one_reg, fragA_reg);
|
||||
/* termA = fbA * tmp */
|
||||
spe_fm(f, term2A_reg, fbA_reg, tmp_reg);
|
||||
break;
|
||||
/* XXX more cases */
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Combine Src/Dest RGB terms
|
||||
*/
|
||||
switch (blend->rgb_func) {
|
||||
case PIPE_BLEND_ADD:
|
||||
spe_fa(f, fragR_reg, term1R_reg, term2R_reg);
|
||||
spe_fa(f, fragG_reg, term1G_reg, term2G_reg);
|
||||
spe_fa(f, fragB_reg, term1B_reg, term2B_reg);
|
||||
break;
|
||||
case PIPE_BLEND_SUBTRACT:
|
||||
spe_fs(f, fragR_reg, term1R_reg, term2R_reg);
|
||||
spe_fs(f, fragG_reg, term1G_reg, term2G_reg);
|
||||
spe_fs(f, fragB_reg, term1B_reg, term2B_reg);
|
||||
break;
|
||||
/* XXX more cases */
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Combine Src/Dest A term
|
||||
*/
|
||||
switch (blend->alpha_func) {
|
||||
case PIPE_BLEND_ADD:
|
||||
spe_fa(f, fragA_reg, term1A_reg, term2A_reg);
|
||||
break;
|
||||
case PIPE_BLEND_SUBTRACT:
|
||||
spe_fs(f, fragA_reg, term1A_reg, term2A_reg);
|
||||
break;
|
||||
/* XXX more cases */
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
spe_release_register(f, term1R_reg);
|
||||
spe_release_register(f, term1G_reg);
|
||||
spe_release_register(f, term1B_reg);
|
||||
spe_release_register(f, term1A_reg);
|
||||
|
||||
spe_release_register(f, term2R_reg);
|
||||
spe_release_register(f, term2G_reg);
|
||||
spe_release_register(f, term2B_reg);
|
||||
spe_release_register(f, term2A_reg);
|
||||
|
||||
spe_release_register(f, fbR_reg);
|
||||
spe_release_register(f, fbG_reg);
|
||||
spe_release_register(f, fbB_reg);
|
||||
spe_release_register(f, fbA_reg);
|
||||
|
||||
spe_release_register(f, one_reg);
|
||||
spe_release_register(f, tmp_reg);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
gen_logicop(const struct pipe_blend_state *blend,
|
||||
struct spe_function *f,
|
||||
int fragRGBA_reg, int fbRGBA_reg)
|
||||
{
|
||||
/* XXX to-do */
|
||||
/* operate on 32-bit packed pixels, not float colors */
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
gen_colormask(uint colormask,
|
||||
struct spe_function *f,
|
||||
int fragRGBA_reg, int fbRGBA_reg)
|
||||
{
|
||||
/* XXX to-do */
|
||||
/* operate on 32-bit packed pixels, not float colors */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Generate code to pack a quad of float colors into a four 32-bit integers.
|
||||
*
|
||||
* \param f SPE function to append instruction onto.
|
||||
* \param color_format the dest color packing format
|
||||
* \param r_reg register containing four red values (in/clobbered)
|
||||
* \param g_reg register containing four green values (in/clobbered)
|
||||
* \param b_reg register containing four blue values (in/clobbered)
|
||||
* \param a_reg register containing four alpha values (in/clobbered)
|
||||
* \param rgba_reg register to store the packed RGBA colors (out)
|
||||
*/
|
||||
static void
|
||||
gen_pack_colors(struct spe_function *f,
|
||||
enum pipe_format color_format,
|
||||
int r_reg, int g_reg, int b_reg, int a_reg,
|
||||
int rgba_reg)
|
||||
{
|
||||
/* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */
|
||||
spe_cfltu(f, r_reg, r_reg, 32);
|
||||
spe_cfltu(f, g_reg, g_reg, 32);
|
||||
spe_cfltu(f, b_reg, b_reg, 32);
|
||||
spe_cfltu(f, a_reg, a_reg, 32);
|
||||
|
||||
/* Shift the most significant bytes to least the significant positions.
|
||||
* I.e.: reg = reg >> 24
|
||||
*/
|
||||
spe_rotmi(f, r_reg, r_reg, -24);
|
||||
spe_rotmi(f, g_reg, g_reg, -24);
|
||||
spe_rotmi(f, b_reg, b_reg, -24);
|
||||
spe_rotmi(f, a_reg, a_reg, -24);
|
||||
|
||||
/* Shift the color bytes according to the surface format */
|
||||
if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) {
|
||||
spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */
|
||||
spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */
|
||||
spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */
|
||||
}
|
||||
else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) {
|
||||
spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */
|
||||
spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */
|
||||
spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */
|
||||
}
|
||||
else {
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
/* Merge red, green, blue, alpha registers to make packed RGBA colors.
|
||||
* Eg: after shifting according to color_format we might have:
|
||||
* R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000}
|
||||
* G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600}
|
||||
* B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099}
|
||||
* A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000}
|
||||
* OR-ing all those together gives us four packed colors:
|
||||
* RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699}
|
||||
*/
|
||||
spe_or(f, rgba_reg, r_reg, g_reg);
|
||||
spe_or(f, rgba_reg, rgba_reg, b_reg);
|
||||
spe_or(f, rgba_reg, rgba_reg, a_reg);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Generate SPE code to implement the fragment operations (alpha test,
|
||||
* depth test, stencil test, blending, colormask, and final
|
||||
* framebuffer write) as specified by the current context state.
|
||||
*
|
||||
* Logically, this code will be called after running the fragment
|
||||
* shader. But under some circumstances we could run some of this
|
||||
* code before the fragment shader to cull fragments/quads that are
|
||||
* totally occluded/discarded.
|
||||
*
|
||||
* XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now.
|
||||
*
|
||||
* See the spu_default_fragment_ops() function to see how the per-fragment
|
||||
* operations would be done with ordinary C code.
|
||||
* The code we generate here though has no branches, is SIMD, etc and
|
||||
* should be much faster.
|
||||
*
|
||||
* \param cell the rendering context (in)
|
||||
* \param f the generated function (out)
|
||||
*/
|
||||
void
|
||||
cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
|
||||
{
|
||||
const struct pipe_depth_stencil_alpha_state *dsa =
|
||||
&cell->depth_stencil->base;
|
||||
const struct pipe_blend_state *blend = &cell->blend->base;
|
||||
const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format;
|
||||
|
||||
/* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
|
||||
const int x_reg = 3; /* uint */
|
||||
const int y_reg = 4; /* uint */
|
||||
const int color_tile_reg = 5; /* tile_t * */
|
||||
const int depth_tile_reg = 6; /* tile_t * */
|
||||
const int fragZ_reg = 7; /* vector float */
|
||||
const int fragR_reg = 8; /* vector float */
|
||||
const int fragG_reg = 9; /* vector float */
|
||||
const int fragB_reg = 10; /* vector float */
|
||||
const int fragA_reg = 11; /* vector float */
|
||||
const int mask_reg = 12; /* vector uint */
|
||||
|
||||
/* offset of quad from start of tile
|
||||
* XXX assuming 4-byte pixels for color AND Z/stencil!!!!
|
||||
*/
|
||||
int quad_offset_reg;
|
||||
|
||||
int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */
|
||||
int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */
|
||||
|
||||
spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
|
||||
spe_allocate_register(f, x_reg);
|
||||
spe_allocate_register(f, y_reg);
|
||||
spe_allocate_register(f, color_tile_reg);
|
||||
spe_allocate_register(f, depth_tile_reg);
|
||||
spe_allocate_register(f, fragZ_reg);
|
||||
spe_allocate_register(f, fragR_reg);
|
||||
spe_allocate_register(f, fragG_reg);
|
||||
spe_allocate_register(f, fragB_reg);
|
||||
spe_allocate_register(f, fragA_reg);
|
||||
spe_allocate_register(f, mask_reg);
|
||||
|
||||
quad_offset_reg = spe_allocate_available_register(f);
|
||||
fbRGBA_reg = spe_allocate_available_register(f);
|
||||
fbZS_reg = spe_allocate_available_register(f);
|
||||
|
||||
/* compute offset of quad from start of tile, in bytes */
|
||||
{
|
||||
int x2_reg = spe_allocate_available_register(f);
|
||||
int y2_reg = spe_allocate_available_register(f);
|
||||
|
||||
ASSERT(TILE_SIZE == 32);
|
||||
|
||||
spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */
|
||||
spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */
|
||||
spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */
|
||||
spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */
|
||||
spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */
|
||||
|
||||
spe_release_register(f, x2_reg);
|
||||
spe_release_register(f, y2_reg);
|
||||
}
|
||||
|
||||
|
||||
if (dsa->alpha.enabled) {
|
||||
gen_alpha_test(dsa, f, mask_reg, fragA_reg);
|
||||
}
|
||||
|
||||
if (dsa->depth.enabled || dsa->stencil[0].enabled) {
|
||||
const enum pipe_format zs_format = cell->framebuffer.zsbuf->format;
|
||||
boolean write_depth_stencil;
|
||||
|
||||
int fbZ_reg = spe_allocate_available_register(f); /* Z values */
|
||||
int fbS_reg = spe_allocate_available_register(f); /* Stencil values */
|
||||
|
||||
/* fetch quad of depth/stencil values from tile at (x,y) */
|
||||
/* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
|
||||
spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
|
||||
|
||||
if (dsa->depth.enabled) {
|
||||
/* Extract Z bits from fbZS_reg into fbZ_reg */
|
||||
if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
|
||||
zs_format == PIPE_FORMAT_X8Z24_UNORM) {
|
||||
int mask_reg = spe_allocate_available_register(f);
|
||||
spe_fsmbi(f, mask_reg, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */
|
||||
spe_and(f, fbZ_reg, fbZS_reg, mask_reg); /* fbZ = fbZS & mask */
|
||||
spe_release_register(f, mask_reg);
|
||||
/* OK, fbZ_reg has four 24-bit Z values now */
|
||||
}
|
||||
else {
|
||||
/* XXX handle other z/stencil formats */
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
/* Convert fragZ values from float[4] to uint[4] */
|
||||
if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
|
||||
zs_format == PIPE_FORMAT_X8Z24_UNORM ||
|
||||
zs_format == PIPE_FORMAT_Z24S8_UNORM ||
|
||||
zs_format == PIPE_FORMAT_Z24X8_UNORM) {
|
||||
/* 24-bit Z values */
|
||||
int scale_reg = spe_allocate_available_register(f);
|
||||
|
||||
/* scale_reg[0,1,2,3] = float(2^24-1) */
|
||||
spe_load_float(f, scale_reg, (float) 0xffffff);
|
||||
|
||||
/* XXX these two instructions might be combined */
|
||||
spe_fm(f, fragZ_reg, fragZ_reg, scale_reg); /* fragZ *= scale */
|
||||
spe_cfltu(f, fragZ_reg, fragZ_reg, 0); /* fragZ = (int) fragZ */
|
||||
|
||||
spe_release_register(f, scale_reg);
|
||||
}
|
||||
else {
|
||||
/* XXX handle 16-bit Z format */
|
||||
ASSERT(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (dsa->stencil[0].enabled) {
|
||||
/* Extract Stencil bit sfrom fbZS_reg into fbS_reg */
|
||||
if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
|
||||
zs_format == PIPE_FORMAT_X8Z24_UNORM) {
|
||||
/* XXX extract with a shift */
|
||||
ASSERT(0);
|
||||
}
|
||||
else if (zs_format == PIPE_FORMAT_Z24S8_UNORM ||
|
||||
zs_format == PIPE_FORMAT_Z24X8_UNORM) {
|
||||
/* XXX extract with a mask */
|
||||
ASSERT(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (dsa->stencil[0].enabled) {
|
||||
/* XXX this may involve depth testing too */
|
||||
// gen_stencil_test(dsa, f, ... );
|
||||
ASSERT(0);
|
||||
}
|
||||
else if (dsa->depth.enabled) {
|
||||
int zmask_reg = spe_allocate_available_register(f);
|
||||
gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg);
|
||||
spe_release_register(f, zmask_reg);
|
||||
}
|
||||
|
||||
/* do we need to write Z and/or Stencil back into framebuffer? */
|
||||
write_depth_stencil = (dsa->depth.writemask |
|
||||
dsa->stencil[0].write_mask |
|
||||
dsa->stencil[1].write_mask);
|
||||
|
||||
if (write_depth_stencil) {
|
||||
/* Merge latest Z and Stencil values into fbZS_reg.
|
||||
* fbZ_reg has four Z vals in bits [23..0] or bits [15..0].
|
||||
* fbS_reg has four 8-bit Z values in bits [7..0].
|
||||
*/
|
||||
if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
|
||||
zs_format == PIPE_FORMAT_X8Z24_UNORM) {
|
||||
spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */
|
||||
spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
|
||||
}
|
||||
else if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
|
||||
zs_format == PIPE_FORMAT_X8Z24_UNORM) {
|
||||
/* XXX to do */
|
||||
ASSERT(0);
|
||||
}
|
||||
else if (zs_format == PIPE_FORMAT_Z16_UNORM) {
|
||||
/* XXX to do */
|
||||
ASSERT(0);
|
||||
}
|
||||
else if (zs_format == PIPE_FORMAT_S8_UNORM) {
|
||||
/* XXX to do */
|
||||
ASSERT(0);
|
||||
}
|
||||
else {
|
||||
/* bad zs_format */
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
/* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */
|
||||
spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
|
||||
}
|
||||
|
||||
spe_release_register(f, fbZ_reg);
|
||||
spe_release_register(f, fbS_reg);
|
||||
}
|
||||
|
||||
|
||||
/* Get framebuffer quad/colors. We'll need these for blending,
|
||||
* color masking, and to obey the quad/pixel mask.
|
||||
* Load: fbRGBA_reg = memory[color_tile + quad_offset]
|
||||
* Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking
|
||||
* we could skip this load.
|
||||
*/
|
||||
spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg);
|
||||
|
||||
|
||||
if (blend->blend_enable) {
|
||||
gen_blend(blend, f, color_format,
|
||||
fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg);
|
||||
}
|
||||
|
||||
/*
|
||||
* Write fragment colors to framebuffer/tile.
|
||||
* This involves converting the fragment colors from float[4] to the
|
||||
* tile's specific format and obeying the quad/pixel mask.
|
||||
*/
|
||||
{
|
||||
int rgba_reg = spe_allocate_available_register(f);
|
||||
|
||||
/* Pack four float colors as four 32-bit int colors */
|
||||
gen_pack_colors(f, color_format,
|
||||
fragR_reg, fragG_reg, fragB_reg, fragA_reg,
|
||||
rgba_reg);
|
||||
|
||||
if (blend->logicop_enable) {
|
||||
gen_logicop(blend, f, rgba_reg, fbRGBA_reg);
|
||||
}
|
||||
|
||||
if (blend->colormask != 0xf) {
|
||||
gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg);
|
||||
}
|
||||
|
||||
|
||||
/* Mix fragment colors with framebuffer colors using the quad/pixel mask:
|
||||
* if (mask[i])
|
||||
* rgba[i] = rgba[i];
|
||||
* else
|
||||
* rgba[i] = framebuffer[i];
|
||||
*/
|
||||
spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg);
|
||||
|
||||
/* Store updated quad in tile:
|
||||
* memory[color_tile + quad_offset] = rgba_reg;
|
||||
*/
|
||||
spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg);
|
||||
|
||||
spe_release_register(f, rgba_reg);
|
||||
}
|
||||
|
||||
//printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);
|
||||
|
||||
spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */
|
||||
|
||||
|
||||
spe_release_register(f, fbRGBA_reg);
|
||||
spe_release_register(f, fbZS_reg);
|
||||
spe_release_register(f, quad_offset_reg);
|
||||
}
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
#ifndef CELL_GEN_FRAGMENT_H
|
||||
#define CELL_GEN_FRAGMENT_H
|
||||
|
||||
|
||||
extern void
|
||||
cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f);
|
||||
|
||||
|
||||
#endif /* CELL_GEN_FRAGMENT_H */
|
||||
|
|
@ -27,6 +27,7 @@
|
|||
|
||||
#include "util/u_memory.h"
|
||||
#include "cell_context.h"
|
||||
#include "cell_gen_fragment.h"
|
||||
#include "cell_state.h"
|
||||
#include "cell_state_emit.h"
|
||||
#include "cell_state_per_fragment.h"
|
||||
|
@ -54,23 +55,6 @@ emit_state_cmd(struct cell_context *cell, uint cmd,
|
|||
void
|
||||
cell_emit_state(struct cell_context *cell)
|
||||
{
|
||||
if (cell->dirty & (CELL_NEW_FRAMEBUFFER | CELL_NEW_BLEND)) {
|
||||
struct cell_command_logicop logicop;
|
||||
|
||||
if (cell->logic_op.store != NULL) {
|
||||
spe_release_func(& cell->logic_op);
|
||||
}
|
||||
|
||||
cell_generate_logic_op(& cell->logic_op,
|
||||
& cell->blend->base,
|
||||
cell->framebuffer.cbufs[0]);
|
||||
|
||||
logicop.base = (intptr_t) cell->logic_op.store;
|
||||
logicop.size = 64 * 4;
|
||||
emit_state_cmd(cell, CELL_CMD_STATE_LOGICOP, &logicop,
|
||||
sizeof(logicop));
|
||||
}
|
||||
|
||||
if (cell->dirty & CELL_NEW_FRAMEBUFFER) {
|
||||
struct pipe_surface *cbuf = cell->framebuffer.cbufs[0];
|
||||
struct pipe_surface *zbuf = cell->framebuffer.zsbuf;
|
||||
|
@ -83,44 +67,49 @@ cell_emit_state(struct cell_context *cell)
|
|||
fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE;
|
||||
fb->width = cell->framebuffer.width;
|
||||
fb->height = cell->framebuffer.height;
|
||||
#if 0
|
||||
printf("EMIT color format %s\n", pf_name(fb->color_format));
|
||||
printf("EMIT depth format %s\n", pf_name(fb->depth_format));
|
||||
#endif
|
||||
}
|
||||
|
||||
if (cell->dirty & CELL_NEW_BLEND) {
|
||||
struct cell_command_blend blend;
|
||||
|
||||
if (cell->blend != NULL) {
|
||||
blend.base = (intptr_t) cell->blend->code.store;
|
||||
blend.size = (char *) cell->blend->code.csr
|
||||
- (char *) cell->blend->code.store;
|
||||
blend.read_fb = TRUE;
|
||||
if (cell->dirty & (CELL_NEW_FS)) {
|
||||
/* Send new fragment program to SPUs */
|
||||
struct cell_command_fragment_program *fp
|
||||
= cell_batch_alloc(cell, sizeof(*fp));
|
||||
fp->opcode = CELL_CMD_STATE_FRAGMENT_PROGRAM;
|
||||
fp->num_inst = cell->fs->code.num_inst;
|
||||
memcpy(&fp->code, cell->fs->code.store,
|
||||
SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
|
||||
if (0) {
|
||||
int i;
|
||||
printf("PPU Emit CELL_CMD_STATE_FRAGMENT_PROGRAM:\n");
|
||||
for (i = 0; i < fp->num_inst; i++) {
|
||||
printf(" %3d: 0x%08x\n", i, fp->code[i]);
|
||||
}
|
||||
}
|
||||
else {
|
||||
blend.base = 0;
|
||||
blend.size = 0;
|
||||
blend.read_fb = FALSE;
|
||||
}
|
||||
|
||||
emit_state_cmd(cell, CELL_CMD_STATE_BLEND, &blend, sizeof(blend));
|
||||
}
|
||||
|
||||
if (cell->dirty & CELL_NEW_DEPTH_STENCIL) {
|
||||
struct cell_command_depth_stencil_alpha_test dsat;
|
||||
if (cell->dirty & (CELL_NEW_FRAMEBUFFER |
|
||||
CELL_NEW_DEPTH_STENCIL |
|
||||
CELL_NEW_BLEND)) {
|
||||
/* XXX we don't want to always do codegen here. We should have
|
||||
* a hash/lookup table to cache previous results...
|
||||
*/
|
||||
struct cell_command_fragment_ops *fops
|
||||
= cell_batch_alloc(cell, sizeof(*fops));
|
||||
struct spe_function spe_code;
|
||||
|
||||
if (cell->depth_stencil != NULL) {
|
||||
dsat.base = (intptr_t) cell->depth_stencil->code.store;
|
||||
dsat.size = (char *) cell->depth_stencil->code.csr
|
||||
- (char *) cell->depth_stencil->code.store;
|
||||
dsat.read_depth = TRUE;
|
||||
dsat.read_stencil = FALSE;
|
||||
}
|
||||
else {
|
||||
dsat.base = 0;
|
||||
dsat.size = 0;
|
||||
dsat.read_depth = FALSE;
|
||||
dsat.read_stencil = FALSE;
|
||||
}
|
||||
|
||||
emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, &dsat, sizeof(dsat));
|
||||
/* generate new code */
|
||||
cell_gen_fragment_function(cell, &spe_code);
|
||||
/* put the new code into the batch buffer */
|
||||
fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS;
|
||||
memcpy(&fops->code, spe_code.store,
|
||||
SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
|
||||
fops->dsa = cell->depth_stencil->base;
|
||||
fops->blend = cell->blend->base;
|
||||
/* free codegen buffer */
|
||||
spe_release_func(&spe_code);
|
||||
}
|
||||
|
||||
if (cell->dirty & CELL_NEW_SAMPLER) {
|
||||
|
@ -160,7 +149,8 @@ cell_emit_state(struct cell_context *cell)
|
|||
emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO,
|
||||
&cell->vertex_info, sizeof(struct vertex_info));
|
||||
}
|
||||
|
||||
|
||||
#if 0
|
||||
if (cell->dirty & CELL_NEW_VS) {
|
||||
const struct draw_context *const draw = cell->draw;
|
||||
struct cell_shader_info info;
|
||||
|
@ -175,4 +165,5 @@ cell_emit_state(struct cell_context *cell)
|
|||
|
||||
emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, &info, sizeof(info));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -132,9 +132,9 @@ emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa,
|
|||
|
||||
|
||||
/**
|
||||
* Generate code to perform Z testing. Four Z values are tested at once.
|
||||
* \param dsa Current depth-test state
|
||||
* \param f Function to which code should be appended
|
||||
* \param m Mask of allocated / free SPE registers
|
||||
* \param mask Index of register to contain depth-pass mask
|
||||
* \param stored Index of register containing values from depth buffer
|
||||
* \param calculated Index of register containing per-fragment depth values
|
||||
|
@ -198,6 +198,7 @@ emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa,
|
|||
|
||||
|
||||
/**
|
||||
* Generate code to apply the stencil operation (after testing).
|
||||
* \note Emits a maximum of 5 instructions.
|
||||
*
|
||||
* \warning
|
||||
|
@ -222,9 +223,13 @@ emit_stencil_op(struct spe_function *f,
|
|||
spe_il(f, result, ref);
|
||||
break;
|
||||
case PIPE_STENCIL_OP_INCR:
|
||||
/* clamp = [0xff, 0xff, 0xff, 0xff] */
|
||||
spe_il(f, clamp, 0x0ff);
|
||||
/* result[i] = in[i] + 1 */
|
||||
spe_ai(f, result, in, 1);
|
||||
/* clamp_mask[i] = (result[i] > 0xff) */
|
||||
spe_clgti(f, clamp_mask, result, 0x0ff);
|
||||
/* result[i] = clamp_mask[i] ? clamp[i] : result[i] */
|
||||
spe_selb(f, result, result, clamp, clamp_mask);
|
||||
break;
|
||||
case PIPE_STENCIL_OP_DECR:
|
||||
|
@ -259,10 +264,10 @@ emit_stencil_op(struct spe_function *f,
|
|||
|
||||
|
||||
/**
|
||||
* Generate code to do stencil test. Four pixels are tested at once.
|
||||
* \param dsa Depth / stencil test state
|
||||
* \param face 0 for front face, 1 for back face
|
||||
* \param f Function to append instructions to
|
||||
* \param reg_mask Mask of allocated registers
|
||||
* \param mask Register containing mask of fragments passing the
|
||||
* alpha test
|
||||
* \param depth_mask Register containing mask of fragments passing the
|
||||
|
@ -310,13 +315,14 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
|
|||
|
||||
switch (dsa->stencil[face].func) {
|
||||
case PIPE_FUNC_NEVER:
|
||||
spe_il(f, stencil_mask, 0);
|
||||
spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */
|
||||
break;
|
||||
|
||||
case PIPE_FUNC_NOTEQUAL:
|
||||
complement = TRUE;
|
||||
/* FALLTHROUGH */
|
||||
case PIPE_FUNC_EQUAL:
|
||||
/* stencil_mask[i] = (stored[i] == ref) */
|
||||
spe_ceqi(f, stencil_mask, stored, ref);
|
||||
break;
|
||||
|
||||
|
@ -324,6 +330,8 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
|
|||
complement = TRUE;
|
||||
/* FALLTHROUGH */
|
||||
case PIPE_FUNC_GREATER:
|
||||
complement = TRUE;
|
||||
/* stencil_mask[i] = (stored[i] > ref) */
|
||||
spe_clgti(f, stencil_mask, stored, ref);
|
||||
break;
|
||||
|
||||
|
@ -331,8 +339,11 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
|
|||
complement = TRUE;
|
||||
/* FALLTHROUGH */
|
||||
case PIPE_FUNC_GEQUAL:
|
||||
/* stencil_mask[i] = (stored[i] > ref) */
|
||||
spe_clgti(f, stencil_mask, stored, ref);
|
||||
/* tmp[i] = (stored[i] == ref) */
|
||||
spe_ceqi(f, tmp, stored, ref);
|
||||
/* stencil_mask[i] = stencil_mask[i] | tmp[i] */
|
||||
spe_or(f, stencil_mask, stencil_mask, tmp);
|
||||
break;
|
||||
|
||||
|
@ -461,7 +472,7 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa)
|
|||
* + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round
|
||||
* up to 64 to make it a happy power-of-two.
|
||||
*/
|
||||
spe_init_func(f, 4 * 64);
|
||||
spe_init_func(f, SPE_INST_SIZE * 64);
|
||||
|
||||
|
||||
/* Allocate registers for the function's input parameters. Cleverly (and
|
||||
|
@ -540,7 +551,7 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa)
|
|||
spe_selb(f, depth, depth, zvals, mask);
|
||||
}
|
||||
|
||||
spe_bi(f, 0, 0, 0);
|
||||
spe_bi(f, 0, 0, 0); /* return from function call */
|
||||
|
||||
|
||||
#if 0
|
||||
|
@ -956,7 +967,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb)
|
|||
* + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to
|
||||
* make it a happy power-of-two.
|
||||
*/
|
||||
spe_init_func(f, 4 * 64);
|
||||
spe_init_func(f, SPE_INST_SIZE * 64);
|
||||
|
||||
|
||||
const int frag[4] = {
|
||||
|
@ -1144,9 +1155,10 @@ cell_generate_alpha_blend(struct cell_blend_state *cb)
|
|||
}
|
||||
|
||||
|
||||
int PC_OFFSET(const struct spe_function *f, const void *d)
|
||||
static int
|
||||
PC_OFFSET(const struct spe_function *f, const void *d)
|
||||
{
|
||||
const intptr_t pc = (intptr_t) f->csr;
|
||||
const intptr_t pc = (intptr_t) &f->store[f->num_inst];
|
||||
const intptr_t ea = ~0x0f & (intptr_t) d;
|
||||
|
||||
return (ea - pc) >> 2;
|
||||
|
@ -1178,7 +1190,7 @@ cell_generate_logic_op(struct spe_function *f,
|
|||
* bytes (equiv. to 8 instructions) are needed for data storage. Round up
|
||||
* to 64 to make it a happy power-of-two.
|
||||
*/
|
||||
spe_init_func(f, 4 * 64);
|
||||
spe_init_func(f, SPE_INST_SIZE * 64);
|
||||
|
||||
|
||||
/* Pixel colors in framebuffer format in AoS layout.
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
|
||||
#include "cell_context.h"
|
||||
#include "cell_state.h"
|
||||
|
||||
#include "cell_gen_fp.h"
|
||||
|
||||
|
||||
/** cast wrapper */
|
||||
|
@ -61,7 +61,7 @@ static void *
|
|||
cell_create_fs_state(struct pipe_context *pipe,
|
||||
const struct pipe_shader_state *templ)
|
||||
{
|
||||
/*struct cell_context *cell = cell_context(pipe);*/
|
||||
struct cell_context *cell = cell_context(pipe);
|
||||
struct cell_fragment_shader_state *cfs;
|
||||
|
||||
cfs = CALLOC_STRUCT(cell_fragment_shader_state);
|
||||
|
@ -76,6 +76,8 @@ cell_create_fs_state(struct pipe_context *pipe,
|
|||
|
||||
tgsi_scan_shader(templ->tokens, &cfs->info);
|
||||
|
||||
cell_gen_fragment_program(cell, cfs->shader.tokens, &cfs->code);
|
||||
|
||||
return cfs;
|
||||
}
|
||||
|
||||
|
@ -102,6 +104,8 @@ cell_delete_fs_state(struct pipe_context *pipe, void *fs)
|
|||
{
|
||||
struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs);
|
||||
|
||||
spe_release_func(&cfs->code);
|
||||
|
||||
FREE((void *) cfs->shader.tokens);
|
||||
FREE(cfs);
|
||||
}
|
||||
|
|
|
@ -297,10 +297,9 @@ void cell_update_vertex_fetch(struct draw_context *draw)
|
|||
|
||||
|
||||
/* Each fetch function can be a maximum of 34 instructions (note: this is
|
||||
* actually a slight over-estimate). That means (34 * 4) = 136 bytes
|
||||
* each maximum.
|
||||
* actually a slight over-estimate).
|
||||
*/
|
||||
spe_init_func(p, 136 * unique_attr_formats);
|
||||
spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats);
|
||||
|
||||
|
||||
/* Allocate registers for the function's input parameters.
|
||||
|
|
|
@ -22,12 +22,15 @@ SOURCES = \
|
|||
spu_render.c \
|
||||
spu_texture.c \
|
||||
spu_tile.c \
|
||||
spu_tri.c \
|
||||
spu_tri.c
|
||||
|
||||
OLD_SOURCES = \
|
||||
spu_exec.c \
|
||||
spu_util.c \
|
||||
spu_vertex_fetch.c \
|
||||
spu_vertex_shader.c
|
||||
|
||||
|
||||
SPU_OBJECTS = $(SOURCES:.c=.o) \
|
||||
|
||||
SPU_ASM_OUT = $(SOURCES:.c=.s) \
|
||||
|
@ -43,7 +46,7 @@ INCLUDE_DIRS = \
|
|||
$(SPU_CC) $(SPU_CFLAGS) -c $<
|
||||
|
||||
.c.s:
|
||||
$(SPU_CC) $(SPU_CFLAGS) -S $<
|
||||
$(SPU_CC) $(SPU_CFLAGS) -O3 -S $<
|
||||
|
||||
|
||||
# The .a file will be linked into the main/PPU executable
|
||||
|
|
|
@ -79,14 +79,14 @@ spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle)
|
|||
|
||||
|
||||
static INLINE vector float
|
||||
spu_unpack_color(uint color)
|
||||
spu_unpack_B8G8R8A8(uint color)
|
||||
{
|
||||
vector unsigned int color_u4 = spu_splats(color);
|
||||
color_u4 = spu_shuffle(color_u4, color_u4,
|
||||
((vector unsigned char) {
|
||||
0, 0, 0, 0,
|
||||
5, 5, 5, 5,
|
||||
10, 10, 10, 10,
|
||||
5, 5, 5, 5,
|
||||
0, 0, 0, 0,
|
||||
15, 15, 15, 15}) );
|
||||
return spu_convtf(color_u4, 32);
|
||||
}
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
|
||||
#include "spu_main.h"
|
||||
#include "spu_render.h"
|
||||
#include "spu_per_fragment_op.h"
|
||||
#include "spu_texture.h"
|
||||
#include "spu_tile.h"
|
||||
//#include "spu_test.h"
|
||||
|
@ -46,7 +47,7 @@
|
|||
/*
|
||||
helpful headers:
|
||||
/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h
|
||||
/opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h
|
||||
/opt/cell/sdk/usr/include/libmisc.h
|
||||
*/
|
||||
|
||||
boolean Debug = FALSE;
|
||||
|
@ -62,14 +63,6 @@ struct spu_vs_context draw;
|
|||
static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]
|
||||
ALIGN16_ATTRIB;
|
||||
|
||||
static unsigned char depth_stencil_code_buffer[4 * 64]
|
||||
ALIGN16_ATTRIB;
|
||||
|
||||
static unsigned char fb_blend_code_buffer[4 * 64]
|
||||
ALIGN16_ATTRIB;
|
||||
|
||||
static unsigned char logicop_code_buffer[4 * 64]
|
||||
ALIGN16_ATTRIB;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -226,6 +219,46 @@ cmd_release_verts(const struct cell_command_release_verts *release)
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Process a CELL_CMD_STATE_FRAGMENT_OPS command.
|
||||
* This involves installing new fragment ops SPU code.
|
||||
* If this function is never called, we'll use a regular C fallback function
|
||||
* for fragment processing.
|
||||
*/
|
||||
static void
|
||||
cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
|
||||
{
|
||||
if (Debug)
|
||||
printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id);
|
||||
/* Copy SPU code from batch buffer to spu buffer */
|
||||
memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
|
||||
/* Copy state info (for fallback case only) */
|
||||
memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
|
||||
memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
|
||||
|
||||
/* Point function pointer at new code */
|
||||
spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code;
|
||||
|
||||
spu.read_depth = spu.depth_stencil_alpha.depth.enabled;
|
||||
spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
|
||||
{
|
||||
if (Debug)
|
||||
printf("SPU %u: CMD_STATE_FRAGMENT_PROGRAM\n", spu.init.id);
|
||||
/* Copy SPU code from batch buffer to spu buffer */
|
||||
memcpy(spu.fragment_program_code, fp->code,
|
||||
SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
|
||||
#if 01
|
||||
/* Point function pointer at new code */
|
||||
spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
|
||||
{
|
||||
|
@ -252,102 +285,24 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
|
|||
|
||||
switch (spu.fb.depth_format) {
|
||||
case PIPE_FORMAT_Z32_UNORM:
|
||||
spu.fb.zsize = 4;
|
||||
spu.fb.zscale = (float) 0xffffffffu;
|
||||
break;
|
||||
case PIPE_FORMAT_Z24S8_UNORM:
|
||||
case PIPE_FORMAT_S8Z24_UNORM:
|
||||
case PIPE_FORMAT_Z24X8_UNORM:
|
||||
case PIPE_FORMAT_X8Z24_UNORM:
|
||||
spu.fb.zsize = 4;
|
||||
spu.fb.zscale = (float) 0x00ffffffu;
|
||||
break;
|
||||
case PIPE_FORMAT_Z16_UNORM:
|
||||
spu.fb.zsize = 2;
|
||||
spu.fb.zscale = (float) 0xffffu;
|
||||
break;
|
||||
default:
|
||||
spu.fb.zsize = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (spu.fb.color_format == PIPE_FORMAT_A8R8G8B8_UNORM)
|
||||
spu.color_shuffle = ((vector unsigned char) {
|
||||
12, 0, 4, 8, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0});
|
||||
else if (spu.fb.color_format == PIPE_FORMAT_B8G8R8A8_UNORM)
|
||||
spu.color_shuffle = ((vector unsigned char) {
|
||||
8, 4, 0, 12, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0});
|
||||
else
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
cmd_state_blend(const struct cell_command_blend *state)
|
||||
{
|
||||
if (Debug)
|
||||
printf("SPU %u: BLEND: enabled %d\n",
|
||||
spu.init.id,
|
||||
(state->size != 0));
|
||||
|
||||
ASSERT_ALIGN16(state->base);
|
||||
|
||||
if (state->size != 0) {
|
||||
mfc_get(fb_blend_code_buffer,
|
||||
(unsigned int) state->base, /* src */
|
||||
ROUNDUP16(state->size),
|
||||
TAG_BATCH_BUFFER,
|
||||
0, /* tid */
|
||||
0 /* rid */);
|
||||
wait_on_mask(1 << TAG_BATCH_BUFFER);
|
||||
spu.blend = (blend_func) fb_blend_code_buffer;
|
||||
spu.read_fb = state->read_fb;
|
||||
} else {
|
||||
spu.read_fb = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *state)
|
||||
{
|
||||
if (Debug)
|
||||
printf("SPU %u: DEPTH_STENCIL: ztest %d\n",
|
||||
spu.init.id,
|
||||
state->read_depth);
|
||||
|
||||
ASSERT_ALIGN16(state->base);
|
||||
|
||||
if (state->size != 0) {
|
||||
mfc_get(depth_stencil_code_buffer,
|
||||
(unsigned int) state->base, /* src */
|
||||
ROUNDUP16(state->size),
|
||||
TAG_BATCH_BUFFER,
|
||||
0, /* tid */
|
||||
0 /* rid */);
|
||||
wait_on_mask(1 << TAG_BATCH_BUFFER);
|
||||
} else {
|
||||
/* If there is no code, emit a return instruction.
|
||||
*/
|
||||
depth_stencil_code_buffer[0] = 0x35;
|
||||
depth_stencil_code_buffer[1] = 0x00;
|
||||
depth_stencil_code_buffer[2] = 0x00;
|
||||
depth_stencil_code_buffer[3] = 0x00;
|
||||
}
|
||||
|
||||
spu.frag_test = (frag_test_func) depth_stencil_code_buffer;
|
||||
spu.read_depth = state->read_depth;
|
||||
spu.read_stencil = state->read_stencil;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
cmd_state_logicop(const struct cell_command_logicop * code)
|
||||
{
|
||||
mfc_get(logicop_code_buffer,
|
||||
(unsigned int) code->base, /* src */
|
||||
code->size,
|
||||
TAG_BATCH_BUFFER,
|
||||
0, /* tid */
|
||||
0 /* rid */);
|
||||
wait_on_mask(1 << TAG_BATCH_BUFFER);
|
||||
|
||||
spu.logicop = (logicop_func) logicop_code_buffer;
|
||||
}
|
||||
|
||||
|
||||
|
@ -450,7 +405,9 @@ cmd_finish(void)
|
|||
|
||||
|
||||
/**
|
||||
* Execute a batch of commands
|
||||
* Execute a batch of commands which was sent to us by the PPU.
|
||||
* See the cell_emit_state.c code to see where the commands come from.
|
||||
*
|
||||
* The opcode param encodes the location of the buffer and its size.
|
||||
*/
|
||||
static void
|
||||
|
@ -487,16 +444,14 @@ cmd_batch(uint opcode)
|
|||
printf("SPU %u: release batch buf %u\n", spu.init.id, buf);
|
||||
release_buffer(buf);
|
||||
|
||||
/*
|
||||
* Loop over commands in the batch buffer
|
||||
*/
|
||||
for (pos = 0; pos < usize; /* no incr */) {
|
||||
switch (buffer[pos]) {
|
||||
case CELL_CMD_STATE_FRAMEBUFFER:
|
||||
{
|
||||
struct cell_command_framebuffer *fb
|
||||
= (struct cell_command_framebuffer *) &buffer[pos];
|
||||
cmd_state_framebuffer(fb);
|
||||
pos += sizeof(*fb) / 8;
|
||||
}
|
||||
break;
|
||||
/*
|
||||
* rendering commands
|
||||
*/
|
||||
case CELL_CMD_CLEAR_SURFACE:
|
||||
{
|
||||
struct cell_command_clear_surface *clr
|
||||
|
@ -514,26 +469,32 @@ cmd_batch(uint opcode)
|
|||
pos += pos_incr;
|
||||
}
|
||||
break;
|
||||
case CELL_CMD_RELEASE_VERTS:
|
||||
/*
|
||||
* state-update commands
|
||||
*/
|
||||
case CELL_CMD_STATE_FRAMEBUFFER:
|
||||
{
|
||||
struct cell_command_release_verts *release
|
||||
= (struct cell_command_release_verts *) &buffer[pos];
|
||||
cmd_release_verts(release);
|
||||
pos += sizeof(*release) / 8;
|
||||
struct cell_command_framebuffer *fb
|
||||
= (struct cell_command_framebuffer *) &buffer[pos];
|
||||
cmd_state_framebuffer(fb);
|
||||
pos += sizeof(*fb) / 8;
|
||||
}
|
||||
break;
|
||||
case CELL_CMD_FINISH:
|
||||
cmd_finish();
|
||||
pos += 1;
|
||||
case CELL_CMD_STATE_FRAGMENT_OPS:
|
||||
{
|
||||
struct cell_command_fragment_ops *fops
|
||||
= (struct cell_command_fragment_ops *) &buffer[pos];
|
||||
cmd_state_fragment_ops(fops);
|
||||
pos += sizeof(*fops) / 8;
|
||||
}
|
||||
break;
|
||||
case CELL_CMD_STATE_BLEND:
|
||||
cmd_state_blend((struct cell_command_blend *) &buffer[pos+1]);
|
||||
pos += (1 + ROUNDUP8(sizeof(struct cell_command_blend)) / 8);
|
||||
break;
|
||||
case CELL_CMD_STATE_DEPTH_STENCIL:
|
||||
cmd_state_depth_stencil((struct cell_command_depth_stencil_alpha_test *)
|
||||
&buffer[pos+1]);
|
||||
pos += (1 + ROUNDUP8(sizeof(struct cell_command_depth_stencil_alpha_test)) / 8);
|
||||
case CELL_CMD_STATE_FRAGMENT_PROGRAM:
|
||||
{
|
||||
struct cell_command_fragment_program *fp
|
||||
= (struct cell_command_fragment_program *) &buffer[pos];
|
||||
cmd_state_fragment_program(fp);
|
||||
pos += sizeof(*fp) / 8;
|
||||
}
|
||||
break;
|
||||
case CELL_CMD_STATE_SAMPLER:
|
||||
{
|
||||
|
@ -569,8 +530,10 @@ cmd_batch(uint opcode)
|
|||
pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8);
|
||||
break;
|
||||
case CELL_CMD_STATE_BIND_VS:
|
||||
#if 0
|
||||
spu_bind_vertex_shader(&draw,
|
||||
(struct cell_shader_info *) &buffer[pos+1]);
|
||||
#endif
|
||||
pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8);
|
||||
break;
|
||||
case CELL_CMD_STATE_ATTRIB_FETCH:
|
||||
|
@ -578,9 +541,20 @@ cmd_batch(uint opcode)
|
|||
&buffer[pos+1]);
|
||||
pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8);
|
||||
break;
|
||||
case CELL_CMD_STATE_LOGICOP:
|
||||
cmd_state_logicop((struct cell_command_logicop *) &buffer[pos+1]);
|
||||
pos += (1 + ROUNDUP8(sizeof(struct cell_command_logicop)) / 8);
|
||||
/*
|
||||
* misc commands
|
||||
*/
|
||||
case CELL_CMD_FINISH:
|
||||
cmd_finish();
|
||||
pos += 1;
|
||||
break;
|
||||
case CELL_CMD_RELEASE_VERTS:
|
||||
{
|
||||
struct cell_command_release_verts *release
|
||||
= (struct cell_command_release_verts *) &buffer[pos];
|
||||
cmd_release_verts(release);
|
||||
pos += sizeof(*release) / 8;
|
||||
}
|
||||
break;
|
||||
case CELL_CMD_FLUSH_BUFFER_RANGE: {
|
||||
struct cell_buffer_range *br = (struct cell_buffer_range *)
|
||||
|
@ -650,7 +624,9 @@ main_loop(void)
|
|||
exitFlag = 1;
|
||||
break;
|
||||
case CELL_CMD_VS_EXECUTE:
|
||||
#if 0
|
||||
spu_execute_vertex_shader(&draw, &cmd.vs);
|
||||
#endif
|
||||
break;
|
||||
case CELL_CMD_BATCH:
|
||||
cmd_batch(opcode);
|
||||
|
@ -675,6 +651,11 @@ one_time_init(void)
|
|||
memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status));
|
||||
memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status));
|
||||
invalidate_tex_cache();
|
||||
|
||||
/* Install default/fallback fragment processing function.
|
||||
* This will normally be overriden by a code-gen'd function.
|
||||
*/
|
||||
spu.fragment_ops = spu_fallback_fragment_ops;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -41,6 +41,10 @@
|
|||
#define MAX_HEIGHT 1024
|
||||
|
||||
|
||||
/**
|
||||
* A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels.
|
||||
* The data may be addressed through several different types.
|
||||
*/
|
||||
typedef union {
|
||||
ushort us[TILE_SIZE][TILE_SIZE];
|
||||
uint ui[TILE_SIZE][TILE_SIZE];
|
||||
|
@ -56,38 +60,29 @@ typedef union {
|
|||
#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */
|
||||
|
||||
|
||||
struct spu_frag_test_results {
|
||||
qword mask;
|
||||
qword depth;
|
||||
qword stencil;
|
||||
};
|
||||
/** Function for sampling textures */
|
||||
typedef vector float (*spu_sample_texture_func)(uint unit,
|
||||
vector float texcoord);
|
||||
|
||||
typedef struct spu_frag_test_results (*frag_test_func)(qword frag_mask,
|
||||
qword pixel_depth, qword pixel_stencil, qword frag_depth,
|
||||
qword frag_alpha, qword facing);
|
||||
/** Function for performing per-fragment ops */
|
||||
typedef void (*spu_fragment_ops_func)(uint x, uint y,
|
||||
tile_t *colorTile,
|
||||
tile_t *depthStencilTile,
|
||||
vector float fragZ,
|
||||
vector float fragRed,
|
||||
vector float fragGreen,
|
||||
vector float fragBlue,
|
||||
vector float fragAlpha,
|
||||
vector unsigned int mask);
|
||||
|
||||
/** Function for running fragment program */
|
||||
typedef void (*spu_fragment_program_func)(vector float *inputs,
|
||||
vector float *outputs,
|
||||
vector float *constants);
|
||||
|
||||
|
||||
struct spu_blend_results {
|
||||
qword r;
|
||||
qword g;
|
||||
qword b;
|
||||
qword a;
|
||||
};
|
||||
|
||||
typedef struct spu_blend_results (*blend_func)(
|
||||
qword frag_r, qword frag_g, qword frag_b, qword frag_a,
|
||||
qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a,
|
||||
qword const_r, qword const_g, qword const_b, qword const_a);
|
||||
|
||||
typedef struct spu_blend_results (*logicop_func)(
|
||||
qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a,
|
||||
qword frag_r, qword frag_g, qword frag_b, qword frag_a,
|
||||
qword frag_mask);
|
||||
|
||||
|
||||
typedef vector float (*sample_texture_func)(uint unit, vector float texcoord);
|
||||
|
||||
struct spu_framebuffer {
|
||||
struct spu_framebuffer
|
||||
{
|
||||
void *color_start; /**< addr of color surface in main memory */
|
||||
void *depth_start; /**< addr of depth surface in main memory */
|
||||
enum pipe_format color_format;
|
||||
|
@ -99,6 +94,7 @@ struct spu_framebuffer {
|
|||
uint depth_clear_value;
|
||||
|
||||
uint zsize; /**< 0, 2 or 4 bytes per Z */
|
||||
float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */
|
||||
} ALIGN16_ATTRIB;
|
||||
|
||||
|
||||
|
@ -115,35 +111,31 @@ struct spu_texture
|
|||
|
||||
|
||||
/**
|
||||
* All SPU global/context state will be in singleton object of this type:
|
||||
* All SPU global/context state will be in a singleton object of this type:
|
||||
*/
|
||||
struct spu_global
|
||||
{
|
||||
/** One-time init/constant info */
|
||||
struct cell_init_info init;
|
||||
|
||||
/*
|
||||
* Current state
|
||||
*/
|
||||
struct spu_framebuffer fb;
|
||||
boolean read_depth;
|
||||
boolean read_stencil;
|
||||
frag_test_func frag_test; /**< Current depth/stencil test code */
|
||||
|
||||
boolean read_fb; /**< Does current blend mode require framebuffer read? */
|
||||
blend_func blend; /**< Current blend code */
|
||||
qword const_blend_color[4] ALIGN16_ATTRIB;
|
||||
|
||||
logicop_func logicop; /**< Current logicop code **/
|
||||
|
||||
struct pipe_depth_stencil_alpha_state depth_stencil_alpha;
|
||||
struct pipe_blend_state blend;
|
||||
struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS];
|
||||
struct spu_texture texture[PIPE_MAX_SAMPLERS];
|
||||
|
||||
struct vertex_info vertex_info;
|
||||
|
||||
/* XXX more state to come */
|
||||
|
||||
|
||||
/** current color and Z tiles */
|
||||
/** Current color and Z tiles */
|
||||
tile_t ctile ALIGN16_ATTRIB;
|
||||
tile_t ztile ALIGN16_ATTRIB;
|
||||
|
||||
/** Read depth/stencil tiles? */
|
||||
boolean read_depth;
|
||||
boolean read_stencil;
|
||||
|
||||
/** Current tiles' status */
|
||||
ubyte cur_ctile_status, cur_ztile_status;
|
||||
|
||||
|
@ -151,11 +143,22 @@ struct spu_global
|
|||
ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
|
||||
ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
|
||||
|
||||
/** Current fragment ops machine code */
|
||||
uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS];
|
||||
/** Current fragment ops function */
|
||||
spu_fragment_ops_func fragment_ops;
|
||||
|
||||
/** for converting RGBA to PIPE_FORMAT_x colors */
|
||||
vector unsigned char color_shuffle;
|
||||
/** Current fragment program machine code */
|
||||
uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
|
||||
/** Current fragment ops function */
|
||||
spu_fragment_program_func fragment_program;
|
||||
|
||||
sample_texture_func sample_texture[CELL_MAX_SAMPLERS];
|
||||
/** Current texture sampler function */
|
||||
spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS];
|
||||
|
||||
/** Fragment program constants (XXX preliminary/used) */
|
||||
#define MAX_CONSTANTS 32
|
||||
vector float constants[MAX_CONSTANTS];
|
||||
|
||||
} ALIGN16_ATTRIB;
|
||||
|
||||
|
|
|
@ -1,211 +1,475 @@
|
|||
/*
|
||||
* (C) Copyright IBM Corporation 2008
|
||||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* \file spu_per_fragment_op.c
|
||||
* SPU implementation various per-fragment operations.
|
||||
*
|
||||
* \author Ian Romanick <idr@us.ibm.com>
|
||||
* \author Brian Paul
|
||||
*/
|
||||
|
||||
|
||||
#include <transpose_matrix4x4.h>
|
||||
#include "pipe/p_format.h"
|
||||
#include "spu_main.h"
|
||||
#include "spu_colorpack.h"
|
||||
#include "spu_per_fragment_op.h"
|
||||
|
||||
#define ZERO 0x80
|
||||
|
||||
static void
|
||||
read_ds_quad(tile_t *buffer, unsigned x, unsigned y,
|
||||
enum pipe_format depth_format, qword *depth,
|
||||
qword *stencil)
|
||||
#define LINEAR_QUAD_LAYOUT 1
|
||||
|
||||
|
||||
/**
|
||||
* Called by rasterizer for each quad after the shader has run. Do
|
||||
* all the per-fragment operations including alpha test, z test,
|
||||
* stencil test, blend, colormask and logicops. This is a
|
||||
* fallback/debug function. In reality we'll use a generated function
|
||||
* produced by the PPU. But this function is useful for
|
||||
* debug/validation.
|
||||
*/
|
||||
void
|
||||
spu_fallback_fragment_ops(uint x, uint y,
|
||||
tile_t *colorTile,
|
||||
tile_t *depthStencilTile,
|
||||
vector float fragZ,
|
||||
vector float fragR,
|
||||
vector float fragG,
|
||||
vector float fragB,
|
||||
vector float fragA,
|
||||
vector unsigned int mask)
|
||||
{
|
||||
const int ix = x / 2;
|
||||
const int iy = y / 2;
|
||||
vector float frag_aos[4];
|
||||
unsigned int c0, c1, c2, c3;
|
||||
|
||||
switch (depth_format) {
|
||||
case PIPE_FORMAT_Z16_UNORM: {
|
||||
qword *ptr = (qword *) &buffer->us8[iy][ix / 2];
|
||||
/* do alpha test */
|
||||
if (spu.depth_stencil_alpha.alpha.enabled) {
|
||||
vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref);
|
||||
vector unsigned int amask;
|
||||
|
||||
const qword shuf_vec = (qword) {
|
||||
ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3,
|
||||
ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7
|
||||
};
|
||||
switch (spu.depth_stencil_alpha.alpha.func) {
|
||||
case PIPE_FUNC_LESS:
|
||||
amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */
|
||||
break;
|
||||
case PIPE_FUNC_GREATER:
|
||||
amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */
|
||||
break;
|
||||
case PIPE_FUNC_GEQUAL:
|
||||
amask = spu_cmpgt(ref, fragA);
|
||||
amask = spu_nor(amask, amask);
|
||||
break;
|
||||
case PIPE_FUNC_LEQUAL:
|
||||
amask = spu_cmpgt(fragA, ref);
|
||||
amask = spu_nor(amask, amask);
|
||||
break;
|
||||
case PIPE_FUNC_EQUAL:
|
||||
amask = spu_cmpeq(ref, fragA);
|
||||
break;
|
||||
case PIPE_FUNC_NOTEQUAL:
|
||||
amask = spu_cmpeq(ref, fragA);
|
||||
amask = spu_nor(amask, amask);
|
||||
break;
|
||||
case PIPE_FUNC_ALWAYS:
|
||||
amask = spu_splats(0xffffffffU);
|
||||
break;
|
||||
case PIPE_FUNC_NEVER:
|
||||
amask = spu_splats( 0x0U);
|
||||
break;
|
||||
default:
|
||||
;
|
||||
}
|
||||
|
||||
mask = spu_and(mask, amask);
|
||||
}
|
||||
|
||||
/* At even X values we want the first 4 shorts, and at odd X values we
|
||||
* want the second 4 shorts.
|
||||
/* Z and/or stencil testing... */
|
||||
if (spu.depth_stencil_alpha.depth.enabled ||
|
||||
spu.depth_stencil_alpha.stencil[0].enabled) {
|
||||
|
||||
/* get four Z/Stencil values from tile */
|
||||
vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU);
|
||||
vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2];
|
||||
vector unsigned int ifbZ = spu_and(ifbZS, mask24);
|
||||
vector unsigned int ifbS = spu_andc(ifbZS, mask24);
|
||||
|
||||
if (spu.depth_stencil_alpha.stencil[0].enabled) {
|
||||
/* do stencil test */
|
||||
ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM);
|
||||
|
||||
}
|
||||
else if (spu.depth_stencil_alpha.depth.enabled) {
|
||||
/* do depth test */
|
||||
|
||||
ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM ||
|
||||
spu.fb.depth_format == PIPE_FORMAT_X8Z24_UNORM);
|
||||
|
||||
vector unsigned int ifragZ;
|
||||
vector unsigned int zmask;
|
||||
|
||||
/* convert four fragZ from float to uint */
|
||||
fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff));
|
||||
ifragZ = spu_convtu(fragZ, 0);
|
||||
|
||||
/* do depth comparison, setting zmask with results */
|
||||
switch (spu.depth_stencil_alpha.depth.func) {
|
||||
case PIPE_FUNC_LESS:
|
||||
zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */
|
||||
break;
|
||||
case PIPE_FUNC_GREATER:
|
||||
zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */
|
||||
break;
|
||||
case PIPE_FUNC_GEQUAL:
|
||||
zmask = spu_cmpgt(ifbZ, ifragZ);
|
||||
zmask = spu_nor(zmask, zmask);
|
||||
break;
|
||||
case PIPE_FUNC_LEQUAL:
|
||||
zmask = spu_cmpgt(ifragZ, ifbZ);
|
||||
zmask = spu_nor(zmask, zmask);
|
||||
break;
|
||||
case PIPE_FUNC_EQUAL:
|
||||
zmask = spu_cmpeq(ifbZ, ifragZ);
|
||||
break;
|
||||
case PIPE_FUNC_NOTEQUAL:
|
||||
zmask = spu_cmpeq(ifbZ, ifragZ);
|
||||
zmask = spu_nor(zmask, zmask);
|
||||
break;
|
||||
case PIPE_FUNC_ALWAYS:
|
||||
zmask = spu_splats(0xffffffffU);
|
||||
break;
|
||||
case PIPE_FUNC_NEVER:
|
||||
zmask = spu_splats( 0x0U);
|
||||
break;
|
||||
default:
|
||||
;
|
||||
}
|
||||
|
||||
mask = spu_and(mask, zmask);
|
||||
|
||||
/* merge framebuffer Z and fragment Z according to the mask */
|
||||
ifbZ = spu_or(spu_and(ifragZ, mask),
|
||||
spu_andc(ifbZ, mask));
|
||||
}
|
||||
|
||||
if (spu_extract(spu_orx(mask), 0)) {
|
||||
/* put new fragment Z/Stencil values back into Z/Stencil tile */
|
||||
depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS);
|
||||
|
||||
spu.cur_ztile_status = TILE_STATUS_DIRTY;
|
||||
}
|
||||
}
|
||||
|
||||
if (spu.blend.blend_enable) {
|
||||
/* blending terms, misc regs */
|
||||
vector float term1r, term1g, term1b, term1a;
|
||||
vector float term2r, term2g, term2b, term2a;
|
||||
vector float one, tmp;
|
||||
|
||||
vector float fbRGBA[4]; /* current framebuffer colors */
|
||||
|
||||
/* get colors from framebuffer/tile */
|
||||
{
|
||||
vector float fc[4];
|
||||
uint c0, c1, c2, c3;
|
||||
|
||||
#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */
|
||||
c0 = colorTile->ui[y][x*2+0];
|
||||
c1 = colorTile->ui[y][x*2+1];
|
||||
c2 = colorTile->ui[y][x*2+2];
|
||||
c3 = colorTile->ui[y][x*2+3];
|
||||
#else
|
||||
c0 = colorTile->ui[y+0][x+0];
|
||||
c1 = colorTile->ui[y+0][x+1];
|
||||
c2 = colorTile->ui[y+1][x+0];
|
||||
c3 = colorTile->ui[y+1][x+1];
|
||||
#endif
|
||||
switch (spu.fb.color_format) {
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
fc[0] = spu_unpack_B8G8R8A8(c0);
|
||||
fc[1] = spu_unpack_B8G8R8A8(c1);
|
||||
fc[2] = spu_unpack_B8G8R8A8(c2);
|
||||
fc[3] = spu_unpack_B8G8R8A8(c3);
|
||||
break;
|
||||
case PIPE_FORMAT_A8R8G8B8_UNORM:
|
||||
fc[0] = spu_unpack_A8R8G8B8(c0);
|
||||
fc[1] = spu_unpack_A8R8G8B8(c1);
|
||||
fc[2] = spu_unpack_A8R8G8B8(c2);
|
||||
fc[3] = spu_unpack_A8R8G8B8(c3);
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
_transpose_matrix4x4(fbRGBA, fc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute Src RGB terms
|
||||
*/
|
||||
qword bias = (qword) spu_splats((unsigned char) ((ix & 0x01) << 3));
|
||||
qword bias_mask = si_fsmbi(0x3333);
|
||||
qword sv = si_a(shuf_vec, si_and(bias_mask, bias));
|
||||
switch (spu.blend.rgb_src_factor) {
|
||||
case PIPE_BLENDFACTOR_ONE:
|
||||
term1r = fragR;
|
||||
term1g = fragG;
|
||||
term1b = fragB;
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_ZERO:
|
||||
term1r =
|
||||
term1g =
|
||||
term1b = spu_splats(0.0f);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_SRC_COLOR:
|
||||
term1r = spu_mul(fragR, fragR);
|
||||
term1g = spu_mul(fragG, fragG);
|
||||
term1b = spu_mul(fragB, fragB);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_SRC_ALPHA:
|
||||
term1r = spu_mul(fragR, fragA);
|
||||
term1g = spu_mul(fragG, fragA);
|
||||
term1b = spu_mul(fragB, fragA);
|
||||
break;
|
||||
/* XXX more cases */
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
*depth = si_shufb(*ptr, *ptr, sv);
|
||||
*stencil = si_il(0);
|
||||
break;
|
||||
/*
|
||||
* Compute Src Alpha term
|
||||
*/
|
||||
switch (spu.blend.alpha_src_factor) {
|
||||
case PIPE_BLENDFACTOR_ONE:
|
||||
term1a = fragA;
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_SRC_COLOR:
|
||||
term1a = spu_splats(0.0f);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_SRC_ALPHA:
|
||||
term1a = spu_mul(fragA, fragA);
|
||||
break;
|
||||
/* XXX more cases */
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute Dest RGB terms
|
||||
*/
|
||||
switch (spu.blend.rgb_dst_factor) {
|
||||
case PIPE_BLENDFACTOR_ONE:
|
||||
term2r = fragR;
|
||||
term2g = fragG;
|
||||
term2b = fragB;
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_ZERO:
|
||||
term2r =
|
||||
term2g =
|
||||
term2b = spu_splats(0.0f);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_SRC_COLOR:
|
||||
term2r = spu_mul(fbRGBA[0], fragR);
|
||||
term2g = spu_mul(fbRGBA[1], fragG);
|
||||
term2b = spu_mul(fbRGBA[2], fragB);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_SRC_ALPHA:
|
||||
term2r = spu_mul(fbRGBA[0], fragA);
|
||||
term2g = spu_mul(fbRGBA[1], fragA);
|
||||
term2b = spu_mul(fbRGBA[2], fragA);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
|
||||
one = spu_splats(1.0f);
|
||||
tmp = spu_sub(one, fragA);
|
||||
term2r = spu_mul(fbRGBA[0], tmp);
|
||||
term2g = spu_mul(fbRGBA[1], tmp);
|
||||
term2b = spu_mul(fbRGBA[2], tmp);
|
||||
break;
|
||||
/* XXX more cases */
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute Dest Alpha term
|
||||
*/
|
||||
switch (spu.blend.alpha_dst_factor) {
|
||||
case PIPE_BLENDFACTOR_ONE:
|
||||
term2a = fragA;
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_SRC_COLOR:
|
||||
term2a = spu_splats(0.0f);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_SRC_ALPHA:
|
||||
term2a = spu_mul(fbRGBA[3], fragA);
|
||||
break;
|
||||
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
|
||||
one = spu_splats(1.0f);
|
||||
tmp = spu_sub(one, fragA);
|
||||
term2a = spu_mul(fbRGBA[3], tmp);
|
||||
break;
|
||||
/* XXX more cases */
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Combine Src/Dest RGB terms
|
||||
*/
|
||||
switch (spu.blend.rgb_func) {
|
||||
case PIPE_BLEND_ADD:
|
||||
fragR = spu_add(term1r, term2r);
|
||||
fragG = spu_add(term1g, term2g);
|
||||
fragB = spu_add(term1b, term2b);
|
||||
break;
|
||||
case PIPE_BLEND_SUBTRACT:
|
||||
fragR = spu_sub(term1r, term2r);
|
||||
fragG = spu_sub(term1g, term2g);
|
||||
fragB = spu_sub(term1b, term2b);
|
||||
break;
|
||||
/* XXX more cases */
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Combine Src/Dest A term
|
||||
*/
|
||||
switch (spu.blend.alpha_func) {
|
||||
case PIPE_BLEND_ADD:
|
||||
fragA = spu_add(term1a, term2a);
|
||||
break;
|
||||
case PIPE_BLEND_SUBTRACT:
|
||||
fragA = spu_sub(term1a, term2a);
|
||||
break;
|
||||
/* XXX more cases */
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
case PIPE_FORMAT_Z32_UNORM: {
|
||||
qword *ptr = (qword *) &buffer->ui4[iy][ix];
|
||||
|
||||
*depth = *ptr;
|
||||
*stencil = si_il(0);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
case PIPE_FORMAT_Z24S8_UNORM: {
|
||||
qword *ptr = (qword *) &buffer->ui4[iy][ix];
|
||||
qword mask = si_fsmbi(0xEEEE);
|
||||
|
||||
*depth = si_rotmai(si_and(*ptr, mask), -8);
|
||||
*stencil = si_andc(*ptr, mask);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
case PIPE_FORMAT_S8Z24_UNORM: {
|
||||
qword *ptr = (qword *) &buffer->ui4[iy][ix];
|
||||
|
||||
*depth = si_and(*ptr, si_fsmbi(0x7777));
|
||||
*stencil = si_andi(si_roti(*ptr, 8), 0x0ff);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
write_ds_quad(tile_t *buffer, unsigned x, unsigned y,
|
||||
enum pipe_format depth_format,
|
||||
qword depth, qword stencil)
|
||||
{
|
||||
const int ix = x / 2;
|
||||
const int iy = y / 2;
|
||||
|
||||
(void) stencil;
|
||||
|
||||
switch (depth_format) {
|
||||
case PIPE_FORMAT_Z16_UNORM: {
|
||||
qword *ptr = (qword *) &buffer->us8[iy][ix / 2];
|
||||
|
||||
qword sv = ((ix & 0x01) == 0)
|
||||
? (qword) { 2, 3, 6, 7, 10, 11, 14, 15,
|
||||
24, 25, 26, 27, 28, 29, 30, 31 }
|
||||
: (qword) { 16, 17, 18, 19, 20 , 21, 22, 23,
|
||||
2, 3, 6, 7, 10, 11, 14, 15 };
|
||||
*ptr = si_shufb(depth, *ptr, sv);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
case PIPE_FORMAT_Z32_UNORM: {
|
||||
qword *ptr = (qword *) &buffer->ui4[iy][ix];
|
||||
*ptr = depth;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
case PIPE_FORMAT_Z24S8_UNORM: {
|
||||
qword *ptr = (qword *) &buffer->ui4[iy][ix];
|
||||
qword mask = si_fsmbi(0xEEEE);
|
||||
|
||||
depth = si_shli(depth, 8);
|
||||
*ptr = si_selb(stencil, depth, mask);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
case PIPE_FORMAT_S8Z24_UNORM: {
|
||||
qword *ptr = (qword *) &buffer->ui4[iy][ix];
|
||||
qword mask = si_fsmbi(0x7777);
|
||||
|
||||
stencil = si_shli(stencil, 24);
|
||||
*ptr = si_selb(stencil, depth, mask);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
qword
|
||||
spu_do_depth_stencil(int x, int y,
|
||||
qword frag_mask, qword frag_depth, qword frag_alpha,
|
||||
qword facing)
|
||||
{
|
||||
struct spu_frag_test_results result;
|
||||
qword pixel_depth;
|
||||
qword pixel_stencil;
|
||||
|
||||
/* All of this preable code (everthing before the call to frag_test) should
|
||||
* be generated on the PPU and upload to the SPU.
|
||||
/*
|
||||
* Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA.
|
||||
*/
|
||||
if (spu.read_depth || spu.read_stencil) {
|
||||
read_ds_quad(&spu.ztile, x, y, spu.fb.depth_format,
|
||||
&pixel_depth, &pixel_stencil);
|
||||
#if 0
|
||||
/* original code */
|
||||
{
|
||||
vector float frag_soa[4];
|
||||
frag_soa[0] = fragR;
|
||||
frag_soa[1] = fragG;
|
||||
frag_soa[2] = fragB;
|
||||
frag_soa[3] = fragA;
|
||||
_transpose_matrix4x4(frag_aos, frag_soa);
|
||||
}
|
||||
|
||||
switch (spu.fb.depth_format) {
|
||||
case PIPE_FORMAT_Z16_UNORM:
|
||||
frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x0000ffffu)));
|
||||
frag_depth = si_cfltu(frag_depth, 0);
|
||||
#else
|
||||
/* short-cut relying on function parameter layout: */
|
||||
_transpose_matrix4x4(frag_aos, &fragR);
|
||||
(void) fragG;
|
||||
(void) fragB;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Pack float colors into 32-bit RGBA words.
|
||||
*/
|
||||
switch (spu.fb.color_format) {
|
||||
case PIPE_FORMAT_A8R8G8B8_UNORM:
|
||||
c0 = spu_pack_A8R8G8B8(frag_aos[0]);
|
||||
c1 = spu_pack_A8R8G8B8(frag_aos[1]);
|
||||
c2 = spu_pack_A8R8G8B8(frag_aos[2]);
|
||||
c3 = spu_pack_A8R8G8B8(frag_aos[3]);
|
||||
break;
|
||||
case PIPE_FORMAT_Z32_UNORM:
|
||||
frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0xffffffffu)));
|
||||
frag_depth = si_cfltu(frag_depth, 0);
|
||||
break;
|
||||
case PIPE_FORMAT_Z24S8_UNORM:
|
||||
case PIPE_FORMAT_S8Z24_UNORM:
|
||||
frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x00ffffffu)));
|
||||
frag_depth = si_cfltu(frag_depth, 0);
|
||||
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
c0 = spu_pack_B8G8R8A8(frag_aos[0]);
|
||||
c1 = spu_pack_B8G8R8A8(frag_aos[1]);
|
||||
c2 = spu_pack_B8G8R8A8(frag_aos[2]);
|
||||
c3 = spu_pack_B8G8R8A8(frag_aos[3]);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n");
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
|
||||
result = (*spu.frag_test)(frag_mask, pixel_depth, pixel_stencil,
|
||||
frag_depth, frag_alpha, facing);
|
||||
|
||||
|
||||
/* This code (everthing after the call to frag_test) should
|
||||
* be generated on the PPU and upload to the SPU.
|
||||
/*
|
||||
* Color masking
|
||||
*/
|
||||
if (spu.read_depth || spu.read_stencil) {
|
||||
write_ds_quad(&spu.ztile, x, y, spu.fb.depth_format,
|
||||
result.depth, result.stencil);
|
||||
if (spu.blend.colormask != 0xf) {
|
||||
/* XXX to do */
|
||||
/* apply color mask to 32-bit packed colors */
|
||||
}
|
||||
|
||||
return result.mask;
|
||||
|
||||
/*
|
||||
* Logic Ops
|
||||
*/
|
||||
if (spu.blend.logicop_enable) {
|
||||
/* XXX to do */
|
||||
/* apply logicop to 32-bit packed colors */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* If mask is non-zero, mark tile as dirty.
|
||||
*/
|
||||
if (spu_extract(spu_orx(mask), 0)) {
|
||||
spu.cur_ctile_status = TILE_STATUS_DIRTY;
|
||||
}
|
||||
else {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Write new quad colors to the framebuffer/tile.
|
||||
* Only write pixels where the corresponding mask word is set.
|
||||
*/
|
||||
#if LINEAR_QUAD_LAYOUT
|
||||
/*
|
||||
* Quad layout:
|
||||
* +--+--+--+--+
|
||||
* |p0|p1|p2|p3|
|
||||
* +--+--+--+--+
|
||||
*/
|
||||
if (spu_extract(mask, 0))
|
||||
colorTile->ui[y][x*2] = c0;
|
||||
if (spu_extract(mask, 1))
|
||||
colorTile->ui[y][x*2+1] = c1;
|
||||
if (spu_extract(mask, 2))
|
||||
colorTile->ui[y][x*2+2] = c2;
|
||||
if (spu_extract(mask, 3))
|
||||
colorTile->ui[y][x*2+3] = c3;
|
||||
#else
|
||||
/*
|
||||
* Quad layout:
|
||||
* +--+--+
|
||||
* |p0|p1|
|
||||
* +--+--+
|
||||
* |p2|p3|
|
||||
* +--+--+
|
||||
*/
|
||||
if (spu_extract(mask, 0))
|
||||
colorTile->ui[y+0][x+0] = c0;
|
||||
if (spu_extract(mask, 1))
|
||||
colorTile->ui[y+0][x+1] = c1;
|
||||
if (spu_extract(mask, 2))
|
||||
colorTile->ui[y+1][x+0] = c2;
|
||||
if (spu_extract(mask, 3))
|
||||
colorTile->ui[y+1][x+1] = c3;
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -1,32 +1,44 @@
|
|||
/*
|
||||
* (C) Copyright IBM Corporation 2008
|
||||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#ifndef SPU_PER_FRAGMENT_OP
|
||||
#define SPU_PER_FRAGMENT_OP
|
||||
|
||||
extern qword
|
||||
spu_do_depth_stencil(int x, int y, qword frag_mask, qword frag_depth,
|
||||
qword frag_alpha, qword facing);
|
||||
|
||||
extern void
|
||||
spu_fallback_fragment_ops(uint x, uint y,
|
||||
tile_t *colorTile,
|
||||
tile_t *depthStencilTile,
|
||||
vector float fragZ,
|
||||
vector float fragRed,
|
||||
vector float fragGreen,
|
||||
vector float fragBlue,
|
||||
vector float fragAlpha,
|
||||
vector unsigned int mask);
|
||||
|
||||
|
||||
#endif /* SPU_PER_FRAGMENT_OP */
|
||||
|
|
|
@ -97,7 +97,7 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels)
|
|||
const qword offset_y = si_andi((qword) y, 0x1f);
|
||||
|
||||
const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row);
|
||||
const qword tile_size = (qword) spu_splats(sizeof(tile_t));
|
||||
const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t));
|
||||
|
||||
qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x);
|
||||
tile_offset = si_mpy((qword) tile_offset, tile_size);
|
||||
|
|
|
@ -38,7 +38,6 @@
|
|||
#include "spu_texture.h"
|
||||
#include "spu_tile.h"
|
||||
#include "spu_tri.h"
|
||||
#include "spu_per_fragment_op.h"
|
||||
|
||||
|
||||
/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
|
||||
|
@ -209,7 +208,7 @@ clip_emit_quad(struct setup_stage *setup)
|
|||
/**
|
||||
* Evaluate attribute coefficients (plane equations) to compute
|
||||
* attribute values for the four fragments in a quad.
|
||||
* Eg: four colors will be compute.
|
||||
* Eg: four colors will be computed (in AoS format).
|
||||
*/
|
||||
static INLINE void
|
||||
eval_coeff(uint slot, float x, float y, vector float result[4])
|
||||
|
@ -255,31 +254,6 @@ eval_z(float x, float y)
|
|||
}
|
||||
|
||||
|
||||
static INLINE mask_t
|
||||
do_depth_test(int x, int y, mask_t quadmask)
|
||||
{
|
||||
float4 zvals;
|
||||
mask_t mask;
|
||||
|
||||
if (spu.fb.depth_format == PIPE_FORMAT_NONE)
|
||||
return quadmask;
|
||||
|
||||
zvals.v = eval_z((float) x, (float) y);
|
||||
|
||||
mask = (mask_t) spu_do_depth_stencil(x - setup.cliprect_minx,
|
||||
y - setup.cliprect_miny,
|
||||
(qword) quadmask,
|
||||
(qword) zvals.v,
|
||||
(qword) spu_splats((unsigned char) 0x0ffu),
|
||||
(qword) spu_splats((unsigned int) 0x01u));
|
||||
|
||||
if (spu_extract(spu_orx(mask), 0))
|
||||
spu.cur_ztile_status = TILE_STATUS_DIRTY;
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit a quad (pass to next stage). No clipping is done.
|
||||
* Note: about 1/5 to 1/7 of the time, mask is zero and this function
|
||||
|
@ -289,18 +263,6 @@ do_depth_test(int x, int y, mask_t quadmask)
|
|||
static INLINE void
|
||||
emit_quad( int x, int y, mask_t mask )
|
||||
{
|
||||
#if 0
|
||||
struct softpipe_context *sp = setup.softpipe;
|
||||
setup.quad.x0 = x;
|
||||
setup.quad.y0 = y;
|
||||
setup.quad.mask = mask;
|
||||
sp->quad.first->run(sp->quad.first, &setup.quad);
|
||||
#else
|
||||
|
||||
if (spu.read_depth) {
|
||||
mask = do_depth_test(x, y, mask);
|
||||
}
|
||||
|
||||
/* If any bits in mask are set... */
|
||||
if (spu_extract(spu_orx(mask), 0)) {
|
||||
const int ix = x - setup.cliprect_minx;
|
||||
|
@ -308,6 +270,7 @@ emit_quad( int x, int y, mask_t mask )
|
|||
vector float colors[4];
|
||||
|
||||
spu.cur_ctile_status = TILE_STATUS_DIRTY;
|
||||
spu.cur_ztile_status = TILE_STATUS_DIRTY;
|
||||
|
||||
if (spu.texture[0].start) {
|
||||
/* texture mapping */
|
||||
|
@ -351,59 +314,68 @@ emit_quad( int x, int y, mask_t mask )
|
|||
}
|
||||
else {
|
||||
/* simple shading */
|
||||
#if 0
|
||||
eval_coeff(1, (float) x, (float) y, colors);
|
||||
}
|
||||
|
||||
#else
|
||||
/* XXX new fragment program code */
|
||||
|
||||
/* Convert fragment data from AoS to SoA format.
|
||||
*/
|
||||
qword soa_frag[4];
|
||||
_transpose_matrix4x4((vec_float4 *) soa_frag, colors);
|
||||
if (spu.fragment_program) {
|
||||
vector float inputs[4*4], outputs[2*4];
|
||||
|
||||
/* Read the current framebuffer values.
|
||||
*/
|
||||
const qword pix[4] = {
|
||||
(qword) spu_splats(spu.ctile.ui[iy+0][ix+0]),
|
||||
(qword) spu_splats(spu.ctile.ui[iy+0][ix+1]),
|
||||
(qword) spu_splats(spu.ctile.ui[iy+1][ix+0]),
|
||||
(qword) spu_splats(spu.ctile.ui[iy+1][ix+1]),
|
||||
};
|
||||
/* setup inputs */
|
||||
eval_coeff(1, (float) x, (float) y, inputs);
|
||||
|
||||
qword soa_pix[4];
|
||||
/* Execute the current fragment program */
|
||||
spu.fragment_program(inputs, outputs, spu.constants);
|
||||
|
||||
if (spu.read_fb) {
|
||||
/* Convert pixel data from AoS to SoA format.
|
||||
*/
|
||||
vec_float4 aos_pix[4] = {
|
||||
spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+0]),
|
||||
spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+1]),
|
||||
spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+0]),
|
||||
spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+1]),
|
||||
};
|
||||
/* Copy outputs */
|
||||
colors[0] = outputs[0*4+0];
|
||||
colors[1] = outputs[0*4+1];
|
||||
colors[2] = outputs[0*4+2];
|
||||
colors[3] = outputs[0*4+3];
|
||||
|
||||
_transpose_matrix4x4((vec_float4 *) soa_pix, aos_pix);
|
||||
}
|
||||
if (0 && spu.init.id==0 && y == 48) {
|
||||
printf("colors[0] = %f %f %f %f\n",
|
||||
spu_extract(colors[0], 0),
|
||||
spu_extract(colors[0], 1),
|
||||
spu_extract(colors[0], 2),
|
||||
spu_extract(colors[0], 3));
|
||||
printf("colors[1] = %f %f %f %f\n",
|
||||
spu_extract(colors[1], 0),
|
||||
spu_extract(colors[1], 1),
|
||||
spu_extract(colors[1], 2),
|
||||
spu_extract(colors[1], 3));
|
||||
}
|
||||
|
||||
|
||||
struct spu_blend_results result =
|
||||
(*spu.blend)(soa_frag[0], soa_frag[1], soa_frag[2], soa_frag[3],
|
||||
soa_pix[0], soa_pix[1], soa_pix[2], soa_pix[3],
|
||||
spu.const_blend_color[0], spu.const_blend_color[1],
|
||||
spu.const_blend_color[2], spu.const_blend_color[3]);
|
||||
|
||||
|
||||
/* Convert final pixel data from SoA to AoS format.
|
||||
*/
|
||||
result = (*spu.logicop)(pix[0], pix[1], pix[2], pix[3],
|
||||
result.r, result.g, result.b, result.a,
|
||||
(qword) mask);
|
||||
|
||||
spu.ctile.ui[iy+0][ix+0] = spu_extract((vec_uint4) result.r, 0);
|
||||
spu.ctile.ui[iy+0][ix+1] = spu_extract((vec_uint4) result.g, 0);
|
||||
spu.ctile.ui[iy+1][ix+0] = spu_extract((vec_uint4) result.b, 0);
|
||||
spu.ctile.ui[iy+1][ix+1] = spu_extract((vec_uint4) result.a, 0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
/* Convert fragment data from AoS to SoA format.
|
||||
* I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA)
|
||||
* This is temporary!
|
||||
*/
|
||||
vector float soa_frag[4];
|
||||
_transpose_matrix4x4(soa_frag, colors);
|
||||
|
||||
float4 fragZ;
|
||||
|
||||
fragZ.v = eval_z((float) x, (float) y);
|
||||
|
||||
/* Do all per-fragment/quad operations here, including:
|
||||
* alpha test, z test, stencil test, blend and framebuffer writing.
|
||||
*/
|
||||
spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile,
|
||||
fragZ.v,
|
||||
soa_frag[0], soa_frag[1],
|
||||
soa_frag[2], soa_frag[3],
|
||||
mask);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -349,12 +349,17 @@ create_xmesa_buffer(XMesaDrawable d, BufferType type,
|
|||
|
||||
if (vis->mesa_visual.depthBits == 0)
|
||||
depthFormat = PIPE_FORMAT_NONE;
|
||||
#ifdef GALLIUM_CELL /* XXX temporary for Cell! */
|
||||
else
|
||||
depthFormat = PIPE_FORMAT_S8Z24_UNORM;
|
||||
#else
|
||||
else if (vis->mesa_visual.depthBits <= 16)
|
||||
depthFormat = PIPE_FORMAT_Z16_UNORM;
|
||||
else if (vis->mesa_visual.depthBits <= 24)
|
||||
depthFormat = PIPE_FORMAT_S8Z24_UNORM;
|
||||
else
|
||||
depthFormat = PIPE_FORMAT_Z32_UNORM;
|
||||
#endif
|
||||
|
||||
if (vis->mesa_visual.stencilBits == 8) {
|
||||
if (depthFormat == PIPE_FORMAT_S8Z24_UNORM)
|
||||
|
|
|
@ -275,6 +275,37 @@ xm_buffer_destroy(struct pipe_winsys *pws,
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* For Cell. Basically, rearrange the pixels/quads from this layout:
|
||||
* +--+--+--+--+
|
||||
* |p0|p1|p2|p3|....
|
||||
* +--+--+--+--+
|
||||
*
|
||||
* to this layout:
|
||||
* +--+--+
|
||||
* |p0|p1|....
|
||||
* +--+--+
|
||||
* |p2|p3|
|
||||
* +--+--+
|
||||
*/
|
||||
static void
|
||||
twiddle_tile(const uint *tileIn, uint *tileOut)
|
||||
{
|
||||
int y, x;
|
||||
|
||||
for (y = 0; y < TILE_SIZE; y+=2) {
|
||||
for (x = 0; x < TILE_SIZE; x+=2) {
|
||||
int k = 4 * (y/2 * TILE_SIZE/2 + x/2);
|
||||
tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k];
|
||||
tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1];
|
||||
tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2];
|
||||
tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Display a surface that's in a tiled configuration. That is, all the
|
||||
* pixels for a TILE_SIZExTILE_SIZE block are contiguous in memory.
|
||||
|
@ -306,6 +337,7 @@ xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf)
|
|||
|
||||
for (y = 0; y < surf->height; y += TILE_SIZE) {
|
||||
for (x = 0; x < surf->width; x += TILE_SIZE) {
|
||||
uint tmpTile[TILE_SIZE * TILE_SIZE];
|
||||
int tx = x / TILE_SIZE;
|
||||
int ty = y / TILE_SIZE;
|
||||
int offset = ty * tilesPerRow + tx;
|
||||
|
@ -319,7 +351,9 @@ xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf)
|
|||
|
||||
offset *= 4 * TILE_SIZE * TILE_SIZE;
|
||||
|
||||
ximage->data = (char *) xm_buf->data + offset;
|
||||
twiddle_tile((uint *) ((char *) xm_buf->data + offset),
|
||||
tmpTile);
|
||||
ximage->data = (char*) tmpTile;
|
||||
|
||||
if (XSHM_ENABLED(xm_buf)) {
|
||||
#if defined(USE_XSHM) && !defined(XFree86Server)
|
||||
|
|
|
@ -1317,7 +1317,12 @@ processWindowWorkList(GLUTwindow * window)
|
|||
is where the finish works gets queued for indirect
|
||||
contexts. */
|
||||
__glutSetWindow(window);
|
||||
glFinish();
|
||||
#if !defined(_WIN32)
|
||||
if (!window->isDirect)
|
||||
#endif
|
||||
{
|
||||
glFinish();
|
||||
}
|
||||
}
|
||||
if (workMask & GLUT_DEBUG_WORK) {
|
||||
__glutSetWindow(window);
|
||||
|
|
Loading…
Reference in New Issue