Merge remote branch 'upstream/gallium-0.1' into nouveau-gallium-0.1

This commit is contained in:
Ben Skeggs 2008-09-12 20:33:59 +10:00
commit 81335d0f17
38 changed files with 2873 additions and 653 deletions

View File

@ -23,6 +23,10 @@ CFLAGS = -O
CXXFLAGS = -O
GLU_CFLAGS =
# Compiler for building demos/tests/etc
APP_CC = $(CC)
APP_CXX = $(CXX)
# Misc tools and flags
MKLIB_OPTIONS =
MKDEP = makedepend

View File

@ -12,6 +12,8 @@ GALLIUM_DRIVER_DIRS += cell
CC = ppu32-gcc
CXX = ppu32-g++
HOST_CC = gcc
APP_CC = gcc
APP_CXX = g++
OPT_FLAGS = -O3
@ -19,7 +21,7 @@ OPT_FLAGS = -O3
## For SDK 2.1: (plus, remove -DSPU_MAIN_PARAM_LONG_LONG below)
#SDK = /opt/ibm/cell-sdk/prototype/sysroot/usr
## For SDK 3.0:
SDK = /opt/cell/sdk/usr/
SDK = /opt/cell/sdk/usr
CFLAGS = $(OPT_FLAGS) -Wall -Winline -fPIC -m32 -mabi=altivec -maltivec \

View File

@ -74,7 +74,7 @@ PROGS = \
# make executable from .c file:
.c: $(LIB_DEP) readtex.o
$(CC) -I$(INCDIR) $(CFLAGS) $< readtex.o $(APP_LIB_DEPS) -o $@
$(APP_CC) -I$(INCDIR) $(CFLAGS) $< readtex.o $(APP_LIB_DEPS) -o $@
##### TARGETS #####
@ -90,7 +90,7 @@ readtex.h: $(TOP)/progs/util/readtex.h
cp $< .
readtex.o: readtex.c readtex.h
$(CC) -c -I$(INCDIR) $(CFLAGS) readtex.c
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) readtex.c
showbuffer.c: $(TOP)/progs/util/showbuffer.c
@ -100,7 +100,7 @@ showbuffer.h: $(TOP)/progs/util/showbuffer.h
cp $< .
showbuffer.o: showbuffer.c showbuffer.h
$(CC) -c -I$(INCDIR) $(CFLAGS) showbuffer.c
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) showbuffer.c
trackball.c: $(TOP)/progs/util/trackball.c
@ -110,7 +110,7 @@ trackball.h: $(TOP)/progs/util/trackball.h
cp $< .
trackball.o: trackball.c trackball.h
$(CC) -c -I$(INCDIR) $(CFLAGS) trackball.c
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) trackball.c
extfuncs.h: $(TOP)/progs/util/extfuncs.h
@ -118,38 +118,38 @@ extfuncs.h: $(TOP)/progs/util/extfuncs.h
reflect: reflect.o showbuffer.o readtex.o
$(CC) reflect.o showbuffer.o readtex.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
$(APP_CC) reflect.o showbuffer.o readtex.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
reflect.o: reflect.c showbuffer.h
$(CC) -c -I$(INCDIR) $(CFLAGS) reflect.c
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) reflect.c
shadowtex: shadowtex.o showbuffer.o
$(CC) shadowtex.o showbuffer.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
$(APP_CC) shadowtex.o showbuffer.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
shadowtex.o: shadowtex.c showbuffer.h
$(CC) -c -I$(INCDIR) $(CFLAGS) shadowtex.c
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) shadowtex.c
gloss: gloss.o trackball.o readtex.o
$(CC) gloss.o trackball.o readtex.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
$(APP_CC) gloss.o trackball.o readtex.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
gloss.o: gloss.c trackball.h
$(CC) -c -I$(INCDIR) $(CFLAGS) gloss.c
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) gloss.c
engine: engine.o trackball.o readtex.o
$(CC) engine.o trackball.o readtex.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
$(APP_CC) engine.o trackball.o readtex.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
engine.o: engine.c trackball.h
$(CC) -c -I$(INCDIR) $(CFLAGS) engine.c
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) engine.c
fslight: fslight.o
$(CC) fslight.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
$(APP_CC) fslight.o $(APP_LIB_DEPS) $(ARCH_FLAGS) -o $@
fslight.o: fslight.c extfuncs.h
$(CC) -c -I$(INCDIR) $(CFLAGS) fslight.c
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) fslight.c

View File

@ -40,13 +40,13 @@ UTIL_FILES = readtex.h readtex.c
.SUFFIXES: .c
.c:
$(CC) $(INCLUDES) $(CFLAGS) $< $(LIBS) -o $@
$(APP_CC) $(INCLUDES) $(CFLAGS) $< $(LIBS) -o $@
.c.o:
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
.S.o:
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
##### TARGETS #####
@ -67,19 +67,19 @@ getproclist.h: $(TOP)/src/mesa/glapi/gl_API.xml getprocaddress.c getprocaddress.
texrect: texrect.o readtex.o
$(CC) texrect.o readtex.o $(LIBS) -o $@
$(APP_CC) texrect.o readtex.o $(LIBS) -o $@
texrect.o: texrect.c readtex.h
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
invert: invert.o readtex.o
$(CC) invert.o readtex.o $(LIBS) -o $@
$(APP_CC) invert.o readtex.o $(LIBS) -o $@
invert.o: invert.c readtex.h
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
readtex.o: readtex.c
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
readtex.h: $(TOP)/progs/util/readtex.h

View File

@ -26,7 +26,7 @@ PROGS = \
# make executable from .c file:
.c: $(LIB_DEP)
$(CC) -I$(INCDIR) $(CFLAGS) $< $(APP_LIB_DEPS) -o $@
$(APP_CC) -I$(INCDIR) $(CFLAGS) $< $(APP_LIB_DEPS) -o $@
##### TARGETS #####
@ -47,7 +47,7 @@ readtex.h: $(TOP)/progs/util/readtex.h
cp $< .
readtex.o: readtex.c readtex.h
$(CC) -c -I$(INCDIR) $(CFLAGS) readtex.c
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) readtex.c
brick.c: extfuncs.h
@ -58,16 +58,16 @@ mandelbrot.c: extfuncs.h
toyball.c: extfuncs.h
texdemo1: texdemo1.o readtex.o
$(CC) -I$(INCDIR) $(CFLAGS) texdemo1.o readtex.o $(APP_LIB_DEPS) -o $@
$(APP_CC) -I$(INCDIR) $(CFLAGS) texdemo1.o readtex.o $(APP_LIB_DEPS) -o $@
texdemo1.o: texdemo1.c readtex.h extfuncs.h
$(CC) -c -I$(INCDIR) $(CFLAGS) texdemo1.c
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) texdemo1.c
convolutions: convolutions.o readtex.o
$(CC) -I$(INCDIR) $(CFLAGS) convolutions.o readtex.o $(APP_LIB_DEPS) -o $@
$(APP_CC) -I$(INCDIR) $(CFLAGS) convolutions.o readtex.o $(APP_LIB_DEPS) -o $@
convolutions.o: convolutions.c readtex.h
$(CC) -c -I$(INCDIR) $(CFLAGS) convolutions.c
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) convolutions.c
clean:

View File

@ -24,7 +24,7 @@ PROGS = aaindex aapoly aargb accanti accpersp alpha alpha3D anti \
.SUFFIXES: .c
.c: $(LIB_DEP)
$(CC) -I$(INCDIR) $(CFLAGS) $< $(APP_LIB_DEPS) -o $@
$(APP_CC) -I$(INCDIR) $(CFLAGS) $< $(APP_LIB_DEPS) -o $@

View File

@ -18,7 +18,7 @@ PROGS = accum bitmap1 bitmap2 blendeq blendxor copy cursor depth eval fog \
.SUFFIXES: .c
.c: $(LIB_DEP)
$(CC) -I$(INCDIR) $(CFLAGS) $< $(APP_LIB_DEPS) -o $@
$(APP_CC) -I$(INCDIR) $(CFLAGS) $< $(APP_LIB_DEPS) -o $@
##### TARGETS #####
@ -27,10 +27,10 @@ default: $(PROGS)
sphere: sphere.o readtex.o
$(CC) -I$(INCDIR) $(CFLAGS) sphere.o readtex.o $(APP_LIB_DEPS) -o $@
$(APP_CC) -I$(INCDIR) $(CFLAGS) sphere.o readtex.o $(APP_LIB_DEPS) -o $@
sphere.o: sphere.c readtex.h
$(CC) -c -I$(INCDIR) $(CFLAGS) sphere.c
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) sphere.c
readtex.c: $(TOP)/progs/util/readtex.c
cp $< .
@ -39,7 +39,7 @@ readtex.h: $(TOP)/progs/util/readtex.h
cp $< .
readtex.o: readtex.c readtex.h
$(CC) -c -I$(INCDIR) $(CFLAGS) $< -o $@
$(APP_CC) -c -I$(INCDIR) $(CFLAGS) $< -o $@
clean:

View File

@ -60,6 +60,7 @@ SOURCES = \
pbo.c \
prog_parameter.c \
projtex.c \
quads.c \
random.c \
readrate.c \
seccolor.c \
@ -102,13 +103,13 @@ UTIL_FILES = readtex.h readtex.c
.SUFFIXES: .c
.c:
$(CC) $(INCLUDES) $(CFLAGS) $< $(LIBS) -o $@
$(APP_CC) $(INCLUDES) $(CFLAGS) $< $(LIBS) -o $@
.c.o:
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
.S.o:
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
##### TARGETS #####
@ -128,58 +129,58 @@ getproclist.h: $(TOP)/src/mesa/glapi/gl_API.xml getprocaddress.c getprocaddress.
python getprocaddress.py > getproclist.h
arraytexture: arraytexture.o readtex.o
$(CC) $(CFLAGS) arraytexture.o readtex.o $(LIBS) -o $@
$(APP_CC) $(CFLAGS) arraytexture.o readtex.o $(LIBS) -o $@
arraytexture.o: arraytexture.c readtex.h
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
afsmultiarb: afsmultiarb.o readtex.o
$(CC) $(CFLAGS) afsmultiarb.o readtex.o $(LIBS) -o $@
$(APP_CC) $(CFLAGS) afsmultiarb.o readtex.o $(LIBS) -o $@
afsmultiarb.o: afsmultiarb.c readtex.h
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
drawbuffers: drawbuffers.o
$(CC) $(CFLAGS) drawbuffers.o $(LIBS) -o $@
$(APP_CC) $(CFLAGS) drawbuffers.o $(LIBS) -o $@
drawbuffers.o: drawbuffers.c extfuncs.h
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
texrect: texrect.o readtex.o
$(CC) $(CFLAGS) texrect.o readtex.o $(LIBS) -o $@
$(APP_CC) $(CFLAGS) texrect.o readtex.o $(LIBS) -o $@
texrect.o: texrect.c readtex.h
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
bug_3195: bug_3195.o readtex.o
$(CC) $(CFLAGS) bug_3195.o readtex.o $(LIBS) -o $@
$(APP_CC) $(CFLAGS) bug_3195.o readtex.o $(LIBS) -o $@
bug_3195.o: bug_3195.c readtex.h
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
invert: invert.o readtex.o
$(CC) $(CFLAGS) invert.o readtex.o $(LIBS) -o $@
$(APP_CC) $(CFLAGS) invert.o readtex.o $(LIBS) -o $@
invert.o: invert.c readtex.h
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
mipmap_view: mipmap_view.o readtex.o
$(CC) $(CFLAGS) mipmap_view.o readtex.o $(LIBS) -o $@
$(APP_CC) $(CFLAGS) mipmap_view.o readtex.o $(LIBS) -o $@
mipmap_view.o: mipmap_view.c readtex.h
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
fillrate: fillrate.o readtex.o
$(CC) $(CFLAGS) fillrate.o readtex.o $(LIBS) -o $@
$(APP_CC) $(CFLAGS) fillrate.o readtex.o $(LIBS) -o $@
fillrate.o: fillrate.c readtex.h
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
readtex.o: readtex.c
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
readtex.h: $(TOP)/progs/util/readtex.h

258
progs/tests/quads.c Normal file
View File

@ -0,0 +1,258 @@
/**
* Draw colored quads.
*/
#define GL_GLEXT_PROTOTYPES
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <GL/glut.h>
#define NUM_QUADS 20
static int Win;
static GLfloat Xrot = 40, Yrot = 0, Zrot = 0;
static GLboolean Anim = GL_TRUE;
static GLuint Vbuffer = 0;
static GLfloat buf[NUM_QUADS * 6 * 4];
static GLboolean SwapBuffers = GL_TRUE;
static GLint Frames = 0, T0 = 0;
static void
Idle(void)
{
Xrot += 3.0;
Yrot += 4.0;
Zrot += 2.0;
glutPostRedisplay();
}
static void
Draw(void)
{
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glPushMatrix();
glRotatef(Xrot, 1, 0, 0);
glRotatef(Yrot, 0, 1, 0);
glRotatef(Zrot, 0, 0, 1);
glDrawArrays(GL_QUADS, 0, NUM_QUADS*4);
glPopMatrix();
if (SwapBuffers)
glutSwapBuffers();
/*
else
glFinish();
*/
{
GLint t = glutGet(GLUT_ELAPSED_TIME);
Frames++;
if (t - T0 >= 5000) {
GLfloat seconds = (t - T0) / 1000.0;
GLfloat fps = Frames / seconds;
printf("%d frames in %6.3f seconds = %6.3f FPS\n",
Frames, seconds, fps);
T0 = t;
Frames = 0;
}
}
}
static void
Reshape(int width, int height)
{
glViewport(0, 0, width, height);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glFrustum(-1.0, 1.0, -1.0, 1.0, 5.0, 25.0);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glTranslatef(0.0, 0.0, -8.0);
}
static void
Key(unsigned char key, int x, int y)
{
const GLfloat step = 3.0;
(void) x;
(void) y;
switch (key) {
case 's':
SwapBuffers = !SwapBuffers;
break;
case 'a':
Anim = !Anim;
if (Anim)
glutIdleFunc(Idle);
else
glutIdleFunc(NULL);
break;
case 'z':
Zrot -= step;
break;
case 'Z':
Zrot += step;
break;
case 27:
glutDestroyWindow(Win);
exit(0);
break;
}
glutPostRedisplay();
}
static void
SpecialKey(int key, int x, int y)
{
const GLfloat step = 3.0;
(void) x;
(void) y;
switch (key) {
case GLUT_KEY_UP:
Xrot -= step;
break;
case GLUT_KEY_DOWN:
Xrot += step;
break;
case GLUT_KEY_LEFT:
Yrot -= step;
break;
case GLUT_KEY_RIGHT:
Yrot += step;
break;
}
glutPostRedisplay();
}
static void
quad(float x, float y, float z, float *v)
{
int k = 0;
/* color */
v[k++] = x * 0.5 + 0.5;
v[k++] = y * 0.5 + 0.5;
v[k++] = z * 0.5 + 0.5;
/* vert */
v[k++] = x;
v[k++] = y;
v[k++] = z;
/* color */
v[k++] = -x * 0.5 + 0.5;
v[k++] = -y * 0.5 + 0.5;
v[k++] = z * 0.5 + 0.5;
/* vert */
v[k++] = -x;
v[k++] = -y;
v[k++] = z;
/* color */
v[k++] = -x * 0.5 + 0.5;
v[k++] = -y * 0.5 + 0.5;
v[k++] = -z * 0.5 + 0.5;
/* vert */
v[k++] = -x;
v[k++] = -y;
v[k++] = -z;
/* color */
v[k++] = x * 0.5 + 0.5;
v[k++] = y * 0.5 + 0.5;
v[k++] = -z * 0.5 + 0.5;
/* vert */
v[k++] = x;
v[k++] = y;
v[k++] = -z;
}
static void
gen_quads(GLfloat *buf)
{
float *v = buf;
float r = 1.0;
int i;
for (i = 0; i < NUM_QUADS; i++) {
float angle = i / (float) NUM_QUADS * M_PI;
float x = r * cos(angle);
float y = r * sin(angle);
float z = 1.10;
quad(x, y, z, v);
v += 24;
}
if (0) {
float *p = buf;
for (i = 0; i < NUM_QUADS * 4 * 2; i++) {
printf("%d: %f %f %f\n", i, p[0], p[1], p[2]);
p += 3;
}
}
}
static void
Init(void)
{
int bytes = NUM_QUADS * 4 * 2 * 3 * sizeof(float);
GLfloat *f;
#if 1
glGenBuffers(1, &Vbuffer);
glBindBuffer(GL_ARRAY_BUFFER, Vbuffer);
glBufferData(GL_ARRAY_BUFFER_ARB, bytes, NULL, GL_STATIC_DRAW_ARB);
f = (float *) glMapBuffer(GL_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB);
gen_quads(f);
glUnmapBuffer(GL_ARRAY_BUFFER_ARB);
glColorPointer(3, GL_FLOAT, 6*sizeof(float), (void *) 0);
glVertexPointer(3, GL_FLOAT, 6*sizeof(float), (void *) 12);
#else
f = buf;
gen_quads(f);
glColorPointer(3, GL_FLOAT, 6*sizeof(float), buf);
glVertexPointer(3, GL_FLOAT, 6*sizeof(float), buf + 3);
#endif
glEnableClientState(GL_COLOR_ARRAY);
glEnableClientState(GL_VERTEX_ARRAY);
glEnable(GL_DEPTH_TEST);
glClearColor(0.5, 0.5, 0.5, 0.0);
}
int
main(int argc, char *argv[])
{
glutInit(&argc, argv);
glutInitWindowPosition(0, 0);
glutInitWindowSize(600, 600);
glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH);
Win = glutCreateWindow(argv[0]);
glutReshapeFunc(Reshape);
glutKeyboardFunc(Key);
glutSpecialFunc(SpecialKey);
glutDisplayFunc(Draw);
if (Anim)
glutIdleFunc(Idle);
Init();
glutMainLoop();
return 0;
}

View File

@ -139,13 +139,13 @@ UTIL_FILES = readtex.h readtex.c
.SUFFIXES: .c
.c:
$(CC) $(INCLUDES) $(CFLAGS) $< $(LIBS) -o $@
$(APP_CC) $(INCLUDES) $(CFLAGS) $< $(LIBS) -o $@
.c.o:
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
.S.o:
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
##### TARGETS #####
@ -166,19 +166,19 @@ getproclist.h: $(TOP)/src/mesa/glapi/gl_API.xml getprocaddress.c getprocaddress.
texrect: texrect.o readtex.o
$(CC) texrect.o readtex.o $(LIBS) -o $@
$(APP_CC) texrect.o readtex.o $(LIBS) -o $@
texrect.o: texrect.c readtex.h
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
invert: invert.o readtex.o
$(CC) invert.o readtex.o $(LIBS) -o $@
$(APP_CC) invert.o readtex.o $(LIBS) -o $@
invert.o: invert.c readtex.h
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
readtex.o: readtex.c
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(APP_CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
readtex.h: $(TOP)/progs/util/readtex.h

View File

@ -37,9 +37,36 @@
#include <GL/glut.h>
#include <stdlib.h>
#include <stdio.h>
static int leftFirst = GL_TRUE;
static struct { GLenum func; const char *str; } funcs[] =
{
{ GL_LESS, "GL_LESS" },
{ GL_LEQUAL, "GL_LEQUAL" },
{ GL_GREATER, "GL_GREATER" },
{ GL_GEQUAL, "GL_GEQUAL" },
{ GL_EQUAL, "GL_EQUAL" },
{ GL_NOTEQUAL, "GL_NOTEQUAL" },
{ GL_ALWAYS, "GL_ALWAYS" },
{ GL_NEVER, "GL_NEVER" },
};
#define NUM_FUNCS (sizeof(funcs) / sizeof(funcs[0]))
static int curFunc = 0;
static double clearVal = 1.0;
static void usage(void)
{
printf("t - toggle rendering order of triangles\n");
printf("c - toggle Z clear value between 0, 1\n");
printf("f - cycle through depth test functions\n");
}
static void init(void)
{
glEnable(GL_DEPTH_TEST);
@ -70,6 +97,11 @@ static void drawRightTriangle(void)
void display(void)
{
printf("GL_CLEAR_DEPTH = %f GL_DEPTH_FUNC = %s\n",
clearVal, funcs[curFunc].str);
glClearDepth(clearVal);
glDepthFunc(funcs[curFunc].func);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
if (leftFirst) {
@ -99,6 +131,16 @@ void reshape(int w, int h)
void keyboard(unsigned char key, int x, int y)
{
switch (key) {
case 'c':
case 'C':
clearVal = 1.0 - clearVal;
glutPostRedisplay();
break;
case 'f':
case 'F':
curFunc = (curFunc + 1) % NUM_FUNCS;
glutPostRedisplay();
break;
case 't':
case 'T':
leftFirst = !leftFirst;
@ -122,10 +164,11 @@ int main(int argc, char** argv)
glutInitDisplayMode (GLUT_SINGLE | GLUT_RGB | GLUT_DEPTH);
glutInitWindowSize (200, 200);
glutCreateWindow (argv[0]);
init();
glutReshapeFunc (reshape);
glutKeyboardFunc (keyboard);
glutDisplayFunc (display);
init();
usage();
glutMainLoop();
return 0;
}

View File

@ -39,7 +39,7 @@ PROGS = glthreads \
.SUFFIXES: .c
.c: $(LIB_DEP)
$(CC) -I$(INCDIR) $(CFLAGS) $< $(APP_LIB_DEPS) -o $@
$(APP_CC) -I$(INCDIR) $(CFLAGS) $< $(APP_LIB_DEPS) -o $@
##### TARGETS #####
@ -54,32 +54,32 @@ clean:
# special cases
pbinfo: pbinfo.o pbutil.o
$(CC) -I$(INCDIR) $(CFLAGS) pbinfo.o pbutil.o $(APP_LIB_DEPS) -o $@
$(APP_CC) -I$(INCDIR) $(CFLAGS) pbinfo.o pbutil.o $(APP_LIB_DEPS) -o $@
pbdemo: pbdemo.o pbutil.o
$(CC) -I$(INCDIR) $(CFLAGS) pbdemo.o pbutil.o $(APP_LIB_DEPS) -o $@
$(APP_CC) -I$(INCDIR) $(CFLAGS) pbdemo.o pbutil.o $(APP_LIB_DEPS) -o $@
pbinfo.o: pbinfo.c pbutil.h
$(CC) -c -I. -I$(INCDIR) $(CFLAGS) pbinfo.c
$(APP_CC) -c -I. -I$(INCDIR) $(CFLAGS) pbinfo.c
pbdemo.o: pbdemo.c pbutil.h
$(CC) -c -I. -I$(INCDIR) $(CFLAGS) pbdemo.c
$(APP_CC) -c -I. -I$(INCDIR) $(CFLAGS) pbdemo.c
pbutil.o: pbutil.c pbutil.h
$(CC) -c -I. -I$(INCDIR) $(CFLAGS) pbutil.c
$(APP_CC) -c -I. -I$(INCDIR) $(CFLAGS) pbutil.c
glxgears_fbconfig: glxgears_fbconfig.o pbutil.o
$(CC) -I$(INCDIR) $(CFLAGS) glxgears_fbconfig.o pbutil.o $(APP_LIB_DEPS) -o $@
$(APP_CC) -I$(INCDIR) $(CFLAGS) glxgears_fbconfig.o pbutil.o $(APP_LIB_DEPS) -o $@
glxgears_fbconfig.o: glxgears_fbconfig.c pbutil.h
$(CC) -I$(INCDIR) $(CFLAGS) -c -I. $(CFLAGS) glxgears_fbconfig.c
$(APP_CC) -I$(INCDIR) $(CFLAGS) -c -I. $(CFLAGS) glxgears_fbconfig.c
xrotfontdemo: xrotfontdemo.o xuserotfont.o
$(CC) -I$(INCDIR) $(CFLAGS) xrotfontdemo.o xuserotfont.o $(APP_LIB_DEPS) -o $@
$(APP_CC) -I$(INCDIR) $(CFLAGS) xrotfontdemo.o xuserotfont.o $(APP_LIB_DEPS) -o $@
xuserotfont.o: xuserotfont.c xuserotfont.h
$(CC) -c -I. -I$(INCDIR) $(CFLAGS) xuserotfont.c
$(APP_CC) -c -I. -I$(INCDIR) $(CFLAGS) xuserotfont.c
xrotfontdemo.o: xrotfontdemo.c xuserotfont.h
$(CC) -c -I. -I$(INCDIR) $(CFLAGS) xrotfontdemo.c
$(APP_CC) -c -I. -I$(INCDIR) $(CFLAGS) xrotfontdemo.c

View File

@ -8,7 +8,7 @@ include $(TOP)/configs/current
INCLUDE_DIRS = \
-I. \
-I/usr/include \
-I/usr/include/drm \
$(shell pkg-config --cflags-only-I libdrm) \
-I$(TOP)/include \
-I$(TOP)/include/GL/internal \
-I$(TOP)/src/mesa \

View File

@ -13,7 +13,7 @@ DRIVER_NAME = egl_xdri.so
INCLUDE_DIRS = \
-I. \
-I/usr/include \
-I/usr/include/drm \
$(shell pkg-config --cflags-only-I libdrm) \
-I$(TOP)/include \
-I$(TOP)/include/GL/internal \
-I$(TOP)/src/mesa/glapi \
@ -48,6 +48,7 @@ $(TOP)/$(LIB_DIR)/$(DRIVER_NAME): $(OBJECTS)
$(TOP)/bin/mklib -o $(DRIVER_NAME) \
-noprefix \
-major 1 -minor 0 \
-L $(TOP)/$(LIB_DIR) \
-install $(TOP)/$(LIB_DIR) \
$(OBJECTS) $(DRM_LIB) $(MISC_LIBS)

View File

@ -151,8 +151,8 @@ static void emit_RR(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.rB = rB;
inst.inst.rA = rA;
inst.inst.rT = rT;
*p->csr = inst.bits;
p->csr++;
p->store[p->num_inst++] = inst.bits;
assert(p->num_inst <= p->max_inst);
}
@ -165,8 +165,8 @@ static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.rB = rB;
inst.inst.rA = rA;
inst.inst.rC = rC;
*p->csr = inst.bits;
p->csr++;
p->store[p->num_inst++] = inst.bits;
assert(p->num_inst <= p->max_inst);
}
@ -178,8 +178,8 @@ static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.i7 = imm;
inst.inst.rA = rA;
inst.inst.rT = rT;
*p->csr = inst.bits;
p->csr++;
p->store[p->num_inst++] = inst.bits;
assert(p->num_inst <= p->max_inst);
}
@ -192,8 +192,8 @@ static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.i8 = imm;
inst.inst.rA = rA;
inst.inst.rT = rT;
*p->csr = inst.bits;
p->csr++;
p->store[p->num_inst++] = inst.bits;
assert(p->num_inst <= p->max_inst);
}
@ -206,8 +206,8 @@ static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.i10 = imm;
inst.inst.rA = rA;
inst.inst.rT = rT;
*p->csr = inst.bits;
p->csr++;
p->store[p->num_inst++] = inst.bits;
assert(p->num_inst <= p->max_inst);
}
@ -218,8 +218,8 @@ static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.op = op;
inst.inst.i16 = imm;
inst.inst.rT = rT;
*p->csr = inst.bits;
p->csr++;
p->store[p->num_inst++] = inst.bits;
assert(p->num_inst <= p->max_inst);
}
@ -230,8 +230,8 @@ static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.op = op;
inst.inst.i18 = imm;
inst.inst.rT = rT;
*p->csr = inst.bits;
p->csr++;
p->store[p->num_inst++] = inst.bits;
assert(p->num_inst <= p->max_inst);
}
@ -300,13 +300,16 @@ void _name (struct spe_function *p, int imm) \
#include "rtasm_ppc_spe.h"
/*
/**
* Initialize an spe_function.
* \param code_size size of instruction buffer to allocate, in bytes.
*/
void spe_init_func(struct spe_function *p, unsigned code_size)
{
p->store = align_malloc(code_size, 16);
p->csr = p->store;
p->num_inst = 0;
p->max_inst = code_size / SPE_INST_SIZE;
/* Conservatively treat R0 - R2 and R80 - R127 as non-volatile.
*/
p->regs[0] = ~7;
@ -316,21 +319,26 @@ void spe_init_func(struct spe_function *p, unsigned code_size)
void spe_release_func(struct spe_function *p)
{
assert(p->num_inst <= p->max_inst);
if (p->store != NULL) {
align_free(p->store);
}
p->store = NULL;
p->csr = NULL;
}
/**
* Alloate a SPE register.
* \return register index or -1 if none left.
*/
int spe_allocate_available_register(struct spe_function *p)
{
unsigned i;
for (i = 0; i < 128; i++) {
for (i = 0; i < SPE_NUM_REGS; i++) {
const uint64_t mask = (1ULL << (i % 64));
const unsigned idx = i / 64;
assert(idx < 2);
if ((p->regs[idx] & mask) != 0) {
p->regs[idx] &= ~mask;
return i;
@ -341,11 +349,15 @@ int spe_allocate_available_register(struct spe_function *p)
}
/**
* Mark the given SPE register as "allocated".
*/
int spe_allocate_register(struct spe_function *p, int reg)
{
const unsigned idx = reg / 64;
const unsigned bit = reg % 64;
assert(reg < SPE_NUM_REGS);
assert((p->regs[idx] & (1ULL << bit)) != 0);
p->regs[idx] &= ~(1ULL << bit);
@ -353,57 +365,75 @@ int spe_allocate_register(struct spe_function *p, int reg)
}
/**
* Mark the given SPE register as "unallocated".
*/
void spe_release_register(struct spe_function *p, int reg)
{
const unsigned idx = reg / 64;
const unsigned bit = reg % 64;
assert(idx < 2);
assert(reg < SPE_NUM_REGS);
assert((p->regs[idx] & (1ULL << bit)) == 0);
p->regs[idx] |= (1ULL << bit);
}
/**
* For branch instructions:
* \param d if 1, disable interupts if branch is taken
* \param e if 1, enable interupts if branch is taken
* If d and e are both zero, don't change interupt status (right?)
*/
/** Branch Indirect to address in rA */
void spe_bi(struct spe_function *p, unsigned rA, int d, int e)
{
emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4));
}
/** Interupt Return */
void spe_iret(struct spe_function *p, unsigned rA, int d, int e)
{
emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4));
}
/** Branch indirect and set link on external data */
void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d,
int e)
{
emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4));
}
/** Branch indirect and set link. Save PC in rT, jump to rA. */
void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d,
int e)
{
emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4));
}
void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d,
int e)
/** Branch indirect if zero word. If rT.word[0]==0, jump to rA. */
void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
{
emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4));
}
/** Branch indirect if non-zero word. If rT.word[0]!=0, jump to rA. */
void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
{
emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4));
}
/** Branch indirect if zero halfword. If rT.halfword[1]==0, jump to rA. */
void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
{
emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4));
}
/** Branch indirect if non-zero halfword. If rT.halfword[1]!=0, jump to rA. */
void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
{
emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4));
@ -432,4 +462,81 @@ EMIT_R (spe_mfspr, 0x00c);
EMIT_R (spe_mtspr, 0x10c);
#endif
/**
** Helper / "macro" instructions.
** Use somewhat verbose names as a reminder that these aren't native
** SPE instructions.
**/
void
spe_load_float(struct spe_function *p, unsigned rT, float x)
{
if (x == 0.0f) {
spe_il(p, rT, 0x0);
}
else if (x == 0.5f) {
spe_ilhu(p, rT, 0x3f00);
}
else if (x == 1.0f) {
spe_ilhu(p, rT, 0x3f80);
}
else if (x == -1.0f) {
spe_ilhu(p, rT, 0xbf80);
}
else {
union {
float f;
unsigned u;
} bits;
bits.f = x;
spe_ilhu(p, rT, bits.u >> 16);
spe_iohl(p, rT, bits.u & 0xffff);
}
}
void
spe_load_int(struct spe_function *p, unsigned rT, int i)
{
if (-32768 <= i && i <= 32767) {
spe_il(p, rT, i);
}
else {
spe_ilhu(p, rT, i >> 16);
spe_iohl(p, rT, i & 0xffff);
}
}
void
spe_splat(struct spe_function *p, unsigned rT, unsigned rA)
{
spe_ila(p, rT, 66051);
spe_shufb(p, rT, rA, rA, rT);
}
void
spe_complement(struct spe_function *p, unsigned rT)
{
spe_nor(p, rT, rT, rT);
}
void
spe_move(struct spe_function *p, unsigned rT, unsigned rA)
{
spe_ori(p, rT, rA, 0);
}
void
spe_zero(struct spe_function *p, unsigned rT)
{
spe_xor(p, rT, rT, rT);
}
#endif /* GALLIUM_CELL */

View File

@ -25,6 +25,7 @@
/**
* \file
* Real-time assembly generation interface for Cell B.E. SPEs.
* For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf
*
* \author Ian Romanick <idr@us.ibm.com>
*/
@ -32,13 +33,24 @@
#ifndef RTASM_PPC_SPE_H
#define RTASM_PPC_SPE_H
struct spe_function {
/**
*
*/
uint32_t *store;
uint32_t *csr;
const char *fn;
/** 4 bytes per instruction */
#define SPE_INST_SIZE 4
/** number of general-purpose SIMD registers */
#define SPE_NUM_REGS 128
/** Return Address register */
#define SPE_REG_RA 0
/** Stack Pointer register */
#define SPE_REG_SP 1
struct spe_function
{
uint32_t *store; /**< instruction buffer */
uint num_inst;
uint max_inst;
/**
* Mask of used / unused registers
@ -50,7 +62,7 @@ struct spe_function {
* spe_allocate_register, spe_allocate_available_register,
* spe_release_register
*/
uint64_t regs[2];
uint64_t regs[SPE_NUM_REGS / 64];
};
extern void spe_init_func(struct spe_function *p, unsigned code_size);
@ -119,7 +131,8 @@ EMIT_RI16(spe_ilhu, 0x082);
EMIT_RI16(spe_il, 0x081);
EMIT_RI18(spe_ila, 0x021);
EMIT_RI16(spe_iohl, 0x0c1);
EMIT_RI16(spe_fsmbi, 0x0c5);
EMIT_RI16(spe_fsmbi, 0x065);
/* Integer and logical instructions
@ -271,6 +284,31 @@ extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA,
int d, int e);
/** Load/splat immediate float into rT. */
extern void
spe_load_float(struct spe_function *p, unsigned rT, float x);
/** Load/splat immediate int into rT. */
extern void
spe_load_int(struct spe_function *p, unsigned rT, int i);
/** Replicate word 0 of rA across rT. */
extern void
spe_splat(struct spe_function *p, unsigned rT, unsigned rA);
/** Complement/invert all bits in rT. */
extern void
spe_complement(struct spe_function *p, unsigned rT);
/** rT = rA. */
extern void
spe_move(struct spe_function *p, unsigned rT, unsigned rA);
/** rT = {0,0,0,0}. */
extern void
spe_zero(struct spe_function *p, unsigned rT);
/* Floating-point instructions
*/
EMIT_RR (spe_fa, 0x2c4);

View File

@ -84,7 +84,7 @@
#define CELL_CMD_BATCH 5
#define CELL_CMD_RELEASE_VERTS 6
#define CELL_CMD_STATE_FRAMEBUFFER 10
#define CELL_CMD_STATE_DEPTH_STENCIL 11
#define CELL_CMD_STATE_FRAGMENT_OPS 11
#define CELL_CMD_STATE_SAMPLER 12
#define CELL_CMD_STATE_TEXTURE 13
#define CELL_CMD_STATE_VERTEX_INFO 14
@ -92,9 +92,8 @@
#define CELL_CMD_STATE_UNIFORMS 16
#define CELL_CMD_STATE_VS_ARRAY_INFO 17
#define CELL_CMD_STATE_BIND_VS 18
#define CELL_CMD_STATE_BLEND 19
#define CELL_CMD_STATE_FRAGMENT_PROGRAM 19
#define CELL_CMD_STATE_ATTRIB_FETCH 20
#define CELL_CMD_STATE_LOGICOP 21
#define CELL_CMD_VS_EXECUTE 22
#define CELL_CMD_FLUSH_BUFFER_RANGE 23
@ -110,29 +109,34 @@
#define CELL_DEBUG_SYNC (1 << 1)
/**
*/
struct cell_command_depth_stencil_alpha_test {
uint64_t base; /**< Effective address of code start. */
unsigned size; /**< Size in bytes of SPE code. */
unsigned read_depth; /**< Flag: should depth be read? */
unsigned read_stencil; /**< Flag: should stencil be read? */
};
/** Max instructions for doing per-fragment operations */
#define SPU_MAX_FRAGMENT_OPS_INSTS 64
/**
* Upload code to perform framebuffer blend operation
* Command to specify per-fragment operations state and generated code.
*/
struct cell_command_blend {
uint64_t base; /**< Effective address of code start. */
unsigned size; /**< Size in bytes of SPE code. */
unsigned read_fb; /**< Flag: should framebuffer be read? */
struct cell_command_fragment_ops
{
uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */
struct pipe_depth_stencil_alpha_state dsa;
struct pipe_blend_state blend;
unsigned code[SPU_MAX_FRAGMENT_OPS_INSTS];
};
struct cell_command_logicop {
uint64_t base; /**< Effective address of code start. */
unsigned size; /**< Size in bytes of SPE code. */
/** Max instructions for fragment programs */
#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 128
/**
* Command to send a fragment progra to SPUs.
*/
struct cell_command_fragment_program
{
uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */
uint num_inst; /**< Number of instructions */
unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
};
@ -172,13 +176,15 @@ struct cell_array_info
};
struct cell_attribute_fetch_code {
struct cell_attribute_fetch_code
{
uint64_t base;
uint size;
};
struct cell_buffer_range {
struct cell_buffer_range
{
uint64_t base;
unsigned size;
};

View File

@ -25,9 +25,10 @@ SOURCES = \
cell_context.c \
cell_draw_arrays.c \
cell_flush.c \
cell_gen_fragment.c \
cell_gen_fp.c \
cell_state_derived.c \
cell_state_emit.c \
cell_state_per_fragment.c \
cell_state_shader.c \
cell_pipe_state.c \
cell_screen.c \

View File

@ -61,6 +61,7 @@ struct cell_fragment_shader_state
{
struct pipe_shader_state shader;
struct tgsi_shader_info info;
struct spe_function code;
void *data;
};

View File

@ -0,0 +1,523 @@
/**************************************************************************
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/**
* Generate SPU fragment program/shader code.
*
* Note that we generate SOA-style code here. So each TGSI instruction
* operates on four pixels (and is translated into four SPU instructions,
* generally speaking).
*
* \author Brian Paul
*/
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_exec.h"
#include "tgsi/tgsi_dump.h"
#include "rtasm/rtasm_ppc_spe.h"
#include "util/u_memory.h"
#include "cell_context.h"
#include "cell_gen_fp.h"
/** Set to 1 to enable debug/disassembly printfs */
#define DISASSEM 01
/**
* Context needed during code generation.
*/
struct codegen
{
int inputs_reg; /**< 1st function parameter */
int outputs_reg; /**< 2nd function parameter */
int constants_reg; /**< 3rd function parameter */
int temp_regs[8][4]; /**< maps TGSI temps to SPE registers */
int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
/** Per-instruction temps / intermediate temps */
int num_itemps;
int itemps[3];
struct spe_function *f;
boolean error;
};
/**
* Allocate an intermediate temporary register.
*/
static int
get_itemp(struct codegen *gen)
{
int t = spe_allocate_available_register(gen->f);
assert(gen->num_itemps < Elements(gen->itemps));
gen->itemps[gen->num_itemps++] = t;
return t;
}
/**
* Free all intermediate temporary registers. To be called after each
* instruction has been emitted.
*/
static void
free_itemps(struct codegen *gen)
{
int i;
for (i = 0; i < gen->num_itemps; i++) {
spe_release_register(gen->f, gen->itemps[i]);
}
gen->num_itemps = 0;
}
/**
* Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
* The register is allocated and initialized upon the first call.
*/
static int
get_const_one_reg(struct codegen *gen)
{
if (gen->one_reg <= 0) {
gen->one_reg = spe_allocate_available_register(gen->f);
}
/* one = {1.0, 1.0, 1.0, 1.0} */
spe_load_float(gen->f, gen->one_reg, 1.0f);
#if DISASSEM
printf("il\tr%d, 1.0f\n", gen->one_reg);
#endif
return gen->one_reg;
}
/**
* Return the index of the SPU temporary containing the named TGSI
* source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
* just return the corresponding SPE register. If the TGIS register
* is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
* and emit an SPE load instruction.
*/
static int
get_src_reg(struct codegen *gen,
int channel,
const struct tgsi_full_src_register *src)
{
int reg;
/* XXX need to examine src swizzle info here.
* That will involve changing the channel var...
*/
switch (src->SrcRegister.File) {
case TGSI_FILE_TEMPORARY:
reg = gen->temp_regs[src->SrcRegister.Index][channel];
break;
case TGSI_FILE_INPUT:
{
/* offset is measured in quadwords, not bytes */
int offset = src->SrcRegister.Index * 4 + channel;
reg = get_itemp(gen);
/* Load: reg = memory[(machine_reg) + offset] */
spe_lqd(gen->f, reg, gen->inputs_reg, offset);
#if DISASSEM
printf("lqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset);
#endif
}
break;
case TGSI_FILE_IMMEDIATE:
/* xxx fall-through for now / fix */
case TGSI_FILE_CONSTANT:
/* xxx fall-through for now / fix */
default:
assert(0);
}
return reg;
}
/**
* Return the index of an SPE register to use for the given TGSI register.
* If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
* corresponding SPE register is returned. If the TGSI register is
* TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
* See store_dest_reg() below...
*/
static int
get_dst_reg(struct codegen *gen,
int channel,
const struct tgsi_full_dst_register *dest)
{
int reg;
switch (dest->DstRegister.File) {
case TGSI_FILE_TEMPORARY:
reg = gen->temp_regs[dest->DstRegister.Index][channel];
break;
case TGSI_FILE_OUTPUT:
reg = get_itemp(gen);
break;
default:
assert(0);
}
return reg;
}
/**
* When a TGSI instruction is writing to an output register, this
* function emits the SPE store instruction to store the value_reg.
* \param value_reg the SPE register containing the value to store.
* This would have been returned by get_dst_reg().
*/
static void
store_dest_reg(struct codegen *gen,
int value_reg, int channel,
const struct tgsi_full_dst_register *dest)
{
switch (dest->DstRegister.File) {
case TGSI_FILE_TEMPORARY:
/* no-op */
break;
case TGSI_FILE_OUTPUT:
{
/* offset is measured in quadwords, not bytes */
int offset = dest->DstRegister.Index * 4 + channel;
/* Store: memory[(machine_reg) + offset] = reg */
spe_stqd(gen->f, value_reg, gen->outputs_reg, offset);
#if DISASSEM
printf("stqd\tr%d, r%d + %d\n", value_reg, gen->outputs_reg, offset);
#endif
}
break;
default:
assert(0);
}
}
static boolean
emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
int ch;
for (ch = 0; ch < 4; ch++) {
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
/* XXX we don't always need to actually emit a mov instruction here */
spe_move(gen->f, dst_reg, src_reg);
#if DISASSEM
printf("mov\tr%d, r%d\n", dst_reg, src_reg);
#endif
store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]);
free_itemps(gen);
}
}
return true;
}
/**
* Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
* becomes (up to) four SPU "fa" instructions because we're doing SOA
* processing.
*/
static boolean
emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
int ch;
/* Loop over Red/Green/Blue/Alpha channels */
for (ch = 0; ch < 4; ch++) {
/* If the dest R, G, B or A writemask is enabled... */
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
/* get indexes of the two src, one dest SPE registers */
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
/* Emit actual SPE instruction: d = s1 + s2 */
spe_fa(gen->f, d_reg, s1_reg, s2_reg);
#if DISASSEM
printf("fa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
#endif
/* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
/* Free any intermediate temps we allocated */
free_itemps(gen);
}
}
return true;
}
/**
* Emit multiply. See emit_ADD for comments.
*/
static boolean
emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
int ch;
for (ch = 0; ch < 4; ch++) {
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
/* d = s1 * s2 */
spe_fm(gen->f, d_reg, s1_reg, s2_reg);
#if DISASSEM
printf("fm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
#endif
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
free_itemps(gen);
}
}
return true;
}
/**
* Emit set-if-greater-than.
* Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
* the result but OpenGL/TGSI needs 0.0 and 1.0 results.
* We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
*/
static boolean
emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
int ch;
for (ch = 0; ch < 4; ch++) {
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
/* d = (s1 > s2) */
spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
#if DISASSEM
printf("fcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg);
#endif
/* convert d from 0x0/0xffffffff to 0.0/1.0 */
/* d = d & one_reg */
spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
#if DISASSEM
printf("and\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen));
#endif
store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
free_itemps(gen);
}
}
return true;
}
/**
* Emit END instruction.
* We just return from the shader function at this point.
*
* Note that there may be more code after this that would be
* called by TGSI_OPCODE_CALL.
*/
static boolean
emit_END(struct codegen *gen)
{
/* return from function call */
spe_bi(gen->f, SPE_REG_RA, 0, 0);
#if DISASSEM
printf("bi\trRA\n");
#endif
return true;
}
/**
* Emit code for the given instruction. Just a big switch stmt.
*/
static boolean
emit_instruction(struct codegen *gen,
const struct tgsi_full_instruction *inst)
{
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_MOV:
return emit_MOV(gen, inst);
case TGSI_OPCODE_MUL:
return emit_MUL(gen, inst);
case TGSI_OPCODE_ADD:
return emit_ADD(gen, inst);
case TGSI_OPCODE_SGT:
return emit_SGT(gen, inst);
case TGSI_OPCODE_END:
return emit_END(gen);
/* XXX lots more cases to do... */
default:
return false;
}
return true;
}
/**
* Emit "code" for a TGSI declaration.
* We only care about TGSI TEMPORARY register declarations at this time.
* For each TGSI TEMPORARY we allocate four SPE registers.
*/
static void
emit_declaration(struct codegen *gen, const struct tgsi_full_declaration *decl)
{
int i, ch;
switch (decl->Declaration.File) {
case TGSI_FILE_TEMPORARY:
#if DISASSEM
printf("Declare temp reg %d .. %d\n",
decl->DeclarationRange.First,
decl->DeclarationRange.Last);
#endif
for (i = decl->DeclarationRange.First;
i <= decl->DeclarationRange.Last;
i++) {
for (ch = 0; ch < 4; ch++) {
gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
}
/* XXX if we run out of SPE registers, we need to spill
* to SPU memory. someday...
*/
#if DISASSEM
printf(" SPE regs: %d %d %d %d\n",
gen->temp_regs[i][0],
gen->temp_regs[i][1],
gen->temp_regs[i][2],
gen->temp_regs[i][3]);
#endif
}
break;
default:
; /* ignore */
}
}
/**
* Translate TGSI shader code to SPE instructions. This is done when
* the state tracker gives us a new shader (via pipe->create_fs_state()).
*
* \param cell the rendering context (in)
* \param tokens the TGSI shader (in)
* \param f the generated function (out)
*/
boolean
cell_gen_fragment_program(struct cell_context *cell,
const struct tgsi_token *tokens,
struct spe_function *f)
{
struct tgsi_parse_context parse;
struct codegen gen;
memset(&gen, 0, sizeof(gen));
gen.f = f;
/* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
gen.inputs_reg = 3; /* pointer to inputs array */
gen.outputs_reg = 4; /* pointer to outputs array */
gen.constants_reg = 5; /* pointer to constants array */
spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
spe_allocate_register(f, gen.inputs_reg);
spe_allocate_register(f, gen.outputs_reg);
spe_allocate_register(f, gen.constants_reg);
#if DISASSEM
printf("Begin %s\n", __FUNCTION__);
tgsi_dump(tokens, 0);
#endif
tgsi_parse_init(&parse, tokens);
while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
tgsi_parse_token(&parse);
switch (parse.FullToken.Token.Type) {
case TGSI_TOKEN_TYPE_IMMEDIATE:
#if 0
if (!note_immediate(&gen, &parse.FullToken.FullImmediate ))
goto fail;
#endif
break;
case TGSI_TOKEN_TYPE_DECLARATION:
emit_declaration(&gen, &parse.FullToken.FullDeclaration);
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
if (!emit_instruction(&gen, &parse.FullToken.FullInstruction )) {
gen.error = true;
}
break;
default:
assert(0);
}
}
if (gen.error) {
/* terminate the SPE code */
return emit_END(&gen);
}
#if DISASSEM
printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
printf("End %s\n", __FUNCTION__);
#endif
tgsi_parse_free( &parse );
return !gen.error;
}

View File

@ -0,0 +1,42 @@
/**************************************************************************
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#ifndef CELL_GEN_FP_H
#define CELL_GEN_FP_H
extern boolean
cell_gen_fragment_program(struct cell_context *cell,
const struct tgsi_token *tokens,
struct spe_function *f);
#endif /* CELL_GEN_FP_H */

View File

@ -0,0 +1,870 @@
/**************************************************************************
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/**
* Generate SPU per-fragment code (actually per-quad code).
* \author Brian Paul
*/
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "rtasm/rtasm_ppc_spe.h"
#include "cell_context.h"
#include "cell_gen_fragment.h"
/** Do extra optimizations? */
#define OPTIMIZATIONS 1
/**
* Generate SPE code to perform Z/depth testing.
*
* \param dsa Gallium depth/stencil/alpha state to gen code for
* \param f SPE function to append instruction onto.
* \param mask_reg register containing quad/pixel "alive" mask (in/out)
* \param ifragZ_reg register containing integer fragment Z values (in)
* \param ifbZ_reg register containing integer frame buffer Z values (in/out)
* \param zmask_reg register containing result of Z test/comparison (out)
*/
static void
gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa,
struct spe_function *f,
int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg)
{
ASSERT(dsa->depth.enabled);
switch (dsa->depth.func) {
case PIPE_FUNC_EQUAL:
/* zmask = (ifragZ == ref) */
spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
/* mask = (mask & zmask) */
spe_and(f, mask_reg, mask_reg, zmask_reg);
break;
case PIPE_FUNC_NOTEQUAL:
/* zmask = (ifragZ == ref) */
spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
/* mask = (mask & ~zmask) */
spe_andc(f, mask_reg, mask_reg, zmask_reg);
break;
case PIPE_FUNC_GREATER:
/* zmask = (ifragZ > ref) */
spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
/* mask = (mask & zmask) */
spe_and(f, mask_reg, mask_reg, zmask_reg);
break;
case PIPE_FUNC_LESS:
/* zmask = (ref > ifragZ) */
spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
/* mask = (mask & zmask) */
spe_and(f, mask_reg, mask_reg, zmask_reg);
break;
case PIPE_FUNC_LEQUAL:
/* zmask = (ifragZ > ref) */
spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
/* mask = (mask & ~zmask) */
spe_andc(f, mask_reg, mask_reg, zmask_reg);
break;
case PIPE_FUNC_GEQUAL:
/* zmask = (ref > ifragZ) */
spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
/* mask = (mask & ~zmask) */
spe_andc(f, mask_reg, mask_reg, zmask_reg);
break;
case PIPE_FUNC_NEVER:
spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */
spe_move(f, zmask_reg, mask_reg); /* zmask = mask */
break;
case PIPE_FUNC_ALWAYS:
/* mask unchanged */
spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */
break;
default:
ASSERT(0);
break;
}
if (dsa->depth.writemask) {
/*
* If (ztest passed) {
* framebufferZ = fragmentZ;
* }
* OR,
* framebufferZ = (ztest_passed ? fragmentZ : framebufferZ;
*/
spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg);
}
}
/**
* Generate SPE code to perform alpha testing.
*
* \param dsa Gallium depth/stencil/alpha state to gen code for
* \param f SPE function to append instruction onto.
* \param mask_reg register containing quad/pixel "alive" mask (in/out)
* \param fragA_reg register containing four fragment alpha values (in)
*/
static void
gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,
struct spe_function *f, int mask_reg, int fragA_reg)
{
int ref_reg = spe_allocate_available_register(f);
int amask_reg = spe_allocate_available_register(f);
ASSERT(dsa->alpha.enabled);
if ((dsa->alpha.func != PIPE_FUNC_NEVER) &&
(dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
/* load/splat the alpha reference float value */
spe_load_float(f, ref_reg, dsa->alpha.ref);
}
/* emit code to do the alpha comparison, updating 'mask' */
switch (dsa->alpha.func) {
case PIPE_FUNC_EQUAL:
/* amask = (fragA == ref) */
spe_fceq(f, amask_reg, fragA_reg, ref_reg);
/* mask = (mask & amask) */
spe_and(f, mask_reg, mask_reg, amask_reg);
break;
case PIPE_FUNC_NOTEQUAL:
/* amask = (fragA == ref) */
spe_fceq(f, amask_reg, fragA_reg, ref_reg);
/* mask = (mask & ~amask) */
spe_andc(f, mask_reg, mask_reg, amask_reg);
break;
case PIPE_FUNC_GREATER:
/* amask = (fragA > ref) */
spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
/* mask = (mask & amask) */
spe_and(f, mask_reg, mask_reg, amask_reg);
break;
case PIPE_FUNC_LESS:
/* amask = (ref > fragA) */
spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
/* mask = (mask & amask) */
spe_and(f, mask_reg, mask_reg, amask_reg);
break;
case PIPE_FUNC_LEQUAL:
/* amask = (fragA > ref) */
spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
/* mask = (mask & ~amask) */
spe_andc(f, mask_reg, mask_reg, amask_reg);
break;
case PIPE_FUNC_GEQUAL:
/* amask = (ref > fragA) */
spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
/* mask = (mask & ~amask) */
spe_andc(f, mask_reg, mask_reg, amask_reg);
break;
case PIPE_FUNC_NEVER:
spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */
break;
case PIPE_FUNC_ALWAYS:
/* no-op, mask unchanged */
break;
default:
ASSERT(0);
break;
}
#if OPTIMIZATIONS
/* if mask == {0,0,0,0} we're all done, return */
{
/* re-use amask reg here */
int tmp_reg = amask_reg;
/* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */
spe_orx(f, tmp_reg, mask_reg);
/* if tmp[0] == 0 then return from function call */
spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0);
}
#endif
spe_release_register(f, ref_reg);
spe_release_register(f, amask_reg);
}
/**
* Generate SPE code to implement the given blend mode for a quad of pixels.
* \param f SPE function to append instruction onto.
* \param fragR_reg register with fragment red values (float) (in/out)
* \param fragG_reg register with fragment green values (float) (in/out)
* \param fragB_reg register with fragment blue values (float) (in/out)
* \param fragA_reg register with fragment alpha values (float) (in/out)
* \param fbRGBA_reg register with packed framebuffer colors (integer) (in)
*/
static void
gen_blend(const struct pipe_blend_state *blend,
struct spe_function *f,
enum pipe_format color_format,
int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg,
int fbRGBA_reg)
{
int term1R_reg = spe_allocate_available_register(f);
int term1G_reg = spe_allocate_available_register(f);
int term1B_reg = spe_allocate_available_register(f);
int term1A_reg = spe_allocate_available_register(f);
int term2R_reg = spe_allocate_available_register(f);
int term2G_reg = spe_allocate_available_register(f);
int term2B_reg = spe_allocate_available_register(f);
int term2A_reg = spe_allocate_available_register(f);
int fbR_reg = spe_allocate_available_register(f);
int fbG_reg = spe_allocate_available_register(f);
int fbB_reg = spe_allocate_available_register(f);
int fbA_reg = spe_allocate_available_register(f);
int one_reg = spe_allocate_available_register(f);
int tmp_reg = spe_allocate_available_register(f);
boolean one_reg_set = false; /* avoid setting one_reg more than once */
ASSERT(blend->blend_enable);
/* Unpack/convert framebuffer colors from four 32-bit packed colors
* (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA).
* Each 8-bit color component is expanded into a float in [0.0, 1.0].
*/
{
int mask_reg = spe_allocate_available_register(f);
/* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */
spe_load_int(f, mask_reg, 0xff);
/* XXX there may be more clever ways to implement the following code */
switch (color_format) {
case PIPE_FORMAT_A8R8G8B8_UNORM:
/* fbB = fbB & mask */
spe_and(f, fbB_reg, fbRGBA_reg, mask_reg);
/* mask = mask << 8 */
spe_roti(f, mask_reg, mask_reg, 8);
/* fbG = fbRGBA & mask */
spe_and(f, fbG_reg, fbRGBA_reg, mask_reg);
/* fbG = fbG >> 8 */
spe_roti(f, fbG_reg, fbG_reg, -8);
/* mask = mask << 8 */
spe_roti(f, mask_reg, mask_reg, 8);
/* fbR = fbRGBA & mask */
spe_and(f, fbR_reg, fbRGBA_reg, mask_reg);
/* fbR = fbR >> 16 */
spe_roti(f, fbR_reg, fbR_reg, -16);
/* mask = mask << 8 */
spe_roti(f, mask_reg, mask_reg, 8);
/* fbA = fbRGBA & mask */
spe_and(f, fbA_reg, fbRGBA_reg, mask_reg);
/* fbA = fbA >> 24 */
spe_roti(f, fbA_reg, fbA_reg, -24);
break;
case PIPE_FORMAT_B8G8R8A8_UNORM:
/* fbA = fbA & mask */
spe_and(f, fbA_reg, fbRGBA_reg, mask_reg);
/* mask = mask << 8 */
spe_roti(f, mask_reg, mask_reg, 8);
/* fbR = fbRGBA & mask */
spe_and(f, fbR_reg, fbRGBA_reg, mask_reg);
/* fbR = fbR >> 8 */
spe_roti(f, fbR_reg, fbR_reg, -8);
/* mask = mask << 8 */
spe_roti(f, mask_reg, mask_reg, 8);
/* fbG = fbRGBA & mask */
spe_and(f, fbG_reg, fbRGBA_reg, mask_reg);
/* fbG = fbG >> 16 */
spe_roti(f, fbG_reg, fbG_reg, -16);
/* mask = mask << 8 */
spe_roti(f, mask_reg, mask_reg, 8);
/* fbB = fbRGBA & mask */
spe_and(f, fbB_reg, fbRGBA_reg, mask_reg);
/* fbB = fbB >> 24 */
spe_roti(f, fbB_reg, fbB_reg, -24);
break;
default:
ASSERT(0);
}
/* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */
spe_cuflt(f, fbR_reg, fbR_reg, 8);
spe_cuflt(f, fbG_reg, fbG_reg, 8);
spe_cuflt(f, fbB_reg, fbB_reg, 8);
spe_cuflt(f, fbA_reg, fbA_reg, 8);
spe_release_register(f, mask_reg);
}
/*
* Compute Src RGB terms
*/
switch (blend->rgb_src_factor) {
case PIPE_BLENDFACTOR_ONE:
spe_move(f, term1R_reg, fragR_reg);
spe_move(f, term1G_reg, fragG_reg);
spe_move(f, term1B_reg, fragB_reg);
break;
case PIPE_BLENDFACTOR_ZERO:
spe_zero(f, term1R_reg);
spe_zero(f, term1G_reg);
spe_zero(f, term1B_reg);
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
spe_fm(f, term1R_reg, fragR_reg, fragR_reg);
spe_fm(f, term1G_reg, fragG_reg, fragG_reg);
spe_fm(f, term1B_reg, fragB_reg, fragB_reg);
break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
spe_fm(f, term1R_reg, fragR_reg, fragA_reg);
spe_fm(f, term1G_reg, fragG_reg, fragA_reg);
spe_fm(f, term1B_reg, fragB_reg, fragA_reg);
break;
/* XXX more cases */
default:
ASSERT(0);
}
/*
* Compute Src Alpha term
*/
switch (blend->alpha_src_factor) {
case PIPE_BLENDFACTOR_ONE:
spe_move(f, term1A_reg, fragA_reg);
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
break;
/* XXX more cases */
default:
ASSERT(0);
}
/*
* Compute Dest RGB terms
*/
switch (blend->rgb_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
spe_move(f, term2R_reg, fbR_reg);
spe_move(f, term2G_reg, fbG_reg);
spe_move(f, term2B_reg, fbB_reg);
break;
case PIPE_BLENDFACTOR_ZERO:
spe_zero(f, term2R_reg);
spe_zero(f, term2G_reg);
spe_zero(f, term2B_reg);
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
spe_fm(f, term2R_reg, fbR_reg, fragR_reg);
spe_fm(f, term2G_reg, fbG_reg, fragG_reg);
spe_fm(f, term2B_reg, fbB_reg, fragB_reg);
break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
spe_fm(f, term2R_reg, fbR_reg, fragA_reg);
spe_fm(f, term2G_reg, fbG_reg, fragA_reg);
spe_fm(f, term2B_reg, fbB_reg, fragA_reg);
break;
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
/* one = {1.0, 1.0, 1.0, 1.0} */
if (!one_reg_set) {
spe_load_float(f, one_reg, 1.0f);
one_reg_set = true;
}
/* tmp = one - fragA */
spe_fs(f, tmp_reg, one_reg, fragA_reg);
/* term = fb * tmp */
spe_fm(f, term2R_reg, fbR_reg, tmp_reg);
spe_fm(f, term2G_reg, fbG_reg, tmp_reg);
spe_fm(f, term2B_reg, fbB_reg, tmp_reg);
break;
/* XXX more cases */
default:
ASSERT(0);
}
/*
* Compute Dest Alpha term
*/
switch (blend->alpha_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
spe_move(f, term2A_reg, fbA_reg);
break;
case PIPE_BLENDFACTOR_ZERO:
spe_zero(f, term2A_reg);
break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
spe_fm(f, term2A_reg, fbA_reg, fragA_reg);
break;
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
/* one = {1.0, 1.0, 1.0, 1.0} */
if (!one_reg_set) {
spe_load_float(f, one_reg, 1.0f);
one_reg_set = true;
}
/* tmp = one - fragA */
spe_fs(f, tmp_reg, one_reg, fragA_reg);
/* termA = fbA * tmp */
spe_fm(f, term2A_reg, fbA_reg, tmp_reg);
break;
/* XXX more cases */
default:
ASSERT(0);
}
/*
* Combine Src/Dest RGB terms
*/
switch (blend->rgb_func) {
case PIPE_BLEND_ADD:
spe_fa(f, fragR_reg, term1R_reg, term2R_reg);
spe_fa(f, fragG_reg, term1G_reg, term2G_reg);
spe_fa(f, fragB_reg, term1B_reg, term2B_reg);
break;
case PIPE_BLEND_SUBTRACT:
spe_fs(f, fragR_reg, term1R_reg, term2R_reg);
spe_fs(f, fragG_reg, term1G_reg, term2G_reg);
spe_fs(f, fragB_reg, term1B_reg, term2B_reg);
break;
/* XXX more cases */
default:
ASSERT(0);
}
/*
* Combine Src/Dest A term
*/
switch (blend->alpha_func) {
case PIPE_BLEND_ADD:
spe_fa(f, fragA_reg, term1A_reg, term2A_reg);
break;
case PIPE_BLEND_SUBTRACT:
spe_fs(f, fragA_reg, term1A_reg, term2A_reg);
break;
/* XXX more cases */
default:
ASSERT(0);
}
spe_release_register(f, term1R_reg);
spe_release_register(f, term1G_reg);
spe_release_register(f, term1B_reg);
spe_release_register(f, term1A_reg);
spe_release_register(f, term2R_reg);
spe_release_register(f, term2G_reg);
spe_release_register(f, term2B_reg);
spe_release_register(f, term2A_reg);
spe_release_register(f, fbR_reg);
spe_release_register(f, fbG_reg);
spe_release_register(f, fbB_reg);
spe_release_register(f, fbA_reg);
spe_release_register(f, one_reg);
spe_release_register(f, tmp_reg);
}
static void
gen_logicop(const struct pipe_blend_state *blend,
struct spe_function *f,
int fragRGBA_reg, int fbRGBA_reg)
{
/* XXX to-do */
/* operate on 32-bit packed pixels, not float colors */
}
static void
gen_colormask(uint colormask,
struct spe_function *f,
int fragRGBA_reg, int fbRGBA_reg)
{
/* XXX to-do */
/* operate on 32-bit packed pixels, not float colors */
}
/**
* Generate code to pack a quad of float colors into a four 32-bit integers.
*
* \param f SPE function to append instruction onto.
* \param color_format the dest color packing format
* \param r_reg register containing four red values (in/clobbered)
* \param g_reg register containing four green values (in/clobbered)
* \param b_reg register containing four blue values (in/clobbered)
* \param a_reg register containing four alpha values (in/clobbered)
* \param rgba_reg register to store the packed RGBA colors (out)
*/
static void
gen_pack_colors(struct spe_function *f,
enum pipe_format color_format,
int r_reg, int g_reg, int b_reg, int a_reg,
int rgba_reg)
{
/* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */
spe_cfltu(f, r_reg, r_reg, 32);
spe_cfltu(f, g_reg, g_reg, 32);
spe_cfltu(f, b_reg, b_reg, 32);
spe_cfltu(f, a_reg, a_reg, 32);
/* Shift the most significant bytes to least the significant positions.
* I.e.: reg = reg >> 24
*/
spe_rotmi(f, r_reg, r_reg, -24);
spe_rotmi(f, g_reg, g_reg, -24);
spe_rotmi(f, b_reg, b_reg, -24);
spe_rotmi(f, a_reg, a_reg, -24);
/* Shift the color bytes according to the surface format */
if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) {
spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */
spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */
spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */
}
else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) {
spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */
spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */
spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */
}
else {
ASSERT(0);
}
/* Merge red, green, blue, alpha registers to make packed RGBA colors.
* Eg: after shifting according to color_format we might have:
* R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000}
* G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600}
* B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099}
* A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000}
* OR-ing all those together gives us four packed colors:
* RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699}
*/
spe_or(f, rgba_reg, r_reg, g_reg);
spe_or(f, rgba_reg, rgba_reg, b_reg);
spe_or(f, rgba_reg, rgba_reg, a_reg);
}
/**
* Generate SPE code to implement the fragment operations (alpha test,
* depth test, stencil test, blending, colormask, and final
* framebuffer write) as specified by the current context state.
*
* Logically, this code will be called after running the fragment
* shader. But under some circumstances we could run some of this
* code before the fragment shader to cull fragments/quads that are
* totally occluded/discarded.
*
* XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now.
*
* See the spu_default_fragment_ops() function to see how the per-fragment
* operations would be done with ordinary C code.
* The code we generate here though has no branches, is SIMD, etc and
* should be much faster.
*
* \param cell the rendering context (in)
* \param f the generated function (out)
*/
void
cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
{
const struct pipe_depth_stencil_alpha_state *dsa =
&cell->depth_stencil->base;
const struct pipe_blend_state *blend = &cell->blend->base;
const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format;
/* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
const int x_reg = 3; /* uint */
const int y_reg = 4; /* uint */
const int color_tile_reg = 5; /* tile_t * */
const int depth_tile_reg = 6; /* tile_t * */
const int fragZ_reg = 7; /* vector float */
const int fragR_reg = 8; /* vector float */
const int fragG_reg = 9; /* vector float */
const int fragB_reg = 10; /* vector float */
const int fragA_reg = 11; /* vector float */
const int mask_reg = 12; /* vector uint */
/* offset of quad from start of tile
* XXX assuming 4-byte pixels for color AND Z/stencil!!!!
*/
int quad_offset_reg;
int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */
int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */
spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
spe_allocate_register(f, x_reg);
spe_allocate_register(f, y_reg);
spe_allocate_register(f, color_tile_reg);
spe_allocate_register(f, depth_tile_reg);
spe_allocate_register(f, fragZ_reg);
spe_allocate_register(f, fragR_reg);
spe_allocate_register(f, fragG_reg);
spe_allocate_register(f, fragB_reg);
spe_allocate_register(f, fragA_reg);
spe_allocate_register(f, mask_reg);
quad_offset_reg = spe_allocate_available_register(f);
fbRGBA_reg = spe_allocate_available_register(f);
fbZS_reg = spe_allocate_available_register(f);
/* compute offset of quad from start of tile, in bytes */
{
int x2_reg = spe_allocate_available_register(f);
int y2_reg = spe_allocate_available_register(f);
ASSERT(TILE_SIZE == 32);
spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */
spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */
spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */
spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */
spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */
spe_release_register(f, x2_reg);
spe_release_register(f, y2_reg);
}
if (dsa->alpha.enabled) {
gen_alpha_test(dsa, f, mask_reg, fragA_reg);
}
if (dsa->depth.enabled || dsa->stencil[0].enabled) {
const enum pipe_format zs_format = cell->framebuffer.zsbuf->format;
boolean write_depth_stencil;
int fbZ_reg = spe_allocate_available_register(f); /* Z values */
int fbS_reg = spe_allocate_available_register(f); /* Stencil values */
/* fetch quad of depth/stencil values from tile at (x,y) */
/* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
if (dsa->depth.enabled) {
/* Extract Z bits from fbZS_reg into fbZ_reg */
if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
zs_format == PIPE_FORMAT_X8Z24_UNORM) {
int mask_reg = spe_allocate_available_register(f);
spe_fsmbi(f, mask_reg, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */
spe_and(f, fbZ_reg, fbZS_reg, mask_reg); /* fbZ = fbZS & mask */
spe_release_register(f, mask_reg);
/* OK, fbZ_reg has four 24-bit Z values now */
}
else {
/* XXX handle other z/stencil formats */
ASSERT(0);
}
/* Convert fragZ values from float[4] to uint[4] */
if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
zs_format == PIPE_FORMAT_X8Z24_UNORM ||
zs_format == PIPE_FORMAT_Z24S8_UNORM ||
zs_format == PIPE_FORMAT_Z24X8_UNORM) {
/* 24-bit Z values */
int scale_reg = spe_allocate_available_register(f);
/* scale_reg[0,1,2,3] = float(2^24-1) */
spe_load_float(f, scale_reg, (float) 0xffffff);
/* XXX these two instructions might be combined */
spe_fm(f, fragZ_reg, fragZ_reg, scale_reg); /* fragZ *= scale */
spe_cfltu(f, fragZ_reg, fragZ_reg, 0); /* fragZ = (int) fragZ */
spe_release_register(f, scale_reg);
}
else {
/* XXX handle 16-bit Z format */
ASSERT(0);
}
}
if (dsa->stencil[0].enabled) {
/* Extract Stencil bit sfrom fbZS_reg into fbS_reg */
if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
zs_format == PIPE_FORMAT_X8Z24_UNORM) {
/* XXX extract with a shift */
ASSERT(0);
}
else if (zs_format == PIPE_FORMAT_Z24S8_UNORM ||
zs_format == PIPE_FORMAT_Z24X8_UNORM) {
/* XXX extract with a mask */
ASSERT(0);
}
}
if (dsa->stencil[0].enabled) {
/* XXX this may involve depth testing too */
// gen_stencil_test(dsa, f, ... );
ASSERT(0);
}
else if (dsa->depth.enabled) {
int zmask_reg = spe_allocate_available_register(f);
gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg);
spe_release_register(f, zmask_reg);
}
/* do we need to write Z and/or Stencil back into framebuffer? */
write_depth_stencil = (dsa->depth.writemask |
dsa->stencil[0].write_mask |
dsa->stencil[1].write_mask);
if (write_depth_stencil) {
/* Merge latest Z and Stencil values into fbZS_reg.
* fbZ_reg has four Z vals in bits [23..0] or bits [15..0].
* fbS_reg has four 8-bit Z values in bits [7..0].
*/
if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
zs_format == PIPE_FORMAT_X8Z24_UNORM) {
spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */
spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
}
else if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
zs_format == PIPE_FORMAT_X8Z24_UNORM) {
/* XXX to do */
ASSERT(0);
}
else if (zs_format == PIPE_FORMAT_Z16_UNORM) {
/* XXX to do */
ASSERT(0);
}
else if (zs_format == PIPE_FORMAT_S8_UNORM) {
/* XXX to do */
ASSERT(0);
}
else {
/* bad zs_format */
ASSERT(0);
}
/* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */
spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
}
spe_release_register(f, fbZ_reg);
spe_release_register(f, fbS_reg);
}
/* Get framebuffer quad/colors. We'll need these for blending,
* color masking, and to obey the quad/pixel mask.
* Load: fbRGBA_reg = memory[color_tile + quad_offset]
* Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking
* we could skip this load.
*/
spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg);
if (blend->blend_enable) {
gen_blend(blend, f, color_format,
fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg);
}
/*
* Write fragment colors to framebuffer/tile.
* This involves converting the fragment colors from float[4] to the
* tile's specific format and obeying the quad/pixel mask.
*/
{
int rgba_reg = spe_allocate_available_register(f);
/* Pack four float colors as four 32-bit int colors */
gen_pack_colors(f, color_format,
fragR_reg, fragG_reg, fragB_reg, fragA_reg,
rgba_reg);
if (blend->logicop_enable) {
gen_logicop(blend, f, rgba_reg, fbRGBA_reg);
}
if (blend->colormask != 0xf) {
gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg);
}
/* Mix fragment colors with framebuffer colors using the quad/pixel mask:
* if (mask[i])
* rgba[i] = rgba[i];
* else
* rgba[i] = framebuffer[i];
*/
spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg);
/* Store updated quad in tile:
* memory[color_tile + quad_offset] = rgba_reg;
*/
spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg);
spe_release_register(f, rgba_reg);
}
//printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);
spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */
spe_release_register(f, fbRGBA_reg);
spe_release_register(f, fbZS_reg);
spe_release_register(f, quad_offset_reg);
}

View File

@ -0,0 +1,38 @@
/**************************************************************************
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#ifndef CELL_GEN_FRAGMENT_H
#define CELL_GEN_FRAGMENT_H
extern void
cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f);
#endif /* CELL_GEN_FRAGMENT_H */

View File

@ -27,6 +27,7 @@
#include "util/u_memory.h"
#include "cell_context.h"
#include "cell_gen_fragment.h"
#include "cell_state.h"
#include "cell_state_emit.h"
#include "cell_state_per_fragment.h"
@ -54,23 +55,6 @@ emit_state_cmd(struct cell_context *cell, uint cmd,
void
cell_emit_state(struct cell_context *cell)
{
if (cell->dirty & (CELL_NEW_FRAMEBUFFER | CELL_NEW_BLEND)) {
struct cell_command_logicop logicop;
if (cell->logic_op.store != NULL) {
spe_release_func(& cell->logic_op);
}
cell_generate_logic_op(& cell->logic_op,
& cell->blend->base,
cell->framebuffer.cbufs[0]);
logicop.base = (intptr_t) cell->logic_op.store;
logicop.size = 64 * 4;
emit_state_cmd(cell, CELL_CMD_STATE_LOGICOP, &logicop,
sizeof(logicop));
}
if (cell->dirty & CELL_NEW_FRAMEBUFFER) {
struct pipe_surface *cbuf = cell->framebuffer.cbufs[0];
struct pipe_surface *zbuf = cell->framebuffer.zsbuf;
@ -83,44 +67,49 @@ cell_emit_state(struct cell_context *cell)
fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE;
fb->width = cell->framebuffer.width;
fb->height = cell->framebuffer.height;
#if 0
printf("EMIT color format %s\n", pf_name(fb->color_format));
printf("EMIT depth format %s\n", pf_name(fb->depth_format));
#endif
}
if (cell->dirty & CELL_NEW_BLEND) {
struct cell_command_blend blend;
if (cell->blend != NULL) {
blend.base = (intptr_t) cell->blend->code.store;
blend.size = (char *) cell->blend->code.csr
- (char *) cell->blend->code.store;
blend.read_fb = TRUE;
if (cell->dirty & (CELL_NEW_FS)) {
/* Send new fragment program to SPUs */
struct cell_command_fragment_program *fp
= cell_batch_alloc(cell, sizeof(*fp));
fp->opcode = CELL_CMD_STATE_FRAGMENT_PROGRAM;
fp->num_inst = cell->fs->code.num_inst;
memcpy(&fp->code, cell->fs->code.store,
SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
if (0) {
int i;
printf("PPU Emit CELL_CMD_STATE_FRAGMENT_PROGRAM:\n");
for (i = 0; i < fp->num_inst; i++) {
printf(" %3d: 0x%08x\n", i, fp->code[i]);
}
}
else {
blend.base = 0;
blend.size = 0;
blend.read_fb = FALSE;
}
emit_state_cmd(cell, CELL_CMD_STATE_BLEND, &blend, sizeof(blend));
}
if (cell->dirty & CELL_NEW_DEPTH_STENCIL) {
struct cell_command_depth_stencil_alpha_test dsat;
if (cell->dirty & (CELL_NEW_FRAMEBUFFER |
CELL_NEW_DEPTH_STENCIL |
CELL_NEW_BLEND)) {
/* XXX we don't want to always do codegen here. We should have
* a hash/lookup table to cache previous results...
*/
struct cell_command_fragment_ops *fops
= cell_batch_alloc(cell, sizeof(*fops));
struct spe_function spe_code;
if (cell->depth_stencil != NULL) {
dsat.base = (intptr_t) cell->depth_stencil->code.store;
dsat.size = (char *) cell->depth_stencil->code.csr
- (char *) cell->depth_stencil->code.store;
dsat.read_depth = TRUE;
dsat.read_stencil = FALSE;
}
else {
dsat.base = 0;
dsat.size = 0;
dsat.read_depth = FALSE;
dsat.read_stencil = FALSE;
}
emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, &dsat, sizeof(dsat));
/* generate new code */
cell_gen_fragment_function(cell, &spe_code);
/* put the new code into the batch buffer */
fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS;
memcpy(&fops->code, spe_code.store,
SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
fops->dsa = cell->depth_stencil->base;
fops->blend = cell->blend->base;
/* free codegen buffer */
spe_release_func(&spe_code);
}
if (cell->dirty & CELL_NEW_SAMPLER) {
@ -160,7 +149,8 @@ cell_emit_state(struct cell_context *cell)
emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO,
&cell->vertex_info, sizeof(struct vertex_info));
}
#if 0
if (cell->dirty & CELL_NEW_VS) {
const struct draw_context *const draw = cell->draw;
struct cell_shader_info info;
@ -175,4 +165,5 @@ cell_emit_state(struct cell_context *cell)
emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, &info, sizeof(info));
}
#endif
}

View File

@ -132,9 +132,9 @@ emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa,
/**
* Generate code to perform Z testing. Four Z values are tested at once.
* \param dsa Current depth-test state
* \param f Function to which code should be appended
* \param m Mask of allocated / free SPE registers
* \param mask Index of register to contain depth-pass mask
* \param stored Index of register containing values from depth buffer
* \param calculated Index of register containing per-fragment depth values
@ -198,6 +198,7 @@ emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa,
/**
* Generate code to apply the stencil operation (after testing).
* \note Emits a maximum of 5 instructions.
*
* \warning
@ -222,9 +223,13 @@ emit_stencil_op(struct spe_function *f,
spe_il(f, result, ref);
break;
case PIPE_STENCIL_OP_INCR:
/* clamp = [0xff, 0xff, 0xff, 0xff] */
spe_il(f, clamp, 0x0ff);
/* result[i] = in[i] + 1 */
spe_ai(f, result, in, 1);
/* clamp_mask[i] = (result[i] > 0xff) */
spe_clgti(f, clamp_mask, result, 0x0ff);
/* result[i] = clamp_mask[i] ? clamp[i] : result[i] */
spe_selb(f, result, result, clamp, clamp_mask);
break;
case PIPE_STENCIL_OP_DECR:
@ -259,10 +264,10 @@ emit_stencil_op(struct spe_function *f,
/**
* Generate code to do stencil test. Four pixels are tested at once.
* \param dsa Depth / stencil test state
* \param face 0 for front face, 1 for back face
* \param f Function to append instructions to
* \param reg_mask Mask of allocated registers
* \param mask Register containing mask of fragments passing the
* alpha test
* \param depth_mask Register containing mask of fragments passing the
@ -310,13 +315,14 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
switch (dsa->stencil[face].func) {
case PIPE_FUNC_NEVER:
spe_il(f, stencil_mask, 0);
spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */
break;
case PIPE_FUNC_NOTEQUAL:
complement = TRUE;
/* FALLTHROUGH */
case PIPE_FUNC_EQUAL:
/* stencil_mask[i] = (stored[i] == ref) */
spe_ceqi(f, stencil_mask, stored, ref);
break;
@ -324,6 +330,8 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
complement = TRUE;
/* FALLTHROUGH */
case PIPE_FUNC_GREATER:
complement = TRUE;
/* stencil_mask[i] = (stored[i] > ref) */
spe_clgti(f, stencil_mask, stored, ref);
break;
@ -331,8 +339,11 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
complement = TRUE;
/* FALLTHROUGH */
case PIPE_FUNC_GEQUAL:
/* stencil_mask[i] = (stored[i] > ref) */
spe_clgti(f, stencil_mask, stored, ref);
/* tmp[i] = (stored[i] == ref) */
spe_ceqi(f, tmp, stored, ref);
/* stencil_mask[i] = stencil_mask[i] | tmp[i] */
spe_or(f, stencil_mask, stencil_mask, tmp);
break;
@ -461,7 +472,7 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa)
* + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round
* up to 64 to make it a happy power-of-two.
*/
spe_init_func(f, 4 * 64);
spe_init_func(f, SPE_INST_SIZE * 64);
/* Allocate registers for the function's input parameters. Cleverly (and
@ -540,7 +551,7 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa)
spe_selb(f, depth, depth, zvals, mask);
}
spe_bi(f, 0, 0, 0);
spe_bi(f, 0, 0, 0); /* return from function call */
#if 0
@ -956,7 +967,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb)
* + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to
* make it a happy power-of-two.
*/
spe_init_func(f, 4 * 64);
spe_init_func(f, SPE_INST_SIZE * 64);
const int frag[4] = {
@ -1144,9 +1155,10 @@ cell_generate_alpha_blend(struct cell_blend_state *cb)
}
int PC_OFFSET(const struct spe_function *f, const void *d)
static int
PC_OFFSET(const struct spe_function *f, const void *d)
{
const intptr_t pc = (intptr_t) f->csr;
const intptr_t pc = (intptr_t) &f->store[f->num_inst];
const intptr_t ea = ~0x0f & (intptr_t) d;
return (ea - pc) >> 2;
@ -1178,7 +1190,7 @@ cell_generate_logic_op(struct spe_function *f,
* bytes (equiv. to 8 instructions) are needed for data storage. Round up
* to 64 to make it a happy power-of-two.
*/
spe_init_func(f, 4 * 64);
spe_init_func(f, SPE_INST_SIZE * 64);
/* Pixel colors in framebuffer format in AoS layout.

View File

@ -34,7 +34,7 @@
#include "cell_context.h"
#include "cell_state.h"
#include "cell_gen_fp.h"
/** cast wrapper */
@ -61,7 +61,7 @@ static void *
cell_create_fs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
{
/*struct cell_context *cell = cell_context(pipe);*/
struct cell_context *cell = cell_context(pipe);
struct cell_fragment_shader_state *cfs;
cfs = CALLOC_STRUCT(cell_fragment_shader_state);
@ -76,6 +76,8 @@ cell_create_fs_state(struct pipe_context *pipe,
tgsi_scan_shader(templ->tokens, &cfs->info);
cell_gen_fragment_program(cell, cfs->shader.tokens, &cfs->code);
return cfs;
}
@ -102,6 +104,8 @@ cell_delete_fs_state(struct pipe_context *pipe, void *fs)
{
struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs);
spe_release_func(&cfs->code);
FREE((void *) cfs->shader.tokens);
FREE(cfs);
}

View File

@ -297,10 +297,9 @@ void cell_update_vertex_fetch(struct draw_context *draw)
/* Each fetch function can be a maximum of 34 instructions (note: this is
* actually a slight over-estimate). That means (34 * 4) = 136 bytes
* each maximum.
* actually a slight over-estimate).
*/
spe_init_func(p, 136 * unique_attr_formats);
spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats);
/* Allocate registers for the function's input parameters.

View File

@ -22,12 +22,15 @@ SOURCES = \
spu_render.c \
spu_texture.c \
spu_tile.c \
spu_tri.c \
spu_tri.c
OLD_SOURCES = \
spu_exec.c \
spu_util.c \
spu_vertex_fetch.c \
spu_vertex_shader.c
SPU_OBJECTS = $(SOURCES:.c=.o) \
SPU_ASM_OUT = $(SOURCES:.c=.s) \
@ -43,7 +46,7 @@ INCLUDE_DIRS = \
$(SPU_CC) $(SPU_CFLAGS) -c $<
.c.s:
$(SPU_CC) $(SPU_CFLAGS) -S $<
$(SPU_CC) $(SPU_CFLAGS) -O3 -S $<
# The .a file will be linked into the main/PPU executable

View File

@ -79,14 +79,14 @@ spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle)
static INLINE vector float
spu_unpack_color(uint color)
spu_unpack_B8G8R8A8(uint color)
{
vector unsigned int color_u4 = spu_splats(color);
color_u4 = spu_shuffle(color_u4, color_u4,
((vector unsigned char) {
0, 0, 0, 0,
5, 5, 5, 5,
10, 10, 10, 10,
5, 5, 5, 5,
0, 0, 0, 0,
15, 15, 15, 15}) );
return spu_convtf(color_u4, 32);
}

View File

@ -34,6 +34,7 @@
#include "spu_main.h"
#include "spu_render.h"
#include "spu_per_fragment_op.h"
#include "spu_texture.h"
#include "spu_tile.h"
//#include "spu_test.h"
@ -46,7 +47,7 @@
/*
helpful headers:
/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h
/opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h
/opt/cell/sdk/usr/include/libmisc.h
*/
boolean Debug = FALSE;
@ -62,14 +63,6 @@ struct spu_vs_context draw;
static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]
ALIGN16_ATTRIB;
static unsigned char depth_stencil_code_buffer[4 * 64]
ALIGN16_ATTRIB;
static unsigned char fb_blend_code_buffer[4 * 64]
ALIGN16_ATTRIB;
static unsigned char logicop_code_buffer[4 * 64]
ALIGN16_ATTRIB;
/**
@ -226,6 +219,46 @@ cmd_release_verts(const struct cell_command_release_verts *release)
}
/**
* Process a CELL_CMD_STATE_FRAGMENT_OPS command.
* This involves installing new fragment ops SPU code.
* If this function is never called, we'll use a regular C fallback function
* for fragment processing.
*/
static void
cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
{
if (Debug)
printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id);
/* Copy SPU code from batch buffer to spu buffer */
memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
/* Copy state info (for fallback case only) */
memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
/* Point function pointer at new code */
spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code;
spu.read_depth = spu.depth_stencil_alpha.depth.enabled;
spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled;
}
static void
cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
{
if (Debug)
printf("SPU %u: CMD_STATE_FRAGMENT_PROGRAM\n", spu.init.id);
/* Copy SPU code from batch buffer to spu buffer */
memcpy(spu.fragment_program_code, fp->code,
SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
#if 01
/* Point function pointer at new code */
spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code;
#endif
}
static void
cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
{
@ -252,102 +285,24 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
switch (spu.fb.depth_format) {
case PIPE_FORMAT_Z32_UNORM:
spu.fb.zsize = 4;
spu.fb.zscale = (float) 0xffffffffu;
break;
case PIPE_FORMAT_Z24S8_UNORM:
case PIPE_FORMAT_S8Z24_UNORM:
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
spu.fb.zsize = 4;
spu.fb.zscale = (float) 0x00ffffffu;
break;
case PIPE_FORMAT_Z16_UNORM:
spu.fb.zsize = 2;
spu.fb.zscale = (float) 0xffffu;
break;
default:
spu.fb.zsize = 0;
break;
}
if (spu.fb.color_format == PIPE_FORMAT_A8R8G8B8_UNORM)
spu.color_shuffle = ((vector unsigned char) {
12, 0, 4, 8, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0});
else if (spu.fb.color_format == PIPE_FORMAT_B8G8R8A8_UNORM)
spu.color_shuffle = ((vector unsigned char) {
8, 4, 0, 12, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0});
else
ASSERT(0);
}
static void
cmd_state_blend(const struct cell_command_blend *state)
{
if (Debug)
printf("SPU %u: BLEND: enabled %d\n",
spu.init.id,
(state->size != 0));
ASSERT_ALIGN16(state->base);
if (state->size != 0) {
mfc_get(fb_blend_code_buffer,
(unsigned int) state->base, /* src */
ROUNDUP16(state->size),
TAG_BATCH_BUFFER,
0, /* tid */
0 /* rid */);
wait_on_mask(1 << TAG_BATCH_BUFFER);
spu.blend = (blend_func) fb_blend_code_buffer;
spu.read_fb = state->read_fb;
} else {
spu.read_fb = FALSE;
}
}
static void
cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *state)
{
if (Debug)
printf("SPU %u: DEPTH_STENCIL: ztest %d\n",
spu.init.id,
state->read_depth);
ASSERT_ALIGN16(state->base);
if (state->size != 0) {
mfc_get(depth_stencil_code_buffer,
(unsigned int) state->base, /* src */
ROUNDUP16(state->size),
TAG_BATCH_BUFFER,
0, /* tid */
0 /* rid */);
wait_on_mask(1 << TAG_BATCH_BUFFER);
} else {
/* If there is no code, emit a return instruction.
*/
depth_stencil_code_buffer[0] = 0x35;
depth_stencil_code_buffer[1] = 0x00;
depth_stencil_code_buffer[2] = 0x00;
depth_stencil_code_buffer[3] = 0x00;
}
spu.frag_test = (frag_test_func) depth_stencil_code_buffer;
spu.read_depth = state->read_depth;
spu.read_stencil = state->read_stencil;
}
static void
cmd_state_logicop(const struct cell_command_logicop * code)
{
mfc_get(logicop_code_buffer,
(unsigned int) code->base, /* src */
code->size,
TAG_BATCH_BUFFER,
0, /* tid */
0 /* rid */);
wait_on_mask(1 << TAG_BATCH_BUFFER);
spu.logicop = (logicop_func) logicop_code_buffer;
}
@ -450,7 +405,9 @@ cmd_finish(void)
/**
* Execute a batch of commands
* Execute a batch of commands which was sent to us by the PPU.
* See the cell_emit_state.c code to see where the commands come from.
*
* The opcode param encodes the location of the buffer and its size.
*/
static void
@ -487,16 +444,14 @@ cmd_batch(uint opcode)
printf("SPU %u: release batch buf %u\n", spu.init.id, buf);
release_buffer(buf);
/*
* Loop over commands in the batch buffer
*/
for (pos = 0; pos < usize; /* no incr */) {
switch (buffer[pos]) {
case CELL_CMD_STATE_FRAMEBUFFER:
{
struct cell_command_framebuffer *fb
= (struct cell_command_framebuffer *) &buffer[pos];
cmd_state_framebuffer(fb);
pos += sizeof(*fb) / 8;
}
break;
/*
* rendering commands
*/
case CELL_CMD_CLEAR_SURFACE:
{
struct cell_command_clear_surface *clr
@ -514,26 +469,32 @@ cmd_batch(uint opcode)
pos += pos_incr;
}
break;
case CELL_CMD_RELEASE_VERTS:
/*
* state-update commands
*/
case CELL_CMD_STATE_FRAMEBUFFER:
{
struct cell_command_release_verts *release
= (struct cell_command_release_verts *) &buffer[pos];
cmd_release_verts(release);
pos += sizeof(*release) / 8;
struct cell_command_framebuffer *fb
= (struct cell_command_framebuffer *) &buffer[pos];
cmd_state_framebuffer(fb);
pos += sizeof(*fb) / 8;
}
break;
case CELL_CMD_FINISH:
cmd_finish();
pos += 1;
case CELL_CMD_STATE_FRAGMENT_OPS:
{
struct cell_command_fragment_ops *fops
= (struct cell_command_fragment_ops *) &buffer[pos];
cmd_state_fragment_ops(fops);
pos += sizeof(*fops) / 8;
}
break;
case CELL_CMD_STATE_BLEND:
cmd_state_blend((struct cell_command_blend *) &buffer[pos+1]);
pos += (1 + ROUNDUP8(sizeof(struct cell_command_blend)) / 8);
break;
case CELL_CMD_STATE_DEPTH_STENCIL:
cmd_state_depth_stencil((struct cell_command_depth_stencil_alpha_test *)
&buffer[pos+1]);
pos += (1 + ROUNDUP8(sizeof(struct cell_command_depth_stencil_alpha_test)) / 8);
case CELL_CMD_STATE_FRAGMENT_PROGRAM:
{
struct cell_command_fragment_program *fp
= (struct cell_command_fragment_program *) &buffer[pos];
cmd_state_fragment_program(fp);
pos += sizeof(*fp) / 8;
}
break;
case CELL_CMD_STATE_SAMPLER:
{
@ -569,8 +530,10 @@ cmd_batch(uint opcode)
pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8);
break;
case CELL_CMD_STATE_BIND_VS:
#if 0
spu_bind_vertex_shader(&draw,
(struct cell_shader_info *) &buffer[pos+1]);
#endif
pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8);
break;
case CELL_CMD_STATE_ATTRIB_FETCH:
@ -578,9 +541,20 @@ cmd_batch(uint opcode)
&buffer[pos+1]);
pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8);
break;
case CELL_CMD_STATE_LOGICOP:
cmd_state_logicop((struct cell_command_logicop *) &buffer[pos+1]);
pos += (1 + ROUNDUP8(sizeof(struct cell_command_logicop)) / 8);
/*
* misc commands
*/
case CELL_CMD_FINISH:
cmd_finish();
pos += 1;
break;
case CELL_CMD_RELEASE_VERTS:
{
struct cell_command_release_verts *release
= (struct cell_command_release_verts *) &buffer[pos];
cmd_release_verts(release);
pos += sizeof(*release) / 8;
}
break;
case CELL_CMD_FLUSH_BUFFER_RANGE: {
struct cell_buffer_range *br = (struct cell_buffer_range *)
@ -650,7 +624,9 @@ main_loop(void)
exitFlag = 1;
break;
case CELL_CMD_VS_EXECUTE:
#if 0
spu_execute_vertex_shader(&draw, &cmd.vs);
#endif
break;
case CELL_CMD_BATCH:
cmd_batch(opcode);
@ -675,6 +651,11 @@ one_time_init(void)
memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status));
memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status));
invalidate_tex_cache();
/* Install default/fallback fragment processing function.
* This will normally be overriden by a code-gen'd function.
*/
spu.fragment_ops = spu_fallback_fragment_ops;
}

View File

@ -41,6 +41,10 @@
#define MAX_HEIGHT 1024
/**
* A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels.
* The data may be addressed through several different types.
*/
typedef union {
ushort us[TILE_SIZE][TILE_SIZE];
uint ui[TILE_SIZE][TILE_SIZE];
@ -56,38 +60,29 @@ typedef union {
#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */
struct spu_frag_test_results {
qword mask;
qword depth;
qword stencil;
};
/** Function for sampling textures */
typedef vector float (*spu_sample_texture_func)(uint unit,
vector float texcoord);
typedef struct spu_frag_test_results (*frag_test_func)(qword frag_mask,
qword pixel_depth, qword pixel_stencil, qword frag_depth,
qword frag_alpha, qword facing);
/** Function for performing per-fragment ops */
typedef void (*spu_fragment_ops_func)(uint x, uint y,
tile_t *colorTile,
tile_t *depthStencilTile,
vector float fragZ,
vector float fragRed,
vector float fragGreen,
vector float fragBlue,
vector float fragAlpha,
vector unsigned int mask);
/** Function for running fragment program */
typedef void (*spu_fragment_program_func)(vector float *inputs,
vector float *outputs,
vector float *constants);
struct spu_blend_results {
qword r;
qword g;
qword b;
qword a;
};
typedef struct spu_blend_results (*blend_func)(
qword frag_r, qword frag_g, qword frag_b, qword frag_a,
qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a,
qword const_r, qword const_g, qword const_b, qword const_a);
typedef struct spu_blend_results (*logicop_func)(
qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a,
qword frag_r, qword frag_g, qword frag_b, qword frag_a,
qword frag_mask);
typedef vector float (*sample_texture_func)(uint unit, vector float texcoord);
struct spu_framebuffer {
struct spu_framebuffer
{
void *color_start; /**< addr of color surface in main memory */
void *depth_start; /**< addr of depth surface in main memory */
enum pipe_format color_format;
@ -99,6 +94,7 @@ struct spu_framebuffer {
uint depth_clear_value;
uint zsize; /**< 0, 2 or 4 bytes per Z */
float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */
} ALIGN16_ATTRIB;
@ -115,35 +111,31 @@ struct spu_texture
/**
* All SPU global/context state will be in singleton object of this type:
* All SPU global/context state will be in a singleton object of this type:
*/
struct spu_global
{
/** One-time init/constant info */
struct cell_init_info init;
/*
* Current state
*/
struct spu_framebuffer fb;
boolean read_depth;
boolean read_stencil;
frag_test_func frag_test; /**< Current depth/stencil test code */
boolean read_fb; /**< Does current blend mode require framebuffer read? */
blend_func blend; /**< Current blend code */
qword const_blend_color[4] ALIGN16_ATTRIB;
logicop_func logicop; /**< Current logicop code **/
struct pipe_depth_stencil_alpha_state depth_stencil_alpha;
struct pipe_blend_state blend;
struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS];
struct spu_texture texture[PIPE_MAX_SAMPLERS];
struct vertex_info vertex_info;
/* XXX more state to come */
/** current color and Z tiles */
/** Current color and Z tiles */
tile_t ctile ALIGN16_ATTRIB;
tile_t ztile ALIGN16_ATTRIB;
/** Read depth/stencil tiles? */
boolean read_depth;
boolean read_stencil;
/** Current tiles' status */
ubyte cur_ctile_status, cur_ztile_status;
@ -151,11 +143,22 @@ struct spu_global
ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
/** Current fragment ops machine code */
uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS];
/** Current fragment ops function */
spu_fragment_ops_func fragment_ops;
/** for converting RGBA to PIPE_FORMAT_x colors */
vector unsigned char color_shuffle;
/** Current fragment program machine code */
uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
/** Current fragment ops function */
spu_fragment_program_func fragment_program;
sample_texture_func sample_texture[CELL_MAX_SAMPLERS];
/** Current texture sampler function */
spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS];
/** Fragment program constants (XXX preliminary/used) */
#define MAX_CONSTANTS 32
vector float constants[MAX_CONSTANTS];
} ALIGN16_ATTRIB;

View File

@ -1,211 +1,475 @@
/*
* (C) Copyright IBM Corporation 2008
/**************************************************************************
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/**
* \file spu_per_fragment_op.c
* SPU implementation various per-fragment operations.
*
* \author Ian Romanick <idr@us.ibm.com>
* \author Brian Paul
*/
#include <transpose_matrix4x4.h>
#include "pipe/p_format.h"
#include "spu_main.h"
#include "spu_colorpack.h"
#include "spu_per_fragment_op.h"
#define ZERO 0x80
static void
read_ds_quad(tile_t *buffer, unsigned x, unsigned y,
enum pipe_format depth_format, qword *depth,
qword *stencil)
#define LINEAR_QUAD_LAYOUT 1
/**
* Called by rasterizer for each quad after the shader has run. Do
* all the per-fragment operations including alpha test, z test,
* stencil test, blend, colormask and logicops. This is a
* fallback/debug function. In reality we'll use a generated function
* produced by the PPU. But this function is useful for
* debug/validation.
*/
void
spu_fallback_fragment_ops(uint x, uint y,
tile_t *colorTile,
tile_t *depthStencilTile,
vector float fragZ,
vector float fragR,
vector float fragG,
vector float fragB,
vector float fragA,
vector unsigned int mask)
{
const int ix = x / 2;
const int iy = y / 2;
vector float frag_aos[4];
unsigned int c0, c1, c2, c3;
switch (depth_format) {
case PIPE_FORMAT_Z16_UNORM: {
qword *ptr = (qword *) &buffer->us8[iy][ix / 2];
/* do alpha test */
if (spu.depth_stencil_alpha.alpha.enabled) {
vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref);
vector unsigned int amask;
const qword shuf_vec = (qword) {
ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3,
ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7
};
switch (spu.depth_stencil_alpha.alpha.func) {
case PIPE_FUNC_LESS:
amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */
break;
case PIPE_FUNC_GREATER:
amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */
break;
case PIPE_FUNC_GEQUAL:
amask = spu_cmpgt(ref, fragA);
amask = spu_nor(amask, amask);
break;
case PIPE_FUNC_LEQUAL:
amask = spu_cmpgt(fragA, ref);
amask = spu_nor(amask, amask);
break;
case PIPE_FUNC_EQUAL:
amask = spu_cmpeq(ref, fragA);
break;
case PIPE_FUNC_NOTEQUAL:
amask = spu_cmpeq(ref, fragA);
amask = spu_nor(amask, amask);
break;
case PIPE_FUNC_ALWAYS:
amask = spu_splats(0xffffffffU);
break;
case PIPE_FUNC_NEVER:
amask = spu_splats( 0x0U);
break;
default:
;
}
mask = spu_and(mask, amask);
}
/* At even X values we want the first 4 shorts, and at odd X values we
* want the second 4 shorts.
/* Z and/or stencil testing... */
if (spu.depth_stencil_alpha.depth.enabled ||
spu.depth_stencil_alpha.stencil[0].enabled) {
/* get four Z/Stencil values from tile */
vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU);
vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2];
vector unsigned int ifbZ = spu_and(ifbZS, mask24);
vector unsigned int ifbS = spu_andc(ifbZS, mask24);
if (spu.depth_stencil_alpha.stencil[0].enabled) {
/* do stencil test */
ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM);
}
else if (spu.depth_stencil_alpha.depth.enabled) {
/* do depth test */
ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM ||
spu.fb.depth_format == PIPE_FORMAT_X8Z24_UNORM);
vector unsigned int ifragZ;
vector unsigned int zmask;
/* convert four fragZ from float to uint */
fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff));
ifragZ = spu_convtu(fragZ, 0);
/* do depth comparison, setting zmask with results */
switch (spu.depth_stencil_alpha.depth.func) {
case PIPE_FUNC_LESS:
zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */
break;
case PIPE_FUNC_GREATER:
zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */
break;
case PIPE_FUNC_GEQUAL:
zmask = spu_cmpgt(ifbZ, ifragZ);
zmask = spu_nor(zmask, zmask);
break;
case PIPE_FUNC_LEQUAL:
zmask = spu_cmpgt(ifragZ, ifbZ);
zmask = spu_nor(zmask, zmask);
break;
case PIPE_FUNC_EQUAL:
zmask = spu_cmpeq(ifbZ, ifragZ);
break;
case PIPE_FUNC_NOTEQUAL:
zmask = spu_cmpeq(ifbZ, ifragZ);
zmask = spu_nor(zmask, zmask);
break;
case PIPE_FUNC_ALWAYS:
zmask = spu_splats(0xffffffffU);
break;
case PIPE_FUNC_NEVER:
zmask = spu_splats( 0x0U);
break;
default:
;
}
mask = spu_and(mask, zmask);
/* merge framebuffer Z and fragment Z according to the mask */
ifbZ = spu_or(spu_and(ifragZ, mask),
spu_andc(ifbZ, mask));
}
if (spu_extract(spu_orx(mask), 0)) {
/* put new fragment Z/Stencil values back into Z/Stencil tile */
depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS);
spu.cur_ztile_status = TILE_STATUS_DIRTY;
}
}
if (spu.blend.blend_enable) {
/* blending terms, misc regs */
vector float term1r, term1g, term1b, term1a;
vector float term2r, term2g, term2b, term2a;
vector float one, tmp;
vector float fbRGBA[4]; /* current framebuffer colors */
/* get colors from framebuffer/tile */
{
vector float fc[4];
uint c0, c1, c2, c3;
#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */
c0 = colorTile->ui[y][x*2+0];
c1 = colorTile->ui[y][x*2+1];
c2 = colorTile->ui[y][x*2+2];
c3 = colorTile->ui[y][x*2+3];
#else
c0 = colorTile->ui[y+0][x+0];
c1 = colorTile->ui[y+0][x+1];
c2 = colorTile->ui[y+1][x+0];
c3 = colorTile->ui[y+1][x+1];
#endif
switch (spu.fb.color_format) {
case PIPE_FORMAT_B8G8R8A8_UNORM:
fc[0] = spu_unpack_B8G8R8A8(c0);
fc[1] = spu_unpack_B8G8R8A8(c1);
fc[2] = spu_unpack_B8G8R8A8(c2);
fc[3] = spu_unpack_B8G8R8A8(c3);
break;
case PIPE_FORMAT_A8R8G8B8_UNORM:
fc[0] = spu_unpack_A8R8G8B8(c0);
fc[1] = spu_unpack_A8R8G8B8(c1);
fc[2] = spu_unpack_A8R8G8B8(c2);
fc[3] = spu_unpack_A8R8G8B8(c3);
break;
default:
ASSERT(0);
}
_transpose_matrix4x4(fbRGBA, fc);
}
/*
* Compute Src RGB terms
*/
qword bias = (qword) spu_splats((unsigned char) ((ix & 0x01) << 3));
qword bias_mask = si_fsmbi(0x3333);
qword sv = si_a(shuf_vec, si_and(bias_mask, bias));
switch (spu.blend.rgb_src_factor) {
case PIPE_BLENDFACTOR_ONE:
term1r = fragR;
term1g = fragG;
term1b = fragB;
break;
case PIPE_BLENDFACTOR_ZERO:
term1r =
term1g =
term1b = spu_splats(0.0f);
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
term1r = spu_mul(fragR, fragR);
term1g = spu_mul(fragG, fragG);
term1b = spu_mul(fragB, fragB);
break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
term1r = spu_mul(fragR, fragA);
term1g = spu_mul(fragG, fragA);
term1b = spu_mul(fragB, fragA);
break;
/* XXX more cases */
default:
ASSERT(0);
}
*depth = si_shufb(*ptr, *ptr, sv);
*stencil = si_il(0);
break;
/*
* Compute Src Alpha term
*/
switch (spu.blend.alpha_src_factor) {
case PIPE_BLENDFACTOR_ONE:
term1a = fragA;
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
term1a = spu_splats(0.0f);
break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
term1a = spu_mul(fragA, fragA);
break;
/* XXX more cases */
default:
ASSERT(0);
}
/*
* Compute Dest RGB terms
*/
switch (spu.blend.rgb_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
term2r = fragR;
term2g = fragG;
term2b = fragB;
break;
case PIPE_BLENDFACTOR_ZERO:
term2r =
term2g =
term2b = spu_splats(0.0f);
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
term2r = spu_mul(fbRGBA[0], fragR);
term2g = spu_mul(fbRGBA[1], fragG);
term2b = spu_mul(fbRGBA[2], fragB);
break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
term2r = spu_mul(fbRGBA[0], fragA);
term2g = spu_mul(fbRGBA[1], fragA);
term2b = spu_mul(fbRGBA[2], fragA);
break;
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
one = spu_splats(1.0f);
tmp = spu_sub(one, fragA);
term2r = spu_mul(fbRGBA[0], tmp);
term2g = spu_mul(fbRGBA[1], tmp);
term2b = spu_mul(fbRGBA[2], tmp);
break;
/* XXX more cases */
default:
ASSERT(0);
}
/*
* Compute Dest Alpha term
*/
switch (spu.blend.alpha_dst_factor) {
case PIPE_BLENDFACTOR_ONE:
term2a = fragA;
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
term2a = spu_splats(0.0f);
break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
term2a = spu_mul(fbRGBA[3], fragA);
break;
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
one = spu_splats(1.0f);
tmp = spu_sub(one, fragA);
term2a = spu_mul(fbRGBA[3], tmp);
break;
/* XXX more cases */
default:
ASSERT(0);
}
/*
* Combine Src/Dest RGB terms
*/
switch (spu.blend.rgb_func) {
case PIPE_BLEND_ADD:
fragR = spu_add(term1r, term2r);
fragG = spu_add(term1g, term2g);
fragB = spu_add(term1b, term2b);
break;
case PIPE_BLEND_SUBTRACT:
fragR = spu_sub(term1r, term2r);
fragG = spu_sub(term1g, term2g);
fragB = spu_sub(term1b, term2b);
break;
/* XXX more cases */
default:
ASSERT(0);
}
/*
* Combine Src/Dest A term
*/
switch (spu.blend.alpha_func) {
case PIPE_BLEND_ADD:
fragA = spu_add(term1a, term2a);
break;
case PIPE_BLEND_SUBTRACT:
fragA = spu_sub(term1a, term2a);
break;
/* XXX more cases */
default:
ASSERT(0);
}
}
case PIPE_FORMAT_Z32_UNORM: {
qword *ptr = (qword *) &buffer->ui4[iy][ix];
*depth = *ptr;
*stencil = si_il(0);
break;
}
case PIPE_FORMAT_Z24S8_UNORM: {
qword *ptr = (qword *) &buffer->ui4[iy][ix];
qword mask = si_fsmbi(0xEEEE);
*depth = si_rotmai(si_and(*ptr, mask), -8);
*stencil = si_andc(*ptr, mask);
break;
}
case PIPE_FORMAT_S8Z24_UNORM: {
qword *ptr = (qword *) &buffer->ui4[iy][ix];
*depth = si_and(*ptr, si_fsmbi(0x7777));
*stencil = si_andi(si_roti(*ptr, 8), 0x0ff);
break;
}
default:
ASSERT(0);
break;
}
}
static void
write_ds_quad(tile_t *buffer, unsigned x, unsigned y,
enum pipe_format depth_format,
qword depth, qword stencil)
{
const int ix = x / 2;
const int iy = y / 2;
(void) stencil;
switch (depth_format) {
case PIPE_FORMAT_Z16_UNORM: {
qword *ptr = (qword *) &buffer->us8[iy][ix / 2];
qword sv = ((ix & 0x01) == 0)
? (qword) { 2, 3, 6, 7, 10, 11, 14, 15,
24, 25, 26, 27, 28, 29, 30, 31 }
: (qword) { 16, 17, 18, 19, 20 , 21, 22, 23,
2, 3, 6, 7, 10, 11, 14, 15 };
*ptr = si_shufb(depth, *ptr, sv);
break;
}
case PIPE_FORMAT_Z32_UNORM: {
qword *ptr = (qword *) &buffer->ui4[iy][ix];
*ptr = depth;
break;
}
case PIPE_FORMAT_Z24S8_UNORM: {
qword *ptr = (qword *) &buffer->ui4[iy][ix];
qword mask = si_fsmbi(0xEEEE);
depth = si_shli(depth, 8);
*ptr = si_selb(stencil, depth, mask);
break;
}
case PIPE_FORMAT_S8Z24_UNORM: {
qword *ptr = (qword *) &buffer->ui4[iy][ix];
qword mask = si_fsmbi(0x7777);
stencil = si_shli(stencil, 24);
*ptr = si_selb(stencil, depth, mask);
break;
}
default:
ASSERT(0);
break;
}
}
qword
spu_do_depth_stencil(int x, int y,
qword frag_mask, qword frag_depth, qword frag_alpha,
qword facing)
{
struct spu_frag_test_results result;
qword pixel_depth;
qword pixel_stencil;
/* All of this preable code (everthing before the call to frag_test) should
* be generated on the PPU and upload to the SPU.
/*
* Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA.
*/
if (spu.read_depth || spu.read_stencil) {
read_ds_quad(&spu.ztile, x, y, spu.fb.depth_format,
&pixel_depth, &pixel_stencil);
#if 0
/* original code */
{
vector float frag_soa[4];
frag_soa[0] = fragR;
frag_soa[1] = fragG;
frag_soa[2] = fragB;
frag_soa[3] = fragA;
_transpose_matrix4x4(frag_aos, frag_soa);
}
switch (spu.fb.depth_format) {
case PIPE_FORMAT_Z16_UNORM:
frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x0000ffffu)));
frag_depth = si_cfltu(frag_depth, 0);
#else
/* short-cut relying on function parameter layout: */
_transpose_matrix4x4(frag_aos, &fragR);
(void) fragG;
(void) fragB;
#endif
/*
* Pack float colors into 32-bit RGBA words.
*/
switch (spu.fb.color_format) {
case PIPE_FORMAT_A8R8G8B8_UNORM:
c0 = spu_pack_A8R8G8B8(frag_aos[0]);
c1 = spu_pack_A8R8G8B8(frag_aos[1]);
c2 = spu_pack_A8R8G8B8(frag_aos[2]);
c3 = spu_pack_A8R8G8B8(frag_aos[3]);
break;
case PIPE_FORMAT_Z32_UNORM:
frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0xffffffffu)));
frag_depth = si_cfltu(frag_depth, 0);
break;
case PIPE_FORMAT_Z24S8_UNORM:
case PIPE_FORMAT_S8Z24_UNORM:
frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x00ffffffu)));
frag_depth = si_cfltu(frag_depth, 0);
case PIPE_FORMAT_B8G8R8A8_UNORM:
c0 = spu_pack_B8G8R8A8(frag_aos[0]);
c1 = spu_pack_B8G8R8A8(frag_aos[1]);
c2 = spu_pack_B8G8R8A8(frag_aos[2]);
c3 = spu_pack_B8G8R8A8(frag_aos[3]);
break;
default:
fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n");
ASSERT(0);
break;
}
result = (*spu.frag_test)(frag_mask, pixel_depth, pixel_stencil,
frag_depth, frag_alpha, facing);
/* This code (everthing after the call to frag_test) should
* be generated on the PPU and upload to the SPU.
/*
* Color masking
*/
if (spu.read_depth || spu.read_stencil) {
write_ds_quad(&spu.ztile, x, y, spu.fb.depth_format,
result.depth, result.stencil);
if (spu.blend.colormask != 0xf) {
/* XXX to do */
/* apply color mask to 32-bit packed colors */
}
return result.mask;
/*
* Logic Ops
*/
if (spu.blend.logicop_enable) {
/* XXX to do */
/* apply logicop to 32-bit packed colors */
}
/*
* If mask is non-zero, mark tile as dirty.
*/
if (spu_extract(spu_orx(mask), 0)) {
spu.cur_ctile_status = TILE_STATUS_DIRTY;
}
else {
return;
}
/*
* Write new quad colors to the framebuffer/tile.
* Only write pixels where the corresponding mask word is set.
*/
#if LINEAR_QUAD_LAYOUT
/*
* Quad layout:
* +--+--+--+--+
* |p0|p1|p2|p3|
* +--+--+--+--+
*/
if (spu_extract(mask, 0))
colorTile->ui[y][x*2] = c0;
if (spu_extract(mask, 1))
colorTile->ui[y][x*2+1] = c1;
if (spu_extract(mask, 2))
colorTile->ui[y][x*2+2] = c2;
if (spu_extract(mask, 3))
colorTile->ui[y][x*2+3] = c3;
#else
/*
* Quad layout:
* +--+--+
* |p0|p1|
* +--+--+
* |p2|p3|
* +--+--+
*/
if (spu_extract(mask, 0))
colorTile->ui[y+0][x+0] = c0;
if (spu_extract(mask, 1))
colorTile->ui[y+0][x+1] = c1;
if (spu_extract(mask, 2))
colorTile->ui[y+1][x+0] = c2;
if (spu_extract(mask, 3))
colorTile->ui[y+1][x+1] = c3;
#endif
}

View File

@ -1,32 +1,44 @@
/*
* (C) Copyright IBM Corporation 2008
/**************************************************************************
*
* Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#ifndef SPU_PER_FRAGMENT_OP
#define SPU_PER_FRAGMENT_OP
extern qword
spu_do_depth_stencil(int x, int y, qword frag_mask, qword frag_depth,
qword frag_alpha, qword facing);
extern void
spu_fallback_fragment_ops(uint x, uint y,
tile_t *colorTile,
tile_t *depthStencilTile,
vector float fragZ,
vector float fragRed,
vector float fragGreen,
vector float fragBlue,
vector float fragAlpha,
vector unsigned int mask);
#endif /* SPU_PER_FRAGMENT_OP */

View File

@ -97,7 +97,7 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels)
const qword offset_y = si_andi((qword) y, 0x1f);
const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row);
const qword tile_size = (qword) spu_splats(sizeof(tile_t));
const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t));
qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x);
tile_offset = si_mpy((qword) tile_offset, tile_size);

View File

@ -38,7 +38,6 @@
#include "spu_texture.h"
#include "spu_tile.h"
#include "spu_tri.h"
#include "spu_per_fragment_op.h"
/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
@ -209,7 +208,7 @@ clip_emit_quad(struct setup_stage *setup)
/**
* Evaluate attribute coefficients (plane equations) to compute
* attribute values for the four fragments in a quad.
* Eg: four colors will be compute.
* Eg: four colors will be computed (in AoS format).
*/
static INLINE void
eval_coeff(uint slot, float x, float y, vector float result[4])
@ -255,31 +254,6 @@ eval_z(float x, float y)
}
static INLINE mask_t
do_depth_test(int x, int y, mask_t quadmask)
{
float4 zvals;
mask_t mask;
if (spu.fb.depth_format == PIPE_FORMAT_NONE)
return quadmask;
zvals.v = eval_z((float) x, (float) y);
mask = (mask_t) spu_do_depth_stencil(x - setup.cliprect_minx,
y - setup.cliprect_miny,
(qword) quadmask,
(qword) zvals.v,
(qword) spu_splats((unsigned char) 0x0ffu),
(qword) spu_splats((unsigned int) 0x01u));
if (spu_extract(spu_orx(mask), 0))
spu.cur_ztile_status = TILE_STATUS_DIRTY;
return mask;
}
/**
* Emit a quad (pass to next stage). No clipping is done.
* Note: about 1/5 to 1/7 of the time, mask is zero and this function
@ -289,18 +263,6 @@ do_depth_test(int x, int y, mask_t quadmask)
static INLINE void
emit_quad( int x, int y, mask_t mask )
{
#if 0
struct softpipe_context *sp = setup.softpipe;
setup.quad.x0 = x;
setup.quad.y0 = y;
setup.quad.mask = mask;
sp->quad.first->run(sp->quad.first, &setup.quad);
#else
if (spu.read_depth) {
mask = do_depth_test(x, y, mask);
}
/* If any bits in mask are set... */
if (spu_extract(spu_orx(mask), 0)) {
const int ix = x - setup.cliprect_minx;
@ -308,6 +270,7 @@ emit_quad( int x, int y, mask_t mask )
vector float colors[4];
spu.cur_ctile_status = TILE_STATUS_DIRTY;
spu.cur_ztile_status = TILE_STATUS_DIRTY;
if (spu.texture[0].start) {
/* texture mapping */
@ -351,59 +314,68 @@ emit_quad( int x, int y, mask_t mask )
}
else {
/* simple shading */
#if 0
eval_coeff(1, (float) x, (float) y, colors);
}
#else
/* XXX new fragment program code */
/* Convert fragment data from AoS to SoA format.
*/
qword soa_frag[4];
_transpose_matrix4x4((vec_float4 *) soa_frag, colors);
if (spu.fragment_program) {
vector float inputs[4*4], outputs[2*4];
/* Read the current framebuffer values.
*/
const qword pix[4] = {
(qword) spu_splats(spu.ctile.ui[iy+0][ix+0]),
(qword) spu_splats(spu.ctile.ui[iy+0][ix+1]),
(qword) spu_splats(spu.ctile.ui[iy+1][ix+0]),
(qword) spu_splats(spu.ctile.ui[iy+1][ix+1]),
};
/* setup inputs */
eval_coeff(1, (float) x, (float) y, inputs);
qword soa_pix[4];
/* Execute the current fragment program */
spu.fragment_program(inputs, outputs, spu.constants);
if (spu.read_fb) {
/* Convert pixel data from AoS to SoA format.
*/
vec_float4 aos_pix[4] = {
spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+0]),
spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+1]),
spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+0]),
spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+1]),
};
/* Copy outputs */
colors[0] = outputs[0*4+0];
colors[1] = outputs[0*4+1];
colors[2] = outputs[0*4+2];
colors[3] = outputs[0*4+3];
_transpose_matrix4x4((vec_float4 *) soa_pix, aos_pix);
}
if (0 && spu.init.id==0 && y == 48) {
printf("colors[0] = %f %f %f %f\n",
spu_extract(colors[0], 0),
spu_extract(colors[0], 1),
spu_extract(colors[0], 2),
spu_extract(colors[0], 3));
printf("colors[1] = %f %f %f %f\n",
spu_extract(colors[1], 0),
spu_extract(colors[1], 1),
spu_extract(colors[1], 2),
spu_extract(colors[1], 3));
}
struct spu_blend_results result =
(*spu.blend)(soa_frag[0], soa_frag[1], soa_frag[2], soa_frag[3],
soa_pix[0], soa_pix[1], soa_pix[2], soa_pix[3],
spu.const_blend_color[0], spu.const_blend_color[1],
spu.const_blend_color[2], spu.const_blend_color[3]);
/* Convert final pixel data from SoA to AoS format.
*/
result = (*spu.logicop)(pix[0], pix[1], pix[2], pix[3],
result.r, result.g, result.b, result.a,
(qword) mask);
spu.ctile.ui[iy+0][ix+0] = spu_extract((vec_uint4) result.r, 0);
spu.ctile.ui[iy+0][ix+1] = spu_extract((vec_uint4) result.g, 0);
spu.ctile.ui[iy+1][ix+0] = spu_extract((vec_uint4) result.b, 0);
spu.ctile.ui[iy+1][ix+1] = spu_extract((vec_uint4) result.a, 0);
}
}
#endif
}
{
/* Convert fragment data from AoS to SoA format.
* I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA)
* This is temporary!
*/
vector float soa_frag[4];
_transpose_matrix4x4(soa_frag, colors);
float4 fragZ;
fragZ.v = eval_z((float) x, (float) y);
/* Do all per-fragment/quad operations here, including:
* alpha test, z test, stencil test, blend and framebuffer writing.
*/
spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile,
fragZ.v,
soa_frag[0], soa_frag[1],
soa_frag[2], soa_frag[3],
mask);
}
}
}

View File

@ -349,12 +349,17 @@ create_xmesa_buffer(XMesaDrawable d, BufferType type,
if (vis->mesa_visual.depthBits == 0)
depthFormat = PIPE_FORMAT_NONE;
#ifdef GALLIUM_CELL /* XXX temporary for Cell! */
else
depthFormat = PIPE_FORMAT_S8Z24_UNORM;
#else
else if (vis->mesa_visual.depthBits <= 16)
depthFormat = PIPE_FORMAT_Z16_UNORM;
else if (vis->mesa_visual.depthBits <= 24)
depthFormat = PIPE_FORMAT_S8Z24_UNORM;
else
depthFormat = PIPE_FORMAT_Z32_UNORM;
#endif
if (vis->mesa_visual.stencilBits == 8) {
if (depthFormat == PIPE_FORMAT_S8Z24_UNORM)

View File

@ -275,6 +275,37 @@ xm_buffer_destroy(struct pipe_winsys *pws,
}
/**
* For Cell. Basically, rearrange the pixels/quads from this layout:
* +--+--+--+--+
* |p0|p1|p2|p3|....
* +--+--+--+--+
*
* to this layout:
* +--+--+
* |p0|p1|....
* +--+--+
* |p2|p3|
* +--+--+
*/
static void
twiddle_tile(const uint *tileIn, uint *tileOut)
{
int y, x;
for (y = 0; y < TILE_SIZE; y+=2) {
for (x = 0; x < TILE_SIZE; x+=2) {
int k = 4 * (y/2 * TILE_SIZE/2 + x/2);
tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k];
tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1];
tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2];
tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3];
}
}
}
/**
* Display a surface that's in a tiled configuration. That is, all the
* pixels for a TILE_SIZExTILE_SIZE block are contiguous in memory.
@ -306,6 +337,7 @@ xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf)
for (y = 0; y < surf->height; y += TILE_SIZE) {
for (x = 0; x < surf->width; x += TILE_SIZE) {
uint tmpTile[TILE_SIZE * TILE_SIZE];
int tx = x / TILE_SIZE;
int ty = y / TILE_SIZE;
int offset = ty * tilesPerRow + tx;
@ -319,7 +351,9 @@ xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf)
offset *= 4 * TILE_SIZE * TILE_SIZE;
ximage->data = (char *) xm_buf->data + offset;
twiddle_tile((uint *) ((char *) xm_buf->data + offset),
tmpTile);
ximage->data = (char*) tmpTile;
if (XSHM_ENABLED(xm_buf)) {
#if defined(USE_XSHM) && !defined(XFree86Server)

View File

@ -1317,7 +1317,12 @@ processWindowWorkList(GLUTwindow * window)
is where the finish works gets queued for indirect
contexts. */
__glutSetWindow(window);
glFinish();
#if !defined(_WIN32)
if (!window->isDirect)
#endif
{
glFinish();
}
}
if (workMask & GLUT_DEBUG_WORK) {
__glutSetWindow(window);