Consolidation of asm code in 3.5

This commit is contained in:
Gareth Hughes 2001-03-29 06:46:15 +00:00
parent 8e48a232fe
commit 1b2fef5c28
25 changed files with 7028 additions and 338 deletions

View File

@ -1,4 +1,4 @@
# $Id: Make-config,v 1.40 2001/03/29 03:41:39 gareth Exp $
# $Id: Make-config,v 1.41 2001/03/29 06:46:15 gareth Exp $
MESA_MAJOR=3
MESA_MINOR=5
@ -220,9 +220,7 @@ freebsd-386:
"MAKELIB = ../bin/mklib.freebsd" \
"APP_LIB_DEPS = -L/usr/X11R6/lib -lXext -lXmu -lXi -lX11 -lm" \
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
X86/x86_cliptest.S"
gcc:
@ -556,26 +554,22 @@ linux-x86:
"OSMESA_LIB = libOSMesa.so" \
"CC = gcc -malign-loops=2 -malign-jumps=2 -malign-functions=2" \
"CPLUSPLUS = g++" \
"CFLAGS = -Wall -O3 -ansi -pedantic -fPIC -ffast-math -funroll-loops -fomit-frame-pointer -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_XSHM -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_KATMAI_ASM -DPTHREADS -I/usr/X11R6/include" \
"CFLAGS = -Wall -O3 -ansi -pedantic -fPIC -ffast-math -funroll-loops -fomit-frame-pointer -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_XSHM -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM -DPTHREADS -I/usr/X11R6/include" \
"MAKELIB = ../bin/mklib.linux" \
"GL_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXext -lm -lm -lpthread" \
"GLU_LIB_DEPS = -L../lib -lGL -lm" \
"GLUT_LIB_DEPS = -L../lib -lGLU -lGL -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm" \
"APP_LIB_DEPS = -lm" \
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
X86/x86_cliptest.S X86/x86_vertex.S \
X86/mmx_blend.S \
X86/3dnow_xform_raw2.S X86/3dnow_xform_masked2.S \
X86/3dnow_xform_raw3.S X86/3dnow_xform_masked3.S \
X86/3dnow_xform_raw4.S X86/3dnow_xform_masked4.S \
X86/3dnow_norm_raw.S X86/3dnow_vertex.S \
X86/katmai_xform_raw2.S X86/katmai_xform_masked2.S \
X86/katmai_xform_raw3.S X86/katmai_xform_masked3.S \
X86/katmai_xform_raw4.S X86/katmai_xform_masked4.S \
X86/katmai_norm_raw.S X86/katmai_vertex.S"
X86/3dnow_xform1.S X86/3dnow_xform2.S \
X86/3dnow_xform3.S X86/3dnow_xform4.S \
X86/3dnow_normal.S X86/3dnow_vertex.S \
X86/sse_xform1.S X86/sse_xform2.S \
X86/sse_xform3.S X86/sse_xform4.S \
X86/sse_normal.S X86/sse_vertex.S"
linux-x86-static:
$(MAKE) $(MFLAGS) -f Makefile.X11 targets \
@ -586,26 +580,22 @@ linux-x86-static:
"OSMESA_LIB = libOSMesa.a" \
"CC = gcc -malign-loops=2 -malign-jumps=2 -malign-functions=2" \
"CPLUSPLUS = g++" \
"CFLAGS = -Wall -O3 -ansi -pedantic -fPIC -ffast-math -funroll-loops -fomit-frame-pointer -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_XSHM -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_KATMAI_ASM -DPTHREADS -I/usr/X11R6/include" \
"CFLAGS = -Wall -O3 -ansi -pedantic -fPIC -ffast-math -funroll-loops -fomit-frame-pointer -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_XSHM -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM -DPTHREADS -I/usr/X11R6/include" \
"MAKELIB = ../bin/mklib.ar-ruv" \
"GL_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXext -lm -lm -lpthread" \
"GLU_LIB_DEPS = -L../lib -lGL -lm" \
"GLUT_LIB_DEPS = -L../lib -lGLU -lGL -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm" \
"APP_LIB_DEPS = -lm" \
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform_raw4.S \
X86/x86_cliptest.S X86/x86_vertex.S \
X86/mmx_blend.S \
X86/3dnow_xform_raw2.S X86/3dnow_xform_masked2.S \
X86/3dnow_xform_raw3.S X86/3dnow_xform_masked3.S \
X86/3dnow_xform_raw4.S X86/3dnow_xform_masked4.S \
X86/3dnow_norm_raw.S X86/3dnow_vertex.S \
X86/katmai_xform_raw2.S X86/katmai_xform_masked2.S \
X86/katmai_xform_raw3.S X86/katmai_xform_masked3.S \
X86/katmai_xform_raw4.S X86/katmai_xform_masked4.S \
X86/katmai_norm_raw.S X86/katmai_vertex.S"
X86/3dnow_xform1.S X86/3dnow_xform2.S \
X86/3dnow_xform3.S X86/3dnow_xform4.S \
X86/3dnow_normal.S X86/3dnow_vertex.S \
X86/sse_xform1.S X86/sse_xform2.S \
X86/sse_xform3.S X86/sse_xform4.S \
X86/sse_normal.S X86/sse_vertex.S"
# Contributed by Uwe_Maurer@t-online.de
linux-ggi:
@ -637,9 +627,7 @@ linux-386-ggi:
"MAKELIB = ../bin/mklib.ggi" \
"APP_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXext -lXmu -lXt -lXi -lggi -lm" \
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
X86/x86_xform_raw2.S X86/x86_xform_masked3.S \
X86/x86_xform_raw3.S X86/x86_xform_masked2.S \
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
X86/x86_cliptest.S X86/x86_vertex.S"
linux-glide:
@ -676,9 +664,7 @@ linux-386-glide:
"GLUT_LIB_DEPS = -L../lib -lGLU -lGL -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm" \
"APP_LIB_DEPS = -lm" \
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
X86/x86_cliptest.S X86/x86_vertex.S"
linux-386-opt-glide:
@ -698,9 +684,7 @@ linux-386-opt-glide:
"GLUT_LIB_DEPS = -L../lib -lGLU -lGL -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm" \
"APP_LIB_DEPS = -lm" \
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
X86/x86_cliptest.S X86/x86_vertex.S"
linux-386-opt-V2-glide:
@ -720,9 +704,7 @@ linux-386-opt-V2-glide:
"GLUT_LIB_DEPS = -L../lib -lGLU -lGL -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm" \
"APP_LIB_DEPS = -lm" \
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
X86/x86_cliptest.S X86/x86_vertex.S"
linux-x86-glide:
@ -734,27 +716,23 @@ linux-x86-glide:
"OSMESA_LIB = libOSMesa.so" \
"CC = gcc -malign-loops=2 -malign-jumps=2 -malign-functions=2" \
"CPLUSPLUS = g++" \
"CFLAGS = -O3 -ansi -pedantic -fPIC -ffast-math -funroll-loops -fomit-frame-pointer -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_KATMAI_ASM -DUSE_XSHM -DFX -DPTHREADS -I/usr/X11R6/include -I/usr/include/glide -I/usr/local/glide/include -I/usr/src/mesa-glx/src/FX/X86" \
"CFLAGS = -O3 -ansi -pedantic -fPIC -ffast-math -funroll-loops -fomit-frame-pointer -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM -DUSE_XSHM -DFX -DPTHREADS -I/usr/X11R6/include -I/usr/include/glide -I/usr/local/glide/include -I/usr/src/mesa-glx/src/FX/X86" \
"MAKELIB = ../bin/mklib.linux" \
"GL_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXext -lm -L/usr/local/glide/lib -lglide2x -lm -lpthread" \
"GLU_LIB_DEPS = -L../lib -lGL -lm" \
"GLUT_LIB_DEPS = -L../lib -lGLU -lGL -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm" \
"APP_LIB_DEPS = -lm" \
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
X86/x86_cliptest.S X86/x86_vertex.S \
X86/mmx_blend.S \
X86/3dnow_xform_raw2.S X86/3dnow_xform_raw2.S \
X86/3dnow_xform_raw3.S X86/3dnow_xform_raw3.S \
X86/3dnow_xform_raw4.S X86/3dnow_xform_raw4.S \
X86/3dnow_norm_raw.S X86/3dnow_vertex.S \
X86/3dnow_xform1.S X86/3dnow_xform2.S \
X86/3dnow_xform3.S X86/3dnow_xform4.S \
X86/3dnow_normal.S X86/3dnow_vertex.S \
FX/X86/fx_3dnow_fastpath.S \
X86/katmai_xform_raw2.S X86/katmai_xform_masked2.S \
X86/katmai_xform_raw3.S X86/katmai_xform_masked3.S \
X86/katmai_xform_raw4.S X86/katmai_xform_masked4.S \
X86/katmai_norm_raw.S X86/katmai_vertex.S"
X86/sse_xform1.S X86/sse_xform2.S \
X86/sse_xform3.S X86/sse_xform4.S \
X86/sse_normal.S X86/sse_vertex.S"
linux-alpha:
$(MAKE) $(MFLAGS) -f Makefile.X11 targets \
@ -999,9 +977,7 @@ os2-x11:
"MAKELIB = ..\\bin\\mklib-emx.cmd " \
"APP_LIB_DEPS = -Zmt -Zcrtdll -Zexe -L$(X11ROOT)/XFree86/lib -lXt -lX11 -lm" \
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
X86/x86_cliptest.S X86/x86_vertex.S"
osf1:
@ -1503,26 +1479,22 @@ linux-x86-debug:
"OSMESA_LIB = libOSMesa.so" \
"CC = gcc -malign-loops=2 -malign-jumps=2 -malign-functions=2" \
"CPLUSPLUS = g++" \
"CFLAGS = -O2 -g -ansi -pedantic -Wall -Wmissing-prototypes -fPIC -ffast-math -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_XSHM -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_KATMAI_ASM -DPTHREADS -I/usr/X11R6/include -DDEBUG -DMESA_DEBUG" \
"CFLAGS = -O2 -g -ansi -pedantic -Wall -Wmissing-prototypes -fPIC -ffast-math -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_XSHM -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM -DPTHREADS -I/usr/X11R6/include -DDEBUG -DMESA_DEBUG" \
"MAKELIB = ../bin/mklib.linux" \
"GL_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXext -lm -lm -lpthread" \
"GLU_LIB_DEPS = -L../lib -lGL -lm" \
"GLUT_LIB_DEPS = -L../lib -lGLU -lGL -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm" \
"APP_LIB_DEPS = -lm" \
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
X86/x86_cliptest.S X86/x86_vertex.S \
X86/mmx_blend.S \
X86/3dnow_xform_raw2.S X86/3dnow_xform_masked2.S \
X86/3dnow_xform_raw3.S X86/3dnow_xform_masked3.S \
X86/3dnow_xform_raw4.S X86/3dnow_xform_masked4.S \
X86/3dnow_norm_raw.S X86/3dnow_vertex.S \
X86/katmai_xform_raw2.S X86/katmai_xform_masked2.S \
X86/katmai_xform_raw3.S X86/katmai_xform_masked3.S \
X86/katmai_xform_raw4.S X86/katmai_xform_masked4.S \
X86/katmai_norm_raw.S X86/katmai_vertex.S"
X86/3dnow_xform1.S X86/3dnow_xform2.S \
X86/3dnow_xform3.S X86/3dnow_xform4.S \
X86/3dnow_normal.S X86/3dnow_vertex.S \
X86/sse_xform1.S X86/sse_xform2.S \
X86/sse_xform3.S X86/sse_xform4.S \
X86/sse_normal.S X86/sse_vertex.S"
linux-glide-debug:
$(MAKE) $(MFLAGS) -f Makefile.X11 targets \
@ -1548,25 +1520,21 @@ linux-prof:
"OSMESA_LIB = libOSMesa.a" \
"CC = gcc" \
"CPLUSPLUS = g++" \
"CFLAGS = -Wall -O2 -pg -ansi -pedantic -ffast-math -DUSE_XSHM -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_KATMAI_ASM" \
"CFLAGS = -Wall -O2 -pg -ansi -pedantic -ffast-math -DUSE_XSHM -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM" \
"CCFLAGS = $(CFLAGS)" \
"MAKELIB = ../bin/mklib.ar-ruv" \
"APP_LIB_DEPS = -L/usr/X11/lib -lX11 -lXext -lXmu -lXt -lXi -lSM -lICE -lm" \
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
X86/x86_xform_raw2.S X86/x86_xform_masked3.S \
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
X86/x86_cliptest.S X86/x86_vertex.S \
X86/mmx_blend.S \
X86/3dnow_xform_raw2.S X86/3dnow_xform_masked2.S \
X86/3dnow_xform_raw3.S X86/3dnow_xform_masked3.S \
X86/3dnow_xform_raw4.S X86/3dnow_xform_masked4.S \
X86/3dnow_norm_raw.S X86/3dnow_vertex.S \
X86/3dnow_xform1.S X86/3dnow_xform2.S \
X86/3dnow_xform3.S X86/3dnow_xform4.S \
X86/3dnow_normal.S X86/3dnow_vertex.S \
FX/X86/fx_3dnow_fastpath.S \
X86/katmai_xform_raw2.S X86/katmai_xform_masked2.S \
X86/katmai_xform_raw3.S X86/katmai_xform_masked3.S \
X86/katmai_xform_raw4.S X86/katmai_xform_masked4.S \
X86/katmai_norm_raw.S X86/katmai_vertex.S"
X86/sse_xform1.S X86/sse_xform2.S \
X86/sse_xform3.S X86/sse_xform4.S \
X86/sse_normal.S X86/sse_vertex.S"
linux-glide-prof:
$(MAKE) $(MFLAGS) -f Makefile.X11 targets \
@ -1577,22 +1545,18 @@ linux-glide-prof:
"OSMESA_LIB = libOSMesa.a" \
"CC = gcc" \
"CPLUSPLUS = g++" \
"CFLAGS = -O2 -pg -ansi -pedantic -Wall -DUSE_XSHM -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_KATMAI_ASM -I/usr/include/glide -I/usr/local/glide/include" \
"CFLAGS = -O2 -pg -ansi -pedantic -Wall -DUSE_XSHM -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM -I/usr/include/glide -I/usr/local/glide/include" \
"CCFLAGS = $(CFLAGS)" \
"MAKELIB = ../bin/mklib.ar-ruv" \
"APP_LIB_DEPS = -L/usr/local/glide/lib -lglide2x -L/usr/X11/lib -lX11 -lXext -lXmu -lXt -lXi -lSM -lICE -lm" \
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
X86/x86_cliptest.S X86/x86_vertex.S \
X86/mmx_blend.S \
X86/3dnow_xform_raw2.S X86/3dnow_xform_masked2.S \
X86/3dnow_xform_raw3.S X86/3dnow_xform_masked3.S \
X86/3dnow_xform_raw4.S X86/3dnow_xform_masked4.S \
X86/3dnow_norm_raw.S X86/3dnow_vertex.S \
X86/3dnow_xform1.S X86/3dnow_xform2.S \
X86/3dnow_xform3.S X86/3dnow_xform4.S \
X86/3dnow_normal.S X86/3dnow_vertex.S \
FX/X86/fx_3dnow_fastpath.S \
X86/katmai_xform_raw2.S X86/katmai_xform_masked2.S \
X86/katmai_xform_raw3.S X86/katmai_xform_masked3.S \
X86/katmai_xform_raw4.S X86/katmai_xform_masked4.S \
X86/katmai_norm_raw.S X86/katmai_vertex.S"
X86/sse_xform1.S X86/sse_xform2.S \
X86/sse_xform3.S X86/sse_xform4.S \
X86/sse_normal.S X86/sse_vertex.S"

View File

@ -1,4 +1,4 @@
# $Id: Makefile.X11,v 1.50 2001/03/29 03:41:40 gareth Exp $
# $Id: Makefile.X11,v 1.51 2001/03/29 06:46:15 gareth Exp $
# Mesa 3-D graphics library
# Version: 3.5
@ -108,7 +108,7 @@ CORE_SOURCES = \
X86/x86.c \
X86/common_x86.c \
X86/3dnow.c \
X86/katmai.c \
X86/sse.c \
math/m_debug_norm.c \
math/m_debug_vertex.c \
math/m_debug_xform.c \
@ -280,7 +280,7 @@ X86/common_x86.o: X86/common_x86.c
X86/common_x86_asm.o: X86/common_x86_asm.S X86/matypes.h
X86/3dnow.o: X86/3dnow.c
$(CC) -c -I. -I$(INCDIR) $(CFLAGS) $< -o $@
X86/katmai.o: X86/katmai.c
X86/sse.o: X86/sse.c
$(CC) -c -I. -I$(INCDIR) $(CFLAGS) $< -o $@
X86/matypes.h: mtypes.h tnl/t_context.h X86/gen_matypes.c
$(CC) -I. -I$(INCDIR) $(CFLAGS) X86/gen_matypes.c -o X86/gen_matypes

View File

@ -1,4 +1,4 @@
# $Id: Makefile.X11,v 1.50 2001/03/29 03:41:40 gareth Exp $
# $Id: Makefile.X11,v 1.51 2001/03/29 06:46:15 gareth Exp $
# Mesa 3-D graphics library
# Version: 3.5
@ -108,7 +108,7 @@ CORE_SOURCES = \
X86/x86.c \
X86/common_x86.c \
X86/3dnow.c \
X86/katmai.c \
X86/sse.c \
math/m_debug_norm.c \
math/m_debug_vertex.c \
math/m_debug_xform.c \
@ -280,7 +280,7 @@ X86/common_x86.o: X86/common_x86.c
X86/common_x86_asm.o: X86/common_x86_asm.S X86/matypes.h
X86/3dnow.o: X86/3dnow.c
$(CC) -c -I. -I$(INCDIR) $(CFLAGS) $< -o $@
X86/katmai.o: X86/katmai.c
X86/sse.o: X86/sse.c
$(CC) -c -I. -I$(INCDIR) $(CFLAGS) $< -o $@
X86/matypes.h: mtypes.h tnl/t_context.h X86/gen_matypes.c
$(CC) -I. -I$(INCDIR) $(CFLAGS) X86/gen_matypes.c -o X86/gen_matypes

View File

@ -1,4 +1,4 @@
/* $Id: m_debug_norm.c,v 1.5 2001/03/12 00:48:41 gareth Exp $ */
/* $Id: m_debug_norm.c,v 1.6 2001/03/29 06:46:27 gareth Exp $ */
/*
* Mesa 3-D graphics library
@ -186,8 +186,7 @@ static void ref_norm_transform_normalize( const GLmatrix *mat,
* Normal transformation tests
*/
static int test_norm_function( normal_func func, int mtype,
int masked, long *cycles )
static int test_norm_function( normal_func func, int mtype, long *cycles )
{
GLvector3f source[1], dest[1], dest2[1], ref[1], ref2[1];
GLmatrix mat[1];
@ -195,7 +194,6 @@ static int test_norm_function( normal_func func, int mtype,
GLfloat d2[TEST_COUNT][3], r2[TEST_COUNT][3], length[TEST_COUNT];
GLfloat scale;
GLfloat *m;
GLubyte mask[TEST_COUNT];
int i, j;
#ifdef RUN_DEBUG_BENCHMARK
int cycle_i; /* the counter for the benchmarks we run */
@ -231,7 +229,6 @@ static int test_norm_function( normal_func func, int mtype,
}
for ( i = 0 ; i < TEST_COUNT ; i++ ) {
mask[i] = i % 2; /* mask every 2nd element */
ASSIGN_3V( d[i], 0.0, 0.0, 0.0 );
ASSIGN_3V( s[i], 0.0, 0.0, 0.0 );
ASSIGN_3V( d2[i], 0.0, 0.0, 0.0 );
@ -278,31 +275,16 @@ static int test_norm_function( normal_func func, int mtype,
}
if ( mesa_profile ) {
if ( masked ) {
BEGIN_RACE( *cycles );
func( mat, scale, source, NULL, mask, dest );
END_RACE( *cycles );
func( mat, scale, source, length, mask, dest2 );
} else {
BEGIN_RACE( *cycles );
func( mat, scale, source, NULL, NULL, dest );
END_RACE( *cycles );
func( mat, scale, source, length, NULL, dest2 );
}
BEGIN_RACE( *cycles );
func( mat, scale, source, NULL, NULL, dest );
END_RACE( *cycles );
func( mat, scale, source, length, NULL, dest2 );
} else {
if ( masked ) {
func( mat, scale, source, NULL, mask, dest );
func( mat, scale, source, length, mask, dest2 );
} else {
func( mat, scale, source, NULL, NULL, dest );
func( mat, scale, source, length, NULL, dest2 );
}
func( mat, scale, source, NULL, NULL, dest );
func( mat, scale, source, length, NULL, dest2 );
}
for ( i = 0 ; i < TEST_COUNT ; i++ ) {
if ( masked && !(mask[i] & 1) )
continue;
for ( j = 0 ; j < 3 ; j++ ) {
if ( significand_match( d[i][j], r[i][j] ) < REQUIRED_PRECISION ) {
printf( "-----------------------------\n" );
@ -344,7 +326,6 @@ static int test_norm_function( normal_func func, int mtype,
void _math_test_all_normal_transform_functions( char *description )
{
int masked;
int mtype;
long benchmark_tab[0xf][0x4];
static int first_time = 1;
@ -362,46 +343,33 @@ void _math_test_all_normal_transform_functions( char *description )
}
printf( "normal transform results after hooking in %s functions:\n",
description );
printf( "\n-------------------------------------------------------\n" );
}
#endif
for ( masked = 0 ; masked <= 1 ; masked++ ) {
int cma = masked ? 1 : 0;
char *cmastring = masked ? "CULL_MASK_ACTIVE" : "0";
for ( mtype = 0 ; mtype < 8 ; mtype++ ) {
normal_func func = _mesa_normal_tab[norm_types[mtype]][0];
long *cycles = &(benchmark_tab[mtype][0]);
if ( test_norm_function( func, mtype, cycles ) == 0 ) {
char buf[100];
sprintf( buf, "_mesa_normal_tab[0][%s] failed test (%s)",
norm_strings[mtype], description );
_mesa_problem( NULL, buf );
}
#ifdef RUN_DEBUG_BENCHMARK
if ( mesa_profile ) {
printf( "\n culling: %s \n", masked ? "CULL_MASK_ACTIVE" : "0" );
printf( "\n-------------------------------------------------------\n" );
}
#endif
for ( mtype = 0 ; mtype < 8 ; mtype++ ) {
normal_func func = _mesa_normal_tab[norm_types[mtype]][cma];
long *cycles = &(benchmark_tab[mtype][cma]);
if ( test_norm_function( func, mtype, masked, cycles ) == 0 ) {
char buf[100];
sprintf( buf, "_mesa_normal_tab[%s][%s] failed test (%s)",
cmastring, norm_strings[mtype], description );
_mesa_problem( NULL, buf );
}
#ifdef RUN_DEBUG_BENCHMARK
if ( mesa_profile ) {
printf( " %li\t", benchmark_tab[mtype][cma] );
printf( " | [%s]\n", norm_strings[mtype] );
}
}
if ( mesa_profile )
printf( "\n" );
#else
printf( " %li\t", benchmark_tab[mtype][0] );
printf( " | [%s]\n", norm_strings[mtype] );
}
#endif
}
#ifdef RUN_DEBUG_BENCHMARK
if ( mesa_profile )
if ( mesa_profile ) {
printf( "\n" );
fflush( stdout );
}
#endif
}

View File

@ -1,4 +1,4 @@
/* $Id: m_debug_xform.c,v 1.6 2001/03/12 02:02:36 gareth Exp $ */
/* $Id: m_debug_xform.c,v 1.7 2001/03/29 06:46:27 gareth Exp $ */
/*
* Mesa 3-D graphics library
@ -159,8 +159,8 @@ static GLfloat s[TEST_COUNT][4] ALIGN16;
static GLfloat d[TEST_COUNT][4] ALIGN16;
static GLfloat r[TEST_COUNT][4] ALIGN16;
static int test_transform_function( transform_func func, int psize, int mtype,
int masked, long *cycles )
static int test_transform_function( transform_func func, int psize,
int mtype, long *cycles )
{
GLvector4f source[1], dest[1], ref[1];
GLmatrix mat[1];
@ -238,28 +238,15 @@ static int test_transform_function( transform_func func, int psize, int mtype,
ref_transform( ref, mat, source, NULL, 0 );
if ( mesa_profile ) {
if ( masked ) {
BEGIN_RACE( *cycles );
func( dest, mat->m, source, mask, 1 );
END_RACE( *cycles );
} else {
BEGIN_RACE( *cycles );
func( dest, mat->m, source, NULL, 0 );
END_RACE( *cycles );
}
BEGIN_RACE( *cycles );
func( dest, mat->m, source, NULL, 0 );
END_RACE( *cycles );
}
else {
if ( masked ) {
func( dest, mat->m, source, mask, 1 );
} else {
func( dest, mat->m, source, NULL, 0 );
}
func( dest, mat->m, source, NULL, 0 );
}
for ( i = 0 ; i < TEST_COUNT ; i++ ) {
if ( masked && (mask[i] & 1) )
continue;
for ( j = 0 ; j < 4 ; j++ ) {
if ( significand_match( d[i][j], r[i][j] ) < REQUIRED_PRECISION ) {
printf( "-----------------------------\n" );
@ -287,7 +274,7 @@ static int test_transform_function( transform_func func, int psize, int mtype,
void _math_test_all_transform_functions( char *description )
{
int masked, psize, mtype;
int psize, mtype;
long benchmark_tab[2][4][7];
static int first_time = 1;
@ -306,47 +293,41 @@ void _math_test_all_transform_functions( char *description )
}
#endif
for ( masked = 0 ; masked <= 1 ; masked++ ) {
int cma = masked ? 1 : 0;
char *cmastring = masked ? "CULL_MASK_ACTIVE" : "0";
#ifdef RUN_DEBUG_BENCHMARK
if ( mesa_profile ) {
printf( "\n culling: %s \n", masked ? "CULL_MASK_ACTIVE" : "0" );
for ( psize = 1 ; psize <= 4 ; psize++ ) {
printf( " p%d\t", psize );
}
printf( "\n--------------------------------------------------------\n" );
if ( mesa_profile ) {
printf( "\n" );
for ( psize = 1 ; psize <= 4 ; psize++ ) {
printf( " p%d\t", psize );
}
printf( "\n--------------------------------------------------------\n" );
}
#endif
for ( mtype = 0 ; mtype < 7 ; mtype++ ) {
for ( psize = 1 ; psize <= 4 ; psize++ ) {
transform_func func = _mesa_transform_tab[cma][psize][mtypes[mtype]];
long *cycles = &(benchmark_tab[cma][psize-1][mtype]);
for ( mtype = 0 ; mtype < 7 ; mtype++ ) {
for ( psize = 1 ; psize <= 4 ; psize++ ) {
transform_func func = _mesa_transform_tab[0][psize][mtypes[mtype]];
long *cycles = &(benchmark_tab[0][psize-1][mtype]);
if ( test_transform_function( func, psize, mtype,
masked, cycles ) == 0 ) {
char buf[100];
sprintf( buf, "_mesa_transform_tab[%s][%d][%s] failed test (%s)",
cmastring, psize, mstrings[mtype], description );
_mesa_problem( NULL, buf );
}
if ( test_transform_function( func, psize, mtype, cycles ) == 0 ) {
char buf[100];
sprintf( buf, "_mesa_transform_tab[0][%d][%s] failed test (%s)",
psize, mstrings[mtype], description );
_mesa_problem( NULL, buf );
}
#ifdef RUN_DEBUG_BENCHMARK
if ( mesa_profile )
printf( " %li\t", benchmark_tab[cma][psize-1][mtype] );
#endif
}
#ifdef RUN_DEBUG_BENCHMARK
if ( mesa_profile )
printf( " | [%s]\n", mstrings[mtype] );
if ( mesa_profile )
printf( " %li\t", benchmark_tab[0][psize-1][mtype] );
#endif
}
#ifdef RUN_DEBUG_BENCHMARK
if ( mesa_profile )
printf( "\n" );
printf( " | [%s]\n", mstrings[mtype] );
#endif
}
#ifdef RUN_DEBUG_BENCHMARK
if ( mesa_profile )
printf( "\n" );
#endif
}

View File

@ -1,4 +1,4 @@
/* $Id: 3dnow.c,v 1.17 2001/03/28 20:44:43 gareth Exp $ */
/* $Id: 3dnow.c,v 1.18 2001/03/29 06:46:15 gareth Exp $ */
/*
* Mesa 3-D graphics library
@ -51,31 +51,31 @@
const GLubyte flag
#define DECLARE_XFORM_GROUP( pfx, sz, masked ) \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_general_##masked( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_identity_##masked( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_no_rot_##masked( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_perspective_##masked( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_##masked( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_no_rot_##masked( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_##masked( XFORM_ARGS );
#define DECLARE_XFORM_GROUP( pfx, sz ) \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_general( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_identity( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_no_rot( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_perspective( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_no_rot( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d( XFORM_ARGS );
#define ASSIGN_XFORM_GROUP( pfx, cma, sz, masked ) \
_mesa_transform_tab[cma][sz][MATRIX_GENERAL] = \
_mesa_##pfx##_transform_points##sz##_general_##masked; \
_mesa_transform_tab[cma][sz][MATRIX_IDENTITY] = \
_mesa_##pfx##_transform_points##sz##_identity_##masked; \
_mesa_transform_tab[cma][sz][MATRIX_3D_NO_ROT] = \
_mesa_##pfx##_transform_points##sz##_3d_no_rot_##masked; \
_mesa_transform_tab[cma][sz][MATRIX_PERSPECTIVE] = \
_mesa_##pfx##_transform_points##sz##_perspective_##masked; \
_mesa_transform_tab[cma][sz][MATRIX_2D] = \
_mesa_##pfx##_transform_points##sz##_2d_##masked; \
_mesa_transform_tab[cma][sz][MATRIX_2D_NO_ROT] = \
_mesa_##pfx##_transform_points##sz##_2d_no_rot_##masked; \
_mesa_transform_tab[cma][sz][MATRIX_3D] = \
_mesa_##pfx##_transform_points##sz##_3d_##masked;
#define ASSIGN_XFORM_GROUP( pfx, sz ) \
_mesa_transform_tab[0][sz][MATRIX_GENERAL] = \
_mesa_##pfx##_transform_points##sz##_general; \
_mesa_transform_tab[0][sz][MATRIX_IDENTITY] = \
_mesa_##pfx##_transform_points##sz##_identity; \
_mesa_transform_tab[0][sz][MATRIX_3D_NO_ROT] = \
_mesa_##pfx##_transform_points##sz##_3d_no_rot; \
_mesa_transform_tab[0][sz][MATRIX_PERSPECTIVE] = \
_mesa_##pfx##_transform_points##sz##_perspective; \
_mesa_transform_tab[0][sz][MATRIX_2D] = \
_mesa_##pfx##_transform_points##sz##_2d; \
_mesa_transform_tab[0][sz][MATRIX_2D_NO_ROT] = \
_mesa_##pfx##_transform_points##sz##_2d_no_rot; \
_mesa_transform_tab[0][sz][MATRIX_3D] = \
_mesa_##pfx##_transform_points##sz##_3d;
@ -87,47 +87,42 @@ extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_##masked( XFORM_ARGS
GLvector3f *dest
#define DECLARE_NORM_GROUP( pfx, masked ) \
extern void _ASMAPI _mesa_##pfx##_rescale_normals_##masked( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_normalize_normals_##masked( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_normals_##masked( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_normals_no_rot_##masked( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals_##masked( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals_no_rot_##masked( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals_##masked( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals_no_rot_##masked( NORM_ARGS );
#define DECLARE_NORM_GROUP( pfx ) \
extern void _ASMAPI _mesa_##pfx##_rescale_normals( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_normalize_normals( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_normals( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_normals_no_rot( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals_no_rot( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals_no_rot( NORM_ARGS );
#define ASSIGN_NORM_GROUP( pfx, cma, masked ) \
_mesa_normal_tab[NORM_RESCALE][cma] = \
_mesa_##pfx##_rescale_normals_##masked; \
_mesa_normal_tab[NORM_NORMALIZE][cma] = \
_mesa_##pfx##_normalize_normals_##masked; \
_mesa_normal_tab[NORM_TRANSFORM][cma] = \
_mesa_##pfx##_transform_normals_##masked; \
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT][cma] = \
_mesa_##pfx##_transform_normals_no_rot_##masked; \
_mesa_normal_tab[NORM_TRANSFORM | NORM_RESCALE][cma] = \
_mesa_##pfx##_transform_rescale_normals_##masked; \
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE][cma] = \
_mesa_##pfx##_transform_rescale_normals_no_rot_##masked; \
_mesa_normal_tab[NORM_TRANSFORM | NORM_NORMALIZE][cma] = \
_mesa_##pfx##_transform_normalize_normals_##masked; \
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE][cma] = \
_mesa_##pfx##_transform_normalize_normals_no_rot_##masked;
#define ASSIGN_NORM_GROUP( pfx ) \
_mesa_normal_tab[NORM_RESCALE][0] = \
_mesa_##pfx##_rescale_normals; \
_mesa_normal_tab[NORM_NORMALIZE][0] = \
_mesa_##pfx##_normalize_normals; \
_mesa_normal_tab[NORM_TRANSFORM][0] = \
_mesa_##pfx##_transform_normals; \
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT][0] = \
_mesa_##pfx##_transform_normals_no_rot; \
_mesa_normal_tab[NORM_TRANSFORM|NORM_RESCALE][0] = \
_mesa_##pfx##_transform_rescale_normals; \
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT|NORM_RESCALE][0] = \
_mesa_##pfx##_transform_rescale_normals_no_rot; \
_mesa_normal_tab[NORM_TRANSFORM|NORM_NORMALIZE][0] = \
_mesa_##pfx##_transform_normalize_normals; \
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT|NORM_NORMALIZE][0] = \
_mesa_##pfx##_transform_normalize_normals_no_rot;
#ifdef USE_3DNOW_ASM
DECLARE_XFORM_GROUP( 3dnow, 2, raw )
DECLARE_XFORM_GROUP( 3dnow, 3, raw )
DECLARE_XFORM_GROUP( 3dnow, 4, raw )
DECLARE_XFORM_GROUP( 3dnow, 2 )
DECLARE_XFORM_GROUP( 3dnow, 3 )
DECLARE_XFORM_GROUP( 3dnow, 4 )
DECLARE_XFORM_GROUP( 3dnow, 2, masked )
DECLARE_XFORM_GROUP( 3dnow, 3, masked )
DECLARE_XFORM_GROUP( 3dnow, 4, masked )
DECLARE_NORM_GROUP( 3dnow, raw )
/*DECLARE_NORM_GROUP( 3dnow, masked )*/
DECLARE_NORM_GROUP( 3dnow )
extern void _ASMAPI
@ -155,16 +150,11 @@ _mesa_3dnow_project_clipped_vertices( GLfloat *first,
void _mesa_init_3dnow_transform_asm( void )
{
#ifdef USE_3DNOW_ASM
ASSIGN_XFORM_GROUP( 3dnow, 0, 2, raw );
ASSIGN_XFORM_GROUP( 3dnow, 0, 3, raw );
ASSIGN_XFORM_GROUP( 3dnow, 0, 4, raw );
ASSIGN_XFORM_GROUP( 3dnow, 2 );
ASSIGN_XFORM_GROUP( 3dnow, 3 );
ASSIGN_XFORM_GROUP( 3dnow, 4 );
/* ASSIGN_XFORM_GROUP( 3dnow, CULL_MASK_ACTIVE, 2, masked ); */
/* ASSIGN_XFORM_GROUP( 3dnow, CULL_MASK_ACTIVE, 3, masked ); */
/* ASSIGN_XFORM_GROUP( 3dnow, CULL_MASK_ACTIVE, 4, masked ); */
ASSIGN_NORM_GROUP( 3dnow, 0, raw );
/* ASSIGN_NORM_GROUP( 3dnow, CULL_MASK_ACTIVE, masked ); */
ASSIGN_NORM_GROUP( 3dnow );
#ifdef DEBUG
_math_test_all_transform_functions( "3DNow!" );
@ -177,6 +167,7 @@ void _mesa_init_3dnow_vertex_asm( void )
{
#ifdef USE_3DNOW_ASM
_mesa_xform_points3_v16_general = _mesa_v16_3dnow_general_xform;
_mesa_project_v16 = _mesa_3dnow_project_vertices;
_mesa_project_clipped_v16 = _mesa_3dnow_project_clipped_vertices;

858
src/mesa/x86/3dnow_normal.S Normal file
View File

@ -0,0 +1,858 @@
/* $Id: 3dnow_normal.S,v 1.1 2001/03/29 06:46:16 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* 3Dnow assembly code by Holger Waechtler
*/
#include "matypes.h"
#include "norm_args.h"
SEG_TEXT
#define M(i) REGOFF(i * 4, ECX)
#define STRIDE REGOFF(12, ESI)
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
GLNAME(_mesa_3dnow_transform_normalize_normals):
#define FRAME_OFFSET 12
PUSH_L ( EDI )
PUSH_L ( ESI )
PUSH_L ( EBP )
MOV_L ( ARG_LENGTHS, EDI )
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */
MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) )
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
MOV_L ( ARG_MAT, ECX )
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
JE ( LLBL (G3TN_end) )
MOV_L ( REGOFF (V3F_COUNT, ESI), EBP )
FEMMS
PUSH_L ( EBP )
PUSH_L ( EAX )
PUSH_L ( EDX ) /* save counter & pointer for */
/* the normalize pass */
#undef FRAME_OFFSET
#define FRAME_OFFSET 24
MOVQ ( M(0), MM3 ) /* m1 | m0 */
MOVQ ( M(4), MM4 ) /* m5 | m4 */
MOVD ( M(2), MM5 ) /* | m2 */
PUNPCKLDQ ( M(6), MM5 ) /* m6 | m2 */
MOVQ ( M(8), MM6 ) /* m9 | m8 */
MOVQ ( M(10), MM7 ) /* | m10 */
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
JNE ( LLBL (G3TN_scale_end ) )
MOVD ( ARG_SCALE, MM0 ) /* | scale */
PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
PFMUL ( MM0, MM3 ) /* scale * m1 | scale * m0 */
PFMUL ( MM0, MM4 ) /* scale * m5 | scale * m4 */
PFMUL ( MM0, MM5 ) /* scale * m6 | scale * m2 */
PFMUL ( MM0, MM6 ) /* scale * m9 | scale * m8 */
PFMUL ( MM0, MM7 ) /* | scale * m10 */
LLBL (G3TN_scale_end):
MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
ALIGNTEXT32
LLBL (G3TN_transform):
MOVQ ( MM0, MM1 ) /* x1 | x0 */
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
ADD_L ( CONST(12), EAX ) /* next r */
PREFETCHW ( REGIND(EAX) )
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
PFADD ( MM2, MM0 ) /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
MOVQ ( REGIND (EDX), MM1 ) /* x1 | x0 */
MOVQ ( MM0, REGOFF(-12, EAX) ) /* write r0, r1 */
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
PFMUL ( MM7, MM2 ) /* | x2*m10 */
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/
ADD_L ( STRIDE, EDX ) /* next normal */
PREFETCH ( REGIND(EDX) )
MOVD ( MM1, REGOFF(-4, EAX) ) /* write r2 */
MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
DEC_L ( EBP ) /* decrement normal counter */
JA ( LLBL (G3TN_transform) )
POP_L ( EDX ) /* end of transform --- */
POP_L ( EAX ) /* now normalizing ... */
POP_L ( EBP )
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
JE ( LLBL (G3TN_norm ) ) /* calculate lengths */
ALIGNTEXT32
LLBL (G3TN_norm_w_lengths):
PREFETCHW ( REGOFF(12,EAX) )
MOVD ( REGIND (EDI), MM3 ) /* | length (x) */
PFMUL ( MM3, MM1 ) /* | x2 (normalize*/
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/
ADD_L ( STRIDE, EDX ) /* next normal */
ADD_L ( CONST(4), EDI ) /* next length */
PREFETCH ( REGIND(EDI) )
MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
ADD_L ( CONST(12), EAX ) /* next r */
DEC_L ( EBP ) /* decrement normal counter */
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
JA ( LLBL (G3TN_norm_w_lengths) )
JMP ( LLBL (G3TN_exit_3dnow) )
ALIGNTEXT32
LLBL (G3TN_norm):
PREFETCHW ( REGIND(EAX) )
MOVQ ( MM0, MM3 ) /* x1 | x0 */
MOVQ ( MM1, MM4 ) /* | x2 */
PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
ADD_L ( CONST(12), EAX ) /* next r */
PFMUL ( MM1, MM4 ) /* | x2*x2 */
PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1+x2**/
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
MOVQ ( MM5, MM4 )
PUNPCKLDQ ( MM3, MM3 )
DEC_L ( EBP ) /* decrement normal counter */
PFMUL ( MM5, MM5 )
PFRSQIT1 ( MM3, MM5 )
PFRCPIT2 ( MM4, MM5 )
PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalize*/
MOVQ ( MM0, REGOFF(-12, EAX) ) /* write new x0, x1 */
PFMUL ( MM5, MM1 ) /* | x2 (normalize*/
MOVD ( MM1, REGOFF(-4, EAX) ) /* write new x2 */
MOVQ ( REGIND (EAX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
JA ( LLBL (G3TN_norm) )
LLBL (G3TN_exit_3dnow):
FEMMS
LLBL (G3TN_end):
POP_L ( EBP )
POP_L ( ESI )
POP_L ( EDI )
RET
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
#undef FRAME_OFFSET
#define FRAME_OFFSET 12
PUSH_L ( EDI )
PUSH_L ( ESI )
PUSH_L ( EBP )
MOV_L ( ARG_LENGTHS, EDI )
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */
MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) )
MOV_L ( ARG_MAT, ECX )
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
JE ( LLBL (G3TNNR_end) )
FEMMS
MOVD ( M(0), MM0 ) /* | m0 */
PUNPCKLDQ ( M(5), MM0 ) /* m5 | m0 */
MOVD ( M(10), MM2 ) /* | m10 */
PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
JNE ( LLBL (G3TNNR_scale_end ) )
MOVD ( ARG_SCALE, MM7 ) /* | scale */
PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */
PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */
PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */
ALIGNTEXT32
LLBL (G3TNNR_scale_end):
MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
JE ( LLBL (G3TNNR_norm) ) /* need to calculate lengths */
MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
ALIGNTEXT32
LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */
PREFETCHW ( REGIND(EAX) )
PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
ADD_L ( STRIDE, EDX ) /* next normal */
PREFETCH ( REGIND(EDX) )
PFMUL ( MM2, MM7 ) /* | x2*m10 */
ADD_L ( CONST(12), EAX ) /* next r */
PFMUL ( MM3, MM7 ) /* | x2 (normalized) */
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
ADD_L ( CONST(4), EDI ) /* next length */
PFMUL ( MM3, MM6 ) /* x1 (normalized) | x0 (normalized) */
DEC_L ( EBP ) /* decrement normal counter */
MOVQ ( MM6, REGOFF(-12, EAX) ) /* write r0, r1 */
MOVD ( MM7, REGOFF(-4, EAX) ) /* write r2 */
MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
JA ( LLBL (G3TNNR_norm_w_lengths) )
JMP ( LLBL (G3TNNR_exit_3dnow) )
ALIGNTEXT32
LLBL (G3TNNR_norm): /* need to calculate lengths */
PREFETCHW ( REGIND(EAX) )
PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
ADD_L ( CONST(12), EAX ) /* next r */
PFMUL ( MM2, MM7 ) /* | x2*m10 */
MOVQ ( MM6, MM3 ) /* x1 (transformed)| x0 (transformed) */
MOVQ ( MM7, MM4 ) /* | x2 (transformed) */
PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */
PFMUL ( MM7, MM4 ) /* | x2*x2 */
PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */
PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/
ADD_L ( STRIDE, EDX ) /* next normal */
PREFETCH ( REGIND(EDX) )
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
MOVQ ( MM5, MM4 )
PUNPCKLDQ ( MM3, MM3 )
PFMUL ( MM5, MM5 )
PFRSQIT1 ( MM3, MM5 )
DEC_L ( EBP ) /* decrement normal counter */
PFRCPIT2 ( MM4, MM5 )
PFMUL ( MM5, MM6 ) /* x1 (normalized) | x0 (normalized) */
MOVQ ( MM6, REGOFF(-12, EAX) ) /* write r0, r1 */
PFMUL ( MM5, MM7 ) /* | x2 (normalized) */
MOVD ( MM7, REGOFF(-4, EAX) ) /* write r2 */
MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
JA ( LLBL (G3TNNR_norm) )
LLBL (G3TNNR_exit_3dnow):
FEMMS
LLBL (G3TNNR_end):
POP_L ( EBP )
POP_L ( ESI )
POP_L ( EDI )
RET
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
#undef FRAME_OFFSET
#define FRAME_OFFSET 12
PUSH_L ( EDI )
PUSH_L ( ESI )
PUSH_L ( EBP )
MOV_L ( ARG_IN, EAX )
MOV_L ( ARG_DEST, EDX )
MOV_L ( REGOFF(V3F_COUNT, EAX), EBP ) /* dest->count = in->count */
MOV_L ( EBP, REGOFF(V3F_COUNT, EDX) )
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_MAT, ECX )
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
MOV_L ( REGOFF(V3F_START, EDX), EAX ) /* dest->start */
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
CMP_L ( CONST(0), EBP )
JE ( LLBL (G3TRNR_end) )
FEMMS
MOVD ( ARG_SCALE, MM6 ) /* | scale */
PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */
MOVD ( REGIND(ECX), MM0 ) /* | m0 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
PFMUL ( MM6, MM0 ) /* scale*m5 | scale*m0 */
MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
PFMUL ( MM6, MM2 ) /* | scale*m10 */
MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
ALIGNTEXT32
LLBL (G3TRNR_rescale):
PREFETCHW ( REGIND(EAX) )
PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
ADD_L ( STRIDE, EDX ) /* next normal */
PREFETCH ( REGIND(EDX) )
PFMUL ( MM2, MM5 ) /* | x2*m10 */
ADD_L ( CONST(12), EAX ) /* next r */
DEC_L ( EBP ) /* decrement normal counter */
MOVQ ( MM4, REGOFF(-12, EAX) ) /* write r0, r1 */
MOVD ( MM5, REGOFF(-4, EAX) ) /* write r2 */
MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
JA ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */
FEMMS
LLBL (G3TRNR_end):
POP_L ( EBP )
POP_L ( ESI )
POP_L ( EDI )
RET
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
GLNAME(_mesa_3dnow_transform_rescale_normals):
#undef FRAME_OFFSET
#define FRAME_OFFSET 8
PUSH_L ( EDI )
PUSH_L ( ESI )
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( ARG_MAT, ECX )
MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */
MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) )
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
CMP_L ( CONST(0), EDI )
JE ( LLBL (G3TR_end) )
FEMMS
MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */
MOVD ( ARG_SCALE, MM0 ) /* scale */
MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */
PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
PUNPCKLDQ ( REGOFF(24, ECX), MM5 )
PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */
MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/
PFMUL ( MM0, MM4 ) /* scale*m5 | scale*m4 */
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
PFMUL ( MM0, MM5 ) /* scale*m6 | scale*m2 */
PFMUL ( MM0, MM6 ) /* scale*m9 | scale*m8 */
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
PFMUL ( MM0, MM7 ) /* | scale*m10 */
MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
ALIGNTEXT32
LLBL (G3TR_rescale):
PREFETCHW ( REGIND(EAX) )
MOVQ ( MM0, MM1 ) /* x1 | x0 */
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
ADD_L ( CONST(12), EAX ) /* next r */
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
ADD_L ( STRIDE, EDX ) /* next normal */
PREFETCH ( REGIND(EDX) )
MOVQ ( MM0, REGOFF(-12, EAX) ) /* write r0, r1 */
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
PFMUL ( MM7, MM2 ) /* | x2*m10 */
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
MOVD ( MM1, REGOFF(-4, EAX) ) /* write r2 */
MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
DEC_L ( EDI ) /* decrement normal counter */
JA ( LLBL (G3TR_rescale) )
FEMMS
LLBL (G3TR_end):
POP_L ( ESI )
POP_L ( EDI )
RET
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
GLNAME(_mesa_3dnow_transform_normals_no_rot):
#undef FRAME_OFFSET
#define FRAME_OFFSET 8
PUSH_L ( EDI )
PUSH_L ( ESI )
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( ARG_MAT, ECX )
MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */
MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) )
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
CMP_L ( CONST(0), EDI )
JE ( LLBL (G3TNR_end) )
FEMMS
MOVD ( REGIND(ECX), MM0 ) /* | m0 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
ALIGNTEXT32
LLBL (G3TNR_transform):
PREFETCHW ( REGIND(EAX) )
PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
ADD_L ( STRIDE, EDX) /* next normal */
PREFETCH ( REGIND(EDX) )
PFMUL ( MM2, MM5 ) /* | x2*m10 */
ADD_L ( CONST(12), EAX ) /* next r */
DEC_L ( EDI ) /* decrement normal counter */
MOVQ ( MM4, REGOFF(-12, EAX) ) /* write r0, r1 */
MOVD ( MM5, REGOFF(-4, EAX) ) /* write r2 */
MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
JA ( LLBL (G3TNR_transform) )
FEMMS
LLBL (G3TNR_end):
POP_L ( ESI )
POP_L ( EDI )
RET
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_transform_normals)
GLNAME(_mesa_3dnow_transform_normals):
#undef FRAME_OFFSET
#define FRAME_OFFSET 8
PUSH_L ( EDI )
PUSH_L ( ESI )
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( ARG_MAT, ECX )
MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */
MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) )
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
CMP_L ( CONST(0), EDI ) /* count > 0 ?? */
JE ( LLBL (G3T_end) )
FEMMS
MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */
MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */
PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */
MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
ALIGNTEXT32
LLBL (G3T_transform):
PREFETCHW ( REGIND(EAX) )
MOVQ ( MM0, MM1 ) /* x1 | x0 */
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
ADD_L ( CONST(12), EAX ) /* next r */
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
MOVQ ( MM0, REGOFF(-12, EAX) ) /* write r0, r1 */
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
PFMUL ( MM7, MM2 ) /* | x2*m10 */
ADD_L ( STRIDE, EDX ) /* next normal */
PREFETCH ( REGIND(EDX) )
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
MOVD ( MM1, REGOFF(-4, EAX) ) /* write r2 */
MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
DEC_L ( EDI ) /* decrement normal counter */
JA ( LLBL (G3T_transform) )
FEMMS
LLBL (G3T_end):
POP_L ( ESI )
POP_L ( EDI )
RET
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_normalize_normals)
GLNAME(_mesa_3dnow_normalize_normals):
#undef FRAME_OFFSET
#define FRAME_OFFSET 12
PUSH_L ( EDI )
PUSH_L ( ESI )
PUSH_L ( EBP )
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */
MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) )
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
MOV_L ( REGOFF(V3F_START, ESI), ECX ) /* in->start */
MOV_L ( ARG_LENGTHS, EDX )
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
JE ( LLBL (G3N_end) )
FEMMS
MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */
JE ( LLBL (G3N_norm2) ) /* calculate lengths */
ALIGNTEXT32
LLBL (G3N_norm1): /* use precalculated lengths */
PREFETCH ( REGIND(EAX) )
MOVD ( REGIND(EDX), MM3 ) /* | length (x) */
PFMUL ( MM3, MM1 ) /* | x2 (normalized) */
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
ADD_L ( STRIDE, ECX ) /* next normal */
PREFETCH ( REGIND(ECX) )
PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */
MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
ADD_L ( CONST(12), EAX ) /* next r */
ADD_L ( CONST(4), EDX ) /* next length */
DEC_L ( EBP ) /* decrement normal counter */
MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
JA ( LLBL (G3N_norm1) )
JMP ( LLBL (G3N_end1) )
ALIGNTEXT32
LLBL (G3N_norm2): /* need to calculate lengths */
PREFETCHW ( REGIND(EAX) )
MOVQ ( MM0, MM3 ) /* x1 | x0 */
ADD_L ( STRIDE, ECX ) /* next normal */
PREFETCH ( REGIND(ECX) )
PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
MOVQ ( MM1, MM4 ) /* | x2 */
ADD_L ( CONST(12), EAX ) /* next r */
PFMUL ( MM1, MM4 ) /* | x2*x2 */
PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
PFACC ( MM3, MM3 ) /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
MOVQ ( MM5, MM4 )
PUNPCKLDQ ( MM3, MM3 )
PFMUL ( MM5, MM5 )
PFRSQIT1 ( MM3, MM5 )
DEC_L ( EBP ) /* decrement normal counter */
PFRCPIT2 ( MM4, MM5 )
PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalized) */
MOVQ ( MM0, REGOFF(-12, EAX) ) /* write new x0, x1 */
PFMUL ( MM5, MM1 ) /* | x2 (normalized) */
MOVD ( MM1, REGOFF(-4, EAX) ) /* write new x2 */
MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
JA ( LLBL (G3N_norm2) )
LLBL (G3N_end1):
FEMMS
LLBL (G3N_end):
POP_L ( EBP )
POP_L ( ESI )
POP_L ( EDI )
RET
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_rescale_normals)
GLNAME(_mesa_3dnow_rescale_normals):
#undef FRAME_OFFSET
#define FRAME_OFFSET 8
PUSH_L ( EDI )
PUSH_L ( ESI )
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( REGOFF(V3F_COUNT, ESI), EDX ) /* dest->count = in->count */
MOV_L ( EDX, REGOFF(V3F_COUNT, EAX) )
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
MOV_L ( REGOFF(V3F_START, ESI), ECX ) /* in->start */
CMP_L ( CONST(0), EDX )
JE ( LLBL (G3R_end) )
FEMMS
MOVD ( ARG_SCALE, MM0 ) /* scale */
PUNPCKLDQ ( MM0, MM0 )
MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */
MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */
ALIGNTEXT32
LLBL (G3R_rescale):
PREFETCHW ( REGIND(EAX) )
PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */
ADD_L ( STRIDE, ECX ) /* next normal */
PREFETCH ( REGIND(ECX) )
PFMUL ( MM0, MM2 ) /* | x2*scale */
ADD_L ( CONST(12), EAX ) /* next r */
MOVQ ( MM1, REGOFF(-12, EAX) ) /* write r0, r1 */
MOVD ( MM2, REGOFF(-4, EAX) ) /* write r2 */
DEC_L ( EDX ) /* decrement normal counter */
MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */
MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */
JA ( LLBL (G3R_rescale) )
FEMMS
LLBL (G3R_end):
POP_L ( ESI )
POP_L ( EDI )
RET

423
src/mesa/x86/3dnow_xform1.S Normal file
View File

@ -0,0 +1,423 @@
/* $Id: 3dnow_xform1.S,v 1.1 2001/03/29 06:46:16 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define FRAME_OFFSET 4
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_general )
GLNAME( _mesa_3dnow_transform_points1_general ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPGR_3 ) )
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
MOVQ ( REGOFF(8, ECX), MM1 ) /* m03 | m02 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
MOVQ ( REGOFF(56, ECX), MM3 ) /* m33 | m32 */
ALIGNTEXT16
LLBL( G3TPGR_2 ):
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
MOVQ ( MM4, MM5 ) /* x0 | x0 */
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
PFMUL ( MM1, MM5 ) /* x0*m03 | x0*m02 */
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */
PFADD ( MM3, MM5 ) /* x0*m03+m33 | x0*m02+m32 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
MOVQ ( MM5, REGOFF(8, EDX) ) /* write r3, r2 */
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPGR_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPGR_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_identity )
GLNAME( _mesa_3dnow_transform_points1_identity ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(1), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPIR_4) )
ALIGNTEXT16
LLBL( G3TPIR_3 ):
MOVD ( REGIND(EAX), MM0 ) /* | x0 */
ADD_L ( EDI, EAX ) /* next vertex */
MOVD ( MM0, REGIND(EDX) ) /* | r0 */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPIR_3 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPIR_4 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_3d_no_rot )
GLNAME( _mesa_3dnow_transform_points1_3d_no_rot ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3NRR_3 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
ALIGNTEXT16
LLBL( G3TP3NRR_2 ):
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
PFMUL ( MM0, MM4 ) /* | x0*m00 */
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP3NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP3NRR_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_perspective )
GLNAME( _mesa_3dnow_transform_points1_perspective ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPPR_3 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
ALIGNTEXT16
LLBL( G3TPPR_2 ):
MOVD ( REGIND(EAX), MM4 ) /* 0 | x0 */
PFMUL ( MM0, MM4 ) /* 0 | x0*m00 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPPR_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPPR_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_2d )
GLNAME( _mesa_3dnow_transform_points1_2d ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2R_3 ) )
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
ALIGNTEXT16
LLBL( G3TP2R_2 ):
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP2R_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_2d_no_rot )
GLNAME( _mesa_3dnow_transform_points1_2d_no_rot ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2NRR_3 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
ALIGNTEXT16
LLBL( G3TP2NRR_2 ):
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
ADD_L ( EDI, EAX ) /* next vertex */
PFMUL ( MM0, MM4 ) /* | x0*m00 */
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP2NRR_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_3d )
GLNAME( _mesa_3dnow_transform_points1_3d ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3R_3 ) )
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
MOVD ( REGOFF(8, ECX), MM1 ) /* | m02 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
ALIGNTEXT16
LLBL( G3TP3R_2 ):
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
MOVQ ( MM4, MM5 ) /* | x0 */
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
PFMUL ( MM1, MM5 ) /* | x0*m02 */
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */
PFADD ( MM3, MM5 ) /* | x0*m02+m32 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
MOVD ( MM5, REGOFF(8, EDX) ) /* write r2 */
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP3R_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP3R_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET

464
src/mesa/x86/3dnow_xform2.S Normal file
View File

@ -0,0 +1,464 @@
/* $Id: 3dnow_xform2.S,v 1.1 2001/03/29 06:46:16 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define FRAME_OFFSET 4
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_general )
GLNAME( _mesa_3dnow_transform_points2_general ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPGR_3 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */
PUNPCKLDQ ( REGOFF(24, ECX), MM2 ) /* m12 | m02 */
MOVD ( REGOFF(12, ECX), MM3 ) /* | m03 */
PUNPCKLDQ ( REGOFF(28, ECX), MM3 ) /* m13 | m03 */
MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */
MOVQ ( REGOFF(56, ECX), MM5 ) /* m33 | m32 */
ALIGNTEXT16
LLBL( G3TPGR_2 ):
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
MOVQ ( MM6, MM7 ) /* x1 | x0 */
PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */
PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */
PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */
PFADD ( MM4, MM6 ) /* x0*...*m11+m31 | x0*...*m10+m30 */
MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
MOVQ ( MM6, MM7 ) /* x1 | x0 */
PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */
PFMUL ( MM3, MM7 ) /* x1*m13 | x0*m03 */
ADD_L ( EDI, EAX ) /* next vertex */
PFACC ( MM7, MM6 ) /* x0*m03+x1*m13 | x0*x02+x1*m12 */
PFADD ( MM5, MM6 ) /* x0*...*m13+m33 | x0*...*m12+m32 */
MOVQ ( MM6, REGOFF(8, EDX) ) /* write r3, r2 */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPGR_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPGR_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_perspective )
GLNAME( _mesa_3dnow_transform_points2_perspective ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPPR_3 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
ALIGNTEXT16
LLBL( G3TPPR_2 ):
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPPR_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPPR_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_3d )
GLNAME( _mesa_3dnow_transform_points2_3d ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3R_3 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */
PUNPCKLDQ ( REGOFF(24, ECX), MM2 ) /* m12 | m02 */
MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */
MOVD ( REGOFF(56, ECX), MM5 ) /* | m32 */
ALIGNTEXT16
LLBL( G3TP3R_2 ):
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
MOVQ ( MM6, MM7 ) /* x1 | x0 */
PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */
PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */
PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */
PFADD ( MM4, MM6 ) /* x0*...*m11+m31 | x0*...*m10+m30 */
MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
MOVQ ( MM6, MM7 ) /* x1 | x0 */
PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */
PFACC ( MM7, MM6 ) /* ***trash*** | x0*x02+x1*m12 */
PFADD ( MM5, MM6 ) /* ***trash*** | x0*...*m12+m32 */
MOVD ( MM6, REGOFF(8, EDX) ) /* write r2 */
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP3R_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP3R_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_3d_no_rot )
GLNAME( _mesa_3dnow_transform_points2_3d_no_rot ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3NRR_3 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
ALIGNTEXT16
LLBL( G3TP3NRR_2 ):
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
PFADD ( MM2, MM4 ) /* x1*m11+m31 | x0*m00+m30 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP3NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP3NRR_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_2d )
GLNAME( _mesa_3dnow_transform_points2_2d ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2R_3 ) )
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
MOVQ ( REGOFF(16, ECX), MM1 ) /* m11 | m10 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
ALIGNTEXT16
LLBL( G3TP2R_2 ):
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
MOVD ( REGOFF(4, EAX), MM5 ) /* | x1 */
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
ADD_L ( EDI, EAX ) /* next vertex */
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
PUNPCKLDQ ( MM5, MM5 ) /* x1 | x1 */
PFMUL ( MM1, MM5 ) /* x1*m11 | x1*m10 */
PFADD ( MM2, MM4 ) /* x...x1*m11+31 | x0*..*m10+m30 */
PFADD ( MM5, MM4 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP2R_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_2d_no_rot )
GLNAME( _mesa_3dnow_transform_points2_2d_no_rot ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2NRR_3 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
ALIGNTEXT16
LLBL( G3TP2NRR_2 ):
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
ADD_L ( EDI, EAX ) /* next vertex */
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP2NRR_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_identity )
GLNAME( _mesa_3dnow_transform_points2_identity ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPIR_3 ) )
ALIGNTEXT16
LLBL( G3TPIR_3 ):
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
ADD_L ( EDI, EAX ) /* next vertex */
MOVQ ( MM0, REGIND(EDX) ) /* r1 | r0 */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPIR_3 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPIR_4 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET

570
src/mesa/x86/3dnow_xform3.S Normal file
View File

@ -0,0 +1,570 @@
/* $Id: 3dnow_xform3.S,v 1.1 2001/03/29 06:46:16 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define FRAME_OFFSET 4
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_general )
GLNAME( _mesa_3dnow_transform_points3_general ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPGR_2 ) )
PREFETCHW ( REGIND(EDX) )
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM2 ) /* | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
ALIGNTEXT16
LLBL( G3TPGR_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
PREFETCH ( REGIND(EAX) )
MOVQ ( MM0, MM1 ) /* x1 | x0 */
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
MOVQ ( MM2, MM5 ) /* x2 | x2 */
PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */
PFMUL ( REGOFF(32, ECX), MM2 ) /* x2*m9 | x2*m8 */
MOVQ ( MM0, MM3 ) /* x0 | x0 */
PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */
MOVQ ( MM1, MM4 ) /* x1 | x1 */
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
PFADD ( REGOFF(48, ECX), MM2 ) /* x2*m9+m13 | x2*m8+m12 */
PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */
PFADD ( REGOFF(56, ECX), MM5 ) /* x2*m11+m15 | x2*m10+m14 */
PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
PFMUL ( REGOFF(8, ECX), MM3 ) /* x0*m3 | x0*m2 */
PFADD ( MM1, MM2 ) /* r1 | r0 */
PFMUL ( REGOFF(24, ECX), MM4 ) /* x1*m7 | x1*m6 */
ADD_L ( CONST(16), EDX ) /* next output vertex */
PFADD ( MM3, MM4 ) /* x0*m3+x1*m7 | x0*m2+x1*m6 */
MOVQ ( MM2, REGOFF(-16, EDX) ) /* write r0, r1 */
PFADD ( MM4, MM5 ) /* r3 | r2 */
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM2 ) /* | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPGR_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_perspective )
GLNAME( _mesa_3dnow_transform_points3_perspective ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPPR_2 ) )
PREFETCH ( REGIND(EAX) )
PREFETCHW ( REGIND(EDX) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
MOVQ ( REGOFF(32, ECX), MM1 ) /* m21 | m20 */
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
ADD_L ( EDI, EAX ) /* next vertex */
ALIGNTEXT16
LLBL( G3TPPR_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
PREFETCH ( REGIND(EAX) )
PXOR ( MM7, MM7 ) /* 0 | 0 */
MOVQ ( MM5, MM6 ) /* | x2 */
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
PFSUB ( MM5, MM7 ) /* | -x2 */
PFMUL ( MM2, MM6 ) /* | x2*m22 */
PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */
ADD_L ( CONST(16), EDX ) /* next r */
PFMUL ( MM1, MM5 ) /* x2*m21 | x2*m20 */
PFADD ( MM3, MM6 ) /* | x2*m22+m32 */
PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */
MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */
MOVD ( MM6, REGOFF(-8, EDX) ) /* write r2 */
MOVD ( MM7, REGOFF(-4, EDX) ) /* write r3 */
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
ADD_L ( EDI, EAX ) /* next vertex */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPPR_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_3d )
GLNAME( _mesa_3dnow_transform_points3_3d ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3R_2 ) )
PREFETCH ( REGIND(EAX) )
PREFETCH ( REGIND(EDX) )
MOVD ( REGOFF(8, ECX), MM7 ) /* | m2 */
PUNPCKLDQ ( REGOFF(24, ECX), MM7 ) /* m6 | m2 */
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
ALIGNTEXT16
LLBL( G3TP3R_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
PREFETCH ( REGIND(EAX) )
MOVQ ( MM0, MM2 ) /* x1 | x0 */
ADD_L ( CONST(16), EDX ) /* next r */
PUNPCKLDQ ( MM2, MM2 ) /* x0 | x0 */
MOVQ ( MM0, MM3 ) /* x1 | x0 */
PFMUL ( REGIND(ECX), MM2 ) /* x0*m1 | x0*m0 */
PUNPCKHDQ ( MM3, MM3 ) /* x1 | x1 */
MOVQ ( MM1, MM4 ) /* | x2 */
PFMUL ( REGOFF(16, ECX), MM3 ) /* x1*m5 | x1*m4 */
PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */
PFADD ( MM2, MM3 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */
PFADD ( REGOFF(48, ECX), MM3 ) /* x0*m1+...+m11 | x0*m0+x1*m4+m12 */
PFMUL ( MM7, MM0 ) /* x1*m6 | x0*m2 */
PFADD ( MM4, MM3 ) /* r1 | r0 */
PFMUL ( REGOFF(40, ECX), MM1 ) /* | x2*m10 */
PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m14 | x2*m10 */
PFACC ( MM0, MM1 )
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
PFACC ( MM1, MM1 ) /* | r2 */
MOVD ( MM1, REGOFF(-8, EDX) ) /* write r2 */
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP3R_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_3d_no_rot )
GLNAME( _mesa_3dnow_transform_points3_3d_no_rot ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3NRR_2 ) )
PREFETCH ( REGIND(EAX) )
PREFETCHW ( REGIND(EDX) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
PUNPCKLDQ ( MM2, MM2 ) /* m22 | m22 */
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
PUNPCKLDQ ( MM3, MM3 ) /* m32 | m32 */
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
ALIGNTEXT16
LLBL( G3TP3NRR_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCHW ( REGIND(EAX) )
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */
PFMUL ( MM2, MM5 ) /* | x2*m22 */
PFADD ( MM3, MM5 ) /* | x2*m22+m32 */
MOVQ ( MM4, REGIND(EDX) ) /* write r0, r1 */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 */
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP3NRR_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_2d )
GLNAME( _mesa_3dnow_transform_points3_2d ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2R_3) )
PREFETCH ( REGIND(EAX) )
PREFETCHW ( REGIND(EDX) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
ALIGNTEXT16
LLBL( G3TP2R_2 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
PREFETCH ( REGIND(EAX) )
MOVQ ( MM3, MM4 ) /* x1 | x0 */
PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */
ADD_L ( CONST(16), EDX ) /* next r */
PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */
PFACC ( MM4, MM3 ) /* x0*m00+x1*m10 | x0*m01+x1*m11 */
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */
PFADD ( MM2, MM3 ) /* x0*...*m10+m30 | x0*...*m11+m31 */
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP2R_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_2d_no_rot )
GLNAME( _mesa_3dnow_transform_points3_2d_no_rot ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2NRR_2 ) )
PREFETCH ( REGIND(EAX) )
PREFETCHW ( REGIND(EDX) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
ALIGNTEXT16
LLBL( G3TP2NRR_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
PREFETCH ( REGIND(EAX) )
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
ADD_L ( CONST(16), EDX ) /* next r */
PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP2NRR_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_identity )
GLNAME( _mesa_3dnow_transform_points3_identity ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPIR_2 ) )
PREFETCHW ( REGIND(EDX) )
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
ALIGNTEXT16
LLBL( G3TPIR_1 ):
PREFETCHW ( REGOFF(32, EDX) )
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */
MOVD ( MM1, REGOFF(-8, EDX) ) /* | r2 */
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPIR_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET

588
src/mesa/x86/3dnow_xform4.S Normal file
View File

@ -0,0 +1,588 @@
/* $Id: 3dnow_xform4.S,v 1.1 2001/03/29 06:46:16 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define FRAME_OFFSET 4
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_general )
GLNAME( _mesa_3dnow_transform_points4_general ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPGR_2 ) )
PREFETCHW ( REGIND(EDX) )
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM4 ) /* x3 | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
ALIGNTEXT16
LLBL( G3TPGR_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
PREFETCH ( REGIND(EAX) )
MOVQ ( MM0, MM2 ) /* x1 | x0 */
MOVQ ( MM4, MM6 ) /* x3 | x2 */
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
PUNPCKHDQ ( MM2, MM2 ) /* x1 | x1 */
MOVQ ( MM0, MM1 ) /* x0 | x0 */
ADD_L ( CONST(16), EDX ) /* next r */
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
MOVQ ( MM2, MM3 ) /* x1 | x1 */
PFMUL ( REGOFF(8, ECX), MM1 ) /* x0*m3 | x0*m2 */
PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */
PFMUL ( REGOFF(16, ECX), MM2 ) /* x1*m5 | x1*m4 */
MOVQ ( MM4, MM5 ) /* x2 | x2 */
PFMUL ( REGOFF(24, ECX), MM3 ) /* x1*m7 | x1*m6 */
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */
MOVQ ( MM6, MM7 ) /* x3 | x3 */
PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */
PFADD ( MM0, MM2 )
PFMUL ( REGOFF(48, ECX), MM6 ) /* x3*m13 | x3*m12 */
PFADD ( MM1, MM3 )
PFMUL ( REGOFF(56, ECX), MM7 ) /* x3*m15 | x3*m14 */
PFADD ( MM4, MM6 )
PFADD ( MM5, MM7 )
PFADD ( MM2, MM6 )
PFADD ( MM3, MM7 )
MOVQ ( MM6, REGOFF(-16, EDX) )
MOVQ ( MM7, REGOFF(-8, EDX) )
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM4 ) /* x3 | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPGR_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_perspective )
GLNAME( _mesa_3dnow_transform_points4_perspective ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPPR_2 ) )
PREFETCH ( REGIND(EAX) )
PREFETCHW ( REGIND(EDX) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
MOVD ( REGOFF(40, ECX), MM1 ) /* | m22 */
PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m32 | m22 */
MOVQ ( REGOFF(32, ECX), MM2 ) /* m21 | m20 */
PXOR ( MM7, MM7 ) /* 0 | 0 */
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
MOVD ( REGOFF(8, EAX), MM3 ) /* | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
ALIGNTEXT16
LLBL( G3TPPR_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */
MOVQ ( MM5, MM6 ) /* x3 | x2 */
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */
ADD_L ( CONST(16), EDX ) /* next r */
PFMUL ( MM2, MM5 ) /* x2*m21 | x2*m20 */
PFSUBR ( MM7, MM3 ) /* | -x2 */
PFMUL ( MM1, MM6 ) /* x3*m32 | x2*m22 */
PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */
PFACC ( MM3, MM6 ) /* -x2 | x2*m22+x3*m32 */
MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */
MOVQ ( MM6, REGOFF(-8, EDX) ) /* write r2, r3 */
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
MOVD ( REGOFF(8, EAX), MM3 ) /* | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPPR_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_3d )
GLNAME( _mesa_3dnow_transform_points4_3d ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3R_2 ) )
MOVD ( REGOFF(8, ECX), MM6 ) /* | m2 */
PUNPCKLDQ ( REGOFF(24, ECX), MM6 ) /* m6 | m2 */
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
PUNPCKLDQ ( REGOFF(56, ECX), MM7 ) /* m14 | m10 */
MOVQ ( REGIND(EAX), MM2 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM3 ) /* x3 | x2 */
ALIGNTEXT16
LLBL( G3TP3R_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
PREFETCH ( REGOFF(32, EAX) ) /* hopefully array is tightly packed */
MOVQ ( MM2, MM0 ) /* x1 | x0 */
MOVQ ( MM3, MM4 ) /* x3 | x2 */
MOVQ ( MM0, MM1 ) /* x1 | x0 */
MOVQ ( MM4, MM5 ) /* x3 | x2 */
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
PUNPCKLDQ ( MM3, MM3 ) /* x2 | x2 */
PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */
PUNPCKHDQ ( MM4, MM4 ) /* x3 | x3 */
PFMUL ( MM6, MM2 ) /* x1*m6 | x0*m2 */
PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
PFMUL ( REGOFF(32, ECX), MM3 ) /* x2*m9 | x2*m8 */
ADD_L ( CONST(16), EDX ) /* next r */
PFMUL ( REGOFF(48, ECX), MM4 ) /* x3*m13 | x3*m12 */
PFADD ( MM1, MM3 ) /* x0*m1+..+x2*m9 | x0*m0+...+x2*m8 */
PFMUL ( MM7, MM5 ) /* x3*m14 | x2*m10 */
PFADD ( MM3, MM4 ) /* r1 | r0 */
PFACC ( MM2, MM5 ) /* x0*m2+x1*m6 | x2*m10+x3*m14 */
MOVD ( REGOFF(12, EAX), MM0 ) /* | x3 */
ADD_L ( EDI, EAX ) /* next vertex */
PFACC ( MM0, MM5 ) /* r3 | r2 */
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
MOVQ ( REGIND(EAX), MM2 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM3 ) /* x3 | x2 */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP3R_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_3d_no_rot )
GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3NRR_2 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
PUNPCKLDQ ( REGOFF(56, ECX), MM2 ) /* m32 | m22 */
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */
ADD_L ( EDI, EAX ) /* next vertex */
ALIGNTEXT16
LLBL( G3TP3NRR_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */
MOVQ ( MM5, MM6 ) /* x3 | x2 */
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
PFMUL ( MM2, MM5 ) /* x3*m32 | x2*m22 */
PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */
PFACC ( MM7, MM5 ) /* x3 | x2*m22+x3*m32 */
PFADD ( MM6, MM4 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */
ADD_L ( CONST(16), EDX ) /* next r */
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */
ADD_L ( EDI, EAX ) /* next vertex */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP3NRR_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_2d )
GLNAME( _mesa_3dnow_transform_points4_2d ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2R_2 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
ALIGNTEXT16
LLBL( G3TP2R_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
PREFETCH ( REGIND(EAX) )
MOVQ ( MM3, MM4 ) /* x1 | x0 */
MOVQ ( MM5, MM6 ) /* x3 | x2 */
PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */
ADD_L ( CONST(16), EDX ) /* next r */
PFACC ( MM4, MM3 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */
PFMUL ( MM2, MM6 ) /* x3*m31 | x3*m30 */
PFADD ( MM6, MM3 ) /* r1 | r0 */
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2R_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP2R_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_2d_no_rot )
GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2NRR_3 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
ALIGNTEXT16
LLBL( G3TP2NRR_2 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
PREFETCH ( REGIND(EAX) )
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
MOVQ ( MM5, MM6 ) /* x3 | x2 */
ADD_L ( CONST(16), EDX ) /* next r */
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */
PFADD ( MM4, MM6 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */
MOVQ ( MM6, REGOFF(-16, EDX) ) /* write r0, r1 */
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP2NRR_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_identity )
GLNAME( _mesa_3dnow_transform_points4_identity ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPIR_2 ) )
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM1 ) /* x3 | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
ALIGNTEXT16
LLBL( G3TPIR_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
PREFETCH ( REGIND(EAX) )
ADD_L ( CONST(16), EDX ) /* next r */
MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */
MOVQ ( MM1, REGOFF(-8, EDX) ) /* r3 | r2 */
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM1 ) /* x3 | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPIR_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET

View File

@ -1,4 +1,4 @@
/* $Id: common_x86.c,v 1.14 2001/03/28 20:44:43 gareth Exp $ */
/* $Id: common_x86.c,v 1.15 2001/03/29 06:46:16 gareth Exp $ */
/*
* Mesa 3-D graphics library
@ -35,7 +35,7 @@
#include <stdlib.h>
#include <stdio.h>
#if defined(USE_KATMAI_ASM) && defined(__linux__)
#if defined(USE_SSE_ASM) && defined(__linux__)
#include <signal.h>
#endif
@ -67,7 +67,7 @@ static void message( const char *msg )
}
}
#if defined(USE_KATMAI_ASM)
#if defined(USE_SSE_ASM)
/*
* We must verify that the Streaming SIMD Extensions are truly supported
* on this processor before we go ahead and hook out the optimized code.
@ -84,8 +84,8 @@ static void message( const char *msg )
* not good.
*/
extern void _mesa_test_os_katmai_support( void );
extern void _mesa_test_os_katmai_exception_support( void );
extern void _mesa_test_os_sse_support( void );
extern void _mesa_test_os_sse_exception_support( void );
#if defined(__linux__) && defined(_POSIX_SOURCE)
static void sigill_handler( int signal, struct sigcontext sc )
@ -135,7 +135,7 @@ static void sigfpe_handler( int signal, struct sigcontext sc )
*
* GH: Isn't this just awful?
*/
static void check_os_katmai_support( void )
static void check_os_sse_support( void )
{
#if defined(__linux__)
#if defined(_POSIX_SOURCE)
@ -159,7 +159,7 @@ static void check_os_katmai_support( void )
if ( cpu_has_xmm ) {
message( "Testing OS support for SSE... " );
_mesa_test_os_katmai_support();
_mesa_test_os_sse_support();
if ( cpu_has_xmm ) {
message( "yes.\n" );
@ -184,7 +184,7 @@ static void check_os_katmai_support( void )
if ( cpu_has_xmm ) {
message( "Testing OS support for SSE unmasked exceptions... " );
_mesa_test_os_katmai_exception_support();
_mesa_test_os_sse_exception_support();
if ( cpu_has_xmm ) {
message( "yes.\n" );
@ -220,7 +220,7 @@ static void check_os_katmai_support( void )
#endif /* __linux__ */
}
#endif /* USE_KATMAI_ASM */
#endif /* USE_SSE_ASM */
void _mesa_init_all_x86_transform_asm( void )
@ -257,14 +257,14 @@ void _mesa_init_all_x86_transform_asm( void )
}
#endif
#ifdef USE_KATMAI_ASM
if ( cpu_has_xmm && getenv( "MESA_FORCE_KATMAI" ) == 0 ) {
check_os_katmai_support();
#ifdef USE_SSE_ASM
if ( cpu_has_xmm && getenv( "MESA_FORCE_SSE" ) == 0 ) {
check_os_sse_support();
}
if ( cpu_has_xmm ) {
if ( getenv( "MESA_NO_KATMAI" ) == 0 ) {
message( "Katmai cpu detected.\n" );
_mesa_init_katmai_transform_asm();
if ( getenv( "MESA_NO_SSE" ) == 0 ) {
message( "SSE cpu detected.\n" );
_mesa_init_sse_transform_asm();
} else {
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
}
@ -289,9 +289,9 @@ void _mesa_init_all_x86_vertex_asm( void )
}
#endif
#ifdef USE_KATMAI_ASM
if ( cpu_has_xmm && getenv( "MESA_NO_KATMAI" ) == 0 ) {
_mesa_init_katmai_vertex_asm();
#ifdef USE_SSE_ASM
if ( cpu_has_xmm && getenv( "MESA_NO_SSE" ) == 0 ) {
_mesa_init_sse_vertex_asm();
}
#endif
#endif

View File

@ -1,4 +1,4 @@
/* $Id: common_x86_asm.S,v 1.6 2001/03/28 20:44:43 gareth Exp $ */
/* $Id: common_x86_asm.S,v 1.7 2001/03/29 06:46:16 gareth Exp $ */
/*
* Mesa 3-D graphics library
@ -58,8 +58,8 @@
GLNAME( found_intel ): STRING( "Genuine Intel processor found\n\0" )
GLNAME( found_amd ): STRING( "Authentic AMD processor found\n\0" )
#ifdef USE_KATMAI_ASM
GLNAME( katmai_test_dummy ):
#ifdef USE_SSE_ASM
GLNAME( sse_test_dummy ):
D_LONG 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
#endif
@ -157,15 +157,15 @@ LLBL ( cpuid_done ):
RET
#ifdef USE_KATMAI_ASM
#ifdef USE_SSE_ASM
/* Execute an SSE instruction to see if the operating system correctly
* supports SSE. A signal handler for SIGILL should have been set
* before calling this function, otherwise this could kill the client
* application.
*/
ALIGNTEXT4
GLOBL GLNAME( _mesa_test_os_katmai_support )
GLNAME( _mesa_test_os_katmai_support ):
GLOBL GLNAME( _mesa_test_os_sse_support )
GLNAME( _mesa_test_os_sse_support ):
XORPS ( XMM0, XMM0 )
@ -178,8 +178,8 @@ GLNAME( _mesa_test_os_katmai_support ):
* otherwise this could kill the client application.
*/
ALIGNTEXT4
GLOBL GLNAME( _mesa_test_os_katmai_exception_support )
GLNAME( _mesa_test_os_katmai_exception_support ):
GLOBL GLNAME( _mesa_test_os_sse_exception_support )
GLNAME( _mesa_test_os_sse_exception_support ):
PUSH_L ( EBP )
MOV_L ( ESP, EBP )
@ -196,7 +196,7 @@ GLNAME( _mesa_test_os_katmai_exception_support ):
LDMXCSR ( REGOFF( -8, EBP ) )
XORPS ( XMM0, XMM0 )
MOVUPS ( GLNAME( katmai_test_dummy ), XMM1 )
MOVUPS ( GLNAME( sse_test_dummy ), XMM1 )
DIVPS ( XMM0, XMM1 )

View File

@ -1,4 +1,4 @@
/* $Id: common_x86_asm.h,v 1.7 2001/03/28 20:44:44 gareth Exp $ */
/* $Id: common_x86_asm.h,v 1.8 2001/03/29 06:46:16 gareth Exp $ */
/*
* Mesa 3-D graphics library
@ -52,8 +52,8 @@
#ifdef USE_3DNOW_ASM
#include "3dnow.h"
#endif
#ifdef USE_KATMAI_ASM
#include "katmai.h"
#ifdef USE_SSE_ASM
#include "sse.h"
#endif
#endif

208
src/mesa/x86/sse.c Normal file
View File

@ -0,0 +1,208 @@
/* $Id: sse.c,v 1.1 2001/03/29 06:46:16 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* PentiumIII-SIMD (SSE) optimizations contributed by
* Andre Werthmann <wertmann@cs.uni-potsdam.de>
*/
#include "glheader.h"
#include "context.h"
#include "mtypes.h"
#include "sse.h"
#include "math/m_vertices.h"
#include "math/m_xform.h"
#include "tnl/t_context.h"
#ifdef DEBUG
#include "math/m_debug.h"
#endif
#define XFORM_ARGS GLvector4f *to_vec, \
const GLfloat m[16], \
const GLvector4f *from_vec, \
const GLubyte *mask, \
const GLubyte flag
#define DECLARE_XFORM_GROUP( pfx, sz ) \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_general( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_identity( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_no_rot( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_perspective( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_no_rot( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d( XFORM_ARGS );
#define ASSIGN_XFORM_GROUP( pfx, sz ) \
_mesa_transform_tab[0][sz][MATRIX_GENERAL] = \
_mesa_##pfx##_transform_points##sz##_general; \
_mesa_transform_tab[0][sz][MATRIX_IDENTITY] = \
_mesa_##pfx##_transform_points##sz##_identity; \
_mesa_transform_tab[0][sz][MATRIX_3D_NO_ROT] = \
_mesa_##pfx##_transform_points##sz##_3d_no_rot; \
_mesa_transform_tab[0][sz][MATRIX_PERSPECTIVE] = \
_mesa_##pfx##_transform_points##sz##_perspective; \
_mesa_transform_tab[0][sz][MATRIX_2D] = \
_mesa_##pfx##_transform_points##sz##_2d; \
_mesa_transform_tab[0][sz][MATRIX_2D_NO_ROT] = \
_mesa_##pfx##_transform_points##sz##_2d_no_rot; \
_mesa_transform_tab[0][sz][MATRIX_3D] = \
_mesa_##pfx##_transform_points##sz##_3d;
#define NORM_ARGS const GLmatrix *mat, \
GLfloat scale, \
const GLvector3f *in, \
const GLfloat *lengths, \
const GLubyte mask[], \
GLvector3f *dest
#define DECLARE_NORM_GROUP( pfx ) \
extern void _ASMAPI _mesa_##pfx##_rescale_normals( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_normalize_normals( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_normals( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_normals_no_rot( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals_no_rot( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals_no_rot( NORM_ARGS );
#define ASSIGN_NORM_GROUP( pfx ) \
_mesa_normal_tab[NORM_RESCALE][0] = \
_mesa_##pfx##_rescale_normals; \
_mesa_normal_tab[NORM_NORMALIZE][0] = \
_mesa_##pfx##_normalize_normals; \
_mesa_normal_tab[NORM_TRANSFORM][0] = \
_mesa_##pfx##_transform_normals; \
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT][0] = \
_mesa_##pfx##_transform_normals_no_rot; \
_mesa_normal_tab[NORM_TRANSFORM|NORM_RESCALE][0] = \
_mesa_##pfx##_transform_rescale_normals; \
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT|NORM_RESCALE][0] = \
_mesa_##pfx##_transform_rescale_normals_no_rot; \
_mesa_normal_tab[NORM_TRANSFORM|NORM_NORMALIZE][0] = \
_mesa_##pfx##_transform_normalize_normals; \
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT|NORM_NORMALIZE][0] = \
_mesa_##pfx##_transform_normalize_normals_no_rot;
#ifdef USE_SSE_ASM
DECLARE_XFORM_GROUP( sse, 2 )
DECLARE_XFORM_GROUP( sse, 3 )
#if 1
/* Some functions are not written in SSE-assembly, because the fpu ones are faster */
extern void _mesa_sse_transform_normals_no_rot( NORM_ARGS );
extern void _mesa_sse_transform_rescale_normals( NORM_ARGS );
extern void _mesa_sse_transform_rescale_normals_no_rot( NORM_ARGS );
extern void _mesa_sse_transform_points4_general( XFORM_ARGS );
extern void _mesa_sse_transform_points4_3d( XFORM_ARGS );
extern void _mesa_sse_transform_points4_identity( XFORM_ARGS );
#else
DECLARE_NORM_GROUP( sse )
#endif
extern void _ASMAPI
_mesa_v16_sse_general_xform( GLfloat *first_vert,
const GLfloat *m,
const GLfloat *src,
GLuint src_stride,
GLuint count );
extern void _ASMAPI
_mesa_sse_project_vertices( GLfloat *first,
GLfloat *last,
const GLfloat *m,
GLuint stride );
extern void _ASMAPI
_mesa_sse_project_clipped_vertices( GLfloat *first,
GLfloat *last,
const GLfloat *m,
GLuint stride,
const GLubyte *clipmask );
#endif
void _mesa_init_sse_transform_asm( void )
{
#ifdef USE_SSE_ASM
ASSIGN_XFORM_GROUP( sse, 2 );
ASSIGN_XFORM_GROUP( sse, 3 );
#if 1
/* TODO: Finish these off.
*/
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT][0] =
_mesa_sse_transform_normals_no_rot;
_mesa_normal_tab[NORM_TRANSFORM|NORM_RESCALE][0] =
_mesa_sse_transform_rescale_normals;
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT|NORM_RESCALE][0] =
_mesa_sse_transform_rescale_normals_no_rot;
_mesa_transform_tab[0][4][MATRIX_GENERAL] =
_mesa_sse_transform_points4_general;
_mesa_transform_tab[0][4][MATRIX_3D] =
_mesa_sse_transform_points4_3d;
_mesa_transform_tab[0][4][MATRIX_IDENTITY] =
_mesa_sse_transform_points4_identity;
#else
ASSIGN_NORM_GROUP( sse );
#endif
#ifdef DEBUG
_math_test_all_transform_functions( "SSE" );
_math_test_all_normal_transform_functions( "SSE" );
#endif
#endif
}
void _mesa_init_sse_vertex_asm( void )
{
#ifdef USE_SSE_ASM
_mesa_xform_points3_v16_general = _mesa_v16_sse_general_xform;
#if 0
/* GH: These are broken. I'm fixing them now.
*/
_mesa_project_v16 = _mesa_sse_project_vertices;
_mesa_project_clipped_v16 = _mesa_sse_project_clipped_vertices;
#endif
#ifdef DEBUG_NOT
_math_test_all_vertex_functions( "SSE" );
#endif
#endif
}

40
src/mesa/x86/sse.h Normal file
View File

@ -0,0 +1,40 @@
/* $Id: sse.h,v 1.1 2001/03/29 06:46:16 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* PentiumIII-SIMD (SSE) optimizations contributed by
* Andre Werthmann <wertmann@cs.uni-potsdam.de>
*/
#ifndef __SSE_H__
#define __SSE_H__
#include "math/m_xform.h"
void _mesa_init_sse_transform_asm( void );
void _mesa_init_sse_vertex_asm( void );
#endif

252
src/mesa/x86/sse_normal.S Normal file
View File

@ -0,0 +1,252 @@
/* $Id: sse_normal.S,v 1.1 2001/03/29 06:46:16 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/** TODO:
* - insert PREFETCH instructions to avoid cache-misses !
* - some more optimizations are possible...
* - for 40-50% more performance in the SSE-functions, the
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
*/
#include "matypes.h"
#include "norm_args.h"
SEG_TEXT
#define M(i) REGOFF(i * 4, EDX)
#define S(i) REGOFF(i * 4, ESI)
#define D(i) REGOFF(i * 4, EDI)
#define STRIDE REGOFF(12, ESI)
ALIGNTEXT16
GLOBL GLNAME(_mesa_sse_transform_rescale_normals_no_rot)
GLNAME(_mesa_sse_transform_rescale_normals_no_rot):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */
MOV_L ( REGOFF(V3F_COUNT, ESI), ECX ) /* source count */
TEST_L ( ECX, ECX )
JZ( LLBL(K_G3TRNNRR_finish) ) /* count was zero; go to finish */
MOV_L ( STRIDE, EAX ) /* stride */
MOV_L ( ECX, REGOFF(V3F_COUNT, EDI) ) /* set dest-count */
IMUL_L( CONST(12), ECX ) /* count *= 12 */
MOV_L( REGOFF(V3F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V3F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* m0 */
MOVSS ( M(5), XMM2 ) /* m5 */
UNPCKLPS( XMM2, XMM1 ) /* m5 | m0 */
MOVSS ( ARG_SCALE, XMM0 ) /* scale */
SHUFPS ( CONST(0x0), XMM0, XMM0 ) /* scale | scale */
MULPS ( XMM0, XMM1 ) /* m5*scale | m0*scale */
MULSS ( M(10), XMM0 ) /* m10*scale */
ALIGNTEXT32
LLBL(K_G3TRNNRR_top):
MOVLPS ( S(0), XMM2 ) /* uy | ux */
MULPS ( XMM1, XMM2 ) /* uy*m5*scale | ux*m0*scale */
MOVLPS ( XMM2, D(0) ) /* ->D(1) | D(0) */
MOVSS ( S(2), XMM2 ) /* uz */
MULSS ( XMM0, XMM2 ) /* uz*m10*scale */
MOVSS ( XMM2, D(2) ) /* ->D(2) */
LLBL(K_G3TRNNRR_skip):
ADD_L ( CONST(12), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_G3TRNNRR_top) )
LLBL(K_G3TRNNRR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME(_mesa_sse_transform_rescale_normals)
GLNAME(_mesa_sse_transform_rescale_normals):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */
MOV_L ( REGOFF(V3F_COUNT, ESI), ECX ) /* source count */
TEST_L ( ECX, ECX )
JZ( LLBL(K_G3TRNR_finish) ) /* count was zero; go to finish */
MOV_L ( STRIDE, EAX ) /* stride */
MOV_L ( ECX, REGOFF(V3F_COUNT, EDI) ) /* set dest-count */
IMUL_L( CONST(12), ECX ) /* count *= 12 */
MOV_L( REGOFF(V3F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V3F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS ( M(0), XMM0 ) /* m0 */
MOVSS ( M(4), XMM1 ) /* m4 */
UNPCKLPS( XMM1, XMM0 ) /* m4 | m0 */
MOVSS ( ARG_SCALE, XMM4 ) /* scale */
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* scale | scale */
MULPS ( XMM4, XMM0 ) /* m4*scale | m0*scale */
MOVSS ( M(1), XMM1 ) /* m1 */
MOVSS ( M(5), XMM2 ) /* m5 */
UNPCKLPS( XMM2, XMM1 ) /* m5 | m1 */
MULPS ( XMM4, XMM1 ) /* m5*scale | m1*scale */
MOVSS ( M(2), XMM2 ) /* m2 */
MOVSS ( M(6), XMM3 ) /* m6 */
UNPCKLPS( XMM3, XMM2 ) /* m6 | m2 */
MULPS ( XMM4, XMM2 ) /* m6*scale | m2*scale */
MOVSS ( M(8), XMM6 ) /* m8 */
MULSS ( ARG_SCALE, XMM6 ) /* m8*scale */
MOVSS ( M(9), XMM7 ) /* m9 */
MULSS ( ARG_SCALE, XMM7 ) /* m9*scale */
ALIGNTEXT32
LLBL(K_G3TRNR_top):
MOVSS ( S(0), XMM3 ) /* ux */
SHUFPS ( CONST(0x0), XMM3, XMM3 ) /* ux | ux */
MULPS ( XMM0, XMM3 ) /* ux*m4 | ux*m0 */
MOVSS ( S(1), XMM4 ) /* uy */
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* uy | uy */
MULPS ( XMM1, XMM4 ) /* uy*m5 | uy*m1 */
MOVSS ( S(2), XMM5 ) /* uz */
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* uz | uz */
MULPS ( XMM2, XMM5 ) /* uz*m6 | uz*m2 */
ADDPS ( XMM4, XMM3 )
ADDPS ( XMM5, XMM3 )
MOVLPS ( XMM3, D(0) )
MOVSS ( M(10), XMM3 ) /* m10 */
MULSS ( ARG_SCALE, XMM3 ) /* m10*scale */
MULSS ( S(2), XMM3 ) /* m10*scale*uz */
MOVSS ( S(1), XMM4 ) /* uy */
MULSS ( XMM7, XMM4 ) /* uy*m9*scale */
MOVSS ( S(0), XMM5 ) /* ux */
MULSS ( XMM6, XMM5 ) /* ux*m8*scale */
ADDSS ( XMM4, XMM3 )
ADDSS ( XMM5, XMM3 )
MOVSS ( XMM3, D(2) )
LLBL(K_G3TRNR_skip):
ADD_L ( CONST(12), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_G3TRNR_top) )
LLBL(K_G3TRNR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME(_mesa_sse_transform_normals_no_rot)
GLNAME(_mesa_sse_transform_normals_no_rot):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */
MOV_L ( REGOFF(V3F_COUNT, ESI), ECX ) /* source count */
TEST_L ( ECX, ECX )
JZ( LLBL(K_G3TNNRR_finish) ) /* count was zero; go to finish */
MOV_L ( STRIDE, EAX ) /* stride */
MOV_L ( ECX, REGOFF(V3F_COUNT, EDI) ) /* set dest-count */
IMUL_L( CONST(12), ECX ) /* count *= 12 */
MOV_L( REGOFF(V3F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V3F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS( M(0), XMM0 ) /* m0 */
MOVSS( M(5), XMM1 ) /* m5 */
UNPCKLPS( XMM1, XMM0 ) /* m5 | m0 */
MOVSS( M(10), XMM1 ) /* m10 */
ALIGNTEXT32
LLBL(K_G3TNNRR_top):
MOVLPS( S(0), XMM2 ) /* uy | ux */
MULPS( XMM0, XMM2 ) /* uy*m5 | ux*m0 */
MOVLPS( XMM2, D(0) )
MOVSS( S(2), XMM2 ) /* uz */
MULSS( XMM1, XMM2 ) /* uz*m10 */
MOVSS( XMM2, D(2) )
LLBL(K_G3TNNRR_skip):
ADD_L ( CONST(12), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_G3TNNRR_top) )
LLBL(K_G3TNNRR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET

433
src/mesa/x86/sse_xform1.S Normal file
View File

@ -0,0 +1,433 @@
/* $Id: sse_xform1.S,v 1.1 2001/03/29 06:46:27 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/** TODO:
* - insert PREFETCH instructions to avoid cache-misses !
* - some more optimizations are possible...
* - for 40-50% more performance in the SSE-functions, the
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
*/
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define S(i) REGOFF(i * 4, ESI)
#define D(i) REGOFF(i * 4, EDI)
#define M(i) REGOFF(i * 4, EDX)
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_general)
GLNAME( _mesa_sse_transform_points1_general ):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
CMP_L( CONST(0), ECX ) /* count == 0 ? */
JE( LLBL(K_GTP1GR_finish) ) /* yes -> nothing to do. */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */
MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP1GR_top):
MOVSS( S(0), XMM2 ) /* ox */
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */
MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
ADDPS( XMM1, XMM2 ) /* + | + | + | + */
MOVUPS( XMM2, D(0) )
LLBL(K_GTP1GR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP1GR_top) )
LLBL(K_GTP1GR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_identity)
GLNAME( _mesa_sse_transform_points1_identity ):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP1IR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(1), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
CMP_L( ESI, EDI )
JE( LLBL(K_GTP1IR_finish) )
ALIGNTEXT32
LLBL(K_GTP1IR_top):
MOV_L( S(0), EDX )
MOV_L( EDX, D(0) )
LLBL(K_GTP1IR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP1IR_top) )
LLBL(K_GTP1IR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_3d_no_rot)
GLNAME(_mesa_sse_transform_points1_3d_no_rot):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP13DNRR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS( M(0), XMM0 ) /* m0 */
MOVSS( M(12), XMM1 ) /* m12 */
MOVSS( M(13), XMM2 ) /* m13 */
MOVSS( M(14), XMM3 ) /* m14 */
ALIGNTEXT32
LLBL(K_GTP13DNRR_top):
MOVSS( S(0), XMM4 ) /* ox */
MULSS( XMM0, XMM4 ) /* ox*m0 */
ADDSS( XMM1, XMM4 ) /* ox*m0+m12 */
MOVSS( XMM4, D(0) )
MOVSS( XMM2, D(1) )
MOVSS( XMM3, D(2) )
LLBL(K_GTP13DNRR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP13DNRR_top) )
LLBL(K_GTP13DNRR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_perspective)
GLNAME(_mesa_sse_transform_points1_perspective):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP13PR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
XORPS( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */
MOVSS( M(0), XMM1 ) /* m0 */
MOVSS( M(14), XMM2 ) /* m14 */
ALIGNTEXT32
LLBL(K_GTP13PR_top):
MOVSS( S(0), XMM3 ) /* ox */
MULSS( XMM1, XMM3 ) /* ox*m0 */
MOVSS( XMM3, D(0) ) /* ox*m0->D(0) */
MOVSS( XMM2, D(2) ) /* m14->D(2) */
MOVSS( XMM0, D(1) )
MOVSS( XMM0, D(3) )
LLBL(K_GTP13PR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP13PR_top) )
LLBL(K_GTP13PR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_2d)
GLNAME(_mesa_sse_transform_points1_2d):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP13P2DR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVLPS( M(0), XMM0 ) /* m1 | m0 */
MOVLPS( M(12), XMM1 ) /* m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP13P2DR_top):
MOVSS( S(0), XMM2 ) /* ox */
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */
MULPS( XMM0, XMM2 ) /* - | - | ox*m1 | ox*m0 */
ADDPS( XMM1, XMM2 ) /* - | - | ox*m1+m13 | ox*m0+m12 */
MOVLPS( XMM2, D(0) )
LLBL(K_GTP13P2DR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP13P2DR_top) )
LLBL(K_GTP13P2DR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_2d_no_rot)
GLNAME(_mesa_sse_transform_points1_2d_no_rot):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP13P2DNRR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS( M(0), XMM0 ) /* m0 */
MOVSS( M(12), XMM1 ) /* m12 */
MOVSS( M(13), XMM2 ) /* m13 */
ALIGNTEXT32
LLBL(K_GTP13P2DNRR_top):
MOVSS( S(0), XMM3 ) /* ox */
MULSS( XMM0, XMM3 ) /* ox*m0 */
ADDSS( XMM1, XMM3 ) /* ox*m0+m12 */
MOVSS( XMM3, D(0) )
MOVSS( XMM2, D(1) )
LLBL(K_GTP13P2DNRR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP13P2DNRR_top) )
LLBL(K_GTP13P2DNRR_finish):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_3d)
GLNAME(_mesa_sse_transform_points1_3d):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP13P3DR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */
MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP13P3DR_top):
MOVSS( S(0), XMM2 ) /* ox */
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */
MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
ADDPS( XMM1, XMM2 ) /* +m15 | +m14 | +m13 | +m12 */
MOVLPS( XMM2, D(0) ) /* - | - | ->D(1)| ->D(0)*/
UNPCKHPS( XMM2, XMM2 ) /* ox*m3+m15 | ox*m3+m15 | ox*m2+m14 | ox*m2+m14 */
MOVSS( XMM2, D(2) )
LLBL(K_GTP13P3DR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP13P3DR_top) )
LLBL(K_GTP13P3DR_finish):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET

452
src/mesa/x86/sse_xform2.S Normal file
View File

@ -0,0 +1,452 @@
/* $Id: sse_xform2.S,v 1.1 2001/03/29 06:46:27 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/** TODO:
* - insert PREFETCH instructions to avoid cache-misses !
* - some more optimizations are possible...
* - for 40-50% more performance in the SSE-functions, the
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
*/
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define S(i) REGOFF(i * 4, ESI)
#define D(i) REGOFF(i * 4, EDI)
#define M(i) REGOFF(i * 4, EDX)
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_general)
GLNAME( _mesa_sse_transform_points2_general ):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX )
JZ( LLBL(K_GTP2GR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */
MOVAPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */
MOVAPS( M(12), XMM2 ) /* m15 | m14 | m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP2GR_top):
MOVSS( S(0), XMM3 ) /* ox */
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox | ox */
MULPS( XMM0, XMM3 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
MOVSS( S(1), XMM4 ) /* oy */
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy | oy | oy */
MULPS( XMM1, XMM4 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
ADDPS( XMM4, XMM3 )
ADDPS( XMM2, XMM3 )
MOVAPS( XMM3, D(0) )
LLBL(K_GTP2GR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP2GR_top) )
LLBL(K_GTP2GR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_identity)
GLNAME( _mesa_sse_transform_points2_identity ):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP2IR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
CMP_L( ESI, EDI )
JE( LLBL(K_GTP2IR_finish) )
ALIGNTEXT32
LLBL(K_GTP2IR_top):
MOV_L ( S(0), EDX )
MOV_L ( EDX, D(0) )
MOV_L ( S(1), EDX )
MOV_L ( EDX, D(1) )
LLBL(K_GTP2IR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP2IR_top) )
LLBL(K_GTP2IR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_3d_no_rot)
GLNAME(_mesa_sse_transform_points2_3d_no_rot):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP23DNRR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
MOVLPS ( M(12), XMM2 ) /* - | - | m13 | m12 */
MOVSS ( M(14), XMM3 ) /* - | - | - | m14 */
ALIGNTEXT32
LLBL(K_GTP23DNRR_top):
MOVLPS ( S(0), XMM0 ) /* - | - | oy | ox */
MULPS ( XMM1, XMM0 ) /* - | - | oy*m5 | ox*m0 */
ADDPS ( XMM2, XMM0 ) /* - | - | +m13 | +m12 */
MOVLPS ( XMM0, D(0) ) /* -> D(1) | -> D(0) */
MOVSS ( XMM3, D(2) ) /* -> D(2) */
LLBL(K_GTP23DNRR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP23DNRR_top) )
LLBL(K_GTP23DNRR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_perspective)
GLNAME(_mesa_sse_transform_points2_perspective):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP23PR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
MOVSS ( M(14), XMM3 ) /* m14 */
XORPS ( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */
ALIGNTEXT32
LLBL(K_GTP23PR_top):
MOVLPS( S(0), XMM4 ) /* oy | ox */
MULPS( XMM1, XMM4 ) /* oy*m5 | ox*m0 */
MOVLPS( XMM4, D(0) ) /* ->D(1) | ->D(0) */
MOVSS( XMM3, D(2) ) /* ->D(2) */
MOVSS( XMM0, D(3) ) /* ->D(3) */
LLBL(K_GTP23PR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP23PR_top) )
LLBL(K_GTP23PR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_2d)
GLNAME(_mesa_sse_transform_points2_2d):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP23P2DR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVLPS( M(0), XMM0 ) /* m1 | m0 */
MOVLPS( M(4), XMM1 ) /* m5 | m4 */
MOVLPS( M(12), XMM2 ) /* m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP23P2DR_top):
MOVSS( S(0), XMM3 ) /* ox */
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox */
MULPS( XMM0, XMM3 ) /* ox*m1 | ox*m0 */
MOVSS( S(1), XMM4 ) /* oy */
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy */
MULPS( XMM1, XMM4 ) /* oy*m5 | oy*m4 */
ADDPS( XMM4, XMM3 )
ADDPS( XMM2, XMM3 )
MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */
LLBL(K_GTP23P2DR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP23P2DR_top) )
LLBL(K_GTP23P2DR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_2d_no_rot)
GLNAME(_mesa_sse_transform_points2_2d_no_rot):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP23P2DNRR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* m0 */
MOVSS ( M(5), XMM2 ) /* m5 */
UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */
MOVLPS ( M(12), XMM2 ) /* m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP23P2DNRR_top):
MOVLPS( S(0), XMM0 ) /* oy | ox */
MULPS( XMM1, XMM0 ) /* oy*m5 | ox*m0 */
ADDPS( XMM2, XMM0 ) /* +m13 | +m12 */
MOVLPS( XMM0, D(0) ) /* ->D(1) | ->D(0) */
LLBL(K_GTP23P2DNRR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP23P2DNRR_top) )
LLBL(K_GTP23P2DNRR_finish):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_3d)
GLNAME(_mesa_sse_transform_points2_3d):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP23P3DR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */
MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */
MOVAPS( M(12), XMM2 ) /* m14 | m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP23P3DR_top):
MOVSS( S(0), XMM3 ) /* ox */
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox */
MULPS( XMM0, XMM3 ) /* ox*m2 | ox*m1 | ox*m0 */
MOVSS( S(1), XMM4 ) /* oy */
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy | oy */
MULPS( XMM1, XMM4 ) /* oy*m6 | oy*m5 | oy*m4 */
ADDPS( XMM4, XMM3 )
ADDPS( XMM2, XMM3 )
MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */
UNPCKHPS( XMM3, XMM3 )
MOVSS( XMM3, D(2) ) /* ->D(2) */
LLBL(K_GTP23P3DR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP23P3DR_top) )
LLBL(K_GTP23P3DR_finish):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET

498
src/mesa/x86/sse_xform3.S Normal file
View File

@ -0,0 +1,498 @@
/* $Id: sse_xform3.S,v 1.1 2001/03/29 06:46:27 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/** TODO:
* - insert PREFETCH instructions to avoid cache-misses !
* - some more optimizations are possible...
* - for 40-50% more performance in the SSE-functions, the
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
*/
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define S(i) REGOFF(i * 4, ESI)
#define D(i) REGOFF(i * 4, EDI)
#define M(i) REGOFF(i * 4, EDX)
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_general)
GLNAME( _mesa_sse_transform_points3_general ):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
CMP_L ( CONST(0), ECX ) /* count == 0 ? */
JE ( LLBL(K_GTPGR_finish) ) /* yes -> nothing to do. */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVAPS ( REGOFF(0, EDX), XMM0 ) /* m0 | m1 | m2 | m3 */
MOVAPS ( REGOFF(16, EDX), XMM1 ) /* m4 | m5 | m6 | m7 */
MOVAPS ( REGOFF(32, EDX), XMM2 ) /* m8 | m9 | m10 | m11 */
MOVAPS ( REGOFF(48, EDX), XMM3 ) /* m12 | m13 | m14 | m15 */
ALIGNTEXT32
LLBL(K_GTPGR_top):
MOVSS ( REGOFF(0, ESI), XMM4 ) /* | | | ox */
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */
MOVSS ( REGOFF(4, ESI), XMM5 ) /* | | | oy */
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy | oy */
MOVSS ( REGOFF(8, ESI), XMM6 ) /* | | | oz */
SHUFPS ( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz | oz */
MULPS ( XMM0, XMM4 ) /* m3*ox | m2*ox | m1*ox | m0*ox */
MULPS ( XMM1, XMM5 ) /* m7*oy | m6*oy | m5*oy | m4*oy */
MULPS ( XMM2, XMM6 ) /* m11*oz | m10*oz | m9*oz | m8*oz */
ADDPS ( XMM5, XMM4 )
ADDPS ( XMM6, XMM4 )
ADDPS ( XMM3, XMM4 )
MOVAPS ( XMM4, REGOFF(0, EDI) )
LLBL(K_GTPGR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTPGR_top) )
LLBL(K_GTPGR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_identity)
GLNAME( _mesa_sse_transform_points3_identity ):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTPIR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
CMP_L( ESI, EDI )
JE( LLBL(K_GTPIR_finish) )
ALIGNTEXT32
LLBL(K_GTPIR_top):
MOVLPS ( S(0), XMM0 )
MOVLPS ( XMM0, D(0) )
MOVSS ( S(2), XMM0 )
MOVSS ( XMM0, D(2) )
LLBL(K_GTPIR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTPIR_top) )
LLBL(K_GTPIR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_3d_no_rot)
GLNAME(_mesa_sse_transform_points3_3d_no_rot):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP3DNRR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
MOVLPS ( M(12), XMM2 ) /* - | - | m13 | m12 */
MOVSS ( M(10), XMM3 ) /* - | - | - | m10 */
MOVSS ( M(14), XMM4 ) /* - | - | - | m14 */
ALIGNTEXT32
LLBL(K_GTP3DNRR_top):
MOVLPS ( S(0), XMM0 ) /* - | - | s1 | s0 */
MULPS ( XMM1, XMM0 ) /* - | - | s1*m5 | s0*m0 */
ADDPS ( XMM2, XMM0 ) /* - | - | +m13 | +m12 */
MOVLPS ( XMM0, D(0) ) /* -> D(1) | -> D(0) */
MOVSS ( S(2), XMM0 ) /* sz */
MULSS ( XMM3, XMM0 ) /* sz*m10 */
ADDSS ( XMM4, XMM0 ) /* +m14 */
MOVSS ( XMM0, D(2) ) /* -> D(2) */
LLBL(K_GTP3DNRR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP3DNRR_top) )
LLBL(K_GTP3DNRR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_perspective)
GLNAME(_mesa_sse_transform_points3_perspective):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP3PR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
MOVLPS ( M(8), XMM2 ) /* - | - | m9 | m8 */
MOVSS ( M(10), XMM3 ) /* m10 */
MOVSS ( M(14), XMM4 ) /* m14 */
XORPS ( XMM6, XMM6 ) /* 0 */
ALIGNTEXT32
LLBL(K_GTP3PR_top):
MOVLPS ( S(0), XMM0 ) /* oy | ox */
MULPS ( XMM1, XMM0 ) /* oy*m5 | ox*m0 */
MOVSS ( S(2), XMM5 ) /* oz */
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* oz | oz */
MULPS ( XMM2, XMM5 ) /* oz*m9 | oz*m8 */
ADDPS ( XMM5, XMM0 ) /* +oy*m5 | +ox*m0 */
MOVLPS ( XMM0, D(0) ) /* ->D(1) | ->D(0) */
MOVSS ( S(2), XMM0 ) /* oz */
MULSS ( XMM3, XMM0 ) /* oz*m10 */
ADDSS ( XMM4, XMM0 ) /* +m14 */
MOVSS ( XMM0, D(2) ) /* ->D(2) */
MOVSS ( S(2), XMM0 ) /* oz */
MOVSS ( XMM6, XMM5 ) /* 0 */
SUBPS ( XMM0, XMM5 ) /* -oz */
MOVSS ( XMM5, D(3) ) /* ->D(3) */
LLBL(K_GTP3PR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP3PR_top) )
LLBL(K_GTP3PR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_2d)
GLNAME(_mesa_sse_transform_points3_2d):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP3P2DR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVLPS( M(0), XMM0 ) /* m1 | m0 */
MOVLPS( M(4), XMM1 ) /* m5 | m4 */
MOVLPS( M(12), XMM2 ) /* m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP3P2DR_top):
MOVSS ( S(0), XMM3 ) /* ox */
SHUFPS ( CONST(0x0), XMM3, XMM3 ) /* ox | ox */
MULPS ( XMM0, XMM3 ) /* ox*m1 | ox*m0 */
MOVSS ( S(1), XMM4 ) /* oy */
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* oy | oy */
MULPS ( XMM1, XMM4 ) /* oy*m5 | oy*m4 */
ADDPS ( XMM4, XMM3 )
ADDPS ( XMM2, XMM3 )
MOVLPS ( XMM3, D(0) )
MOVSS ( S(2), XMM3 )
MOVSS ( XMM3, D(2) )
LLBL(K_GTP3P2DR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP3P2DR_top) )
LLBL(K_GTP3P2DR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_2d_no_rot)
GLNAME(_mesa_sse_transform_points3_2d_no_rot):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP3P2DNRR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* m0 */
MOVSS ( M(5), XMM2 ) /* m5 */
UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */
MOVLPS ( M(12), XMM2 ) /* m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP3P2DNRR_top):
MOVLPS( S(0), XMM0 ) /* oy | ox */
MULPS( XMM1, XMM0 ) /* oy*m5 | ox*m0 */
ADDPS( XMM2, XMM0 ) /* +m13 | +m12 */
MOVLPS( XMM0, D(0) ) /* ->D(1) | ->D(0) */
MOVSS( S(2), XMM0 )
MOVSS( XMM0, D(2) )
LLBL(K_GTP3P2DNRR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP3P2DNRR_top) )
LLBL(K_GTP3P2DNRR_finish):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_3d)
GLNAME(_mesa_sse_transform_points3_3d):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP3P3DR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */
MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */
MOVAPS( M(8), XMM2 ) /* m10 | m9 | m8 */
MOVAPS( M(12), XMM3 ) /* m14 | m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP3P3DR_top):
MOVSS( S(0), XMM4 )
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox */
MULPS( XMM0, XMM4 ) /* ox*m2 | ox*m1 | ox*m0 */
MOVSS( S(1), XMM5 )
SHUFPS( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy */
MULPS( XMM1, XMM5 ) /* oy*m6 | oy*m5 | oy*m4 */
MOVSS( S(2), XMM6 )
SHUFPS( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz */
MULPS( XMM2, XMM6 ) /* oz*m10 | oz*m9 | oz*m8 */
ADDPS( XMM5, XMM4 ) /* + | + | + */
ADDPS( XMM6, XMM4 ) /* + | + | + */
ADDPS( XMM3, XMM4 ) /* + | + | + */
MOVLPS( XMM4, D(0) ) /* => D(1) | => D(0) */
UNPCKHPS( XMM4, XMM4 )
MOVSS( XMM4, D(2) )
LLBL(K_GTP3P3DR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP3P3DR_top) )
LLBL(K_GTP3P3DR_finish):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET

226
src/mesa/x86/sse_xform4.S Normal file
View File

@ -0,0 +1,226 @@
/* $Id: sse_xform4.S,v 1.1 2001/03/29 06:46:27 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define FRAME_OFFSET 8
#define SRC(i) REGOFF(i * 4, ESI)
#define DST(i) REGOFF(i * 4, EDI)
#define MAT(i) REGOFF(i * 4, EDX)
#define SELECT(r0, r1, r2, r3) CONST( r0 * 64 + r1 * 16 + r2 * 4 + r3 )
ALIGNTEXT16
GLOBL GLNAME( _mesa_sse_transform_points4_general )
GLNAME( _mesa_sse_transform_points4_general ):
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX ) /* verify non-zero count */
JE( LLBL( sse_general_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
PREFETCHT0( REGIND(ESI) )
MOVAPS( MAT(0), XMM4 ) /* m3 | m2 | m1 | m0 */
MOVAPS( MAT(4), XMM5 ) /* m7 | m6 | m5 | m4 */
MOVAPS( MAT(8), XMM6 ) /* m11 | m10 | m9 | m8 */
MOVAPS( MAT(12), XMM7 ) /* m15 | m14 | m13 | m12 */
ALIGNTEXT16
LLBL( sse_general_loop ):
MOVSS( SRC(0), XMM0 ) /* ox */
SHUFPS( CONST(0x0), XMM0, XMM0 ) /* ox | ox | ox | ox */
MULPS( XMM4, XMM0 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
MOVSS( SRC(1), XMM1 ) /* oy */
SHUFPS( CONST(0x0), XMM1, XMM1 ) /* oy | oy | oy | oy */
MULPS( XMM5, XMM1 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
MOVSS( SRC(2), XMM2 ) /* oz */
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* oz | oz | oz | oz */
MULPS( XMM6, XMM2 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */
MOVSS( SRC(3), XMM3 ) /* ow */
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ow | ow | ow | ow */
MULPS( XMM7, XMM3 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
ADDPS( XMM1, XMM0 ) /* ox*m3+oy*m7 | ... */
ADDPS( XMM2, XMM0 ) /* ox*m3+oy*m7+oz*m11 | ... */
ADDPS( XMM3, XMM0 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
MOVAPS( XMM0, DST(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
DEC_L( ECX )
JNZ( LLBL( sse_general_loop ) )
LLBL( sse_general_done ):
POP_L( EDI )
POP_L( ESI )
RET
ALIGNTEXT4
GLOBL GLNAME( _mesa_sse_transform_points4_3d )
GLNAME( _mesa_sse_transform_points4_3d ):
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */
MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP43P3DR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )/* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
MOVAPS( MAT(0), XMM0 ) /* m3 | m2 | m1 | m0 */
MOVAPS( MAT(4), XMM1 ) /* m7 | m6 | m5 | m4 */
MOVAPS( MAT(8), XMM2 ) /* m11 | m10 | m9 | m8 */
MOVAPS( MAT(12), XMM3 ) /* m15 | m14 | m13 | m12 */
ALIGNTEXT32
LLBL( K_GTP43P3DR_top ):
MOVSS( SRC(0), XMM4 ) /* ox */
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */
MULPS( XMM0, XMM4 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
MOVSS( SRC(1), XMM5 ) /* oy */
SHUFPS( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy | oy */
MULPS( XMM1, XMM5 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
MOVSS( SRC(2), XMM6 ) /* oz */
SHUFPS( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz | oz */
MULPS( XMM2, XMM6 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */
MOVSS( SRC(3), XMM7 ) /* ow */
SHUFPS( CONST(0x0), XMM7, XMM7 ) /* ow | ow | ow | ow */
MULPS( XMM3, XMM7 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
ADDPS( XMM5, XMM4 ) /* ox*m3+oy*m7 | ... */
ADDPS( XMM6, XMM4 ) /* ox*m3+oy*m7+oz*m11 | ... */
ADDPS( XMM7, XMM4 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
MOVAPS( XMM4, DST(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
MOVSS( SRC(3), XMM4 ) /* ow */
MOVSS( XMM4, DST(3) ) /* ->D(3) */
LLBL( K_GTP43P3DR_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP43P3DR_top) )
LLBL( K_GTP43P3DR_finish ):
POP_L( EDI )
POP_L( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_sse_transform_points4_identity )
GLNAME( _mesa_sse_transform_points4_identity ):
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX ) /* verify non-zero count */
JE( LLBL( sse_identity_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ALIGNTEXT16
LLBL( sse_identity_loop ):
PREFETCHNTA( REGOFF(32, ESI) )
MOVAPS( REGIND(ESI), XMM0 )
ADD_L( EAX, ESI )
MOVAPS( XMM0, REGIND(EDI) )
ADD_L( CONST(16), EDI )
DEC_L( ECX )
JNZ( LLBL( sse_identity_loop ) )
LLBL( sse_identity_done ):
POP_L( EDI )
POP_L( ESI )
RET

View File

@ -1,4 +1,4 @@
/* $Id: x86.c,v 1.19 2001/03/28 20:44:44 gareth Exp $ */
/* $Id: x86.c,v 1.20 2001/03/29 06:46:27 gareth Exp $ */
/*
* Mesa 3-D graphics library
@ -50,40 +50,37 @@
const GLubyte flag
#define DECLARE_XFORM_GROUP( pfx, sz, masked ) \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_general_##masked( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_identity_##masked( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_no_rot_##masked( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_perspective_##masked( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_##masked( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_no_rot_##masked( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_##masked( XFORM_ARGS );
#define DECLARE_XFORM_GROUP( pfx, sz ) \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_general( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_identity( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_no_rot( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_perspective( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_no_rot( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d( XFORM_ARGS );
#define ASSIGN_XFORM_GROUP( pfx, cma, sz, masked ) \
_mesa_transform_tab[cma][sz][MATRIX_GENERAL] = \
_mesa_##pfx##_transform_points##sz##_general_##masked; \
_mesa_transform_tab[cma][sz][MATRIX_IDENTITY] = \
_mesa_##pfx##_transform_points##sz##_identity_##masked; \
_mesa_transform_tab[cma][sz][MATRIX_3D_NO_ROT] = \
_mesa_##pfx##_transform_points##sz##_3d_no_rot_##masked; \
_mesa_transform_tab[cma][sz][MATRIX_PERSPECTIVE] = \
_mesa_##pfx##_transform_points##sz##_perspective_##masked; \
_mesa_transform_tab[cma][sz][MATRIX_2D] = \
_mesa_##pfx##_transform_points##sz##_2d_##masked; \
_mesa_transform_tab[cma][sz][MATRIX_2D_NO_ROT] = \
_mesa_##pfx##_transform_points##sz##_2d_no_rot_##masked; \
_mesa_transform_tab[cma][sz][MATRIX_3D] = \
_mesa_##pfx##_transform_points##sz##_3d_##masked;
#define ASSIGN_XFORM_GROUP( pfx, sz ) \
_mesa_transform_tab[0][sz][MATRIX_GENERAL] = \
_mesa_##pfx##_transform_points##sz##_general; \
_mesa_transform_tab[0][sz][MATRIX_IDENTITY] = \
_mesa_##pfx##_transform_points##sz##_identity; \
_mesa_transform_tab[0][sz][MATRIX_3D_NO_ROT] = \
_mesa_##pfx##_transform_points##sz##_3d_no_rot; \
_mesa_transform_tab[0][sz][MATRIX_PERSPECTIVE] = \
_mesa_##pfx##_transform_points##sz##_perspective; \
_mesa_transform_tab[0][sz][MATRIX_2D] = \
_mesa_##pfx##_transform_points##sz##_2d; \
_mesa_transform_tab[0][sz][MATRIX_2D_NO_ROT] = \
_mesa_##pfx##_transform_points##sz##_2d_no_rot; \
_mesa_transform_tab[0][sz][MATRIX_3D] = \
_mesa_##pfx##_transform_points##sz##_3d;
#ifdef USE_X86_ASM
DECLARE_XFORM_GROUP( x86, 2, raw )
DECLARE_XFORM_GROUP( x86, 3, raw )
DECLARE_XFORM_GROUP( x86, 4, raw )
DECLARE_XFORM_GROUP( x86, 2, masked )
DECLARE_XFORM_GROUP( x86, 3, masked )
DECLARE_XFORM_GROUP( x86, 4, masked )
DECLARE_XFORM_GROUP( x86, 2 )
DECLARE_XFORM_GROUP( x86, 3 )
DECLARE_XFORM_GROUP( x86, 4 )
extern GLvector4f * _ASMAPI
@ -119,13 +116,9 @@ _mesa_v16_x86_general_xform( GLfloat *dest,
void _mesa_init_x86_transform_asm( void )
{
#ifdef USE_X86_ASM
ASSIGN_XFORM_GROUP( x86, 0, 2, raw );
ASSIGN_XFORM_GROUP( x86, 0, 3, raw );
ASSIGN_XFORM_GROUP( x86, 0, 4, raw );
/* ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 2, masked ); */
/* ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 3, masked ); */
/* ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 4, masked ); */
ASSIGN_XFORM_GROUP( x86, 2 );
ASSIGN_XFORM_GROUP( x86, 3 );
ASSIGN_XFORM_GROUP( x86, 4 );
/* XXX this function has been found to cause FP overflow exceptions */
_mesa_clip_tab[4] = _mesa_x86_cliptest_points4;

536
src/mesa/x86/x86_xform2.S Normal file
View File

@ -0,0 +1,536 @@
/* $Id: x86_xform2.S,v 1.1 2001/03/29 06:46:27 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define FP_ONE 1065353216
#define FP_ZERO 0
#define SRC(i) REGOFF(i * 4, ESI)
#define DST(i) REGOFF(i * 4, EDI)
#define MAT(i) REGOFF(i * 4, EDX)
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points2_general )
GLNAME( _mesa_x86_transform_points2_general ):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p2_gr_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL( x86_p2_gr_loop ):
FLD_S( SRC(0) ) /* F4 */
FMUL_S( MAT(0) )
FLD_S( SRC(0) ) /* F5 F4 */
FMUL_S( MAT(1) )
FLD_S( SRC(0) ) /* F6 F5 F4 */
FMUL_S( MAT(2) )
FLD_S( SRC(0) ) /* F7 F6 F5 F4 */
FMUL_S( MAT(3) )
FLD_S( SRC(1) ) /* F0 F7 F6 F5 F4 */
FMUL_S( MAT(4) )
FLD_S( SRC(1) ) /* F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT(5) )
FLD_S( SRC(1) ) /* F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT(6) )
FLD_S( SRC(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT(7) )
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */
FXCH( ST(3) ) /* F4 F6 F5 F7 */
FADD_S( MAT(12) )
FXCH( ST(2) ) /* F5 F6 F4 F7 */
FADD_S( MAT(13) )
FXCH( ST(1) ) /* F6 F5 F4 F7 */
FADD_S( MAT(14) )
FXCH( ST(3) ) /* F7 F5 F4 F6 */
FADD_S( MAT(15) )
FXCH( ST(2) ) /* F4 F5 F7 F6 */
FSTP_S( DST(0) ) /* F5 F7 F6 */
FSTP_S( DST(1) ) /* F7 F6 */
FXCH( ST(1) ) /* F6 F7 */
FSTP_S( DST(2) ) /* F7 */
FSTP_S( DST(3) ) /* */
LLBL( x86_p2_gr_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p2_gr_loop ) )
LLBL( x86_p2_gr_done ):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points2_perspective )
GLNAME( _mesa_x86_transform_points2_perspective ):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p2_pr_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
MOV_L( MAT(14), EBX )
ALIGNTEXT16
LLBL( x86_p2_pr_loop ):
FLD_S( SRC(0) ) /* F4 */
FMUL_S( MAT(0) )
FLD_S( SRC(1) ) /* F1 F4 */
FMUL_S( MAT(5) )
FXCH( ST(1) ) /* F4 F1 */
FSTP_S( DST(0) ) /* F1 */
FSTP_S( DST(1) ) /* */
MOV_L( EBX, DST(2) )
MOV_L( CONST(FP_ZERO), DST(3) )
LLBL( x86_p2_pr_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p2_pr_loop ) )
LLBL( x86_p2_pr_done ):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points2_3d )
GLNAME( _mesa_x86_transform_points2_3d ):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p2_3dr_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL( x86_p2_3dr_loop ):
FLD_S( SRC(0) ) /* F4 */
FMUL_S( MAT(0) )
FLD_S( SRC(0) ) /* F5 F4 */
FMUL_S( MAT(1) )
FLD_S( SRC(0) ) /* F6 F5 F4 */
FMUL_S( MAT(2) )
FLD_S( SRC(1) ) /* F0 F6 F5 F4 */
FMUL_S( MAT(4) )
FLD_S( SRC(1) ) /* F1 F0 F6 F5 F4 */
FMUL_S( MAT(5) )
FLD_S( SRC(1) ) /* F2 F1 F0 F6 F5 F4 */
FMUL_S( MAT(6) )
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */
FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */
FADDP( ST(0), ST(1) ) /* F6 F5 F4 */
FXCH( ST(2) ) /* F4 F5 F6 */
FADD_S( MAT(12) )
FXCH( ST(1) ) /* F5 F4 F6 */
FADD_S( MAT(13) )
FXCH( ST(2) ) /* F6 F4 F5 */
FADD_S( MAT(14) )
FXCH( ST(1) ) /* F4 F6 F5 */
FSTP_S( DST(0) ) /* F6 F5 */
FXCH( ST(1) ) /* F5 F6 */
FSTP_S( DST(1) ) /* F6 */
FSTP_S( DST(2) ) /* */
LLBL( x86_p2_3dr_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p2_3dr_loop ) )
LLBL( x86_p2_3dr_done ):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points2_3d_no_rot )
GLNAME( _mesa_x86_transform_points2_3d_no_rot ):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p2_3dnrr_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
MOV_L( MAT(14), EBX )
ALIGNTEXT16
LLBL( x86_p2_3dnrr_loop ):
FLD_S( SRC(0) ) /* F4 */
FMUL_S( MAT(0) )
FLD_S( SRC(1) ) /* F1 F4 */
FMUL_S( MAT(5) )
FXCH( ST(1) ) /* F4 F1 */
FADD_S( MAT(12) )
FLD_S( MAT(13) ) /* F5 F4 F1 */
FXCH( ST(2) ) /* F1 F4 F5 */
FADDP( ST(0), ST(2) ) /* F4 F5 */
FSTP_S( DST(0) ) /* F5 */
FSTP_S( DST(1) ) /* */
MOV_L( EBX, DST(2) )
LLBL( x86_p2_3dnrr_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p2_3dnrr_loop ) )
LLBL( x86_p2_3dnrr_done ):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points2_2d )
GLNAME( _mesa_x86_transform_points2_2d ):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p2_2dr_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL( x86_p2_2dr_loop ):
FLD_S( SRC(0) ) /* F4 */
FMUL_S( MAT(0) )
FLD_S( SRC(0) ) /* F5 F4 */
FMUL_S( MAT(1) )
FLD_S( SRC(1) ) /* F0 F5 F4 */
FMUL_S( MAT(4) )
FLD_S( SRC(1) ) /* F1 F0 F5 F4 */
FMUL_S( MAT(5) )
FXCH( ST(1) ) /* F0 F1 F5 F4 */
FADDP( ST(0), ST(3) ) /* F1 F5 F4 */
FADDP( ST(0), ST(1) ) /* F5 F4 */
FXCH( ST(1) ) /* F4 F5 */
FADD_S( MAT(12) )
FXCH( ST(1) ) /* F5 F4 */
FADD_S( MAT(13) )
FXCH( ST(1) ) /* F4 F5 */
FSTP_S( DST(0) ) /* F5 */
FSTP_S( DST(1) ) /* */
LLBL( x86_p2_2dr_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p2_2dr_loop ) )
LLBL( x86_p2_2dr_done ):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME( _mesa_x86_transform_points2_2d_no_rot )
GLNAME( _mesa_x86_transform_points2_2d_no_rot ):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p2_2dnrr_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL( x86_p2_2dnrr_loop ):
FLD_S( SRC(0) ) /* F4 */
FMUL_S( MAT(0) )
FLD_S( SRC(1) ) /* F1 F4 */
FMUL_S( MAT(5) )
FXCH( ST(1) ) /* F4 F1 */
FADD_S( MAT(12) )
FLD_S( MAT(13) ) /* F5 F4 F1 */
FXCH( ST(2) ) /* F1 F4 F5 */
FADDP( ST(0), ST(2) ) /* F4 F5 */
FSTP_S( DST(0) ) /* F5 */
FSTP_S( DST(1) ) /* */
LLBL( x86_p2_2dnrr_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p2_2dnrr_loop ) )
LLBL( x86_p2_2dnrr_done ):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points2_identity )
GLNAME( _mesa_x86_transform_points2_identity ):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p2_ir_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
CMP_L( ESI, EDI )
JE( LLBL( x86_p2_ir_done ) )
ALIGNTEXT16
LLBL( x86_p2_ir_loop ):
MOV_L( SRC(0), EBX )
MOV_L( SRC(1), EDX )
MOV_L( EBX, DST(0) )
MOV_L( EDX, DST(1) )
LLBL( x86_p2_ir_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p2_ir_loop ) )
LLBL( x86_p2_ir_done ):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET

606
src/mesa/x86/x86_xform3.S Normal file
View File

@ -0,0 +1,606 @@
/* $Id: x86_xform3.S,v 1.1 2001/03/29 06:46:27 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define FP_ONE 1065353216
#define FP_ZERO 0
#define SRC(i) REGOFF(i * 4, ESI)
#define DST(i) REGOFF(i * 4, EDI)
#define MAT(i) REGOFF(i * 4, EDX)
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points3_general )
GLNAME( _mesa_x86_transform_points3_general ):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p3_gr_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL( x86_p3_gr_loop ):
FLD_S( SRC(0) ) /* F4 */
FMUL_S( MAT(0) )
FLD_S( SRC(0) ) /* F5 F4 */
FMUL_S( MAT(1) )
FLD_S( SRC(0) ) /* F6 F5 F4 */
FMUL_S( MAT(2) )
FLD_S( SRC(0) ) /* F7 F6 F5 F4 */
FMUL_S( MAT(3) )
FLD_S( SRC(1) ) /* F0 F7 F6 F5 F4 */
FMUL_S( MAT(4) )
FLD_S( SRC(1) ) /* F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT(5) )
FLD_S( SRC(1) ) /* F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT(6) )
FLD_S( SRC(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT(7) )
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */
FLD_S( SRC(2) ) /* F0 F7 F6 F5 F4 */
FMUL_S( MAT(8) )
FLD_S( SRC(2) ) /* F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT(9) )
FLD_S( SRC(2) ) /* F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT(10) )
FLD_S( SRC(2) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT(11) )
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */
FXCH( ST(3) ) /* F4 F6 F5 F7 */
FADD_S( MAT(12) )
FXCH( ST(2) ) /* F5 F6 F4 F7 */
FADD_S( MAT(13) )
FXCH( ST(1) ) /* F6 F5 F4 F7 */
FADD_S( MAT(14) )
FXCH( ST(3) ) /* F7 F5 F4 F6 */
FADD_S( MAT(15) )
FXCH( ST(2) ) /* F4 F5 F7 F6 */
FSTP_S( DST(0) ) /* F5 F7 F6 */
FSTP_S( DST(1) ) /* F7 F6 */
FXCH( ST(1) ) /* F6 F7 */
FSTP_S( DST(2) ) /* F7 */
FSTP_S( DST(3) ) /* */
LLBL( x86_p3_gr_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p3_gr_loop ) )
LLBL( x86_p3_gr_done ):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points3_perspective )
GLNAME( _mesa_x86_transform_points3_perspective ):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p3_pr_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL( x86_p3_pr_loop ):
FLD_S( SRC(0) ) /* F4 */
FMUL_S( MAT(0) )
FLD_S( SRC(1) ) /* F5 F4 */
FMUL_S( MAT(5) )
FLD_S( SRC(2) ) /* F0 F5 F4 */
FMUL_S( MAT(8) )
FLD_S( SRC(2) ) /* F1 F0 F5 F4 */
FMUL_S( MAT(9) )
FLD_S( SRC(2) ) /* F2 F1 F0 F5 F4 */
FMUL_S( MAT(10) )
FXCH( ST(2) ) /* F0 F1 F2 F5 F4 */
FADDP( ST(0), ST(4) ) /* F1 F2 F5 F4 */
FADDP( ST(0), ST(2) ) /* F2 F5 F4 */
FLD_S( MAT(14) ) /* F6 F2 F5 F4 */
FXCH( ST(1) ) /* F2 F6 F5 F4 */
FADDP( ST(0), ST(1) ) /* F6 F5 F4 */
MOV_L( SRC(2), EBX )
XOR_L( CONST(-2147483648), EBX )/* change sign */
FXCH( ST(2) ) /* F4 F5 F6 */
FSTP_S( DST(0) ) /* F5 F6 */
FSTP_S( DST(1) ) /* F6 */
FSTP_S( DST(2) ) /* */
MOV_L( EBX, DST(3) )
LLBL( x86_p3_pr_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p3_pr_loop ) )
LLBL( x86_p3_pr_done ):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points3_3d )
GLNAME( _mesa_x86_transform_points3_3d ):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p3_3dr_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL( x86_p3_3dr_loop ):
FLD_S( SRC(0) ) /* F4 */
FMUL_S( MAT(0) )
FLD_S( SRC(0) ) /* F5 F4 */
FMUL_S( MAT(1) )
FLD_S( SRC(0) ) /* F6 F5 F4 */
FMUL_S( MAT(2) )
FLD_S( SRC(1) ) /* F0 F6 F5 F4 */
FMUL_S( MAT(4) )
FLD_S( SRC(1) ) /* F1 F0 F6 F5 F4 */
FMUL_S( MAT(5) )
FLD_S( SRC(1) ) /* F2 F1 F0 F6 F5 F4 */
FMUL_S( MAT(6) )
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */
FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */
FADDP( ST(0), ST(1) ) /* F6 F5 F4 */
FLD_S( SRC(2) ) /* F0 F6 F5 F4 */
FMUL_S( MAT(8) )
FLD_S( SRC(2) ) /* F1 F0 F6 F5 F4 */
FMUL_S( MAT(9) )
FLD_S( SRC(2) ) /* F2 F1 F0 F6 F5 F4 */
FMUL_S( MAT(10) )
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */
FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */
FADDP( ST(0), ST(1) ) /* F6 F5 F4 */
FXCH( ST(2) ) /* F4 F5 F6 */
FADD_S( MAT(12) )
FXCH( ST(1) ) /* F5 F4 F6 */
FADD_S( MAT(13) )
FXCH( ST(2) ) /* F6 F4 F5 */
FADD_S( MAT(14) )
FXCH( ST(1) ) /* F4 F6 F5 */
FSTP_S( DST(0) ) /* F6 F5 */
FXCH( ST(1) ) /* F5 F6 */
FSTP_S( DST(1) ) /* F6 */
FSTP_S( DST(2) ) /* */
LLBL( x86_p3_3dr_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p3_3dr_loop ) )
LLBL( x86_p3_3dr_done ):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points3_3d_no_rot )
GLNAME( _mesa_x86_transform_points3_3d_no_rot ):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p3_3dnrr_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL( x86_p3_3dnrr_loop ):
FLD_S( SRC(0) ) /* F4 */
FMUL_S( MAT(0) )
FLD_S( SRC(1) ) /* F1 F4 */
FMUL_S( MAT(5) )
FLD_S( SRC(2) ) /* F2 F1 F4 */
FMUL_S( MAT(10) )
FXCH( ST(2) ) /* F4 F1 F2 */
FADD_S( MAT(12) )
FLD_S( MAT(13) ) /* F5 F4 F1 F2 */
FXCH( ST(2) ) /* F1 F4 F5 F2 */
FADDP( ST(0), ST(2) ) /* F4 F5 F2 */
FLD_S( MAT(14) ) /* F6 F4 F5 F2 */
FXCH( ST(3) ) /* F2 F4 F5 F6 */
FADDP( ST(0), ST(3) ) /* F4 F5 F6 */
FSTP_S( DST(0) ) /* F5 F6 */
FSTP_S( DST(1) ) /* F6 */
FSTP_S( DST(2) ) /* */
LLBL( x86_p3_3dnrr_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p3_3dnrr_loop ) )
LLBL( x86_p3_3dnrr_done ):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points3_2d )
GLNAME( _mesa_x86_transform_points3_2d ):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p3_2dr_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL( x86_p3_2dr_loop ):
FLD_S( SRC(0) ) /* F4 */
FMUL_S( MAT(0) )
FLD_S( SRC(0) ) /* F5 F4 */
FMUL_S( MAT(1) )
FLD_S( SRC(1) ) /* F0 F5 F4 */
FMUL_S( MAT(4) )
FLD_S( SRC(1) ) /* F1 F0 F5 F4 */
FMUL_S( MAT(5) )
FXCH( ST(1) ) /* F0 F1 F5 F4 */
FADDP( ST(0), ST(3) ) /* F1 F5 F4 */
FADDP( ST(0), ST(1) ) /* F5 F4 */
FXCH( ST(1) ) /* F4 F5 */
FADD_S( MAT(12) )
FXCH( ST(1) ) /* F5 F4 */
FADD_S( MAT(13) )
MOV_L( SRC(2), EBX )
FXCH( ST(1) ) /* F4 F5 */
FSTP_S( DST(0) ) /* F5 */
FSTP_S( DST(1) ) /* */
MOV_L( EBX, DST(2) )
LLBL( x86_p3_2dr_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p3_2dr_loop ) )
LLBL( x86_p3_2dr_done ):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points3_2d_no_rot )
GLNAME( _mesa_x86_transform_points3_2d_no_rot ):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p3_2dnrr_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL( x86_p3_2dnrr_loop ):
FLD_S( SRC(0) ) /* F4 */
FMUL_S( MAT(0) )
FLD_S( SRC(1) ) /* F1 F4 */
FMUL_S( MAT(5) )
FXCH( ST(1) ) /* F4 F1 */
FADD_S( MAT(12) )
FLD_S( MAT(13) ) /* F5 F4 F1 */
FXCH( ST(2) ) /* F1 F4 F5 */
FADDP( ST(0), ST(2) ) /* F4 F5 */
MOV_L( SRC(2), EBX )
FSTP_S( DST(0) ) /* F5 */
FSTP_S( DST(1) ) /* */
MOV_L( EBX, DST(2) )
LLBL( x86_p3_2dnrr_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p3_2dnrr_loop ) )
LLBL( x86_p3_2dnrr_done ):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points3_identity )
GLNAME(_mesa_x86_transform_points3_identity ):
#define FRAME_OFFSET 16
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
PUSH_L( EBP )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p3_ir_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
CMP_L( ESI, EDI )
JE( LLBL( x86_p3_ir_done ) )
ALIGNTEXT16
LLBL( x86_p3_ir_loop ):
#if 1
MOV_L( SRC(0), EBX )
MOV_L( SRC(1), EBP )
MOV_L( SRC(2), EDX )
MOV_L( EBX, DST(0) )
MOV_L( EBP, DST(1) )
MOV_L( EDX, DST(2) )
#else
FLD_S( SRC(0) )
FLD_S( SRC(1) )
FLD_S( SRC(2) )
FSTP_S( DST(2) )
FSTP_S( DST(1) )
FSTP_S( DST(0) )
#endif
LLBL( x86_p3_ir_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p3_ir_loop ) )
LLBL( x86_p3_ir_done ):
POP_L( EBP )
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET

639
src/mesa/x86/x86_xform4.S Normal file
View File

@ -0,0 +1,639 @@
/* $Id: x86_xform4.S,v 1.1 2001/03/29 06:46:27 gareth Exp $ */
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define FP_ONE 1065353216
#define FP_ZERO 0
#define SRC(i) REGOFF(i * 4, ESI)
#define DST(i) REGOFF(i * 4, EDI)
#define MAT(i) REGOFF(i * 4, EDX)
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points4_general )
GLNAME( _mesa_x86_transform_points4_general ):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p4_gr_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL( x86_p4_gr_loop ):
FLD_S( SRC(0) ) /* F4 */
FMUL_S( MAT(0) )
FLD_S( SRC(0) ) /* F5 F4 */
FMUL_S( MAT(1) )
FLD_S( SRC(0) ) /* F6 F5 F4 */
FMUL_S( MAT(2) )
FLD_S( SRC(0) ) /* F7 F6 F5 F4 */
FMUL_S( MAT(3) )
FLD_S( SRC(1) ) /* F0 F7 F6 F5 F4 */
FMUL_S( MAT(4) )
FLD_S( SRC(1) ) /* F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT(5) )
FLD_S( SRC(1) ) /* F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT(6) )
FLD_S( SRC(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT(7) )
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */
FLD_S( SRC(2) ) /* F0 F7 F6 F5 F4 */
FMUL_S( MAT(8) )
FLD_S( SRC(2) ) /* F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT(9) )
FLD_S( SRC(2) ) /* F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT(10) )
FLD_S( SRC(2) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT(11) )
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */
FLD_S( SRC(3) ) /* F0 F7 F6 F5 F4 */
FMUL_S( MAT(12) )
FLD_S( SRC(3) ) /* F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT(13) )
FLD_S( SRC(3) ) /* F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT(14) )
FLD_S( SRC(3) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT(15) )
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */
FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */
FXCH( ST(3) ) /* F4 F6 F5 F7 */
FSTP_S( DST(0) ) /* F6 F5 F7 */
FXCH( ST(1) ) /* F5 F6 F7 */
FSTP_S( DST(1) ) /* F6 F7 */
FSTP_S( DST(2) ) /* F7 */
FSTP_S( DST(3) ) /* */
LLBL( x86_p4_gr_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p4_gr_loop ) )
LLBL( x86_p4_gr_done ):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points4_perspective )
GLNAME( _mesa_x86_transform_points4_perspective ):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p4_pr_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL( x86_p4_pr_loop ):
FLD_S( SRC(0) ) /* F4 */
FMUL_S( MAT(0) )
FLD_S( SRC(1) ) /* F5 F4 */
FMUL_S( MAT(5) )
FLD_S( SRC(2) ) /* F0 F5 F4 */
FMUL_S( MAT(8) )
FLD_S( SRC(2) ) /* F1 F0 F5 F4 */
FMUL_S( MAT(9) )
FLD_S( SRC(2) ) /* F6 F1 F0 F5 F4 */
FMUL_S( MAT(10) )
FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */
FADDP( ST(0), ST(4) ) /* F1 F6 F5 F4 */
FADDP( ST(0), ST(2) ) /* F6 F5 F4 */
FLD_S( SRC(3) ) /* F2 F6 F5 F4 */
FMUL_S( MAT(14) )
FADDP( ST(0), ST(1) ) /* F6 F5 F4 */
MOV_L( SRC(2), EBX )
XOR_L( CONST(-2147483648), EBX )/* change sign */
FXCH( ST(2) ) /* F4 F5 F6 */
FSTP_S( DST(0) ) /* F5 F6 */
FSTP_S( DST(1) ) /* F6 */
FSTP_S( DST(2) ) /* */
MOV_L( EBX, DST(3) )
LLBL( x86_p4_pr_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p4_pr_loop ) )
LLBL( x86_p4_pr_done ):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points4_3d )
GLNAME( _mesa_x86_transform_points4_3d ):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p4_3dr_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL( x86_p4_3dr_loop ):
FLD_S( SRC(0) ) /* F4 */
FMUL_S( MAT(0) )
FLD_S( SRC(0) ) /* F5 F4 */
FMUL_S( MAT(1) )
FLD_S( SRC(0) ) /* F6 F5 F4 */
FMUL_S( MAT(2) )
FLD_S( SRC(1) ) /* F0 F6 F5 F4 */
FMUL_S( MAT(4) )
FLD_S( SRC(1) ) /* F1 F0 F6 F5 F4 */
FMUL_S( MAT(5) )
FLD_S( SRC(1) ) /* F2 F1 F0 F6 F5 F4 */
FMUL_S( MAT(6) )
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */
FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */
FADDP( ST(0), ST(1) ) /* F6 F5 F4 */
FLD_S( SRC(2) ) /* F0 F6 F5 F4 */
FMUL_S( MAT(8) )
FLD_S( SRC(2) ) /* F1 F0 F6 F5 F4 */
FMUL_S( MAT(9) )
FLD_S( SRC(2) ) /* F2 F1 F0 F6 F5 F4 */
FMUL_S( MAT(10) )
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */
FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */
FADDP( ST(0), ST(1) ) /* F6 F5 F4 */
FLD_S( SRC(3) ) /* F0 F6 F5 F4 */
FMUL_S( MAT(12) )
FLD_S( SRC(3) ) /* F1 F0 F6 F5 F4 */
FMUL_S( MAT(13) )
FLD_S( SRC(3) ) /* F2 F1 F0 F6 F5 F4 */
FMUL_S( MAT(14) )
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */
FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */
FADDP( ST(0), ST(1) ) /* F6 F5 F4 */
MOV_L( SRC(3), EBX )
FXCH( ST(2) ) /* F4 F5 F6 */
FSTP_S( DST(0) ) /* F5 F6 */
FSTP_S( DST(1) ) /* F6 */
FSTP_S( DST(2) ) /* */
MOV_L( EBX, DST(3) )
LLBL( x86_p4_3dr_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p4_3dr_loop ) )
LLBL( x86_p4_3dr_done ):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot)
GLNAME(_mesa_x86_transform_points4_3d_no_rot):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p4_3dnrr_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL( x86_p4_3dnrr_loop ):
FLD_S( SRC(0) ) /* F4 */
FMUL_S( MAT(0) )
FLD_S( SRC(1) ) /* F5 F4 */
FMUL_S( MAT(5) )
FLD_S( SRC(2) ) /* F6 F5 F4 */
FMUL_S( MAT(10) )
FLD_S( SRC(3) ) /* F0 F6 F5 F4 */
FMUL_S( MAT(12) )
FLD_S( SRC(3) ) /* F1 F0 F6 F5 F4 */
FMUL_S( MAT(13) )
FLD_S( SRC(3) ) /* F2 F1 F0 F6 F5 F4 */
FMUL_S( MAT(14) )
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */
FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */
FADDP( ST(0), ST(1) ) /* F6 F5 F4 */
MOV_L( SRC(3), EBX )
FXCH( ST(2) ) /* F4 F5 F6 */
FSTP_S( DST(0) ) /* F5 F6 */
FSTP_S( DST(1) ) /* F6 */
FSTP_S( DST(2) ) /* */
MOV_L( EBX, DST(3) )
LLBL( x86_p4_3dnrr_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p4_3dnrr_loop ) )
LLBL( x86_p4_3dnrr_done ):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points4_2d )
GLNAME( _mesa_x86_transform_points4_2d ):
#define FRAME_OFFSET 16
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
PUSH_L( EBP )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p4_2dr_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL( x86_p4_2dr_loop ):
FLD_S( SRC(0) ) /* F4 */
FMUL_S( MAT(0) )
FLD_S( SRC(0) ) /* F5 F4 */
FMUL_S( MAT(1) )
FLD_S( SRC(1) ) /* F0 F5 F4 */
FMUL_S( MAT(4) )
FLD_S( SRC(1) ) /* F1 F0 F5 F4 */
FMUL_S( MAT(5) )
FXCH( ST(1) ) /* F0 F1 F5 F4 */
FADDP( ST(0), ST(3) ) /* F1 F5 F4 */
FADDP( ST(0), ST(1) ) /* F5 F4 */
FLD_S( SRC(3) ) /* F0 F5 F4 */
FMUL_S( MAT(12) )
FLD_S( SRC(3) ) /* F1 F0 F5 F4 */
FMUL_S( MAT(13) )
FXCH( ST(1) ) /* F0 F1 F5 F4 */
FADDP( ST(0), ST(3) ) /* F1 F5 F4 */
FADDP( ST(0), ST(1) ) /* F5 F4 */
MOV_L( SRC(2), EBX )
MOV_L( SRC(3), EBP )
FXCH( ST(1) ) /* F4 F5 */
FSTP_S( DST(0) ) /* F5 */
FSTP_S( DST(1) ) /* */
MOV_L( EBX, DST(2) )
MOV_L( EBP, DST(3) )
LLBL( x86_p4_2dr_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p4_2dr_loop ) )
LLBL( x86_p4_2dr_done ):
POP_L( EBP )
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot )
GLNAME( _mesa_x86_transform_points4_2d_no_rot ):
#define FRAME_OFFSET 16
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
PUSH_L( EBP )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p4_2dnrr_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL( x86_p4_2dnrr_loop ):
FLD_S( SRC(0) ) /* F4 */
FMUL_S( MAT(0) )
FLD_S( SRC(1) ) /* F5 F4 */
FMUL_S( MAT(5) )
FLD_S( SRC(3) ) /* F0 F5 F4 */
FMUL_S( MAT(12) )
FLD_S( SRC(3) ) /* F1 F0 F5 F4 */
FMUL_S( MAT(13) )
FXCH( ST(1) ) /* F0 F1 F5 F4 */
FADDP( ST(0), ST(3) ) /* F1 F5 F4 */
FADDP( ST(0), ST(1) ) /* F5 F4 */
MOV_L( SRC(2), EBX )
MOV_L( SRC(3), EBP )
FXCH( ST(1) ) /* F4 F5 */
FSTP_S( DST(0) ) /* F5 */
FSTP_S( DST(1) ) /* */
MOV_L( EBX, DST(2) )
MOV_L( EBP, DST(3) )
LLBL( x86_p4_2dnrr_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p4_2dnrr_loop ) )
LLBL( x86_p4_2dnrr_done ):
POP_L( EBP )
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points4_identity )
GLNAME( _mesa_x86_transform_points4_identity ):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL( x86_p4_ir_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
CMP_L( ESI, EDI )
JE( LLBL( x86_p4_ir_done ) )
ALIGNTEXT16
LLBL( x86_p4_ir_loop ):
MOV_L( SRC(0), EBX )
MOV_L( SRC(1), EDX )
MOV_L( EBX, DST(0) )
MOV_L( EDX, DST(1) )
MOV_L( SRC(2), EBX )
MOV_L( SRC(3), EDX )
MOV_L( EBX, DST(2) )
MOV_L( EDX, DST(3) )
LLBL( x86_p4_ir_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL( x86_p4_ir_loop ) )
LLBL( x86_p4_ir_done ):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET