Consolidation of asm code in 3.5
This commit is contained in:
parent
8e48a232fe
commit
1b2fef5c28
146
Make-config
146
Make-config
|
@ -1,4 +1,4 @@
|
|||
# $Id: Make-config,v 1.40 2001/03/29 03:41:39 gareth Exp $
|
||||
# $Id: Make-config,v 1.41 2001/03/29 06:46:15 gareth Exp $
|
||||
|
||||
MESA_MAJOR=3
|
||||
MESA_MINOR=5
|
||||
|
@ -220,9 +220,7 @@ freebsd-386:
|
|||
"MAKELIB = ../bin/mklib.freebsd" \
|
||||
"APP_LIB_DEPS = -L/usr/X11R6/lib -lXext -lXmu -lXi -lX11 -lm" \
|
||||
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
|
||||
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
|
||||
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
|
||||
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
|
||||
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
|
||||
X86/x86_cliptest.S"
|
||||
|
||||
gcc:
|
||||
|
@ -556,26 +554,22 @@ linux-x86:
|
|||
"OSMESA_LIB = libOSMesa.so" \
|
||||
"CC = gcc -malign-loops=2 -malign-jumps=2 -malign-functions=2" \
|
||||
"CPLUSPLUS = g++" \
|
||||
"CFLAGS = -Wall -O3 -ansi -pedantic -fPIC -ffast-math -funroll-loops -fomit-frame-pointer -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_XSHM -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_KATMAI_ASM -DPTHREADS -I/usr/X11R6/include" \
|
||||
"CFLAGS = -Wall -O3 -ansi -pedantic -fPIC -ffast-math -funroll-loops -fomit-frame-pointer -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_XSHM -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM -DPTHREADS -I/usr/X11R6/include" \
|
||||
"MAKELIB = ../bin/mklib.linux" \
|
||||
"GL_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXext -lm -lm -lpthread" \
|
||||
"GLU_LIB_DEPS = -L../lib -lGL -lm" \
|
||||
"GLUT_LIB_DEPS = -L../lib -lGLU -lGL -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm" \
|
||||
"APP_LIB_DEPS = -lm" \
|
||||
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
|
||||
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
|
||||
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
|
||||
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
|
||||
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
|
||||
X86/x86_cliptest.S X86/x86_vertex.S \
|
||||
X86/mmx_blend.S \
|
||||
X86/3dnow_xform_raw2.S X86/3dnow_xform_masked2.S \
|
||||
X86/3dnow_xform_raw3.S X86/3dnow_xform_masked3.S \
|
||||
X86/3dnow_xform_raw4.S X86/3dnow_xform_masked4.S \
|
||||
X86/3dnow_norm_raw.S X86/3dnow_vertex.S \
|
||||
X86/katmai_xform_raw2.S X86/katmai_xform_masked2.S \
|
||||
X86/katmai_xform_raw3.S X86/katmai_xform_masked3.S \
|
||||
X86/katmai_xform_raw4.S X86/katmai_xform_masked4.S \
|
||||
X86/katmai_norm_raw.S X86/katmai_vertex.S"
|
||||
X86/3dnow_xform1.S X86/3dnow_xform2.S \
|
||||
X86/3dnow_xform3.S X86/3dnow_xform4.S \
|
||||
X86/3dnow_normal.S X86/3dnow_vertex.S \
|
||||
X86/sse_xform1.S X86/sse_xform2.S \
|
||||
X86/sse_xform3.S X86/sse_xform4.S \
|
||||
X86/sse_normal.S X86/sse_vertex.S"
|
||||
|
||||
linux-x86-static:
|
||||
$(MAKE) $(MFLAGS) -f Makefile.X11 targets \
|
||||
|
@ -586,26 +580,22 @@ linux-x86-static:
|
|||
"OSMESA_LIB = libOSMesa.a" \
|
||||
"CC = gcc -malign-loops=2 -malign-jumps=2 -malign-functions=2" \
|
||||
"CPLUSPLUS = g++" \
|
||||
"CFLAGS = -Wall -O3 -ansi -pedantic -fPIC -ffast-math -funroll-loops -fomit-frame-pointer -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_XSHM -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_KATMAI_ASM -DPTHREADS -I/usr/X11R6/include" \
|
||||
"CFLAGS = -Wall -O3 -ansi -pedantic -fPIC -ffast-math -funroll-loops -fomit-frame-pointer -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_XSHM -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM -DPTHREADS -I/usr/X11R6/include" \
|
||||
"MAKELIB = ../bin/mklib.ar-ruv" \
|
||||
"GL_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXext -lm -lm -lpthread" \
|
||||
"GLU_LIB_DEPS = -L../lib -lGL -lm" \
|
||||
"GLUT_LIB_DEPS = -L../lib -lGLU -lGL -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm" \
|
||||
"APP_LIB_DEPS = -lm" \
|
||||
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
|
||||
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
|
||||
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
|
||||
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
|
||||
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform_raw4.S \
|
||||
X86/x86_cliptest.S X86/x86_vertex.S \
|
||||
X86/mmx_blend.S \
|
||||
X86/3dnow_xform_raw2.S X86/3dnow_xform_masked2.S \
|
||||
X86/3dnow_xform_raw3.S X86/3dnow_xform_masked3.S \
|
||||
X86/3dnow_xform_raw4.S X86/3dnow_xform_masked4.S \
|
||||
X86/3dnow_norm_raw.S X86/3dnow_vertex.S \
|
||||
X86/katmai_xform_raw2.S X86/katmai_xform_masked2.S \
|
||||
X86/katmai_xform_raw3.S X86/katmai_xform_masked3.S \
|
||||
X86/katmai_xform_raw4.S X86/katmai_xform_masked4.S \
|
||||
X86/katmai_norm_raw.S X86/katmai_vertex.S"
|
||||
X86/3dnow_xform1.S X86/3dnow_xform2.S \
|
||||
X86/3dnow_xform3.S X86/3dnow_xform4.S \
|
||||
X86/3dnow_normal.S X86/3dnow_vertex.S \
|
||||
X86/sse_xform1.S X86/sse_xform2.S \
|
||||
X86/sse_xform3.S X86/sse_xform4.S \
|
||||
X86/sse_normal.S X86/sse_vertex.S"
|
||||
|
||||
# Contributed by Uwe_Maurer@t-online.de
|
||||
linux-ggi:
|
||||
|
@ -637,9 +627,7 @@ linux-386-ggi:
|
|||
"MAKELIB = ../bin/mklib.ggi" \
|
||||
"APP_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXext -lXmu -lXt -lXi -lggi -lm" \
|
||||
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
|
||||
X86/x86_xform_raw2.S X86/x86_xform_masked3.S \
|
||||
X86/x86_xform_raw3.S X86/x86_xform_masked2.S \
|
||||
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
|
||||
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
|
||||
X86/x86_cliptest.S X86/x86_vertex.S"
|
||||
|
||||
linux-glide:
|
||||
|
@ -676,9 +664,7 @@ linux-386-glide:
|
|||
"GLUT_LIB_DEPS = -L../lib -lGLU -lGL -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm" \
|
||||
"APP_LIB_DEPS = -lm" \
|
||||
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
|
||||
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
|
||||
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
|
||||
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
|
||||
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
|
||||
X86/x86_cliptest.S X86/x86_vertex.S"
|
||||
|
||||
linux-386-opt-glide:
|
||||
|
@ -698,9 +684,7 @@ linux-386-opt-glide:
|
|||
"GLUT_LIB_DEPS = -L../lib -lGLU -lGL -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm" \
|
||||
"APP_LIB_DEPS = -lm" \
|
||||
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
|
||||
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
|
||||
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
|
||||
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
|
||||
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
|
||||
X86/x86_cliptest.S X86/x86_vertex.S"
|
||||
|
||||
linux-386-opt-V2-glide:
|
||||
|
@ -720,9 +704,7 @@ linux-386-opt-V2-glide:
|
|||
"GLUT_LIB_DEPS = -L../lib -lGLU -lGL -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm" \
|
||||
"APP_LIB_DEPS = -lm" \
|
||||
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
|
||||
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
|
||||
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
|
||||
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
|
||||
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
|
||||
X86/x86_cliptest.S X86/x86_vertex.S"
|
||||
|
||||
linux-x86-glide:
|
||||
|
@ -734,27 +716,23 @@ linux-x86-glide:
|
|||
"OSMESA_LIB = libOSMesa.so" \
|
||||
"CC = gcc -malign-loops=2 -malign-jumps=2 -malign-functions=2" \
|
||||
"CPLUSPLUS = g++" \
|
||||
"CFLAGS = -O3 -ansi -pedantic -fPIC -ffast-math -funroll-loops -fomit-frame-pointer -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_KATMAI_ASM -DUSE_XSHM -DFX -DPTHREADS -I/usr/X11R6/include -I/usr/include/glide -I/usr/local/glide/include -I/usr/src/mesa-glx/src/FX/X86" \
|
||||
"CFLAGS = -O3 -ansi -pedantic -fPIC -ffast-math -funroll-loops -fomit-frame-pointer -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM -DUSE_XSHM -DFX -DPTHREADS -I/usr/X11R6/include -I/usr/include/glide -I/usr/local/glide/include -I/usr/src/mesa-glx/src/FX/X86" \
|
||||
"MAKELIB = ../bin/mklib.linux" \
|
||||
"GL_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXext -lm -L/usr/local/glide/lib -lglide2x -lm -lpthread" \
|
||||
"GLU_LIB_DEPS = -L../lib -lGL -lm" \
|
||||
"GLUT_LIB_DEPS = -L../lib -lGLU -lGL -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm" \
|
||||
"APP_LIB_DEPS = -lm" \
|
||||
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
|
||||
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
|
||||
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
|
||||
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
|
||||
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
|
||||
X86/x86_cliptest.S X86/x86_vertex.S \
|
||||
X86/mmx_blend.S \
|
||||
X86/3dnow_xform_raw2.S X86/3dnow_xform_raw2.S \
|
||||
X86/3dnow_xform_raw3.S X86/3dnow_xform_raw3.S \
|
||||
X86/3dnow_xform_raw4.S X86/3dnow_xform_raw4.S \
|
||||
X86/3dnow_norm_raw.S X86/3dnow_vertex.S \
|
||||
X86/3dnow_xform1.S X86/3dnow_xform2.S \
|
||||
X86/3dnow_xform3.S X86/3dnow_xform4.S \
|
||||
X86/3dnow_normal.S X86/3dnow_vertex.S \
|
||||
FX/X86/fx_3dnow_fastpath.S \
|
||||
X86/katmai_xform_raw2.S X86/katmai_xform_masked2.S \
|
||||
X86/katmai_xform_raw3.S X86/katmai_xform_masked3.S \
|
||||
X86/katmai_xform_raw4.S X86/katmai_xform_masked4.S \
|
||||
X86/katmai_norm_raw.S X86/katmai_vertex.S"
|
||||
X86/sse_xform1.S X86/sse_xform2.S \
|
||||
X86/sse_xform3.S X86/sse_xform4.S \
|
||||
X86/sse_normal.S X86/sse_vertex.S"
|
||||
|
||||
linux-alpha:
|
||||
$(MAKE) $(MFLAGS) -f Makefile.X11 targets \
|
||||
|
@ -999,9 +977,7 @@ os2-x11:
|
|||
"MAKELIB = ..\\bin\\mklib-emx.cmd " \
|
||||
"APP_LIB_DEPS = -Zmt -Zcrtdll -Zexe -L$(X11ROOT)/XFree86/lib -lXt -lX11 -lm" \
|
||||
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
|
||||
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
|
||||
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
|
||||
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
|
||||
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
|
||||
X86/x86_cliptest.S X86/x86_vertex.S"
|
||||
|
||||
osf1:
|
||||
|
@ -1503,26 +1479,22 @@ linux-x86-debug:
|
|||
"OSMESA_LIB = libOSMesa.so" \
|
||||
"CC = gcc -malign-loops=2 -malign-jumps=2 -malign-functions=2" \
|
||||
"CPLUSPLUS = g++" \
|
||||
"CFLAGS = -O2 -g -ansi -pedantic -Wall -Wmissing-prototypes -fPIC -ffast-math -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_XSHM -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_KATMAI_ASM -DPTHREADS -I/usr/X11R6/include -DDEBUG -DMESA_DEBUG" \
|
||||
"CFLAGS = -O2 -g -ansi -pedantic -Wall -Wmissing-prototypes -fPIC -ffast-math -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199309L -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_XSHM -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM -DPTHREADS -I/usr/X11R6/include -DDEBUG -DMESA_DEBUG" \
|
||||
"MAKELIB = ../bin/mklib.linux" \
|
||||
"GL_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXext -lm -lm -lpthread" \
|
||||
"GLU_LIB_DEPS = -L../lib -lGL -lm" \
|
||||
"GLUT_LIB_DEPS = -L../lib -lGLU -lGL -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm" \
|
||||
"APP_LIB_DEPS = -lm" \
|
||||
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
|
||||
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
|
||||
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
|
||||
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
|
||||
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
|
||||
X86/x86_cliptest.S X86/x86_vertex.S \
|
||||
X86/mmx_blend.S \
|
||||
X86/3dnow_xform_raw2.S X86/3dnow_xform_masked2.S \
|
||||
X86/3dnow_xform_raw3.S X86/3dnow_xform_masked3.S \
|
||||
X86/3dnow_xform_raw4.S X86/3dnow_xform_masked4.S \
|
||||
X86/3dnow_norm_raw.S X86/3dnow_vertex.S \
|
||||
X86/katmai_xform_raw2.S X86/katmai_xform_masked2.S \
|
||||
X86/katmai_xform_raw3.S X86/katmai_xform_masked3.S \
|
||||
X86/katmai_xform_raw4.S X86/katmai_xform_masked4.S \
|
||||
X86/katmai_norm_raw.S X86/katmai_vertex.S"
|
||||
X86/3dnow_xform1.S X86/3dnow_xform2.S \
|
||||
X86/3dnow_xform3.S X86/3dnow_xform4.S \
|
||||
X86/3dnow_normal.S X86/3dnow_vertex.S \
|
||||
X86/sse_xform1.S X86/sse_xform2.S \
|
||||
X86/sse_xform3.S X86/sse_xform4.S \
|
||||
X86/sse_normal.S X86/sse_vertex.S"
|
||||
|
||||
linux-glide-debug:
|
||||
$(MAKE) $(MFLAGS) -f Makefile.X11 targets \
|
||||
|
@ -1548,25 +1520,21 @@ linux-prof:
|
|||
"OSMESA_LIB = libOSMesa.a" \
|
||||
"CC = gcc" \
|
||||
"CPLUSPLUS = g++" \
|
||||
"CFLAGS = -Wall -O2 -pg -ansi -pedantic -ffast-math -DUSE_XSHM -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_KATMAI_ASM" \
|
||||
"CFLAGS = -Wall -O2 -pg -ansi -pedantic -ffast-math -DUSE_XSHM -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM" \
|
||||
"CCFLAGS = $(CFLAGS)" \
|
||||
"MAKELIB = ../bin/mklib.ar-ruv" \
|
||||
"APP_LIB_DEPS = -L/usr/X11/lib -lX11 -lXext -lXmu -lXt -lXi -lSM -lICE -lm" \
|
||||
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
|
||||
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
|
||||
X86/x86_xform_raw2.S X86/x86_xform_masked3.S \
|
||||
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
|
||||
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
|
||||
X86/x86_cliptest.S X86/x86_vertex.S \
|
||||
X86/mmx_blend.S \
|
||||
X86/3dnow_xform_raw2.S X86/3dnow_xform_masked2.S \
|
||||
X86/3dnow_xform_raw3.S X86/3dnow_xform_masked3.S \
|
||||
X86/3dnow_xform_raw4.S X86/3dnow_xform_masked4.S \
|
||||
X86/3dnow_norm_raw.S X86/3dnow_vertex.S \
|
||||
X86/3dnow_xform1.S X86/3dnow_xform2.S \
|
||||
X86/3dnow_xform3.S X86/3dnow_xform4.S \
|
||||
X86/3dnow_normal.S X86/3dnow_vertex.S \
|
||||
FX/X86/fx_3dnow_fastpath.S \
|
||||
X86/katmai_xform_raw2.S X86/katmai_xform_masked2.S \
|
||||
X86/katmai_xform_raw3.S X86/katmai_xform_masked3.S \
|
||||
X86/katmai_xform_raw4.S X86/katmai_xform_masked4.S \
|
||||
X86/katmai_norm_raw.S X86/katmai_vertex.S"
|
||||
X86/sse_xform1.S X86/sse_xform2.S \
|
||||
X86/sse_xform3.S X86/sse_xform4.S \
|
||||
X86/sse_normal.S X86/sse_vertex.S"
|
||||
|
||||
linux-glide-prof:
|
||||
$(MAKE) $(MFLAGS) -f Makefile.X11 targets \
|
||||
|
@ -1577,22 +1545,18 @@ linux-glide-prof:
|
|||
"OSMESA_LIB = libOSMesa.a" \
|
||||
"CC = gcc" \
|
||||
"CPLUSPLUS = g++" \
|
||||
"CFLAGS = -O2 -pg -ansi -pedantic -Wall -DUSE_XSHM -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_KATMAI_ASM -I/usr/include/glide -I/usr/local/glide/include" \
|
||||
"CFLAGS = -O2 -pg -ansi -pedantic -Wall -DUSE_XSHM -D_SVID_SOURCE -D_BSD_SOURCE -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM -I/usr/include/glide -I/usr/local/glide/include" \
|
||||
"CCFLAGS = $(CFLAGS)" \
|
||||
"MAKELIB = ../bin/mklib.ar-ruv" \
|
||||
"APP_LIB_DEPS = -L/usr/local/glide/lib -lglide2x -L/usr/X11/lib -lX11 -lXext -lXmu -lXt -lXi -lSM -lICE -lm" \
|
||||
"ASM_SOURCES = X86/common_x86_asm.S X86/glapi_x86.S \
|
||||
X86/x86_xform_raw2.S X86/x86_xform_masked2.S \
|
||||
X86/x86_xform_raw3.S X86/x86_xform_masked3.S \
|
||||
X86/x86_xform_raw4.S X86/x86_xform_masked4.S \
|
||||
X86/x86_xform2.S X86/x86_xform3.S X86/x86_xform4.S \
|
||||
X86/x86_cliptest.S X86/x86_vertex.S \
|
||||
X86/mmx_blend.S \
|
||||
X86/3dnow_xform_raw2.S X86/3dnow_xform_masked2.S \
|
||||
X86/3dnow_xform_raw3.S X86/3dnow_xform_masked3.S \
|
||||
X86/3dnow_xform_raw4.S X86/3dnow_xform_masked4.S \
|
||||
X86/3dnow_norm_raw.S X86/3dnow_vertex.S \
|
||||
X86/3dnow_xform1.S X86/3dnow_xform2.S \
|
||||
X86/3dnow_xform3.S X86/3dnow_xform4.S \
|
||||
X86/3dnow_normal.S X86/3dnow_vertex.S \
|
||||
FX/X86/fx_3dnow_fastpath.S \
|
||||
X86/katmai_xform_raw2.S X86/katmai_xform_masked2.S \
|
||||
X86/katmai_xform_raw3.S X86/katmai_xform_masked3.S \
|
||||
X86/katmai_xform_raw4.S X86/katmai_xform_masked4.S \
|
||||
X86/katmai_norm_raw.S X86/katmai_vertex.S"
|
||||
X86/sse_xform1.S X86/sse_xform2.S \
|
||||
X86/sse_xform3.S X86/sse_xform4.S \
|
||||
X86/sse_normal.S X86/sse_vertex.S"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# $Id: Makefile.X11,v 1.50 2001/03/29 03:41:40 gareth Exp $
|
||||
# $Id: Makefile.X11,v 1.51 2001/03/29 06:46:15 gareth Exp $
|
||||
|
||||
# Mesa 3-D graphics library
|
||||
# Version: 3.5
|
||||
|
@ -108,7 +108,7 @@ CORE_SOURCES = \
|
|||
X86/x86.c \
|
||||
X86/common_x86.c \
|
||||
X86/3dnow.c \
|
||||
X86/katmai.c \
|
||||
X86/sse.c \
|
||||
math/m_debug_norm.c \
|
||||
math/m_debug_vertex.c \
|
||||
math/m_debug_xform.c \
|
||||
|
@ -280,7 +280,7 @@ X86/common_x86.o: X86/common_x86.c
|
|||
X86/common_x86_asm.o: X86/common_x86_asm.S X86/matypes.h
|
||||
X86/3dnow.o: X86/3dnow.c
|
||||
$(CC) -c -I. -I$(INCDIR) $(CFLAGS) $< -o $@
|
||||
X86/katmai.o: X86/katmai.c
|
||||
X86/sse.o: X86/sse.c
|
||||
$(CC) -c -I. -I$(INCDIR) $(CFLAGS) $< -o $@
|
||||
X86/matypes.h: mtypes.h tnl/t_context.h X86/gen_matypes.c
|
||||
$(CC) -I. -I$(INCDIR) $(CFLAGS) X86/gen_matypes.c -o X86/gen_matypes
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# $Id: Makefile.X11,v 1.50 2001/03/29 03:41:40 gareth Exp $
|
||||
# $Id: Makefile.X11,v 1.51 2001/03/29 06:46:15 gareth Exp $
|
||||
|
||||
# Mesa 3-D graphics library
|
||||
# Version: 3.5
|
||||
|
@ -108,7 +108,7 @@ CORE_SOURCES = \
|
|||
X86/x86.c \
|
||||
X86/common_x86.c \
|
||||
X86/3dnow.c \
|
||||
X86/katmai.c \
|
||||
X86/sse.c \
|
||||
math/m_debug_norm.c \
|
||||
math/m_debug_vertex.c \
|
||||
math/m_debug_xform.c \
|
||||
|
@ -280,7 +280,7 @@ X86/common_x86.o: X86/common_x86.c
|
|||
X86/common_x86_asm.o: X86/common_x86_asm.S X86/matypes.h
|
||||
X86/3dnow.o: X86/3dnow.c
|
||||
$(CC) -c -I. -I$(INCDIR) $(CFLAGS) $< -o $@
|
||||
X86/katmai.o: X86/katmai.c
|
||||
X86/sse.o: X86/sse.c
|
||||
$(CC) -c -I. -I$(INCDIR) $(CFLAGS) $< -o $@
|
||||
X86/matypes.h: mtypes.h tnl/t_context.h X86/gen_matypes.c
|
||||
$(CC) -I. -I$(INCDIR) $(CFLAGS) X86/gen_matypes.c -o X86/gen_matypes
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $Id: m_debug_norm.c,v 1.5 2001/03/12 00:48:41 gareth Exp $ */
|
||||
/* $Id: m_debug_norm.c,v 1.6 2001/03/29 06:46:27 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
|
@ -186,8 +186,7 @@ static void ref_norm_transform_normalize( const GLmatrix *mat,
|
|||
* Normal transformation tests
|
||||
*/
|
||||
|
||||
static int test_norm_function( normal_func func, int mtype,
|
||||
int masked, long *cycles )
|
||||
static int test_norm_function( normal_func func, int mtype, long *cycles )
|
||||
{
|
||||
GLvector3f source[1], dest[1], dest2[1], ref[1], ref2[1];
|
||||
GLmatrix mat[1];
|
||||
|
@ -195,7 +194,6 @@ static int test_norm_function( normal_func func, int mtype,
|
|||
GLfloat d2[TEST_COUNT][3], r2[TEST_COUNT][3], length[TEST_COUNT];
|
||||
GLfloat scale;
|
||||
GLfloat *m;
|
||||
GLubyte mask[TEST_COUNT];
|
||||
int i, j;
|
||||
#ifdef RUN_DEBUG_BENCHMARK
|
||||
int cycle_i; /* the counter for the benchmarks we run */
|
||||
|
@ -231,7 +229,6 @@ static int test_norm_function( normal_func func, int mtype,
|
|||
}
|
||||
|
||||
for ( i = 0 ; i < TEST_COUNT ; i++ ) {
|
||||
mask[i] = i % 2; /* mask every 2nd element */
|
||||
ASSIGN_3V( d[i], 0.0, 0.0, 0.0 );
|
||||
ASSIGN_3V( s[i], 0.0, 0.0, 0.0 );
|
||||
ASSIGN_3V( d2[i], 0.0, 0.0, 0.0 );
|
||||
|
@ -278,31 +275,16 @@ static int test_norm_function( normal_func func, int mtype,
|
|||
}
|
||||
|
||||
if ( mesa_profile ) {
|
||||
if ( masked ) {
|
||||
BEGIN_RACE( *cycles );
|
||||
func( mat, scale, source, NULL, mask, dest );
|
||||
END_RACE( *cycles );
|
||||
func( mat, scale, source, length, mask, dest2 );
|
||||
} else {
|
||||
BEGIN_RACE( *cycles );
|
||||
func( mat, scale, source, NULL, NULL, dest );
|
||||
END_RACE( *cycles );
|
||||
func( mat, scale, source, length, NULL, dest2 );
|
||||
}
|
||||
} else {
|
||||
if ( masked ) {
|
||||
func( mat, scale, source, NULL, mask, dest );
|
||||
func( mat, scale, source, length, mask, dest2 );
|
||||
} else {
|
||||
func( mat, scale, source, NULL, NULL, dest );
|
||||
func( mat, scale, source, length, NULL, dest2 );
|
||||
}
|
||||
}
|
||||
|
||||
for ( i = 0 ; i < TEST_COUNT ; i++ ) {
|
||||
if ( masked && !(mask[i] & 1) )
|
||||
continue;
|
||||
|
||||
for ( j = 0 ; j < 3 ; j++ ) {
|
||||
if ( significand_match( d[i][j], r[i][j] ) < REQUIRED_PRECISION ) {
|
||||
printf( "-----------------------------\n" );
|
||||
|
@ -344,7 +326,6 @@ static int test_norm_function( normal_func func, int mtype,
|
|||
|
||||
void _math_test_all_normal_transform_functions( char *description )
|
||||
{
|
||||
int masked;
|
||||
int mtype;
|
||||
long benchmark_tab[0xf][0x4];
|
||||
static int first_time = 1;
|
||||
|
@ -362,46 +343,33 @@ void _math_test_all_normal_transform_functions( char *description )
|
|||
}
|
||||
printf( "normal transform results after hooking in %s functions:\n",
|
||||
description );
|
||||
}
|
||||
#endif
|
||||
|
||||
for ( masked = 0 ; masked <= 1 ; masked++ ) {
|
||||
int cma = masked ? 1 : 0;
|
||||
char *cmastring = masked ? "CULL_MASK_ACTIVE" : "0";
|
||||
|
||||
#ifdef RUN_DEBUG_BENCHMARK
|
||||
if ( mesa_profile ) {
|
||||
printf( "\n culling: %s \n", masked ? "CULL_MASK_ACTIVE" : "0" );
|
||||
printf( "\n-------------------------------------------------------\n" );
|
||||
}
|
||||
#endif
|
||||
|
||||
for ( mtype = 0 ; mtype < 8 ; mtype++ ) {
|
||||
normal_func func = _mesa_normal_tab[norm_types[mtype]][cma];
|
||||
long *cycles = &(benchmark_tab[mtype][cma]);
|
||||
normal_func func = _mesa_normal_tab[norm_types[mtype]][0];
|
||||
long *cycles = &(benchmark_tab[mtype][0]);
|
||||
|
||||
if ( test_norm_function( func, mtype, masked, cycles ) == 0 ) {
|
||||
if ( test_norm_function( func, mtype, cycles ) == 0 ) {
|
||||
char buf[100];
|
||||
sprintf( buf, "_mesa_normal_tab[%s][%s] failed test (%s)",
|
||||
cmastring, norm_strings[mtype], description );
|
||||
sprintf( buf, "_mesa_normal_tab[0][%s] failed test (%s)",
|
||||
norm_strings[mtype], description );
|
||||
_mesa_problem( NULL, buf );
|
||||
}
|
||||
|
||||
#ifdef RUN_DEBUG_BENCHMARK
|
||||
if ( mesa_profile ) {
|
||||
printf( " %li\t", benchmark_tab[mtype][cma] );
|
||||
printf( " %li\t", benchmark_tab[mtype][0] );
|
||||
printf( " | [%s]\n", norm_strings[mtype] );
|
||||
}
|
||||
}
|
||||
if ( mesa_profile )
|
||||
printf( "\n" );
|
||||
#else
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#ifdef RUN_DEBUG_BENCHMARK
|
||||
if ( mesa_profile )
|
||||
if ( mesa_profile ) {
|
||||
printf( "\n" );
|
||||
fflush( stdout );
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $Id: m_debug_xform.c,v 1.6 2001/03/12 02:02:36 gareth Exp $ */
|
||||
/* $Id: m_debug_xform.c,v 1.7 2001/03/29 06:46:27 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
|
@ -159,8 +159,8 @@ static GLfloat s[TEST_COUNT][4] ALIGN16;
|
|||
static GLfloat d[TEST_COUNT][4] ALIGN16;
|
||||
static GLfloat r[TEST_COUNT][4] ALIGN16;
|
||||
|
||||
static int test_transform_function( transform_func func, int psize, int mtype,
|
||||
int masked, long *cycles )
|
||||
static int test_transform_function( transform_func func, int psize,
|
||||
int mtype, long *cycles )
|
||||
{
|
||||
GLvector4f source[1], dest[1], ref[1];
|
||||
GLmatrix mat[1];
|
||||
|
@ -238,28 +238,15 @@ static int test_transform_function( transform_func func, int psize, int mtype,
|
|||
ref_transform( ref, mat, source, NULL, 0 );
|
||||
|
||||
if ( mesa_profile ) {
|
||||
if ( masked ) {
|
||||
BEGIN_RACE( *cycles );
|
||||
func( dest, mat->m, source, mask, 1 );
|
||||
END_RACE( *cycles );
|
||||
} else {
|
||||
BEGIN_RACE( *cycles );
|
||||
func( dest, mat->m, source, NULL, 0 );
|
||||
END_RACE( *cycles );
|
||||
}
|
||||
}
|
||||
else {
|
||||
if ( masked ) {
|
||||
func( dest, mat->m, source, mask, 1 );
|
||||
} else {
|
||||
func( dest, mat->m, source, NULL, 0 );
|
||||
}
|
||||
}
|
||||
|
||||
for ( i = 0 ; i < TEST_COUNT ; i++ ) {
|
||||
if ( masked && (mask[i] & 1) )
|
||||
continue;
|
||||
|
||||
for ( j = 0 ; j < 4 ; j++ ) {
|
||||
if ( significand_match( d[i][j], r[i][j] ) < REQUIRED_PRECISION ) {
|
||||
printf( "-----------------------------\n" );
|
||||
|
@ -287,7 +274,7 @@ static int test_transform_function( transform_func func, int psize, int mtype,
|
|||
|
||||
void _math_test_all_transform_functions( char *description )
|
||||
{
|
||||
int masked, psize, mtype;
|
||||
int psize, mtype;
|
||||
long benchmark_tab[2][4][7];
|
||||
static int first_time = 1;
|
||||
|
||||
|
@ -306,13 +293,9 @@ void _math_test_all_transform_functions( char *description )
|
|||
}
|
||||
#endif
|
||||
|
||||
for ( masked = 0 ; masked <= 1 ; masked++ ) {
|
||||
int cma = masked ? 1 : 0;
|
||||
char *cmastring = masked ? "CULL_MASK_ACTIVE" : "0";
|
||||
|
||||
#ifdef RUN_DEBUG_BENCHMARK
|
||||
if ( mesa_profile ) {
|
||||
printf( "\n culling: %s \n", masked ? "CULL_MASK_ACTIVE" : "0" );
|
||||
printf( "\n" );
|
||||
for ( psize = 1 ; psize <= 4 ; psize++ ) {
|
||||
printf( " p%d\t", psize );
|
||||
}
|
||||
|
@ -322,19 +305,18 @@ void _math_test_all_transform_functions( char *description )
|
|||
|
||||
for ( mtype = 0 ; mtype < 7 ; mtype++ ) {
|
||||
for ( psize = 1 ; psize <= 4 ; psize++ ) {
|
||||
transform_func func = _mesa_transform_tab[cma][psize][mtypes[mtype]];
|
||||
long *cycles = &(benchmark_tab[cma][psize-1][mtype]);
|
||||
transform_func func = _mesa_transform_tab[0][psize][mtypes[mtype]];
|
||||
long *cycles = &(benchmark_tab[0][psize-1][mtype]);
|
||||
|
||||
if ( test_transform_function( func, psize, mtype,
|
||||
masked, cycles ) == 0 ) {
|
||||
if ( test_transform_function( func, psize, mtype, cycles ) == 0 ) {
|
||||
char buf[100];
|
||||
sprintf( buf, "_mesa_transform_tab[%s][%d][%s] failed test (%s)",
|
||||
cmastring, psize, mstrings[mtype], description );
|
||||
sprintf( buf, "_mesa_transform_tab[0][%d][%s] failed test (%s)",
|
||||
psize, mstrings[mtype], description );
|
||||
_mesa_problem( NULL, buf );
|
||||
}
|
||||
#ifdef RUN_DEBUG_BENCHMARK
|
||||
if ( mesa_profile )
|
||||
printf( " %li\t", benchmark_tab[cma][psize-1][mtype] );
|
||||
printf( " %li\t", benchmark_tab[0][psize-1][mtype] );
|
||||
#endif
|
||||
}
|
||||
#ifdef RUN_DEBUG_BENCHMARK
|
||||
|
@ -346,7 +328,6 @@ void _math_test_all_transform_functions( char *description )
|
|||
if ( mesa_profile )
|
||||
printf( "\n" );
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $Id: 3dnow.c,v 1.17 2001/03/28 20:44:43 gareth Exp $ */
|
||||
/* $Id: 3dnow.c,v 1.18 2001/03/29 06:46:15 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
|
@ -51,31 +51,31 @@
|
|||
const GLubyte flag
|
||||
|
||||
|
||||
#define DECLARE_XFORM_GROUP( pfx, sz, masked ) \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_general_##masked( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_identity_##masked( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_no_rot_##masked( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_perspective_##masked( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_##masked( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_no_rot_##masked( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_##masked( XFORM_ARGS );
|
||||
#define DECLARE_XFORM_GROUP( pfx, sz ) \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_general( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_identity( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_no_rot( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_perspective( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_no_rot( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d( XFORM_ARGS );
|
||||
|
||||
|
||||
#define ASSIGN_XFORM_GROUP( pfx, cma, sz, masked ) \
|
||||
_mesa_transform_tab[cma][sz][MATRIX_GENERAL] = \
|
||||
_mesa_##pfx##_transform_points##sz##_general_##masked; \
|
||||
_mesa_transform_tab[cma][sz][MATRIX_IDENTITY] = \
|
||||
_mesa_##pfx##_transform_points##sz##_identity_##masked; \
|
||||
_mesa_transform_tab[cma][sz][MATRIX_3D_NO_ROT] = \
|
||||
_mesa_##pfx##_transform_points##sz##_3d_no_rot_##masked; \
|
||||
_mesa_transform_tab[cma][sz][MATRIX_PERSPECTIVE] = \
|
||||
_mesa_##pfx##_transform_points##sz##_perspective_##masked; \
|
||||
_mesa_transform_tab[cma][sz][MATRIX_2D] = \
|
||||
_mesa_##pfx##_transform_points##sz##_2d_##masked; \
|
||||
_mesa_transform_tab[cma][sz][MATRIX_2D_NO_ROT] = \
|
||||
_mesa_##pfx##_transform_points##sz##_2d_no_rot_##masked; \
|
||||
_mesa_transform_tab[cma][sz][MATRIX_3D] = \
|
||||
_mesa_##pfx##_transform_points##sz##_3d_##masked;
|
||||
#define ASSIGN_XFORM_GROUP( pfx, sz ) \
|
||||
_mesa_transform_tab[0][sz][MATRIX_GENERAL] = \
|
||||
_mesa_##pfx##_transform_points##sz##_general; \
|
||||
_mesa_transform_tab[0][sz][MATRIX_IDENTITY] = \
|
||||
_mesa_##pfx##_transform_points##sz##_identity; \
|
||||
_mesa_transform_tab[0][sz][MATRIX_3D_NO_ROT] = \
|
||||
_mesa_##pfx##_transform_points##sz##_3d_no_rot; \
|
||||
_mesa_transform_tab[0][sz][MATRIX_PERSPECTIVE] = \
|
||||
_mesa_##pfx##_transform_points##sz##_perspective; \
|
||||
_mesa_transform_tab[0][sz][MATRIX_2D] = \
|
||||
_mesa_##pfx##_transform_points##sz##_2d; \
|
||||
_mesa_transform_tab[0][sz][MATRIX_2D_NO_ROT] = \
|
||||
_mesa_##pfx##_transform_points##sz##_2d_no_rot; \
|
||||
_mesa_transform_tab[0][sz][MATRIX_3D] = \
|
||||
_mesa_##pfx##_transform_points##sz##_3d;
|
||||
|
||||
|
||||
|
||||
|
@ -87,47 +87,42 @@ extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_##masked( XFORM_ARGS
|
|||
GLvector3f *dest
|
||||
|
||||
|
||||
#define DECLARE_NORM_GROUP( pfx, masked ) \
|
||||
extern void _ASMAPI _mesa_##pfx##_rescale_normals_##masked( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_normalize_normals_##masked( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_normals_##masked( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_normals_no_rot_##masked( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals_##masked( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals_no_rot_##masked( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals_##masked( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals_no_rot_##masked( NORM_ARGS );
|
||||
#define DECLARE_NORM_GROUP( pfx ) \
|
||||
extern void _ASMAPI _mesa_##pfx##_rescale_normals( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_normalize_normals( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_normals( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_normals_no_rot( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals_no_rot( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals_no_rot( NORM_ARGS );
|
||||
|
||||
|
||||
#define ASSIGN_NORM_GROUP( pfx, cma, masked ) \
|
||||
_mesa_normal_tab[NORM_RESCALE][cma] = \
|
||||
_mesa_##pfx##_rescale_normals_##masked; \
|
||||
_mesa_normal_tab[NORM_NORMALIZE][cma] = \
|
||||
_mesa_##pfx##_normalize_normals_##masked; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM][cma] = \
|
||||
_mesa_##pfx##_transform_normals_##masked; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT][cma] = \
|
||||
_mesa_##pfx##_transform_normals_no_rot_##masked; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM | NORM_RESCALE][cma] = \
|
||||
_mesa_##pfx##_transform_rescale_normals_##masked; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE][cma] = \
|
||||
_mesa_##pfx##_transform_rescale_normals_no_rot_##masked; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM | NORM_NORMALIZE][cma] = \
|
||||
_mesa_##pfx##_transform_normalize_normals_##masked; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE][cma] = \
|
||||
_mesa_##pfx##_transform_normalize_normals_no_rot_##masked;
|
||||
#define ASSIGN_NORM_GROUP( pfx ) \
|
||||
_mesa_normal_tab[NORM_RESCALE][0] = \
|
||||
_mesa_##pfx##_rescale_normals; \
|
||||
_mesa_normal_tab[NORM_NORMALIZE][0] = \
|
||||
_mesa_##pfx##_normalize_normals; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM][0] = \
|
||||
_mesa_##pfx##_transform_normals; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT][0] = \
|
||||
_mesa_##pfx##_transform_normals_no_rot; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM|NORM_RESCALE][0] = \
|
||||
_mesa_##pfx##_transform_rescale_normals; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT|NORM_RESCALE][0] = \
|
||||
_mesa_##pfx##_transform_rescale_normals_no_rot; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM|NORM_NORMALIZE][0] = \
|
||||
_mesa_##pfx##_transform_normalize_normals; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT|NORM_NORMALIZE][0] = \
|
||||
_mesa_##pfx##_transform_normalize_normals_no_rot;
|
||||
|
||||
|
||||
#ifdef USE_3DNOW_ASM
|
||||
DECLARE_XFORM_GROUP( 3dnow, 2, raw )
|
||||
DECLARE_XFORM_GROUP( 3dnow, 3, raw )
|
||||
DECLARE_XFORM_GROUP( 3dnow, 4, raw )
|
||||
DECLARE_XFORM_GROUP( 3dnow, 2 )
|
||||
DECLARE_XFORM_GROUP( 3dnow, 3 )
|
||||
DECLARE_XFORM_GROUP( 3dnow, 4 )
|
||||
|
||||
DECLARE_XFORM_GROUP( 3dnow, 2, masked )
|
||||
DECLARE_XFORM_GROUP( 3dnow, 3, masked )
|
||||
DECLARE_XFORM_GROUP( 3dnow, 4, masked )
|
||||
|
||||
DECLARE_NORM_GROUP( 3dnow, raw )
|
||||
/*DECLARE_NORM_GROUP( 3dnow, masked )*/
|
||||
DECLARE_NORM_GROUP( 3dnow )
|
||||
|
||||
|
||||
extern void _ASMAPI
|
||||
|
@ -155,16 +150,11 @@ _mesa_3dnow_project_clipped_vertices( GLfloat *first,
|
|||
void _mesa_init_3dnow_transform_asm( void )
|
||||
{
|
||||
#ifdef USE_3DNOW_ASM
|
||||
ASSIGN_XFORM_GROUP( 3dnow, 0, 2, raw );
|
||||
ASSIGN_XFORM_GROUP( 3dnow, 0, 3, raw );
|
||||
ASSIGN_XFORM_GROUP( 3dnow, 0, 4, raw );
|
||||
ASSIGN_XFORM_GROUP( 3dnow, 2 );
|
||||
ASSIGN_XFORM_GROUP( 3dnow, 3 );
|
||||
ASSIGN_XFORM_GROUP( 3dnow, 4 );
|
||||
|
||||
/* ASSIGN_XFORM_GROUP( 3dnow, CULL_MASK_ACTIVE, 2, masked ); */
|
||||
/* ASSIGN_XFORM_GROUP( 3dnow, CULL_MASK_ACTIVE, 3, masked ); */
|
||||
/* ASSIGN_XFORM_GROUP( 3dnow, CULL_MASK_ACTIVE, 4, masked ); */
|
||||
|
||||
ASSIGN_NORM_GROUP( 3dnow, 0, raw );
|
||||
/* ASSIGN_NORM_GROUP( 3dnow, CULL_MASK_ACTIVE, masked ); */
|
||||
ASSIGN_NORM_GROUP( 3dnow );
|
||||
|
||||
#ifdef DEBUG
|
||||
_math_test_all_transform_functions( "3DNow!" );
|
||||
|
@ -177,6 +167,7 @@ void _mesa_init_3dnow_vertex_asm( void )
|
|||
{
|
||||
#ifdef USE_3DNOW_ASM
|
||||
_mesa_xform_points3_v16_general = _mesa_v16_3dnow_general_xform;
|
||||
|
||||
_mesa_project_v16 = _mesa_3dnow_project_vertices;
|
||||
_mesa_project_clipped_v16 = _mesa_3dnow_project_clipped_vertices;
|
||||
|
||||
|
|
|
@ -0,0 +1,858 @@
|
|||
/* $Id: 3dnow_normal.S,v 1.1 2001/03/29 06:46:16 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* 3Dnow assembly code by Holger Waechtler
|
||||
*/
|
||||
|
||||
#include "matypes.h"
|
||||
#include "norm_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define M(i) REGOFF(i * 4, ECX)
|
||||
#define STRIDE REGOFF(12, ESI)
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
|
||||
GLNAME(_mesa_3dnow_transform_normalize_normals):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
|
||||
PUSH_L ( EDI )
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EBP )
|
||||
|
||||
MOV_L ( ARG_LENGTHS, EDI )
|
||||
MOV_L ( ARG_IN, ESI )
|
||||
MOV_L ( ARG_DEST, EAX )
|
||||
MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */
|
||||
MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) )
|
||||
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
|
||||
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
|
||||
MOV_L ( ARG_MAT, ECX )
|
||||
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
|
||||
|
||||
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
|
||||
JE ( LLBL (G3TN_end) )
|
||||
|
||||
MOV_L ( REGOFF (V3F_COUNT, ESI), EBP )
|
||||
FEMMS
|
||||
|
||||
PUSH_L ( EBP )
|
||||
PUSH_L ( EAX )
|
||||
PUSH_L ( EDX ) /* save counter & pointer for */
|
||||
/* the normalize pass */
|
||||
#undef FRAME_OFFSET
|
||||
#define FRAME_OFFSET 24
|
||||
|
||||
MOVQ ( M(0), MM3 ) /* m1 | m0 */
|
||||
MOVQ ( M(4), MM4 ) /* m5 | m4 */
|
||||
|
||||
MOVD ( M(2), MM5 ) /* | m2 */
|
||||
PUNPCKLDQ ( M(6), MM5 ) /* m6 | m2 */
|
||||
|
||||
MOVQ ( M(8), MM6 ) /* m9 | m8 */
|
||||
MOVQ ( M(10), MM7 ) /* | m10 */
|
||||
|
||||
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
|
||||
JNE ( LLBL (G3TN_scale_end ) )
|
||||
|
||||
MOVD ( ARG_SCALE, MM0 ) /* | scale */
|
||||
PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
|
||||
|
||||
PFMUL ( MM0, MM3 ) /* scale * m1 | scale * m0 */
|
||||
PFMUL ( MM0, MM4 ) /* scale * m5 | scale * m4 */
|
||||
PFMUL ( MM0, MM5 ) /* scale * m6 | scale * m2 */
|
||||
PFMUL ( MM0, MM6 ) /* scale * m9 | scale * m8 */
|
||||
PFMUL ( MM0, MM7 ) /* | scale * m10 */
|
||||
|
||||
LLBL (G3TN_scale_end):
|
||||
MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3TN_transform):
|
||||
MOVQ ( MM0, MM1 ) /* x1 | x0 */
|
||||
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
|
||||
|
||||
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
|
||||
ADD_L ( CONST(12), EAX ) /* next r */
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
|
||||
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
|
||||
|
||||
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
|
||||
PFADD ( MM2, MM0 ) /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
|
||||
|
||||
MOVQ ( REGIND (EDX), MM1 ) /* x1 | x0 */
|
||||
MOVQ ( MM0, REGOFF(-12, EAX) ) /* write r0, r1 */
|
||||
|
||||
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
|
||||
MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
|
||||
|
||||
PFMUL ( MM7, MM2 ) /* | x2*m10 */
|
||||
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
|
||||
|
||||
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/
|
||||
ADD_L ( STRIDE, EDX ) /* next normal */
|
||||
|
||||
PREFETCH ( REGIND(EDX) )
|
||||
|
||||
MOVD ( MM1, REGOFF(-4, EAX) ) /* write r2 */
|
||||
MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */
|
||||
|
||||
MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
|
||||
DEC_L ( EBP ) /* decrement normal counter */
|
||||
JA ( LLBL (G3TN_transform) )
|
||||
|
||||
|
||||
POP_L ( EDX ) /* end of transform --- */
|
||||
POP_L ( EAX ) /* now normalizing ... */
|
||||
POP_L ( EBP )
|
||||
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
|
||||
|
||||
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
|
||||
JE ( LLBL (G3TN_norm ) ) /* calculate lengths */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3TN_norm_w_lengths):
|
||||
|
||||
PREFETCHW ( REGOFF(12,EAX) )
|
||||
|
||||
MOVD ( REGIND (EDI), MM3 ) /* | length (x) */
|
||||
PFMUL ( MM3, MM1 ) /* | x2 (normalize*/
|
||||
|
||||
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
|
||||
PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/
|
||||
|
||||
ADD_L ( STRIDE, EDX ) /* next normal */
|
||||
ADD_L ( CONST(4), EDI ) /* next length */
|
||||
|
||||
PREFETCH ( REGIND(EDI) )
|
||||
|
||||
MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
|
||||
MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
|
||||
|
||||
ADD_L ( CONST(12), EAX ) /* next r */
|
||||
DEC_L ( EBP ) /* decrement normal counter */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
|
||||
JA ( LLBL (G3TN_norm_w_lengths) )
|
||||
JMP ( LLBL (G3TN_exit_3dnow) )
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3TN_norm):
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( MM0, MM3 ) /* x1 | x0 */
|
||||
MOVQ ( MM1, MM4 ) /* | x2 */
|
||||
|
||||
PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
|
||||
ADD_L ( CONST(12), EAX ) /* next r */
|
||||
|
||||
PFMUL ( MM1, MM4 ) /* | x2*x2 */
|
||||
PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
|
||||
|
||||
PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1+x2**/
|
||||
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
|
||||
|
||||
MOVQ ( MM5, MM4 )
|
||||
PUNPCKLDQ ( MM3, MM3 )
|
||||
|
||||
DEC_L ( EBP ) /* decrement normal counter */
|
||||
PFMUL ( MM5, MM5 )
|
||||
|
||||
PFRSQIT1 ( MM3, MM5 )
|
||||
PFRCPIT2 ( MM4, MM5 )
|
||||
|
||||
PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalize*/
|
||||
|
||||
MOVQ ( MM0, REGOFF(-12, EAX) ) /* write new x0, x1 */
|
||||
PFMUL ( MM5, MM1 ) /* | x2 (normalize*/
|
||||
|
||||
MOVD ( MM1, REGOFF(-4, EAX) ) /* write new x2 */
|
||||
MOVQ ( REGIND (EAX), MM0 ) /* x1 | x0 */
|
||||
|
||||
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
|
||||
JA ( LLBL (G3TN_norm) )
|
||||
|
||||
LLBL (G3TN_exit_3dnow):
|
||||
FEMMS
|
||||
|
||||
LLBL (G3TN_end):
|
||||
POP_L ( EBP )
|
||||
POP_L ( ESI )
|
||||
POP_L ( EDI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
|
||||
GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
|
||||
|
||||
#undef FRAME_OFFSET
|
||||
#define FRAME_OFFSET 12
|
||||
|
||||
PUSH_L ( EDI )
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EBP )
|
||||
|
||||
MOV_L ( ARG_LENGTHS, EDI )
|
||||
MOV_L ( ARG_IN, ESI )
|
||||
MOV_L ( ARG_DEST, EAX )
|
||||
MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */
|
||||
MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) )
|
||||
MOV_L ( ARG_MAT, ECX )
|
||||
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
|
||||
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
|
||||
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
|
||||
|
||||
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
|
||||
JE ( LLBL (G3TNNR_end) )
|
||||
|
||||
FEMMS
|
||||
|
||||
MOVD ( M(0), MM0 ) /* | m0 */
|
||||
PUNPCKLDQ ( M(5), MM0 ) /* m5 | m0 */
|
||||
|
||||
MOVD ( M(10), MM2 ) /* | m10 */
|
||||
PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
|
||||
|
||||
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
|
||||
JNE ( LLBL (G3TNNR_scale_end ) )
|
||||
|
||||
MOVD ( ARG_SCALE, MM7 ) /* | scale */
|
||||
PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */
|
||||
|
||||
PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */
|
||||
PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3TNNR_scale_end):
|
||||
MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
|
||||
|
||||
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
|
||||
JE ( LLBL (G3TNNR_norm) ) /* need to calculate lengths */
|
||||
|
||||
MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
|
||||
ADD_L ( STRIDE, EDX ) /* next normal */
|
||||
|
||||
PREFETCH ( REGIND(EDX) )
|
||||
|
||||
PFMUL ( MM2, MM7 ) /* | x2*m10 */
|
||||
ADD_L ( CONST(12), EAX ) /* next r */
|
||||
|
||||
PFMUL ( MM3, MM7 ) /* | x2 (normalized) */
|
||||
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
|
||||
|
||||
ADD_L ( CONST(4), EDI ) /* next length */
|
||||
PFMUL ( MM3, MM6 ) /* x1 (normalized) | x0 (normalized) */
|
||||
|
||||
DEC_L ( EBP ) /* decrement normal counter */
|
||||
MOVQ ( MM6, REGOFF(-12, EAX) ) /* write r0, r1 */
|
||||
|
||||
MOVD ( MM7, REGOFF(-4, EAX) ) /* write r2 */
|
||||
MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
|
||||
|
||||
MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
|
||||
|
||||
JA ( LLBL (G3TNNR_norm_w_lengths) )
|
||||
JMP ( LLBL (G3TNNR_exit_3dnow) )
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3TNNR_norm): /* need to calculate lengths */
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
|
||||
ADD_L ( CONST(12), EAX ) /* next r */
|
||||
|
||||
PFMUL ( MM2, MM7 ) /* | x2*m10 */
|
||||
MOVQ ( MM6, MM3 ) /* x1 (transformed)| x0 (transformed) */
|
||||
|
||||
MOVQ ( MM7, MM4 ) /* | x2 (transformed) */
|
||||
PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */
|
||||
|
||||
|
||||
PFMUL ( MM7, MM4 ) /* | x2*x2 */
|
||||
PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */
|
||||
|
||||
PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/
|
||||
ADD_L ( STRIDE, EDX ) /* next normal */
|
||||
|
||||
PREFETCH ( REGIND(EDX) )
|
||||
|
||||
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
|
||||
MOVQ ( MM5, MM4 )
|
||||
|
||||
PUNPCKLDQ ( MM3, MM3 )
|
||||
PFMUL ( MM5, MM5 )
|
||||
|
||||
PFRSQIT1 ( MM3, MM5 )
|
||||
DEC_L ( EBP ) /* decrement normal counter */
|
||||
|
||||
PFRCPIT2 ( MM4, MM5 )
|
||||
PFMUL ( MM5, MM6 ) /* x1 (normalized) | x0 (normalized) */
|
||||
|
||||
MOVQ ( MM6, REGOFF(-12, EAX) ) /* write r0, r1 */
|
||||
PFMUL ( MM5, MM7 ) /* | x2 (normalized) */
|
||||
|
||||
MOVD ( MM7, REGOFF(-4, EAX) ) /* write r2 */
|
||||
MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
|
||||
|
||||
MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
|
||||
JA ( LLBL (G3TNNR_norm) )
|
||||
|
||||
|
||||
LLBL (G3TNNR_exit_3dnow):
|
||||
FEMMS
|
||||
|
||||
LLBL (G3TNNR_end):
|
||||
POP_L ( EBP )
|
||||
POP_L ( ESI )
|
||||
POP_L ( EDI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
|
||||
GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
|
||||
|
||||
#undef FRAME_OFFSET
|
||||
#define FRAME_OFFSET 12
|
||||
|
||||
PUSH_L ( EDI )
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EBP )
|
||||
|
||||
MOV_L ( ARG_IN, EAX )
|
||||
MOV_L ( ARG_DEST, EDX )
|
||||
MOV_L ( REGOFF(V3F_COUNT, EAX), EBP ) /* dest->count = in->count */
|
||||
MOV_L ( EBP, REGOFF(V3F_COUNT, EDX) )
|
||||
MOV_L ( ARG_IN, ESI )
|
||||
MOV_L ( ARG_MAT, ECX )
|
||||
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
|
||||
MOV_L ( REGOFF(V3F_START, EDX), EAX ) /* dest->start */
|
||||
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
|
||||
|
||||
CMP_L ( CONST(0), EBP )
|
||||
JE ( LLBL (G3TRNR_end) )
|
||||
|
||||
FEMMS
|
||||
|
||||
MOVD ( ARG_SCALE, MM6 ) /* | scale */
|
||||
PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m0 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
|
||||
|
||||
PFMUL ( MM6, MM0 ) /* scale*m5 | scale*m0 */
|
||||
MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
|
||||
|
||||
MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
|
||||
PFMUL ( MM6, MM2 ) /* | scale*m10 */
|
||||
|
||||
MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3TRNR_rescale):
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
|
||||
ADD_L ( STRIDE, EDX ) /* next normal */
|
||||
|
||||
PREFETCH ( REGIND(EDX) )
|
||||
|
||||
PFMUL ( MM2, MM5 ) /* | x2*m10 */
|
||||
ADD_L ( CONST(12), EAX ) /* next r */
|
||||
|
||||
DEC_L ( EBP ) /* decrement normal counter */
|
||||
MOVQ ( MM4, REGOFF(-12, EAX) ) /* write r0, r1 */
|
||||
|
||||
MOVD ( MM5, REGOFF(-4, EAX) ) /* write r2 */
|
||||
MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
|
||||
|
||||
MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
|
||||
JA ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */
|
||||
|
||||
FEMMS
|
||||
|
||||
LLBL (G3TRNR_end):
|
||||
POP_L ( EBP )
|
||||
POP_L ( ESI )
|
||||
POP_L ( EDI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
|
||||
GLNAME(_mesa_3dnow_transform_rescale_normals):
|
||||
|
||||
#undef FRAME_OFFSET
|
||||
#define FRAME_OFFSET 8
|
||||
|
||||
PUSH_L ( EDI )
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_IN, ESI )
|
||||
MOV_L ( ARG_DEST, EAX )
|
||||
MOV_L ( ARG_MAT, ECX )
|
||||
MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */
|
||||
MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) )
|
||||
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
|
||||
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
|
||||
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
|
||||
|
||||
CMP_L ( CONST(0), EDI )
|
||||
JE ( LLBL (G3TR_end) )
|
||||
|
||||
FEMMS
|
||||
|
||||
MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
|
||||
|
||||
MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */
|
||||
MOVD ( ARG_SCALE, MM0 ) /* scale */
|
||||
|
||||
MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */
|
||||
PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
|
||||
|
||||
PUNPCKLDQ ( REGOFF(24, ECX), MM5 )
|
||||
PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */
|
||||
|
||||
MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/
|
||||
PFMUL ( MM0, MM4 ) /* scale*m5 | scale*m4 */
|
||||
|
||||
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
|
||||
PFMUL ( MM0, MM5 ) /* scale*m6 | scale*m2 */
|
||||
|
||||
PFMUL ( MM0, MM6 ) /* scale*m9 | scale*m8 */
|
||||
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
|
||||
|
||||
PFMUL ( MM0, MM7 ) /* | scale*m10 */
|
||||
MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3TR_rescale):
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( MM0, MM1 ) /* x1 | x0 */
|
||||
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
|
||||
|
||||
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
|
||||
ADD_L ( CONST(12), EAX ) /* next r */
|
||||
|
||||
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
|
||||
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
|
||||
|
||||
MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
|
||||
|
||||
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
|
||||
PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
|
||||
|
||||
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
|
||||
ADD_L ( STRIDE, EDX ) /* next normal */
|
||||
|
||||
PREFETCH ( REGIND(EDX) )
|
||||
|
||||
MOVQ ( MM0, REGOFF(-12, EAX) ) /* write r0, r1 */
|
||||
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
|
||||
|
||||
PFMUL ( MM7, MM2 ) /* | x2*m10 */
|
||||
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
|
||||
|
||||
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
|
||||
MOVD ( MM1, REGOFF(-4, EAX) ) /* write r2 */
|
||||
|
||||
MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
|
||||
|
||||
DEC_L ( EDI ) /* decrement normal counter */
|
||||
JA ( LLBL (G3TR_rescale) )
|
||||
|
||||
FEMMS
|
||||
|
||||
LLBL (G3TR_end):
|
||||
POP_L ( ESI )
|
||||
POP_L ( EDI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
|
||||
GLNAME(_mesa_3dnow_transform_normals_no_rot):
|
||||
|
||||
#undef FRAME_OFFSET
|
||||
#define FRAME_OFFSET 8
|
||||
|
||||
PUSH_L ( EDI )
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_IN, ESI )
|
||||
MOV_L ( ARG_DEST, EAX )
|
||||
MOV_L ( ARG_MAT, ECX )
|
||||
MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */
|
||||
MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) )
|
||||
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
|
||||
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
|
||||
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
|
||||
|
||||
CMP_L ( CONST(0), EDI )
|
||||
JE ( LLBL (G3TNR_end) )
|
||||
|
||||
FEMMS
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m0 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
|
||||
|
||||
MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
|
||||
PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
|
||||
|
||||
MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3TNR_transform):
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
|
||||
ADD_L ( STRIDE, EDX) /* next normal */
|
||||
|
||||
PREFETCH ( REGIND(EDX) )
|
||||
|
||||
PFMUL ( MM2, MM5 ) /* | x2*m10 */
|
||||
ADD_L ( CONST(12), EAX ) /* next r */
|
||||
|
||||
DEC_L ( EDI ) /* decrement normal counter */
|
||||
MOVQ ( MM4, REGOFF(-12, EAX) ) /* write r0, r1 */
|
||||
|
||||
MOVD ( MM5, REGOFF(-4, EAX) ) /* write r2 */
|
||||
MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
|
||||
|
||||
MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
|
||||
JA ( LLBL (G3TNR_transform) )
|
||||
|
||||
FEMMS
|
||||
|
||||
LLBL (G3TNR_end):
|
||||
POP_L ( ESI )
|
||||
POP_L ( EDI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_3dnow_transform_normals)
|
||||
GLNAME(_mesa_3dnow_transform_normals):
|
||||
|
||||
#undef FRAME_OFFSET
|
||||
#define FRAME_OFFSET 8
|
||||
|
||||
PUSH_L ( EDI )
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_IN, ESI )
|
||||
MOV_L ( ARG_DEST, EAX )
|
||||
MOV_L ( ARG_MAT, ECX )
|
||||
MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */
|
||||
MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) )
|
||||
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
|
||||
MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */
|
||||
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
|
||||
|
||||
CMP_L ( CONST(0), EDI ) /* count > 0 ?? */
|
||||
JE ( LLBL (G3T_end) )
|
||||
|
||||
FEMMS
|
||||
|
||||
MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
|
||||
MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */
|
||||
|
||||
MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */
|
||||
PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */
|
||||
|
||||
MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */
|
||||
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
|
||||
|
||||
MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3T_transform):
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( MM0, MM1 ) /* x1 | x0 */
|
||||
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
|
||||
|
||||
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
|
||||
ADD_L ( CONST(12), EAX ) /* next r */
|
||||
|
||||
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
|
||||
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
|
||||
|
||||
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
|
||||
PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
|
||||
|
||||
MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
|
||||
MOVQ ( MM0, REGOFF(-12, EAX) ) /* write r0, r1 */
|
||||
|
||||
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
|
||||
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
|
||||
|
||||
PFMUL ( MM7, MM2 ) /* | x2*m10 */
|
||||
ADD_L ( STRIDE, EDX ) /* next normal */
|
||||
|
||||
PREFETCH ( REGIND(EDX) )
|
||||
|
||||
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
|
||||
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
|
||||
|
||||
MOVD ( MM1, REGOFF(-4, EAX) ) /* write r2 */
|
||||
MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
|
||||
|
||||
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
|
||||
DEC_L ( EDI ) /* decrement normal counter */
|
||||
JA ( LLBL (G3T_transform) )
|
||||
|
||||
FEMMS
|
||||
|
||||
LLBL (G3T_end):
|
||||
POP_L ( ESI )
|
||||
POP_L ( EDI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_3dnow_normalize_normals)
|
||||
GLNAME(_mesa_3dnow_normalize_normals):
|
||||
|
||||
#undef FRAME_OFFSET
|
||||
#define FRAME_OFFSET 12
|
||||
|
||||
PUSH_L ( EDI )
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EBP )
|
||||
|
||||
MOV_L ( ARG_IN, ESI )
|
||||
MOV_L ( ARG_DEST, EAX )
|
||||
MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */
|
||||
MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) )
|
||||
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
|
||||
MOV_L ( REGOFF(V3F_START, ESI), ECX ) /* in->start */
|
||||
MOV_L ( ARG_LENGTHS, EDX )
|
||||
|
||||
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
|
||||
JE ( LLBL (G3N_end) )
|
||||
|
||||
FEMMS
|
||||
|
||||
MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
|
||||
|
||||
CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */
|
||||
JE ( LLBL (G3N_norm2) ) /* calculate lengths */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3N_norm1): /* use precalculated lengths */
|
||||
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
MOVD ( REGIND(EDX), MM3 ) /* | length (x) */
|
||||
PFMUL ( MM3, MM1 ) /* | x2 (normalized) */
|
||||
|
||||
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
|
||||
ADD_L ( STRIDE, ECX ) /* next normal */
|
||||
|
||||
PREFETCH ( REGIND(ECX) )
|
||||
|
||||
PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */
|
||||
MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
|
||||
|
||||
MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
|
||||
ADD_L ( CONST(12), EAX ) /* next r */
|
||||
|
||||
ADD_L ( CONST(4), EDX ) /* next length */
|
||||
DEC_L ( EBP ) /* decrement normal counter */
|
||||
|
||||
MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
|
||||
JA ( LLBL (G3N_norm1) )
|
||||
|
||||
JMP ( LLBL (G3N_end1) )
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3N_norm2): /* need to calculate lengths */
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( MM0, MM3 ) /* x1 | x0 */
|
||||
ADD_L ( STRIDE, ECX ) /* next normal */
|
||||
|
||||
PREFETCH ( REGIND(ECX) )
|
||||
|
||||
PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
|
||||
MOVQ ( MM1, MM4 ) /* | x2 */
|
||||
|
||||
ADD_L ( CONST(12), EAX ) /* next r */
|
||||
PFMUL ( MM1, MM4 ) /* | x2*x2 */
|
||||
|
||||
PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
|
||||
PFACC ( MM3, MM3 ) /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
|
||||
|
||||
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
|
||||
MOVQ ( MM5, MM4 )
|
||||
|
||||
PUNPCKLDQ ( MM3, MM3 )
|
||||
PFMUL ( MM5, MM5 )
|
||||
|
||||
PFRSQIT1 ( MM3, MM5 )
|
||||
DEC_L ( EBP ) /* decrement normal counter */
|
||||
|
||||
PFRCPIT2 ( MM4, MM5 )
|
||||
|
||||
PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalized) */
|
||||
MOVQ ( MM0, REGOFF(-12, EAX) ) /* write new x0, x1 */
|
||||
|
||||
PFMUL ( MM5, MM1 ) /* | x2 (normalized) */
|
||||
MOVD ( MM1, REGOFF(-4, EAX) ) /* write new x2 */
|
||||
|
||||
MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
|
||||
JA ( LLBL (G3N_norm2) )
|
||||
|
||||
LLBL (G3N_end1):
|
||||
FEMMS
|
||||
|
||||
LLBL (G3N_end):
|
||||
POP_L ( EBP )
|
||||
POP_L ( ESI )
|
||||
POP_L ( EDI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_3dnow_rescale_normals)
|
||||
GLNAME(_mesa_3dnow_rescale_normals):
|
||||
|
||||
#undef FRAME_OFFSET
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( EDI )
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_IN, ESI )
|
||||
MOV_L ( ARG_DEST, EAX )
|
||||
MOV_L ( REGOFF(V3F_COUNT, ESI), EDX ) /* dest->count = in->count */
|
||||
MOV_L ( EDX, REGOFF(V3F_COUNT, EAX) )
|
||||
MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */
|
||||
MOV_L ( REGOFF(V3F_START, ESI), ECX ) /* in->start */
|
||||
|
||||
CMP_L ( CONST(0), EDX )
|
||||
JE ( LLBL (G3R_end) )
|
||||
|
||||
FEMMS
|
||||
|
||||
MOVD ( ARG_SCALE, MM0 ) /* scale */
|
||||
PUNPCKLDQ ( MM0, MM0 )
|
||||
|
||||
MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3R_rescale):
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */
|
||||
ADD_L ( STRIDE, ECX ) /* next normal */
|
||||
|
||||
PREFETCH ( REGIND(ECX) )
|
||||
|
||||
PFMUL ( MM0, MM2 ) /* | x2*scale */
|
||||
ADD_L ( CONST(12), EAX ) /* next r */
|
||||
|
||||
MOVQ ( MM1, REGOFF(-12, EAX) ) /* write r0, r1 */
|
||||
MOVD ( MM2, REGOFF(-4, EAX) ) /* write r2 */
|
||||
|
||||
DEC_L ( EDX ) /* decrement normal counter */
|
||||
MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */
|
||||
|
||||
MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */
|
||||
JA ( LLBL (G3R_rescale) )
|
||||
|
||||
FEMMS
|
||||
|
||||
LLBL (G3R_end):
|
||||
POP_L ( ESI )
|
||||
POP_L ( EDI )
|
||||
RET
|
|
@ -0,0 +1,423 @@
|
|||
/* $Id: 3dnow_xform1.S,v 1.1 2001/03/29 06:46:16 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define FRAME_OFFSET 4
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points1_general )
|
||||
GLNAME( _mesa_3dnow_transform_points1_general ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(4, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPGR_3 ) )
|
||||
|
||||
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
|
||||
MOVQ ( REGOFF(8, ECX), MM1 ) /* m03 | m02 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
MOVQ ( REGOFF(56, ECX), MM3 ) /* m33 | m32 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPGR_2 ):
|
||||
|
||||
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
|
||||
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
|
||||
|
||||
MOVQ ( MM4, MM5 ) /* x0 | x0 */
|
||||
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
|
||||
|
||||
PFMUL ( MM1, MM5 ) /* x0*m03 | x0*m02 */
|
||||
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */
|
||||
|
||||
PFADD ( MM3, MM5 ) /* x0*m03+m33 | x0*m02+m32 */
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
|
||||
MOVQ ( MM5, REGOFF(8, EDX) ) /* write r3, r2 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
JNZ ( LLBL( G3TPGR_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPGR_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points1_identity )
|
||||
GLNAME( _mesa_3dnow_transform_points1_identity ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(1), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(4, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPIR_4) )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPIR_3 ):
|
||||
|
||||
MOVD ( REGIND(EAX), MM0 ) /* | x0 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
MOVD ( MM0, REGIND(EDX) ) /* | r0 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TPIR_3 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPIR_4 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points1_3d_no_rot )
|
||||
GLNAME( _mesa_3dnow_transform_points1_3d_no_rot ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(4, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP3NRR_3 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
|
||||
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP3NRR_2 ):
|
||||
|
||||
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
|
||||
PFMUL ( MM0, MM4 ) /* | x0*m00 */
|
||||
|
||||
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
|
||||
MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
JNZ ( LLBL( G3TP3NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP3NRR_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points1_perspective )
|
||||
GLNAME( _mesa_3dnow_transform_points1_perspective ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(4, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPPR_3 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPPR_2 ):
|
||||
|
||||
MOVD ( REGIND(EAX), MM4 ) /* 0 | x0 */
|
||||
PFMUL ( MM0, MM4 ) /* 0 | x0*m00 */
|
||||
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TPPR_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPPR_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points1_2d )
|
||||
GLNAME( _mesa_3dnow_transform_points1_2d ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(4, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP2R_3 ) )
|
||||
|
||||
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP2R_2 ):
|
||||
|
||||
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
|
||||
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
|
||||
|
||||
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
|
||||
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */
|
||||
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP2R_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points1_2d_no_rot )
|
||||
GLNAME( _mesa_3dnow_transform_points1_2d_no_rot ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(4, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP2NRR_3 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP2NRR_2 ):
|
||||
|
||||
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
PFMUL ( MM0, MM4 ) /* | x0*m00 */
|
||||
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */
|
||||
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP2NRR_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points1_3d )
|
||||
GLNAME( _mesa_3dnow_transform_points1_3d ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(4, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP3R_3 ) )
|
||||
|
||||
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
|
||||
MOVD ( REGOFF(8, ECX), MM1 ) /* | m02 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP3R_2 ):
|
||||
|
||||
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
|
||||
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
|
||||
|
||||
MOVQ ( MM4, MM5 ) /* | x0 */
|
||||
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
|
||||
|
||||
PFMUL ( MM1, MM5 ) /* | x0*m02 */
|
||||
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */
|
||||
|
||||
PFADD ( MM3, MM5 ) /* | x0*m02+m32 */
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
|
||||
MOVD ( MM5, REGOFF(8, EDX) ) /* write r2 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
JNZ ( LLBL( G3TP3R_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP3R_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
|
@ -0,0 +1,464 @@
|
|||
/* $Id: 3dnow_xform2.S,v 1.1 2001/03/29 06:46:16 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define FRAME_OFFSET 4
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points2_general )
|
||||
GLNAME( _mesa_3dnow_transform_points2_general ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPGR_3 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
|
||||
|
||||
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
|
||||
|
||||
MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */
|
||||
PUNPCKLDQ ( REGOFF(24, ECX), MM2 ) /* m12 | m02 */
|
||||
|
||||
MOVD ( REGOFF(12, ECX), MM3 ) /* | m03 */
|
||||
PUNPCKLDQ ( REGOFF(28, ECX), MM3 ) /* m13 | m03 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */
|
||||
MOVQ ( REGOFF(56, ECX), MM5 ) /* m33 | m32 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPGR_2 ):
|
||||
|
||||
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
|
||||
MOVQ ( MM6, MM7 ) /* x1 | x0 */
|
||||
|
||||
PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */
|
||||
PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */
|
||||
|
||||
PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */
|
||||
PFADD ( MM4, MM6 ) /* x0*...*m11+m31 | x0*...*m10+m30 */
|
||||
|
||||
MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */
|
||||
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
|
||||
|
||||
MOVQ ( MM6, MM7 ) /* x1 | x0 */
|
||||
PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */
|
||||
|
||||
PFMUL ( MM3, MM7 ) /* x1*m13 | x0*m03 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
PFACC ( MM7, MM6 ) /* x0*m03+x1*m13 | x0*x02+x1*m12 */
|
||||
PFADD ( MM5, MM6 ) /* x0*...*m13+m33 | x0*...*m12+m32 */
|
||||
|
||||
MOVQ ( MM6, REGOFF(8, EDX) ) /* write r3, r2 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TPGR_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPGR_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points2_perspective )
|
||||
GLNAME( _mesa_3dnow_transform_points2_perspective ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPPR_3 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
|
||||
|
||||
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPPR_2 ):
|
||||
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
|
||||
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TPPR_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPPR_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points2_3d )
|
||||
GLNAME( _mesa_3dnow_transform_points2_3d ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP3R_3 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
|
||||
|
||||
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
|
||||
|
||||
MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */
|
||||
PUNPCKLDQ ( REGOFF(24, ECX), MM2 ) /* m12 | m02 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */
|
||||
MOVD ( REGOFF(56, ECX), MM5 ) /* | m32 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP3R_2 ):
|
||||
|
||||
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
|
||||
MOVQ ( MM6, MM7 ) /* x1 | x0 */
|
||||
|
||||
PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */
|
||||
PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */
|
||||
|
||||
PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */
|
||||
PFADD ( MM4, MM6 ) /* x0*...*m11+m31 | x0*...*m10+m30 */
|
||||
|
||||
MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */
|
||||
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
|
||||
|
||||
MOVQ ( MM6, MM7 ) /* x1 | x0 */
|
||||
PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */
|
||||
|
||||
PFACC ( MM7, MM6 ) /* ***trash*** | x0*x02+x1*m12 */
|
||||
PFADD ( MM5, MM6 ) /* ***trash*** | x0*...*m12+m32 */
|
||||
|
||||
MOVD ( MM6, REGOFF(8, EDX) ) /* write r2 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
JNZ ( LLBL( G3TP3R_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP3R_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points2_3d_no_rot )
|
||||
GLNAME( _mesa_3dnow_transform_points2_3d_no_rot ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP3NRR_3 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP3NRR_2 ):
|
||||
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
|
||||
|
||||
PFADD ( MM2, MM4 ) /* x1*m11+m31 | x0*m00+m30 */
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
|
||||
MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
JNZ ( LLBL( G3TP3NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP3NRR_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points2_2d )
|
||||
GLNAME( _mesa_3dnow_transform_points2_2d ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP2R_3 ) )
|
||||
|
||||
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
|
||||
MOVQ ( REGOFF(16, ECX), MM1 ) /* m11 | m10 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP2R_2 ):
|
||||
|
||||
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
|
||||
MOVD ( REGOFF(4, EAX), MM5 ) /* | x1 */
|
||||
|
||||
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
|
||||
PUNPCKLDQ ( MM5, MM5 ) /* x1 | x1 */
|
||||
|
||||
PFMUL ( MM1, MM5 ) /* x1*m11 | x1*m10 */
|
||||
PFADD ( MM2, MM4 ) /* x...x1*m11+31 | x0*..*m10+m30 */
|
||||
|
||||
PFADD ( MM5, MM4 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP2R_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points2_2d_no_rot )
|
||||
GLNAME( _mesa_3dnow_transform_points2_2d_no_rot ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP2NRR_3 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP2NRR_2 ):
|
||||
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
|
||||
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */
|
||||
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP2NRR_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points2_identity )
|
||||
GLNAME( _mesa_3dnow_transform_points2_identity ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPIR_3 ) )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPIR_3 ):
|
||||
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
MOVQ ( MM0, REGIND(EDX) ) /* r1 | r0 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TPIR_3 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPIR_4 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
|
@ -0,0 +1,570 @@
|
|||
/* $Id: 3dnow_xform3.S,v 1.1 2001/03/29 06:46:16 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define FRAME_OFFSET 4
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points3_general )
|
||||
GLNAME( _mesa_3dnow_transform_points3_general ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPGR_2 ) )
|
||||
|
||||
PREFETCHW ( REGIND(EDX) )
|
||||
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EAX), MM2 ) /* | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPGR_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( MM0, MM1 ) /* x1 | x0 */
|
||||
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
|
||||
|
||||
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
|
||||
MOVQ ( MM2, MM5 ) /* x2 | x2 */
|
||||
|
||||
PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */
|
||||
PFMUL ( REGOFF(32, ECX), MM2 ) /* x2*m9 | x2*m8 */
|
||||
|
||||
MOVQ ( MM0, MM3 ) /* x0 | x0 */
|
||||
PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */
|
||||
|
||||
MOVQ ( MM1, MM4 ) /* x1 | x1 */
|
||||
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
|
||||
|
||||
PFADD ( REGOFF(48, ECX), MM2 ) /* x2*m9+m13 | x2*m8+m12 */
|
||||
PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */
|
||||
|
||||
PFADD ( REGOFF(56, ECX), MM5 ) /* x2*m11+m15 | x2*m10+m14 */
|
||||
PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
|
||||
|
||||
PFMUL ( REGOFF(8, ECX), MM3 ) /* x0*m3 | x0*m2 */
|
||||
PFADD ( MM1, MM2 ) /* r1 | r0 */
|
||||
|
||||
PFMUL ( REGOFF(24, ECX), MM4 ) /* x1*m7 | x1*m6 */
|
||||
ADD_L ( CONST(16), EDX ) /* next output vertex */
|
||||
|
||||
PFADD ( MM3, MM4 ) /* x0*m3+x1*m7 | x0*m2+x1*m6 */
|
||||
MOVQ ( MM2, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
|
||||
PFADD ( MM4, MM5 ) /* r3 | r2 */
|
||||
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EAX), MM2 ) /* | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPGR_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points3_perspective )
|
||||
GLNAME( _mesa_3dnow_transform_points3_perspective ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPPR_2 ) )
|
||||
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
PREFETCHW ( REGIND(EDX) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
|
||||
|
||||
MOVQ ( REGOFF(32, ECX), MM1 ) /* m21 | m20 */
|
||||
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
|
||||
|
||||
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
|
||||
|
||||
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPPR_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
PXOR ( MM7, MM7 ) /* 0 | 0 */
|
||||
MOVQ ( MM5, MM6 ) /* | x2 */
|
||||
|
||||
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
|
||||
PFSUB ( MM5, MM7 ) /* | -x2 */
|
||||
|
||||
PFMUL ( MM2, MM6 ) /* | x2*m22 */
|
||||
PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
PFMUL ( MM1, MM5 ) /* x2*m21 | x2*m20 */
|
||||
|
||||
PFADD ( MM3, MM6 ) /* | x2*m22+m32 */
|
||||
PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */
|
||||
|
||||
MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
MOVD ( MM6, REGOFF(-8, EDX) ) /* write r2 */
|
||||
|
||||
MOVD ( MM7, REGOFF(-4, EDX) ) /* write r3 */
|
||||
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPPR_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points3_3d )
|
||||
GLNAME( _mesa_3dnow_transform_points3_3d ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP3R_2 ) )
|
||||
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
PREFETCH ( REGIND(EDX) )
|
||||
|
||||
MOVD ( REGOFF(8, ECX), MM7 ) /* | m2 */
|
||||
PUNPCKLDQ ( REGOFF(24, ECX), MM7 ) /* m6 | m2 */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP3R_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( MM0, MM2 ) /* x1 | x0 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
PUNPCKLDQ ( MM2, MM2 ) /* x0 | x0 */
|
||||
MOVQ ( MM0, MM3 ) /* x1 | x0 */
|
||||
|
||||
PFMUL ( REGIND(ECX), MM2 ) /* x0*m1 | x0*m0 */
|
||||
PUNPCKHDQ ( MM3, MM3 ) /* x1 | x1 */
|
||||
|
||||
MOVQ ( MM1, MM4 ) /* | x2 */
|
||||
PFMUL ( REGOFF(16, ECX), MM3 ) /* x1*m5 | x1*m4 */
|
||||
|
||||
PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */
|
||||
PFADD ( MM2, MM3 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
|
||||
|
||||
PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */
|
||||
PFADD ( REGOFF(48, ECX), MM3 ) /* x0*m1+...+m11 | x0*m0+x1*m4+m12 */
|
||||
|
||||
PFMUL ( MM7, MM0 ) /* x1*m6 | x0*m2 */
|
||||
PFADD ( MM4, MM3 ) /* r1 | r0 */
|
||||
|
||||
PFMUL ( REGOFF(40, ECX), MM1 ) /* | x2*m10 */
|
||||
PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m14 | x2*m10 */
|
||||
|
||||
PFACC ( MM0, MM1 )
|
||||
|
||||
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
PFACC ( MM1, MM1 ) /* | r2 */
|
||||
|
||||
MOVD ( MM1, REGOFF(-8, EDX) ) /* write r2 */
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
|
||||
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP3R_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points3_3d_no_rot )
|
||||
GLNAME( _mesa_3dnow_transform_points3_3d_no_rot ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP3NRR_2 ) )
|
||||
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
PREFETCHW ( REGIND(EDX) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
|
||||
|
||||
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
|
||||
PUNPCKLDQ ( MM2, MM2 ) /* m22 | m22 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
|
||||
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
|
||||
|
||||
PUNPCKLDQ ( MM3, MM3 ) /* m32 | m32 */
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
|
||||
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP3NRR_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
|
||||
|
||||
PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */
|
||||
PFMUL ( MM2, MM5 ) /* | x2*m22 */
|
||||
|
||||
PFADD ( MM3, MM5 ) /* | x2*m22+m32 */
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r0, r1 */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 */
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
|
||||
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
|
||||
JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP3NRR_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points3_2d )
|
||||
GLNAME( _mesa_3dnow_transform_points3_2d ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP2R_3) )
|
||||
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
PREFETCHW ( REGIND(EDX) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
|
||||
|
||||
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */
|
||||
|
||||
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP2R_2 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( MM3, MM4 ) /* x1 | x0 */
|
||||
PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */
|
||||
|
||||
PFACC ( MM4, MM3 ) /* x0*m00+x1*m10 | x0*m01+x1*m11 */
|
||||
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */
|
||||
|
||||
PFADD ( MM2, MM3 ) /* x0*...*m10+m30 | x0*...*m11+m31 */
|
||||
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP2R_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points3_2d_no_rot )
|
||||
GLNAME( _mesa_3dnow_transform_points3_2d_no_rot ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP2NRR_2 ) )
|
||||
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
PREFETCHW ( REGIND(EDX) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
|
||||
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP2NRR_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */
|
||||
|
||||
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TP2NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP2NRR_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points3_identity )
|
||||
GLNAME( _mesa_3dnow_transform_points3_identity ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPIR_2 ) )
|
||||
|
||||
PREFETCHW ( REGIND(EDX) )
|
||||
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPIR_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) )
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */
|
||||
|
||||
MOVD ( MM1, REGOFF(-8, EDX) ) /* | r2 */
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
|
||||
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
|
||||
JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPIR_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
|
@ -0,0 +1,588 @@
|
|||
/* $Id: 3dnow_xform4.S,v 1.1 2001/03/29 06:46:16 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define FRAME_OFFSET 4
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points4_general )
|
||||
GLNAME( _mesa_3dnow_transform_points4_general ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPGR_2 ) )
|
||||
|
||||
PREFETCHW ( REGIND(EDX) )
|
||||
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
MOVQ ( REGOFF(8, EAX), MM4 ) /* x3 | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPGR_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( MM0, MM2 ) /* x1 | x0 */
|
||||
MOVQ ( MM4, MM6 ) /* x3 | x2 */
|
||||
|
||||
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
|
||||
PUNPCKHDQ ( MM2, MM2 ) /* x1 | x1 */
|
||||
|
||||
MOVQ ( MM0, MM1 ) /* x0 | x0 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
|
||||
MOVQ ( MM2, MM3 ) /* x1 | x1 */
|
||||
|
||||
PFMUL ( REGOFF(8, ECX), MM1 ) /* x0*m3 | x0*m2 */
|
||||
PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */
|
||||
|
||||
PFMUL ( REGOFF(16, ECX), MM2 ) /* x1*m5 | x1*m4 */
|
||||
MOVQ ( MM4, MM5 ) /* x2 | x2 */
|
||||
|
||||
PFMUL ( REGOFF(24, ECX), MM3 ) /* x1*m7 | x1*m6 */
|
||||
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
|
||||
|
||||
PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */
|
||||
MOVQ ( MM6, MM7 ) /* x3 | x3 */
|
||||
|
||||
PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */
|
||||
PFADD ( MM0, MM2 )
|
||||
|
||||
PFMUL ( REGOFF(48, ECX), MM6 ) /* x3*m13 | x3*m12 */
|
||||
PFADD ( MM1, MM3 )
|
||||
|
||||
PFMUL ( REGOFF(56, ECX), MM7 ) /* x3*m15 | x3*m14 */
|
||||
PFADD ( MM4, MM6 )
|
||||
|
||||
PFADD ( MM5, MM7 )
|
||||
PFADD ( MM2, MM6 )
|
||||
|
||||
PFADD ( MM3, MM7 )
|
||||
MOVQ ( MM6, REGOFF(-16, EDX) )
|
||||
|
||||
MOVQ ( MM7, REGOFF(-8, EDX) )
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
|
||||
MOVQ ( REGOFF(8, EAX), MM4 ) /* x3 | x2 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPGR_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points4_perspective )
|
||||
GLNAME( _mesa_3dnow_transform_points4_perspective ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPPR_2 ) )
|
||||
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
PREFETCHW ( REGIND(EDX) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
|
||||
|
||||
MOVD ( REGOFF(40, ECX), MM1 ) /* | m22 */
|
||||
PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m32 | m22 */
|
||||
|
||||
MOVQ ( REGOFF(32, ECX), MM2 ) /* m21 | m20 */
|
||||
PXOR ( MM7, MM7 ) /* 0 | 0 */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
|
||||
MOVD ( REGOFF(8, EAX), MM3 ) /* | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPPR_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */
|
||||
|
||||
MOVQ ( MM5, MM6 ) /* x3 | x2 */
|
||||
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
|
||||
|
||||
PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
PFMUL ( MM2, MM5 ) /* x2*m21 | x2*m20 */
|
||||
PFSUBR ( MM7, MM3 ) /* | -x2 */
|
||||
|
||||
PFMUL ( MM1, MM6 ) /* x3*m32 | x2*m22 */
|
||||
PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */
|
||||
|
||||
PFACC ( MM3, MM6 ) /* -x2 | x2*m22+x3*m32 */
|
||||
MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
|
||||
MOVQ ( MM6, REGOFF(-8, EDX) ) /* write r2, r3 */
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
|
||||
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
|
||||
MOVD ( REGOFF(8, EAX), MM3 ) /* | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPPR_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points4_3d )
|
||||
GLNAME( _mesa_3dnow_transform_points4_3d ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP3R_2 ) )
|
||||
|
||||
MOVD ( REGOFF(8, ECX), MM6 ) /* | m2 */
|
||||
PUNPCKLDQ ( REGOFF(24, ECX), MM6 ) /* m6 | m2 */
|
||||
|
||||
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
|
||||
PUNPCKLDQ ( REGOFF(56, ECX), MM7 ) /* m14 | m10 */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM2 ) /* x1 | x0 */
|
||||
MOVQ ( REGOFF(8, EAX), MM3 ) /* x3 | x2 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP3R_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
PREFETCH ( REGOFF(32, EAX) ) /* hopefully array is tightly packed */
|
||||
|
||||
MOVQ ( MM2, MM0 ) /* x1 | x0 */
|
||||
MOVQ ( MM3, MM4 ) /* x3 | x2 */
|
||||
|
||||
MOVQ ( MM0, MM1 ) /* x1 | x0 */
|
||||
MOVQ ( MM4, MM5 ) /* x3 | x2 */
|
||||
|
||||
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
|
||||
PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */
|
||||
|
||||
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
|
||||
PUNPCKLDQ ( MM3, MM3 ) /* x2 | x2 */
|
||||
|
||||
PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */
|
||||
PUNPCKHDQ ( MM4, MM4 ) /* x3 | x3 */
|
||||
|
||||
PFMUL ( MM6, MM2 ) /* x1*m6 | x0*m2 */
|
||||
PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
|
||||
|
||||
PFMUL ( REGOFF(32, ECX), MM3 ) /* x2*m9 | x2*m8 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
PFMUL ( REGOFF(48, ECX), MM4 ) /* x3*m13 | x3*m12 */
|
||||
PFADD ( MM1, MM3 ) /* x0*m1+..+x2*m9 | x0*m0+...+x2*m8 */
|
||||
|
||||
PFMUL ( MM7, MM5 ) /* x3*m14 | x2*m10 */
|
||||
PFADD ( MM3, MM4 ) /* r1 | r0 */
|
||||
|
||||
PFACC ( MM2, MM5 ) /* x0*m2+x1*m6 | x2*m10+x3*m14 */
|
||||
MOVD ( REGOFF(12, EAX), MM0 ) /* | x3 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
PFACC ( MM0, MM5 ) /* r3 | r2 */
|
||||
|
||||
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM2 ) /* x1 | x0 */
|
||||
MOVQ ( REGOFF(8, EAX), MM3 ) /* x3 | x2 */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP3R_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points4_3d_no_rot )
|
||||
GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP3NRR_2 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
|
||||
|
||||
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
|
||||
PUNPCKLDQ ( REGOFF(56, ECX), MM2 ) /* m32 | m22 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
|
||||
MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP3NRR_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */
|
||||
|
||||
MOVQ ( MM5, MM6 ) /* x3 | x2 */
|
||||
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
|
||||
|
||||
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
|
||||
PFMUL ( MM2, MM5 ) /* x3*m32 | x2*m22 */
|
||||
|
||||
PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */
|
||||
PFACC ( MM7, MM5 ) /* x3 | x2*m22+x3*m32 */
|
||||
|
||||
PFADD ( MM6, MM4 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
|
||||
|
||||
MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP3NRR_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points4_2d )
|
||||
GLNAME( _mesa_3dnow_transform_points4_2d ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP2R_2 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
|
||||
|
||||
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */
|
||||
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP2R_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( MM3, MM4 ) /* x1 | x0 */
|
||||
MOVQ ( MM5, MM6 ) /* x3 | x2 */
|
||||
|
||||
PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */
|
||||
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
|
||||
|
||||
PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
PFACC ( MM4, MM3 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */
|
||||
PFMUL ( MM2, MM6 ) /* x3*m31 | x3*m30 */
|
||||
|
||||
PFADD ( MM6, MM3 ) /* r1 | r0 */
|
||||
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
|
||||
|
||||
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */
|
||||
|
||||
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TP2R_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP2R_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points4_2d_no_rot )
|
||||
GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP2NRR_3 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP2NRR_2 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
|
||||
MOVQ ( MM5, MM6 ) /* x3 | x2 */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
|
||||
|
||||
PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */
|
||||
PFADD ( MM4, MM6 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */
|
||||
|
||||
MOVQ ( MM6, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP2NRR_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points4_identity )
|
||||
GLNAME( _mesa_3dnow_transform_points4_identity ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPIR_2 ) )
|
||||
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
MOVQ ( REGOFF(8, EAX), MM1 ) /* x3 | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPIR_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */
|
||||
|
||||
MOVQ ( MM1, REGOFF(-8, EDX) ) /* r3 | r2 */
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
|
||||
MOVQ ( REGOFF(8, EAX), MM1 ) /* x3 | x2 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPIR_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
|
@ -1,4 +1,4 @@
|
|||
/* $Id: common_x86.c,v 1.14 2001/03/28 20:44:43 gareth Exp $ */
|
||||
/* $Id: common_x86.c,v 1.15 2001/03/29 06:46:16 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
|
@ -35,7 +35,7 @@
|
|||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#if defined(USE_KATMAI_ASM) && defined(__linux__)
|
||||
#if defined(USE_SSE_ASM) && defined(__linux__)
|
||||
#include <signal.h>
|
||||
#endif
|
||||
|
||||
|
@ -67,7 +67,7 @@ static void message( const char *msg )
|
|||
}
|
||||
}
|
||||
|
||||
#if defined(USE_KATMAI_ASM)
|
||||
#if defined(USE_SSE_ASM)
|
||||
/*
|
||||
* We must verify that the Streaming SIMD Extensions are truly supported
|
||||
* on this processor before we go ahead and hook out the optimized code.
|
||||
|
@ -84,8 +84,8 @@ static void message( const char *msg )
|
|||
* not good.
|
||||
*/
|
||||
|
||||
extern void _mesa_test_os_katmai_support( void );
|
||||
extern void _mesa_test_os_katmai_exception_support( void );
|
||||
extern void _mesa_test_os_sse_support( void );
|
||||
extern void _mesa_test_os_sse_exception_support( void );
|
||||
|
||||
#if defined(__linux__) && defined(_POSIX_SOURCE)
|
||||
static void sigill_handler( int signal, struct sigcontext sc )
|
||||
|
@ -135,7 +135,7 @@ static void sigfpe_handler( int signal, struct sigcontext sc )
|
|||
*
|
||||
* GH: Isn't this just awful?
|
||||
*/
|
||||
static void check_os_katmai_support( void )
|
||||
static void check_os_sse_support( void )
|
||||
{
|
||||
#if defined(__linux__)
|
||||
#if defined(_POSIX_SOURCE)
|
||||
|
@ -159,7 +159,7 @@ static void check_os_katmai_support( void )
|
|||
if ( cpu_has_xmm ) {
|
||||
message( "Testing OS support for SSE... " );
|
||||
|
||||
_mesa_test_os_katmai_support();
|
||||
_mesa_test_os_sse_support();
|
||||
|
||||
if ( cpu_has_xmm ) {
|
||||
message( "yes.\n" );
|
||||
|
@ -184,7 +184,7 @@ static void check_os_katmai_support( void )
|
|||
if ( cpu_has_xmm ) {
|
||||
message( "Testing OS support for SSE unmasked exceptions... " );
|
||||
|
||||
_mesa_test_os_katmai_exception_support();
|
||||
_mesa_test_os_sse_exception_support();
|
||||
|
||||
if ( cpu_has_xmm ) {
|
||||
message( "yes.\n" );
|
||||
|
@ -220,7 +220,7 @@ static void check_os_katmai_support( void )
|
|||
#endif /* __linux__ */
|
||||
}
|
||||
|
||||
#endif /* USE_KATMAI_ASM */
|
||||
#endif /* USE_SSE_ASM */
|
||||
|
||||
|
||||
void _mesa_init_all_x86_transform_asm( void )
|
||||
|
@ -257,14 +257,14 @@ void _mesa_init_all_x86_transform_asm( void )
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_KATMAI_ASM
|
||||
if ( cpu_has_xmm && getenv( "MESA_FORCE_KATMAI" ) == 0 ) {
|
||||
check_os_katmai_support();
|
||||
#ifdef USE_SSE_ASM
|
||||
if ( cpu_has_xmm && getenv( "MESA_FORCE_SSE" ) == 0 ) {
|
||||
check_os_sse_support();
|
||||
}
|
||||
if ( cpu_has_xmm ) {
|
||||
if ( getenv( "MESA_NO_KATMAI" ) == 0 ) {
|
||||
message( "Katmai cpu detected.\n" );
|
||||
_mesa_init_katmai_transform_asm();
|
||||
if ( getenv( "MESA_NO_SSE" ) == 0 ) {
|
||||
message( "SSE cpu detected.\n" );
|
||||
_mesa_init_sse_transform_asm();
|
||||
} else {
|
||||
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
|
||||
}
|
||||
|
@ -289,9 +289,9 @@ void _mesa_init_all_x86_vertex_asm( void )
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_KATMAI_ASM
|
||||
if ( cpu_has_xmm && getenv( "MESA_NO_KATMAI" ) == 0 ) {
|
||||
_mesa_init_katmai_vertex_asm();
|
||||
#ifdef USE_SSE_ASM
|
||||
if ( cpu_has_xmm && getenv( "MESA_NO_SSE" ) == 0 ) {
|
||||
_mesa_init_sse_vertex_asm();
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $Id: common_x86_asm.S,v 1.6 2001/03/28 20:44:43 gareth Exp $ */
|
||||
/* $Id: common_x86_asm.S,v 1.7 2001/03/29 06:46:16 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
|
@ -58,8 +58,8 @@
|
|||
GLNAME( found_intel ): STRING( "Genuine Intel processor found\n\0" )
|
||||
GLNAME( found_amd ): STRING( "Authentic AMD processor found\n\0" )
|
||||
|
||||
#ifdef USE_KATMAI_ASM
|
||||
GLNAME( katmai_test_dummy ):
|
||||
#ifdef USE_SSE_ASM
|
||||
GLNAME( sse_test_dummy ):
|
||||
D_LONG 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
|
||||
#endif
|
||||
|
||||
|
@ -157,15 +157,15 @@ LLBL ( cpuid_done ):
|
|||
RET
|
||||
|
||||
|
||||
#ifdef USE_KATMAI_ASM
|
||||
#ifdef USE_SSE_ASM
|
||||
/* Execute an SSE instruction to see if the operating system correctly
|
||||
* supports SSE. A signal handler for SIGILL should have been set
|
||||
* before calling this function, otherwise this could kill the client
|
||||
* application.
|
||||
*/
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME( _mesa_test_os_katmai_support )
|
||||
GLNAME( _mesa_test_os_katmai_support ):
|
||||
GLOBL GLNAME( _mesa_test_os_sse_support )
|
||||
GLNAME( _mesa_test_os_sse_support ):
|
||||
|
||||
XORPS ( XMM0, XMM0 )
|
||||
|
||||
|
@ -178,8 +178,8 @@ GLNAME( _mesa_test_os_katmai_support ):
|
|||
* otherwise this could kill the client application.
|
||||
*/
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME( _mesa_test_os_katmai_exception_support )
|
||||
GLNAME( _mesa_test_os_katmai_exception_support ):
|
||||
GLOBL GLNAME( _mesa_test_os_sse_exception_support )
|
||||
GLNAME( _mesa_test_os_sse_exception_support ):
|
||||
|
||||
PUSH_L ( EBP )
|
||||
MOV_L ( ESP, EBP )
|
||||
|
@ -196,7 +196,7 @@ GLNAME( _mesa_test_os_katmai_exception_support ):
|
|||
LDMXCSR ( REGOFF( -8, EBP ) )
|
||||
|
||||
XORPS ( XMM0, XMM0 )
|
||||
MOVUPS ( GLNAME( katmai_test_dummy ), XMM1 )
|
||||
MOVUPS ( GLNAME( sse_test_dummy ), XMM1 )
|
||||
|
||||
DIVPS ( XMM0, XMM1 )
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $Id: common_x86_asm.h,v 1.7 2001/03/28 20:44:44 gareth Exp $ */
|
||||
/* $Id: common_x86_asm.h,v 1.8 2001/03/29 06:46:16 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
|
@ -52,8 +52,8 @@
|
|||
#ifdef USE_3DNOW_ASM
|
||||
#include "3dnow.h"
|
||||
#endif
|
||||
#ifdef USE_KATMAI_ASM
|
||||
#include "katmai.h"
|
||||
#ifdef USE_SSE_ASM
|
||||
#include "sse.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -0,0 +1,208 @@
|
|||
/* $Id: sse.c,v 1.1 2001/03/29 06:46:16 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* PentiumIII-SIMD (SSE) optimizations contributed by
|
||||
* Andre Werthmann <wertmann@cs.uni-potsdam.de>
|
||||
*/
|
||||
|
||||
#include "glheader.h"
|
||||
#include "context.h"
|
||||
#include "mtypes.h"
|
||||
#include "sse.h"
|
||||
|
||||
#include "math/m_vertices.h"
|
||||
#include "math/m_xform.h"
|
||||
|
||||
#include "tnl/t_context.h"
|
||||
|
||||
#ifdef DEBUG
|
||||
#include "math/m_debug.h"
|
||||
#endif
|
||||
|
||||
|
||||
#define XFORM_ARGS GLvector4f *to_vec, \
|
||||
const GLfloat m[16], \
|
||||
const GLvector4f *from_vec, \
|
||||
const GLubyte *mask, \
|
||||
const GLubyte flag
|
||||
|
||||
|
||||
#define DECLARE_XFORM_GROUP( pfx, sz ) \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_general( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_identity( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_no_rot( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_perspective( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_no_rot( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d( XFORM_ARGS );
|
||||
|
||||
|
||||
#define ASSIGN_XFORM_GROUP( pfx, sz ) \
|
||||
_mesa_transform_tab[0][sz][MATRIX_GENERAL] = \
|
||||
_mesa_##pfx##_transform_points##sz##_general; \
|
||||
_mesa_transform_tab[0][sz][MATRIX_IDENTITY] = \
|
||||
_mesa_##pfx##_transform_points##sz##_identity; \
|
||||
_mesa_transform_tab[0][sz][MATRIX_3D_NO_ROT] = \
|
||||
_mesa_##pfx##_transform_points##sz##_3d_no_rot; \
|
||||
_mesa_transform_tab[0][sz][MATRIX_PERSPECTIVE] = \
|
||||
_mesa_##pfx##_transform_points##sz##_perspective; \
|
||||
_mesa_transform_tab[0][sz][MATRIX_2D] = \
|
||||
_mesa_##pfx##_transform_points##sz##_2d; \
|
||||
_mesa_transform_tab[0][sz][MATRIX_2D_NO_ROT] = \
|
||||
_mesa_##pfx##_transform_points##sz##_2d_no_rot; \
|
||||
_mesa_transform_tab[0][sz][MATRIX_3D] = \
|
||||
_mesa_##pfx##_transform_points##sz##_3d;
|
||||
|
||||
|
||||
|
||||
#define NORM_ARGS const GLmatrix *mat, \
|
||||
GLfloat scale, \
|
||||
const GLvector3f *in, \
|
||||
const GLfloat *lengths, \
|
||||
const GLubyte mask[], \
|
||||
GLvector3f *dest
|
||||
|
||||
|
||||
#define DECLARE_NORM_GROUP( pfx ) \
|
||||
extern void _ASMAPI _mesa_##pfx##_rescale_normals( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_normalize_normals( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_normals( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_normals_no_rot( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals_no_rot( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals_no_rot( NORM_ARGS );
|
||||
|
||||
|
||||
#define ASSIGN_NORM_GROUP( pfx ) \
|
||||
_mesa_normal_tab[NORM_RESCALE][0] = \
|
||||
_mesa_##pfx##_rescale_normals; \
|
||||
_mesa_normal_tab[NORM_NORMALIZE][0] = \
|
||||
_mesa_##pfx##_normalize_normals; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM][0] = \
|
||||
_mesa_##pfx##_transform_normals; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT][0] = \
|
||||
_mesa_##pfx##_transform_normals_no_rot; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM|NORM_RESCALE][0] = \
|
||||
_mesa_##pfx##_transform_rescale_normals; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT|NORM_RESCALE][0] = \
|
||||
_mesa_##pfx##_transform_rescale_normals_no_rot; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM|NORM_NORMALIZE][0] = \
|
||||
_mesa_##pfx##_transform_normalize_normals; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT|NORM_NORMALIZE][0] = \
|
||||
_mesa_##pfx##_transform_normalize_normals_no_rot;
|
||||
|
||||
|
||||
#ifdef USE_SSE_ASM
|
||||
DECLARE_XFORM_GROUP( sse, 2 )
|
||||
DECLARE_XFORM_GROUP( sse, 3 )
|
||||
|
||||
#if 1
|
||||
/* Some functions are not written in SSE-assembly, because the fpu ones are faster */
|
||||
extern void _mesa_sse_transform_normals_no_rot( NORM_ARGS );
|
||||
extern void _mesa_sse_transform_rescale_normals( NORM_ARGS );
|
||||
extern void _mesa_sse_transform_rescale_normals_no_rot( NORM_ARGS );
|
||||
|
||||
extern void _mesa_sse_transform_points4_general( XFORM_ARGS );
|
||||
extern void _mesa_sse_transform_points4_3d( XFORM_ARGS );
|
||||
extern void _mesa_sse_transform_points4_identity( XFORM_ARGS );
|
||||
#else
|
||||
DECLARE_NORM_GROUP( sse )
|
||||
#endif
|
||||
|
||||
|
||||
extern void _ASMAPI
|
||||
_mesa_v16_sse_general_xform( GLfloat *first_vert,
|
||||
const GLfloat *m,
|
||||
const GLfloat *src,
|
||||
GLuint src_stride,
|
||||
GLuint count );
|
||||
|
||||
extern void _ASMAPI
|
||||
_mesa_sse_project_vertices( GLfloat *first,
|
||||
GLfloat *last,
|
||||
const GLfloat *m,
|
||||
GLuint stride );
|
||||
|
||||
extern void _ASMAPI
|
||||
_mesa_sse_project_clipped_vertices( GLfloat *first,
|
||||
GLfloat *last,
|
||||
const GLfloat *m,
|
||||
GLuint stride,
|
||||
const GLubyte *clipmask );
|
||||
#endif
|
||||
|
||||
|
||||
void _mesa_init_sse_transform_asm( void )
|
||||
{
|
||||
#ifdef USE_SSE_ASM
|
||||
ASSIGN_XFORM_GROUP( sse, 2 );
|
||||
ASSIGN_XFORM_GROUP( sse, 3 );
|
||||
|
||||
#if 1
|
||||
/* TODO: Finish these off.
|
||||
*/
|
||||
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT][0] =
|
||||
_mesa_sse_transform_normals_no_rot;
|
||||
_mesa_normal_tab[NORM_TRANSFORM|NORM_RESCALE][0] =
|
||||
_mesa_sse_transform_rescale_normals;
|
||||
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT|NORM_RESCALE][0] =
|
||||
_mesa_sse_transform_rescale_normals_no_rot;
|
||||
|
||||
_mesa_transform_tab[0][4][MATRIX_GENERAL] =
|
||||
_mesa_sse_transform_points4_general;
|
||||
_mesa_transform_tab[0][4][MATRIX_3D] =
|
||||
_mesa_sse_transform_points4_3d;
|
||||
_mesa_transform_tab[0][4][MATRIX_IDENTITY] =
|
||||
_mesa_sse_transform_points4_identity;
|
||||
#else
|
||||
ASSIGN_NORM_GROUP( sse );
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
_math_test_all_transform_functions( "SSE" );
|
||||
_math_test_all_normal_transform_functions( "SSE" );
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
void _mesa_init_sse_vertex_asm( void )
|
||||
{
|
||||
#ifdef USE_SSE_ASM
|
||||
_mesa_xform_points3_v16_general = _mesa_v16_sse_general_xform;
|
||||
#if 0
|
||||
/* GH: These are broken. I'm fixing them now.
|
||||
*/
|
||||
_mesa_project_v16 = _mesa_sse_project_vertices;
|
||||
_mesa_project_clipped_v16 = _mesa_sse_project_clipped_vertices;
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_NOT
|
||||
_math_test_all_vertex_functions( "SSE" );
|
||||
#endif
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
/* $Id: sse.h,v 1.1 2001/03/29 06:46:16 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* PentiumIII-SIMD (SSE) optimizations contributed by
|
||||
* Andre Werthmann <wertmann@cs.uni-potsdam.de>
|
||||
*/
|
||||
|
||||
#ifndef __SSE_H__
|
||||
#define __SSE_H__
|
||||
|
||||
#include "math/m_xform.h"
|
||||
|
||||
void _mesa_init_sse_transform_asm( void );
|
||||
void _mesa_init_sse_vertex_asm( void );
|
||||
|
||||
#endif
|
|
@ -0,0 +1,252 @@
|
|||
/* $Id: sse_normal.S,v 1.1 2001/03/29 06:46:16 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/** TODO:
|
||||
* - insert PREFETCH instructions to avoid cache-misses !
|
||||
* - some more optimizations are possible...
|
||||
* - for 40-50% more performance in the SSE-functions, the
|
||||
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
|
||||
*/
|
||||
|
||||
#include "matypes.h"
|
||||
#include "norm_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define M(i) REGOFF(i * 4, EDX)
|
||||
#define S(i) REGOFF(i * 4, ESI)
|
||||
#define D(i) REGOFF(i * 4, EDI)
|
||||
#define STRIDE REGOFF(12, ESI)
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_sse_transform_rescale_normals_no_rot)
|
||||
GLNAME(_mesa_sse_transform_rescale_normals_no_rot):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */
|
||||
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */
|
||||
|
||||
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */
|
||||
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */
|
||||
|
||||
MOV_L ( REGOFF(V3F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L ( ECX, ECX )
|
||||
JZ( LLBL(K_G3TRNNRR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L ( STRIDE, EAX ) /* stride */
|
||||
MOV_L ( ECX, REGOFF(V3F_COUNT, EDI) ) /* set dest-count */
|
||||
|
||||
IMUL_L( CONST(12), ECX ) /* count *= 12 */
|
||||
MOV_L( REGOFF(V3F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V3F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS ( M(0), XMM1 ) /* m0 */
|
||||
MOVSS ( M(5), XMM2 ) /* m5 */
|
||||
UNPCKLPS( XMM2, XMM1 ) /* m5 | m0 */
|
||||
MOVSS ( ARG_SCALE, XMM0 ) /* scale */
|
||||
SHUFPS ( CONST(0x0), XMM0, XMM0 ) /* scale | scale */
|
||||
MULPS ( XMM0, XMM1 ) /* m5*scale | m0*scale */
|
||||
MULSS ( M(10), XMM0 ) /* m10*scale */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_G3TRNNRR_top):
|
||||
MOVLPS ( S(0), XMM2 ) /* uy | ux */
|
||||
MULPS ( XMM1, XMM2 ) /* uy*m5*scale | ux*m0*scale */
|
||||
MOVLPS ( XMM2, D(0) ) /* ->D(1) | D(0) */
|
||||
|
||||
MOVSS ( S(2), XMM2 ) /* uz */
|
||||
MULSS ( XMM0, XMM2 ) /* uz*m10*scale */
|
||||
MOVSS ( XMM2, D(2) ) /* ->D(2) */
|
||||
|
||||
LLBL(K_G3TRNNRR_skip):
|
||||
ADD_L ( CONST(12), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_G3TRNNRR_top) )
|
||||
|
||||
LLBL(K_G3TRNNRR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_sse_transform_rescale_normals)
|
||||
GLNAME(_mesa_sse_transform_rescale_normals):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */
|
||||
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */
|
||||
|
||||
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */
|
||||
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */
|
||||
|
||||
MOV_L ( REGOFF(V3F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L ( ECX, ECX )
|
||||
JZ( LLBL(K_G3TRNR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L ( STRIDE, EAX ) /* stride */
|
||||
MOV_L ( ECX, REGOFF(V3F_COUNT, EDI) ) /* set dest-count */
|
||||
|
||||
IMUL_L( CONST(12), ECX ) /* count *= 12 */
|
||||
MOV_L( REGOFF(V3F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V3F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS ( M(0), XMM0 ) /* m0 */
|
||||
MOVSS ( M(4), XMM1 ) /* m4 */
|
||||
UNPCKLPS( XMM1, XMM0 ) /* m4 | m0 */
|
||||
|
||||
MOVSS ( ARG_SCALE, XMM4 ) /* scale */
|
||||
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* scale | scale */
|
||||
|
||||
MULPS ( XMM4, XMM0 ) /* m4*scale | m0*scale */
|
||||
MOVSS ( M(1), XMM1 ) /* m1 */
|
||||
MOVSS ( M(5), XMM2 ) /* m5 */
|
||||
UNPCKLPS( XMM2, XMM1 ) /* m5 | m1 */
|
||||
MULPS ( XMM4, XMM1 ) /* m5*scale | m1*scale */
|
||||
MOVSS ( M(2), XMM2 ) /* m2 */
|
||||
MOVSS ( M(6), XMM3 ) /* m6 */
|
||||
UNPCKLPS( XMM3, XMM2 ) /* m6 | m2 */
|
||||
MULPS ( XMM4, XMM2 ) /* m6*scale | m2*scale */
|
||||
|
||||
MOVSS ( M(8), XMM6 ) /* m8 */
|
||||
MULSS ( ARG_SCALE, XMM6 ) /* m8*scale */
|
||||
MOVSS ( M(9), XMM7 ) /* m9 */
|
||||
MULSS ( ARG_SCALE, XMM7 ) /* m9*scale */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_G3TRNR_top):
|
||||
MOVSS ( S(0), XMM3 ) /* ux */
|
||||
SHUFPS ( CONST(0x0), XMM3, XMM3 ) /* ux | ux */
|
||||
MULPS ( XMM0, XMM3 ) /* ux*m4 | ux*m0 */
|
||||
MOVSS ( S(1), XMM4 ) /* uy */
|
||||
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* uy | uy */
|
||||
MULPS ( XMM1, XMM4 ) /* uy*m5 | uy*m1 */
|
||||
MOVSS ( S(2), XMM5 ) /* uz */
|
||||
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* uz | uz */
|
||||
MULPS ( XMM2, XMM5 ) /* uz*m6 | uz*m2 */
|
||||
|
||||
ADDPS ( XMM4, XMM3 )
|
||||
ADDPS ( XMM5, XMM3 )
|
||||
MOVLPS ( XMM3, D(0) )
|
||||
|
||||
MOVSS ( M(10), XMM3 ) /* m10 */
|
||||
MULSS ( ARG_SCALE, XMM3 ) /* m10*scale */
|
||||
MULSS ( S(2), XMM3 ) /* m10*scale*uz */
|
||||
MOVSS ( S(1), XMM4 ) /* uy */
|
||||
MULSS ( XMM7, XMM4 ) /* uy*m9*scale */
|
||||
MOVSS ( S(0), XMM5 ) /* ux */
|
||||
MULSS ( XMM6, XMM5 ) /* ux*m8*scale */
|
||||
|
||||
ADDSS ( XMM4, XMM3 )
|
||||
ADDSS ( XMM5, XMM3 )
|
||||
MOVSS ( XMM3, D(2) )
|
||||
|
||||
LLBL(K_G3TRNR_skip):
|
||||
ADD_L ( CONST(12), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_G3TRNR_top) )
|
||||
|
||||
LLBL(K_G3TRNR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_sse_transform_normals_no_rot)
|
||||
GLNAME(_mesa_sse_transform_normals_no_rot):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */
|
||||
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */
|
||||
|
||||
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */
|
||||
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */
|
||||
|
||||
MOV_L ( REGOFF(V3F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L ( ECX, ECX )
|
||||
JZ( LLBL(K_G3TNNRR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L ( STRIDE, EAX ) /* stride */
|
||||
MOV_L ( ECX, REGOFF(V3F_COUNT, EDI) ) /* set dest-count */
|
||||
|
||||
IMUL_L( CONST(12), ECX ) /* count *= 12 */
|
||||
MOV_L( REGOFF(V3F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V3F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS( M(0), XMM0 ) /* m0 */
|
||||
MOVSS( M(5), XMM1 ) /* m5 */
|
||||
UNPCKLPS( XMM1, XMM0 ) /* m5 | m0 */
|
||||
MOVSS( M(10), XMM1 ) /* m10 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_G3TNNRR_top):
|
||||
MOVLPS( S(0), XMM2 ) /* uy | ux */
|
||||
MULPS( XMM0, XMM2 ) /* uy*m5 | ux*m0 */
|
||||
MOVLPS( XMM2, D(0) )
|
||||
|
||||
MOVSS( S(2), XMM2 ) /* uz */
|
||||
MULSS( XMM1, XMM2 ) /* uz*m10 */
|
||||
MOVSS( XMM2, D(2) )
|
||||
|
||||
LLBL(K_G3TNNRR_skip):
|
||||
ADD_L ( CONST(12), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_G3TNNRR_top) )
|
||||
|
||||
LLBL(K_G3TNNRR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
|
@ -0,0 +1,433 @@
|
|||
/* $Id: sse_xform1.S,v 1.1 2001/03/29 06:46:27 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/** TODO:
|
||||
* - insert PREFETCH instructions to avoid cache-misses !
|
||||
* - some more optimizations are possible...
|
||||
* - for 40-50% more performance in the SSE-functions, the
|
||||
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
|
||||
*/
|
||||
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define S(i) REGOFF(i * 4, ESI)
|
||||
#define D(i) REGOFF(i * 4, EDI)
|
||||
#define M(i) REGOFF(i * 4, EDX)
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points1_general)
|
||||
GLNAME( _mesa_sse_transform_points1_general ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
CMP_L( CONST(0), ECX ) /* count == 0 ? */
|
||||
JE( LLBL(K_GTP1GR_finish) ) /* yes -> nothing to do. */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */
|
||||
MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP1GR_top):
|
||||
MOVSS( S(0), XMM2 ) /* ox */
|
||||
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */
|
||||
MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
|
||||
ADDPS( XMM1, XMM2 ) /* + | + | + | + */
|
||||
MOVUPS( XMM2, D(0) )
|
||||
|
||||
LLBL(K_GTP1GR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP1GR_top) )
|
||||
|
||||
LLBL(K_GTP1GR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points1_identity)
|
||||
GLNAME( _mesa_sse_transform_points1_identity ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP1IR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(1), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
CMP_L( ESI, EDI )
|
||||
JE( LLBL(K_GTP1IR_finish) )
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP1IR_top):
|
||||
MOV_L( S(0), EDX )
|
||||
MOV_L( EDX, D(0) )
|
||||
|
||||
LLBL(K_GTP1IR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP1IR_top) )
|
||||
|
||||
LLBL(K_GTP1IR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points1_3d_no_rot)
|
||||
GLNAME(_mesa_sse_transform_points1_3d_no_rot):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP13DNRR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS( M(0), XMM0 ) /* m0 */
|
||||
MOVSS( M(12), XMM1 ) /* m12 */
|
||||
MOVSS( M(13), XMM2 ) /* m13 */
|
||||
MOVSS( M(14), XMM3 ) /* m14 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP13DNRR_top):
|
||||
MOVSS( S(0), XMM4 ) /* ox */
|
||||
MULSS( XMM0, XMM4 ) /* ox*m0 */
|
||||
ADDSS( XMM1, XMM4 ) /* ox*m0+m12 */
|
||||
MOVSS( XMM4, D(0) )
|
||||
|
||||
MOVSS( XMM2, D(1) )
|
||||
MOVSS( XMM3, D(2) )
|
||||
|
||||
LLBL(K_GTP13DNRR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP13DNRR_top) )
|
||||
|
||||
LLBL(K_GTP13DNRR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points1_perspective)
|
||||
GLNAME(_mesa_sse_transform_points1_perspective):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP13PR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
XORPS( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */
|
||||
MOVSS( M(0), XMM1 ) /* m0 */
|
||||
MOVSS( M(14), XMM2 ) /* m14 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP13PR_top):
|
||||
MOVSS( S(0), XMM3 ) /* ox */
|
||||
MULSS( XMM1, XMM3 ) /* ox*m0 */
|
||||
MOVSS( XMM3, D(0) ) /* ox*m0->D(0) */
|
||||
MOVSS( XMM2, D(2) ) /* m14->D(2) */
|
||||
|
||||
MOVSS( XMM0, D(1) )
|
||||
MOVSS( XMM0, D(3) )
|
||||
|
||||
LLBL(K_GTP13PR_skip):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP13PR_top) )
|
||||
|
||||
LLBL(K_GTP13PR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points1_2d)
|
||||
GLNAME(_mesa_sse_transform_points1_2d):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP13P2DR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVLPS( M(0), XMM0 ) /* m1 | m0 */
|
||||
MOVLPS( M(12), XMM1 ) /* m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP13P2DR_top):
|
||||
MOVSS( S(0), XMM2 ) /* ox */
|
||||
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */
|
||||
MULPS( XMM0, XMM2 ) /* - | - | ox*m1 | ox*m0 */
|
||||
ADDPS( XMM1, XMM2 ) /* - | - | ox*m1+m13 | ox*m0+m12 */
|
||||
MOVLPS( XMM2, D(0) )
|
||||
|
||||
LLBL(K_GTP13P2DR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP13P2DR_top) )
|
||||
|
||||
LLBL(K_GTP13P2DR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points1_2d_no_rot)
|
||||
GLNAME(_mesa_sse_transform_points1_2d_no_rot):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP13P2DNRR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS( M(0), XMM0 ) /* m0 */
|
||||
MOVSS( M(12), XMM1 ) /* m12 */
|
||||
MOVSS( M(13), XMM2 ) /* m13 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP13P2DNRR_top):
|
||||
MOVSS( S(0), XMM3 ) /* ox */
|
||||
MULSS( XMM0, XMM3 ) /* ox*m0 */
|
||||
ADDSS( XMM1, XMM3 ) /* ox*m0+m12 */
|
||||
MOVSS( XMM3, D(0) )
|
||||
MOVSS( XMM2, D(1) )
|
||||
|
||||
LLBL(K_GTP13P2DNRR_skip):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP13P2DNRR_top) )
|
||||
|
||||
LLBL(K_GTP13P2DNRR_finish):
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points1_3d)
|
||||
GLNAME(_mesa_sse_transform_points1_3d):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP13P3DR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */
|
||||
MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP13P3DR_top):
|
||||
MOVSS( S(0), XMM2 ) /* ox */
|
||||
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */
|
||||
MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
|
||||
ADDPS( XMM1, XMM2 ) /* +m15 | +m14 | +m13 | +m12 */
|
||||
MOVLPS( XMM2, D(0) ) /* - | - | ->D(1)| ->D(0)*/
|
||||
UNPCKHPS( XMM2, XMM2 ) /* ox*m3+m15 | ox*m3+m15 | ox*m2+m14 | ox*m2+m14 */
|
||||
MOVSS( XMM2, D(2) )
|
||||
|
||||
LLBL(K_GTP13P3DR_skip):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP13P3DR_top) )
|
||||
|
||||
LLBL(K_GTP13P3DR_finish):
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
|
@ -0,0 +1,452 @@
|
|||
/* $Id: sse_xform2.S,v 1.1 2001/03/29 06:46:27 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/** TODO:
|
||||
* - insert PREFETCH instructions to avoid cache-misses !
|
||||
* - some more optimizations are possible...
|
||||
* - for 40-50% more performance in the SSE-functions, the
|
||||
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
|
||||
*/
|
||||
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define S(i) REGOFF(i * 4, ESI)
|
||||
#define D(i) REGOFF(i * 4, EDI)
|
||||
#define M(i) REGOFF(i * 4, EDX)
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points2_general)
|
||||
GLNAME( _mesa_sse_transform_points2_general ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(K_GTP2GR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */
|
||||
MOVAPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */
|
||||
MOVAPS( M(12), XMM2 ) /* m15 | m14 | m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP2GR_top):
|
||||
MOVSS( S(0), XMM3 ) /* ox */
|
||||
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox | ox */
|
||||
MULPS( XMM0, XMM3 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
|
||||
MOVSS( S(1), XMM4 ) /* oy */
|
||||
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy | oy | oy */
|
||||
MULPS( XMM1, XMM4 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
|
||||
|
||||
ADDPS( XMM4, XMM3 )
|
||||
ADDPS( XMM2, XMM3 )
|
||||
MOVAPS( XMM3, D(0) )
|
||||
|
||||
LLBL(K_GTP2GR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP2GR_top) )
|
||||
|
||||
LLBL(K_GTP2GR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points2_identity)
|
||||
GLNAME( _mesa_sse_transform_points2_identity ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP2IR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
CMP_L( ESI, EDI )
|
||||
JE( LLBL(K_GTP2IR_finish) )
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP2IR_top):
|
||||
MOV_L ( S(0), EDX )
|
||||
MOV_L ( EDX, D(0) )
|
||||
MOV_L ( S(1), EDX )
|
||||
MOV_L ( EDX, D(1) )
|
||||
|
||||
LLBL(K_GTP2IR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP2IR_top) )
|
||||
|
||||
LLBL(K_GTP2IR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points2_3d_no_rot)
|
||||
GLNAME(_mesa_sse_transform_points2_3d_no_rot):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP23DNRR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
|
||||
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
|
||||
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
|
||||
MOVLPS ( M(12), XMM2 ) /* - | - | m13 | m12 */
|
||||
MOVSS ( M(14), XMM3 ) /* - | - | - | m14 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP23DNRR_top):
|
||||
MOVLPS ( S(0), XMM0 ) /* - | - | oy | ox */
|
||||
MULPS ( XMM1, XMM0 ) /* - | - | oy*m5 | ox*m0 */
|
||||
ADDPS ( XMM2, XMM0 ) /* - | - | +m13 | +m12 */
|
||||
MOVLPS ( XMM0, D(0) ) /* -> D(1) | -> D(0) */
|
||||
|
||||
MOVSS ( XMM3, D(2) ) /* -> D(2) */
|
||||
|
||||
LLBL(K_GTP23DNRR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP23DNRR_top) )
|
||||
|
||||
LLBL(K_GTP23DNRR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points2_perspective)
|
||||
GLNAME(_mesa_sse_transform_points2_perspective):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP23PR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
|
||||
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
|
||||
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
|
||||
MOVSS ( M(14), XMM3 ) /* m14 */
|
||||
XORPS ( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP23PR_top):
|
||||
MOVLPS( S(0), XMM4 ) /* oy | ox */
|
||||
MULPS( XMM1, XMM4 ) /* oy*m5 | ox*m0 */
|
||||
MOVLPS( XMM4, D(0) ) /* ->D(1) | ->D(0) */
|
||||
MOVSS( XMM3, D(2) ) /* ->D(2) */
|
||||
MOVSS( XMM0, D(3) ) /* ->D(3) */
|
||||
|
||||
LLBL(K_GTP23PR_skip):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP23PR_top) )
|
||||
|
||||
LLBL(K_GTP23PR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points2_2d)
|
||||
GLNAME(_mesa_sse_transform_points2_2d):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP23P2DR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVLPS( M(0), XMM0 ) /* m1 | m0 */
|
||||
MOVLPS( M(4), XMM1 ) /* m5 | m4 */
|
||||
MOVLPS( M(12), XMM2 ) /* m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP23P2DR_top):
|
||||
MOVSS( S(0), XMM3 ) /* ox */
|
||||
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox */
|
||||
MULPS( XMM0, XMM3 ) /* ox*m1 | ox*m0 */
|
||||
|
||||
MOVSS( S(1), XMM4 ) /* oy */
|
||||
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy */
|
||||
MULPS( XMM1, XMM4 ) /* oy*m5 | oy*m4 */
|
||||
|
||||
ADDPS( XMM4, XMM3 )
|
||||
ADDPS( XMM2, XMM3 )
|
||||
MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */
|
||||
|
||||
LLBL(K_GTP23P2DR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP23P2DR_top) )
|
||||
|
||||
LLBL(K_GTP23P2DR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points2_2d_no_rot)
|
||||
GLNAME(_mesa_sse_transform_points2_2d_no_rot):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP23P2DNRR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS ( M(0), XMM1 ) /* m0 */
|
||||
MOVSS ( M(5), XMM2 ) /* m5 */
|
||||
UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */
|
||||
MOVLPS ( M(12), XMM2 ) /* m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP23P2DNRR_top):
|
||||
MOVLPS( S(0), XMM0 ) /* oy | ox */
|
||||
MULPS( XMM1, XMM0 ) /* oy*m5 | ox*m0 */
|
||||
ADDPS( XMM2, XMM0 ) /* +m13 | +m12 */
|
||||
MOVLPS( XMM0, D(0) ) /* ->D(1) | ->D(0) */
|
||||
|
||||
LLBL(K_GTP23P2DNRR_skip):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP23P2DNRR_top) )
|
||||
|
||||
LLBL(K_GTP23P2DNRR_finish):
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points2_3d)
|
||||
GLNAME(_mesa_sse_transform_points2_3d):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP23P3DR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */
|
||||
MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */
|
||||
MOVAPS( M(12), XMM2 ) /* m14 | m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP23P3DR_top):
|
||||
MOVSS( S(0), XMM3 ) /* ox */
|
||||
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox */
|
||||
MULPS( XMM0, XMM3 ) /* ox*m2 | ox*m1 | ox*m0 */
|
||||
|
||||
MOVSS( S(1), XMM4 ) /* oy */
|
||||
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy | oy */
|
||||
MULPS( XMM1, XMM4 ) /* oy*m6 | oy*m5 | oy*m4 */
|
||||
|
||||
ADDPS( XMM4, XMM3 )
|
||||
ADDPS( XMM2, XMM3 )
|
||||
|
||||
MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */
|
||||
UNPCKHPS( XMM3, XMM3 )
|
||||
MOVSS( XMM3, D(2) ) /* ->D(2) */
|
||||
|
||||
LLBL(K_GTP23P3DR_skip):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP23P3DR_top) )
|
||||
|
||||
LLBL(K_GTP23P3DR_finish):
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
|
@ -0,0 +1,498 @@
|
|||
/* $Id: sse_xform3.S,v 1.1 2001/03/29 06:46:27 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/** TODO:
|
||||
* - insert PREFETCH instructions to avoid cache-misses !
|
||||
* - some more optimizations are possible...
|
||||
* - for 40-50% more performance in the SSE-functions, the
|
||||
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
|
||||
*/
|
||||
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define S(i) REGOFF(i * 4, ESI)
|
||||
#define D(i) REGOFF(i * 4, EDI)
|
||||
#define M(i) REGOFF(i * 4, EDX)
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points3_general)
|
||||
GLNAME( _mesa_sse_transform_points3_general ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
CMP_L ( CONST(0), ECX ) /* count == 0 ? */
|
||||
JE ( LLBL(K_GTPGR_finish) ) /* yes -> nothing to do. */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVAPS ( REGOFF(0, EDX), XMM0 ) /* m0 | m1 | m2 | m3 */
|
||||
MOVAPS ( REGOFF(16, EDX), XMM1 ) /* m4 | m5 | m6 | m7 */
|
||||
MOVAPS ( REGOFF(32, EDX), XMM2 ) /* m8 | m9 | m10 | m11 */
|
||||
MOVAPS ( REGOFF(48, EDX), XMM3 ) /* m12 | m13 | m14 | m15 */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTPGR_top):
|
||||
MOVSS ( REGOFF(0, ESI), XMM4 ) /* | | | ox */
|
||||
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */
|
||||
MOVSS ( REGOFF(4, ESI), XMM5 ) /* | | | oy */
|
||||
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy | oy */
|
||||
MOVSS ( REGOFF(8, ESI), XMM6 ) /* | | | oz */
|
||||
SHUFPS ( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz | oz */
|
||||
|
||||
MULPS ( XMM0, XMM4 ) /* m3*ox | m2*ox | m1*ox | m0*ox */
|
||||
MULPS ( XMM1, XMM5 ) /* m7*oy | m6*oy | m5*oy | m4*oy */
|
||||
MULPS ( XMM2, XMM6 ) /* m11*oz | m10*oz | m9*oz | m8*oz */
|
||||
|
||||
ADDPS ( XMM5, XMM4 )
|
||||
ADDPS ( XMM6, XMM4 )
|
||||
ADDPS ( XMM3, XMM4 )
|
||||
|
||||
MOVAPS ( XMM4, REGOFF(0, EDI) )
|
||||
|
||||
LLBL(K_GTPGR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTPGR_top) )
|
||||
|
||||
LLBL(K_GTPGR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points3_identity)
|
||||
GLNAME( _mesa_sse_transform_points3_identity ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTPIR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
CMP_L( ESI, EDI )
|
||||
JE( LLBL(K_GTPIR_finish) )
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTPIR_top):
|
||||
MOVLPS ( S(0), XMM0 )
|
||||
MOVLPS ( XMM0, D(0) )
|
||||
MOVSS ( S(2), XMM0 )
|
||||
MOVSS ( XMM0, D(2) )
|
||||
|
||||
LLBL(K_GTPIR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTPIR_top) )
|
||||
|
||||
LLBL(K_GTPIR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points3_3d_no_rot)
|
||||
GLNAME(_mesa_sse_transform_points3_3d_no_rot):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP3DNRR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
|
||||
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
|
||||
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
|
||||
MOVLPS ( M(12), XMM2 ) /* - | - | m13 | m12 */
|
||||
MOVSS ( M(10), XMM3 ) /* - | - | - | m10 */
|
||||
MOVSS ( M(14), XMM4 ) /* - | - | - | m14 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP3DNRR_top):
|
||||
|
||||
MOVLPS ( S(0), XMM0 ) /* - | - | s1 | s0 */
|
||||
MULPS ( XMM1, XMM0 ) /* - | - | s1*m5 | s0*m0 */
|
||||
ADDPS ( XMM2, XMM0 ) /* - | - | +m13 | +m12 */
|
||||
MOVLPS ( XMM0, D(0) ) /* -> D(1) | -> D(0) */
|
||||
|
||||
MOVSS ( S(2), XMM0 ) /* sz */
|
||||
MULSS ( XMM3, XMM0 ) /* sz*m10 */
|
||||
ADDSS ( XMM4, XMM0 ) /* +m14 */
|
||||
MOVSS ( XMM0, D(2) ) /* -> D(2) */
|
||||
|
||||
LLBL(K_GTP3DNRR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP3DNRR_top) )
|
||||
|
||||
LLBL(K_GTP3DNRR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points3_perspective)
|
||||
GLNAME(_mesa_sse_transform_points3_perspective):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP3PR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
|
||||
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
|
||||
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
|
||||
MOVLPS ( M(8), XMM2 ) /* - | - | m9 | m8 */
|
||||
MOVSS ( M(10), XMM3 ) /* m10 */
|
||||
MOVSS ( M(14), XMM4 ) /* m14 */
|
||||
XORPS ( XMM6, XMM6 ) /* 0 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP3PR_top):
|
||||
MOVLPS ( S(0), XMM0 ) /* oy | ox */
|
||||
MULPS ( XMM1, XMM0 ) /* oy*m5 | ox*m0 */
|
||||
MOVSS ( S(2), XMM5 ) /* oz */
|
||||
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* oz | oz */
|
||||
MULPS ( XMM2, XMM5 ) /* oz*m9 | oz*m8 */
|
||||
ADDPS ( XMM5, XMM0 ) /* +oy*m5 | +ox*m0 */
|
||||
MOVLPS ( XMM0, D(0) ) /* ->D(1) | ->D(0) */
|
||||
|
||||
MOVSS ( S(2), XMM0 ) /* oz */
|
||||
MULSS ( XMM3, XMM0 ) /* oz*m10 */
|
||||
ADDSS ( XMM4, XMM0 ) /* +m14 */
|
||||
MOVSS ( XMM0, D(2) ) /* ->D(2) */
|
||||
|
||||
MOVSS ( S(2), XMM0 ) /* oz */
|
||||
MOVSS ( XMM6, XMM5 ) /* 0 */
|
||||
SUBPS ( XMM0, XMM5 ) /* -oz */
|
||||
MOVSS ( XMM5, D(3) ) /* ->D(3) */
|
||||
|
||||
LLBL(K_GTP3PR_skip):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP3PR_top) )
|
||||
|
||||
LLBL(K_GTP3PR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points3_2d)
|
||||
GLNAME(_mesa_sse_transform_points3_2d):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP3P2DR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVLPS( M(0), XMM0 ) /* m1 | m0 */
|
||||
MOVLPS( M(4), XMM1 ) /* m5 | m4 */
|
||||
MOVLPS( M(12), XMM2 ) /* m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP3P2DR_top):
|
||||
MOVSS ( S(0), XMM3 ) /* ox */
|
||||
SHUFPS ( CONST(0x0), XMM3, XMM3 ) /* ox | ox */
|
||||
MULPS ( XMM0, XMM3 ) /* ox*m1 | ox*m0 */
|
||||
MOVSS ( S(1), XMM4 ) /* oy */
|
||||
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* oy | oy */
|
||||
MULPS ( XMM1, XMM4 ) /* oy*m5 | oy*m4 */
|
||||
|
||||
ADDPS ( XMM4, XMM3 )
|
||||
ADDPS ( XMM2, XMM3 )
|
||||
MOVLPS ( XMM3, D(0) )
|
||||
|
||||
MOVSS ( S(2), XMM3 )
|
||||
MOVSS ( XMM3, D(2) )
|
||||
|
||||
LLBL(K_GTP3P2DR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP3P2DR_top) )
|
||||
|
||||
LLBL(K_GTP3P2DR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points3_2d_no_rot)
|
||||
GLNAME(_mesa_sse_transform_points3_2d_no_rot):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP3P2DNRR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS ( M(0), XMM1 ) /* m0 */
|
||||
MOVSS ( M(5), XMM2 ) /* m5 */
|
||||
UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */
|
||||
MOVLPS ( M(12), XMM2 ) /* m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP3P2DNRR_top):
|
||||
MOVLPS( S(0), XMM0 ) /* oy | ox */
|
||||
MULPS( XMM1, XMM0 ) /* oy*m5 | ox*m0 */
|
||||
ADDPS( XMM2, XMM0 ) /* +m13 | +m12 */
|
||||
MOVLPS( XMM0, D(0) ) /* ->D(1) | ->D(0) */
|
||||
|
||||
MOVSS( S(2), XMM0 )
|
||||
MOVSS( XMM0, D(2) )
|
||||
|
||||
LLBL(K_GTP3P2DNRR_skip):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP3P2DNRR_top) )
|
||||
|
||||
LLBL(K_GTP3P2DNRR_finish):
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points3_3d)
|
||||
GLNAME(_mesa_sse_transform_points3_3d):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP3P3DR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */
|
||||
MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */
|
||||
MOVAPS( M(8), XMM2 ) /* m10 | m9 | m8 */
|
||||
MOVAPS( M(12), XMM3 ) /* m14 | m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP3P3DR_top):
|
||||
MOVSS( S(0), XMM4 )
|
||||
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox */
|
||||
MULPS( XMM0, XMM4 ) /* ox*m2 | ox*m1 | ox*m0 */
|
||||
|
||||
MOVSS( S(1), XMM5 )
|
||||
SHUFPS( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy */
|
||||
MULPS( XMM1, XMM5 ) /* oy*m6 | oy*m5 | oy*m4 */
|
||||
|
||||
MOVSS( S(2), XMM6 )
|
||||
SHUFPS( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz */
|
||||
MULPS( XMM2, XMM6 ) /* oz*m10 | oz*m9 | oz*m8 */
|
||||
|
||||
ADDPS( XMM5, XMM4 ) /* + | + | + */
|
||||
ADDPS( XMM6, XMM4 ) /* + | + | + */
|
||||
ADDPS( XMM3, XMM4 ) /* + | + | + */
|
||||
|
||||
MOVLPS( XMM4, D(0) ) /* => D(1) | => D(0) */
|
||||
UNPCKHPS( XMM4, XMM4 )
|
||||
MOVSS( XMM4, D(2) )
|
||||
|
||||
LLBL(K_GTP3P3DR_skip):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP3P3DR_top) )
|
||||
|
||||
LLBL(K_GTP3P3DR_finish):
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
|
@ -0,0 +1,226 @@
|
|||
/* $Id: sse_xform4.S,v 1.1 2001/03/29 06:46:27 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
|
||||
#define SRC(i) REGOFF(i * 4, ESI)
|
||||
#define DST(i) REGOFF(i * 4, EDI)
|
||||
#define MAT(i) REGOFF(i * 4, EDX)
|
||||
|
||||
#define SELECT(r0, r1, r2, r3) CONST( r0 * 64 + r1 * 16 + r2 * 4 + r3 )
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_sse_transform_points4_general )
|
||||
GLNAME( _mesa_sse_transform_points4_general ):
|
||||
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX ) /* verify non-zero count */
|
||||
JE( LLBL( sse_general_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
|
||||
PREFETCHT0( REGIND(ESI) )
|
||||
|
||||
MOVAPS( MAT(0), XMM4 ) /* m3 | m2 | m1 | m0 */
|
||||
MOVAPS( MAT(4), XMM5 ) /* m7 | m6 | m5 | m4 */
|
||||
MOVAPS( MAT(8), XMM6 ) /* m11 | m10 | m9 | m8 */
|
||||
MOVAPS( MAT(12), XMM7 ) /* m15 | m14 | m13 | m12 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( sse_general_loop ):
|
||||
|
||||
MOVSS( SRC(0), XMM0 ) /* ox */
|
||||
SHUFPS( CONST(0x0), XMM0, XMM0 ) /* ox | ox | ox | ox */
|
||||
MULPS( XMM4, XMM0 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
|
||||
|
||||
MOVSS( SRC(1), XMM1 ) /* oy */
|
||||
SHUFPS( CONST(0x0), XMM1, XMM1 ) /* oy | oy | oy | oy */
|
||||
MULPS( XMM5, XMM1 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
|
||||
|
||||
MOVSS( SRC(2), XMM2 ) /* oz */
|
||||
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* oz | oz | oz | oz */
|
||||
MULPS( XMM6, XMM2 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */
|
||||
|
||||
MOVSS( SRC(3), XMM3 ) /* ow */
|
||||
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ow | ow | ow | ow */
|
||||
MULPS( XMM7, XMM3 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
|
||||
|
||||
ADDPS( XMM1, XMM0 ) /* ox*m3+oy*m7 | ... */
|
||||
ADDPS( XMM2, XMM0 ) /* ox*m3+oy*m7+oz*m11 | ... */
|
||||
ADDPS( XMM3, XMM0 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
|
||||
MOVAPS( XMM0, DST(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
|
||||
DEC_L( ECX )
|
||||
JNZ( LLBL( sse_general_loop ) )
|
||||
|
||||
LLBL( sse_general_done ):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME( _mesa_sse_transform_points4_3d )
|
||||
GLNAME( _mesa_sse_transform_points4_3d ):
|
||||
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP43P3DR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )/* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
MOVAPS( MAT(0), XMM0 ) /* m3 | m2 | m1 | m0 */
|
||||
MOVAPS( MAT(4), XMM1 ) /* m7 | m6 | m5 | m4 */
|
||||
MOVAPS( MAT(8), XMM2 ) /* m11 | m10 | m9 | m8 */
|
||||
MOVAPS( MAT(12), XMM3 ) /* m15 | m14 | m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL( K_GTP43P3DR_top ):
|
||||
MOVSS( SRC(0), XMM4 ) /* ox */
|
||||
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */
|
||||
MULPS( XMM0, XMM4 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
|
||||
|
||||
MOVSS( SRC(1), XMM5 ) /* oy */
|
||||
SHUFPS( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy | oy */
|
||||
MULPS( XMM1, XMM5 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
|
||||
|
||||
MOVSS( SRC(2), XMM6 ) /* oz */
|
||||
SHUFPS( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz | oz */
|
||||
MULPS( XMM2, XMM6 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */
|
||||
|
||||
MOVSS( SRC(3), XMM7 ) /* ow */
|
||||
SHUFPS( CONST(0x0), XMM7, XMM7 ) /* ow | ow | ow | ow */
|
||||
MULPS( XMM3, XMM7 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
|
||||
|
||||
ADDPS( XMM5, XMM4 ) /* ox*m3+oy*m7 | ... */
|
||||
ADDPS( XMM6, XMM4 ) /* ox*m3+oy*m7+oz*m11 | ... */
|
||||
ADDPS( XMM7, XMM4 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
|
||||
MOVAPS( XMM4, DST(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
|
||||
|
||||
MOVSS( SRC(3), XMM4 ) /* ow */
|
||||
MOVSS( XMM4, DST(3) ) /* ->D(3) */
|
||||
|
||||
LLBL( K_GTP43P3DR_skip ):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP43P3DR_top) )
|
||||
|
||||
LLBL( K_GTP43P3DR_finish ):
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_sse_transform_points4_identity )
|
||||
GLNAME( _mesa_sse_transform_points4_identity ):
|
||||
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX ) /* verify non-zero count */
|
||||
JE( LLBL( sse_identity_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( sse_identity_loop ):
|
||||
|
||||
PREFETCHNTA( REGOFF(32, ESI) )
|
||||
|
||||
MOVAPS( REGIND(ESI), XMM0 )
|
||||
ADD_L( EAX, ESI )
|
||||
|
||||
MOVAPS( XMM0, REGIND(EDI) )
|
||||
ADD_L( CONST(16), EDI )
|
||||
|
||||
DEC_L( ECX )
|
||||
JNZ( LLBL( sse_identity_loop ) )
|
||||
|
||||
LLBL( sse_identity_done ):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
|
@ -1,4 +1,4 @@
|
|||
/* $Id: x86.c,v 1.19 2001/03/28 20:44:44 gareth Exp $ */
|
||||
/* $Id: x86.c,v 1.20 2001/03/29 06:46:27 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
|
@ -50,40 +50,37 @@
|
|||
const GLubyte flag
|
||||
|
||||
|
||||
#define DECLARE_XFORM_GROUP( pfx, sz, masked ) \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_general_##masked( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_identity_##masked( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_no_rot_##masked( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_perspective_##masked( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_##masked( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_no_rot_##masked( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_##masked( XFORM_ARGS );
|
||||
#define DECLARE_XFORM_GROUP( pfx, sz ) \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_general( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_identity( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_no_rot( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_perspective( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_no_rot( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d( XFORM_ARGS );
|
||||
|
||||
|
||||
#define ASSIGN_XFORM_GROUP( pfx, cma, sz, masked ) \
|
||||
_mesa_transform_tab[cma][sz][MATRIX_GENERAL] = \
|
||||
_mesa_##pfx##_transform_points##sz##_general_##masked; \
|
||||
_mesa_transform_tab[cma][sz][MATRIX_IDENTITY] = \
|
||||
_mesa_##pfx##_transform_points##sz##_identity_##masked; \
|
||||
_mesa_transform_tab[cma][sz][MATRIX_3D_NO_ROT] = \
|
||||
_mesa_##pfx##_transform_points##sz##_3d_no_rot_##masked; \
|
||||
_mesa_transform_tab[cma][sz][MATRIX_PERSPECTIVE] = \
|
||||
_mesa_##pfx##_transform_points##sz##_perspective_##masked; \
|
||||
_mesa_transform_tab[cma][sz][MATRIX_2D] = \
|
||||
_mesa_##pfx##_transform_points##sz##_2d_##masked; \
|
||||
_mesa_transform_tab[cma][sz][MATRIX_2D_NO_ROT] = \
|
||||
_mesa_##pfx##_transform_points##sz##_2d_no_rot_##masked; \
|
||||
_mesa_transform_tab[cma][sz][MATRIX_3D] = \
|
||||
_mesa_##pfx##_transform_points##sz##_3d_##masked;
|
||||
#define ASSIGN_XFORM_GROUP( pfx, sz ) \
|
||||
_mesa_transform_tab[0][sz][MATRIX_GENERAL] = \
|
||||
_mesa_##pfx##_transform_points##sz##_general; \
|
||||
_mesa_transform_tab[0][sz][MATRIX_IDENTITY] = \
|
||||
_mesa_##pfx##_transform_points##sz##_identity; \
|
||||
_mesa_transform_tab[0][sz][MATRIX_3D_NO_ROT] = \
|
||||
_mesa_##pfx##_transform_points##sz##_3d_no_rot; \
|
||||
_mesa_transform_tab[0][sz][MATRIX_PERSPECTIVE] = \
|
||||
_mesa_##pfx##_transform_points##sz##_perspective; \
|
||||
_mesa_transform_tab[0][sz][MATRIX_2D] = \
|
||||
_mesa_##pfx##_transform_points##sz##_2d; \
|
||||
_mesa_transform_tab[0][sz][MATRIX_2D_NO_ROT] = \
|
||||
_mesa_##pfx##_transform_points##sz##_2d_no_rot; \
|
||||
_mesa_transform_tab[0][sz][MATRIX_3D] = \
|
||||
_mesa_##pfx##_transform_points##sz##_3d;
|
||||
|
||||
|
||||
#ifdef USE_X86_ASM
|
||||
DECLARE_XFORM_GROUP( x86, 2, raw )
|
||||
DECLARE_XFORM_GROUP( x86, 3, raw )
|
||||
DECLARE_XFORM_GROUP( x86, 4, raw )
|
||||
DECLARE_XFORM_GROUP( x86, 2, masked )
|
||||
DECLARE_XFORM_GROUP( x86, 3, masked )
|
||||
DECLARE_XFORM_GROUP( x86, 4, masked )
|
||||
DECLARE_XFORM_GROUP( x86, 2 )
|
||||
DECLARE_XFORM_GROUP( x86, 3 )
|
||||
DECLARE_XFORM_GROUP( x86, 4 )
|
||||
|
||||
|
||||
extern GLvector4f * _ASMAPI
|
||||
|
@ -119,13 +116,9 @@ _mesa_v16_x86_general_xform( GLfloat *dest,
|
|||
void _mesa_init_x86_transform_asm( void )
|
||||
{
|
||||
#ifdef USE_X86_ASM
|
||||
ASSIGN_XFORM_GROUP( x86, 0, 2, raw );
|
||||
ASSIGN_XFORM_GROUP( x86, 0, 3, raw );
|
||||
ASSIGN_XFORM_GROUP( x86, 0, 4, raw );
|
||||
|
||||
/* ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 2, masked ); */
|
||||
/* ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 3, masked ); */
|
||||
/* ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 4, masked ); */
|
||||
ASSIGN_XFORM_GROUP( x86, 2 );
|
||||
ASSIGN_XFORM_GROUP( x86, 3 );
|
||||
ASSIGN_XFORM_GROUP( x86, 4 );
|
||||
|
||||
/* XXX this function has been found to cause FP overflow exceptions */
|
||||
_mesa_clip_tab[4] = _mesa_x86_cliptest_points4;
|
||||
|
|
|
@ -0,0 +1,536 @@
|
|||
/* $Id: x86_xform2.S,v 1.1 2001/03/29 06:46:27 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define FP_ONE 1065353216
|
||||
#define FP_ZERO 0
|
||||
|
||||
#define SRC(i) REGOFF(i * 4, ESI)
|
||||
#define DST(i) REGOFF(i * 4, EDI)
|
||||
#define MAT(i) REGOFF(i * 4, EDX)
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points2_general )
|
||||
GLNAME( _mesa_x86_transform_points2_general ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p2_gr_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p2_gr_loop ):
|
||||
|
||||
FLD_S( SRC(0) ) /* F4 */
|
||||
FMUL_S( MAT(0) )
|
||||
FLD_S( SRC(0) ) /* F5 F4 */
|
||||
FMUL_S( MAT(1) )
|
||||
FLD_S( SRC(0) ) /* F6 F5 F4 */
|
||||
FMUL_S( MAT(2) )
|
||||
FLD_S( SRC(0) ) /* F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(3) )
|
||||
|
||||
FLD_S( SRC(1) ) /* F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(4) )
|
||||
FLD_S( SRC(1) ) /* F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(5) )
|
||||
FLD_S( SRC(1) ) /* F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(6) )
|
||||
FLD_S( SRC(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(7) )
|
||||
|
||||
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
|
||||
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */
|
||||
|
||||
FXCH( ST(3) ) /* F4 F6 F5 F7 */
|
||||
FADD_S( MAT(12) )
|
||||
FXCH( ST(2) ) /* F5 F6 F4 F7 */
|
||||
FADD_S( MAT(13) )
|
||||
FXCH( ST(1) ) /* F6 F5 F4 F7 */
|
||||
FADD_S( MAT(14) )
|
||||
FXCH( ST(3) ) /* F7 F5 F4 F6 */
|
||||
FADD_S( MAT(15) )
|
||||
|
||||
FXCH( ST(2) ) /* F4 F5 F7 F6 */
|
||||
FSTP_S( DST(0) ) /* F5 F7 F6 */
|
||||
FSTP_S( DST(1) ) /* F7 F6 */
|
||||
FXCH( ST(1) ) /* F6 F7 */
|
||||
FSTP_S( DST(2) ) /* F7 */
|
||||
FSTP_S( DST(3) ) /* */
|
||||
|
||||
LLBL( x86_p2_gr_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p2_gr_loop ) )
|
||||
|
||||
LLBL( x86_p2_gr_done ):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points2_perspective )
|
||||
GLNAME( _mesa_x86_transform_points2_perspective ):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p2_pr_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
MOV_L( MAT(14), EBX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p2_pr_loop ):
|
||||
|
||||
FLD_S( SRC(0) ) /* F4 */
|
||||
FMUL_S( MAT(0) )
|
||||
|
||||
FLD_S( SRC(1) ) /* F1 F4 */
|
||||
FMUL_S( MAT(5) )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F1 */
|
||||
FSTP_S( DST(0) ) /* F1 */
|
||||
FSTP_S( DST(1) ) /* */
|
||||
MOV_L( EBX, DST(2) )
|
||||
MOV_L( CONST(FP_ZERO), DST(3) )
|
||||
|
||||
LLBL( x86_p2_pr_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p2_pr_loop ) )
|
||||
|
||||
LLBL( x86_p2_pr_done ):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points2_3d )
|
||||
GLNAME( _mesa_x86_transform_points2_3d ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p2_3dr_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p2_3dr_loop ):
|
||||
|
||||
FLD_S( SRC(0) ) /* F4 */
|
||||
FMUL_S( MAT(0) )
|
||||
FLD_S( SRC(0) ) /* F5 F4 */
|
||||
FMUL_S( MAT(1) )
|
||||
FLD_S( SRC(0) ) /* F6 F5 F4 */
|
||||
FMUL_S( MAT(2) )
|
||||
|
||||
FLD_S( SRC(1) ) /* F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(4) )
|
||||
FLD_S( SRC(1) ) /* F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(5) )
|
||||
FLD_S( SRC(1) ) /* F2 F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(6) )
|
||||
|
||||
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(1) ) /* F6 F5 F4 */
|
||||
|
||||
FXCH( ST(2) ) /* F4 F5 F6 */
|
||||
FADD_S( MAT(12) )
|
||||
FXCH( ST(1) ) /* F5 F4 F6 */
|
||||
FADD_S( MAT(13) )
|
||||
FXCH( ST(2) ) /* F6 F4 F5 */
|
||||
FADD_S( MAT(14) )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F6 F5 */
|
||||
FSTP_S( DST(0) ) /* F6 F5 */
|
||||
FXCH( ST(1) ) /* F5 F6 */
|
||||
FSTP_S( DST(1) ) /* F6 */
|
||||
FSTP_S( DST(2) ) /* */
|
||||
|
||||
LLBL( x86_p2_3dr_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p2_3dr_loop ) )
|
||||
|
||||
LLBL( x86_p2_3dr_done ):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points2_3d_no_rot )
|
||||
GLNAME( _mesa_x86_transform_points2_3d_no_rot ):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p2_3dnrr_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
MOV_L( MAT(14), EBX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p2_3dnrr_loop ):
|
||||
|
||||
FLD_S( SRC(0) ) /* F4 */
|
||||
FMUL_S( MAT(0) )
|
||||
|
||||
FLD_S( SRC(1) ) /* F1 F4 */
|
||||
FMUL_S( MAT(5) )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F1 */
|
||||
FADD_S( MAT(12) )
|
||||
FLD_S( MAT(13) ) /* F5 F4 F1 */
|
||||
FXCH( ST(2) ) /* F1 F4 F5 */
|
||||
FADDP( ST(0), ST(2) ) /* F4 F5 */
|
||||
|
||||
FSTP_S( DST(0) ) /* F5 */
|
||||
FSTP_S( DST(1) ) /* */
|
||||
MOV_L( EBX, DST(2) )
|
||||
|
||||
LLBL( x86_p2_3dnrr_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p2_3dnrr_loop ) )
|
||||
|
||||
LLBL( x86_p2_3dnrr_done ):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points2_2d )
|
||||
GLNAME( _mesa_x86_transform_points2_2d ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p2_2dr_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p2_2dr_loop ):
|
||||
|
||||
FLD_S( SRC(0) ) /* F4 */
|
||||
FMUL_S( MAT(0) )
|
||||
FLD_S( SRC(0) ) /* F5 F4 */
|
||||
FMUL_S( MAT(1) )
|
||||
|
||||
FLD_S( SRC(1) ) /* F0 F5 F4 */
|
||||
FMUL_S( MAT(4) )
|
||||
FLD_S( SRC(1) ) /* F1 F0 F5 F4 */
|
||||
FMUL_S( MAT(5) )
|
||||
|
||||
FXCH( ST(1) ) /* F0 F1 F5 F4 */
|
||||
FADDP( ST(0), ST(3) ) /* F1 F5 F4 */
|
||||
FADDP( ST(0), ST(1) ) /* F5 F4 */
|
||||
|
||||
FXCH( ST(1) ) /* F4 F5 */
|
||||
FADD_S( MAT(12) )
|
||||
FXCH( ST(1) ) /* F5 F4 */
|
||||
FADD_S( MAT(13) )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F5 */
|
||||
FSTP_S( DST(0) ) /* F5 */
|
||||
FSTP_S( DST(1) ) /* */
|
||||
|
||||
LLBL( x86_p2_2dr_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p2_2dr_loop ) )
|
||||
|
||||
LLBL( x86_p2_2dr_done ):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME( _mesa_x86_transform_points2_2d_no_rot )
|
||||
GLNAME( _mesa_x86_transform_points2_2d_no_rot ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p2_2dnrr_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p2_2dnrr_loop ):
|
||||
|
||||
FLD_S( SRC(0) ) /* F4 */
|
||||
FMUL_S( MAT(0) )
|
||||
|
||||
FLD_S( SRC(1) ) /* F1 F4 */
|
||||
FMUL_S( MAT(5) )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F1 */
|
||||
FADD_S( MAT(12) )
|
||||
FLD_S( MAT(13) ) /* F5 F4 F1 */
|
||||
FXCH( ST(2) ) /* F1 F4 F5 */
|
||||
FADDP( ST(0), ST(2) ) /* F4 F5 */
|
||||
|
||||
FSTP_S( DST(0) ) /* F5 */
|
||||
FSTP_S( DST(1) ) /* */
|
||||
|
||||
LLBL( x86_p2_2dnrr_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p2_2dnrr_loop ) )
|
||||
|
||||
LLBL( x86_p2_2dnrr_done ):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points2_identity )
|
||||
GLNAME( _mesa_x86_transform_points2_identity ):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p2_ir_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
CMP_L( ESI, EDI )
|
||||
JE( LLBL( x86_p2_ir_done ) )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p2_ir_loop ):
|
||||
|
||||
MOV_L( SRC(0), EBX )
|
||||
MOV_L( SRC(1), EDX )
|
||||
|
||||
MOV_L( EBX, DST(0) )
|
||||
MOV_L( EDX, DST(1) )
|
||||
|
||||
LLBL( x86_p2_ir_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p2_ir_loop ) )
|
||||
|
||||
LLBL( x86_p2_ir_done ):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
|
@ -0,0 +1,606 @@
|
|||
/* $Id: x86_xform3.S,v 1.1 2001/03/29 06:46:27 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define FP_ONE 1065353216
|
||||
#define FP_ZERO 0
|
||||
|
||||
#define SRC(i) REGOFF(i * 4, ESI)
|
||||
#define DST(i) REGOFF(i * 4, EDI)
|
||||
#define MAT(i) REGOFF(i * 4, EDX)
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points3_general )
|
||||
GLNAME( _mesa_x86_transform_points3_general ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p3_gr_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p3_gr_loop ):
|
||||
|
||||
FLD_S( SRC(0) ) /* F4 */
|
||||
FMUL_S( MAT(0) )
|
||||
FLD_S( SRC(0) ) /* F5 F4 */
|
||||
FMUL_S( MAT(1) )
|
||||
FLD_S( SRC(0) ) /* F6 F5 F4 */
|
||||
FMUL_S( MAT(2) )
|
||||
FLD_S( SRC(0) ) /* F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(3) )
|
||||
|
||||
FLD_S( SRC(1) ) /* F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(4) )
|
||||
FLD_S( SRC(1) ) /* F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(5) )
|
||||
FLD_S( SRC(1) ) /* F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(6) )
|
||||
FLD_S( SRC(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(7) )
|
||||
|
||||
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
|
||||
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */
|
||||
|
||||
FLD_S( SRC(2) ) /* F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(8) )
|
||||
FLD_S( SRC(2) ) /* F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(9) )
|
||||
FLD_S( SRC(2) ) /* F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(10) )
|
||||
FLD_S( SRC(2) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(11) )
|
||||
|
||||
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
|
||||
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */
|
||||
|
||||
FXCH( ST(3) ) /* F4 F6 F5 F7 */
|
||||
FADD_S( MAT(12) )
|
||||
FXCH( ST(2) ) /* F5 F6 F4 F7 */
|
||||
FADD_S( MAT(13) )
|
||||
FXCH( ST(1) ) /* F6 F5 F4 F7 */
|
||||
FADD_S( MAT(14) )
|
||||
FXCH( ST(3) ) /* F7 F5 F4 F6 */
|
||||
FADD_S( MAT(15) )
|
||||
|
||||
FXCH( ST(2) ) /* F4 F5 F7 F6 */
|
||||
FSTP_S( DST(0) ) /* F5 F7 F6 */
|
||||
FSTP_S( DST(1) ) /* F7 F6 */
|
||||
FXCH( ST(1) ) /* F6 F7 */
|
||||
FSTP_S( DST(2) ) /* F7 */
|
||||
FSTP_S( DST(3) ) /* */
|
||||
|
||||
LLBL( x86_p3_gr_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p3_gr_loop ) )
|
||||
|
||||
LLBL( x86_p3_gr_done ):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points3_perspective )
|
||||
GLNAME( _mesa_x86_transform_points3_perspective ):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p3_pr_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p3_pr_loop ):
|
||||
|
||||
FLD_S( SRC(0) ) /* F4 */
|
||||
FMUL_S( MAT(0) )
|
||||
|
||||
FLD_S( SRC(1) ) /* F5 F4 */
|
||||
FMUL_S( MAT(5) )
|
||||
|
||||
FLD_S( SRC(2) ) /* F0 F5 F4 */
|
||||
FMUL_S( MAT(8) )
|
||||
FLD_S( SRC(2) ) /* F1 F0 F5 F4 */
|
||||
FMUL_S( MAT(9) )
|
||||
FLD_S( SRC(2) ) /* F2 F1 F0 F5 F4 */
|
||||
FMUL_S( MAT(10) )
|
||||
|
||||
FXCH( ST(2) ) /* F0 F1 F2 F5 F4 */
|
||||
FADDP( ST(0), ST(4) ) /* F1 F2 F5 F4 */
|
||||
FADDP( ST(0), ST(2) ) /* F2 F5 F4 */
|
||||
FLD_S( MAT(14) ) /* F6 F2 F5 F4 */
|
||||
FXCH( ST(1) ) /* F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(1) ) /* F6 F5 F4 */
|
||||
|
||||
MOV_L( SRC(2), EBX )
|
||||
XOR_L( CONST(-2147483648), EBX )/* change sign */
|
||||
|
||||
FXCH( ST(2) ) /* F4 F5 F6 */
|
||||
FSTP_S( DST(0) ) /* F5 F6 */
|
||||
FSTP_S( DST(1) ) /* F6 */
|
||||
FSTP_S( DST(2) ) /* */
|
||||
MOV_L( EBX, DST(3) )
|
||||
|
||||
LLBL( x86_p3_pr_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p3_pr_loop ) )
|
||||
|
||||
LLBL( x86_p3_pr_done ):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points3_3d )
|
||||
GLNAME( _mesa_x86_transform_points3_3d ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p3_3dr_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p3_3dr_loop ):
|
||||
|
||||
FLD_S( SRC(0) ) /* F4 */
|
||||
FMUL_S( MAT(0) )
|
||||
FLD_S( SRC(0) ) /* F5 F4 */
|
||||
FMUL_S( MAT(1) )
|
||||
FLD_S( SRC(0) ) /* F6 F5 F4 */
|
||||
FMUL_S( MAT(2) )
|
||||
|
||||
FLD_S( SRC(1) ) /* F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(4) )
|
||||
FLD_S( SRC(1) ) /* F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(5) )
|
||||
FLD_S( SRC(1) ) /* F2 F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(6) )
|
||||
|
||||
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(1) ) /* F6 F5 F4 */
|
||||
|
||||
FLD_S( SRC(2) ) /* F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(8) )
|
||||
FLD_S( SRC(2) ) /* F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(9) )
|
||||
FLD_S( SRC(2) ) /* F2 F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(10) )
|
||||
|
||||
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(1) ) /* F6 F5 F4 */
|
||||
|
||||
FXCH( ST(2) ) /* F4 F5 F6 */
|
||||
FADD_S( MAT(12) )
|
||||
FXCH( ST(1) ) /* F5 F4 F6 */
|
||||
FADD_S( MAT(13) )
|
||||
FXCH( ST(2) ) /* F6 F4 F5 */
|
||||
FADD_S( MAT(14) )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F6 F5 */
|
||||
FSTP_S( DST(0) ) /* F6 F5 */
|
||||
FXCH( ST(1) ) /* F5 F6 */
|
||||
FSTP_S( DST(1) ) /* F6 */
|
||||
FSTP_S( DST(2) ) /* */
|
||||
|
||||
LLBL( x86_p3_3dr_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p3_3dr_loop ) )
|
||||
|
||||
LLBL( x86_p3_3dr_done ):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points3_3d_no_rot )
|
||||
GLNAME( _mesa_x86_transform_points3_3d_no_rot ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p3_3dnrr_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p3_3dnrr_loop ):
|
||||
|
||||
FLD_S( SRC(0) ) /* F4 */
|
||||
FMUL_S( MAT(0) )
|
||||
|
||||
FLD_S( SRC(1) ) /* F1 F4 */
|
||||
FMUL_S( MAT(5) )
|
||||
|
||||
FLD_S( SRC(2) ) /* F2 F1 F4 */
|
||||
FMUL_S( MAT(10) )
|
||||
|
||||
FXCH( ST(2) ) /* F4 F1 F2 */
|
||||
FADD_S( MAT(12) )
|
||||
FLD_S( MAT(13) ) /* F5 F4 F1 F2 */
|
||||
FXCH( ST(2) ) /* F1 F4 F5 F2 */
|
||||
FADDP( ST(0), ST(2) ) /* F4 F5 F2 */
|
||||
FLD_S( MAT(14) ) /* F6 F4 F5 F2 */
|
||||
FXCH( ST(3) ) /* F2 F4 F5 F6 */
|
||||
FADDP( ST(0), ST(3) ) /* F4 F5 F6 */
|
||||
|
||||
FSTP_S( DST(0) ) /* F5 F6 */
|
||||
FSTP_S( DST(1) ) /* F6 */
|
||||
FSTP_S( DST(2) ) /* */
|
||||
|
||||
LLBL( x86_p3_3dnrr_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p3_3dnrr_loop ) )
|
||||
|
||||
LLBL( x86_p3_3dnrr_done ):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points3_2d )
|
||||
GLNAME( _mesa_x86_transform_points3_2d ):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p3_2dr_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p3_2dr_loop ):
|
||||
|
||||
FLD_S( SRC(0) ) /* F4 */
|
||||
FMUL_S( MAT(0) )
|
||||
FLD_S( SRC(0) ) /* F5 F4 */
|
||||
FMUL_S( MAT(1) )
|
||||
|
||||
FLD_S( SRC(1) ) /* F0 F5 F4 */
|
||||
FMUL_S( MAT(4) )
|
||||
FLD_S( SRC(1) ) /* F1 F0 F5 F4 */
|
||||
FMUL_S( MAT(5) )
|
||||
|
||||
FXCH( ST(1) ) /* F0 F1 F5 F4 */
|
||||
FADDP( ST(0), ST(3) ) /* F1 F5 F4 */
|
||||
FADDP( ST(0), ST(1) ) /* F5 F4 */
|
||||
|
||||
FXCH( ST(1) ) /* F4 F5 */
|
||||
FADD_S( MAT(12) )
|
||||
FXCH( ST(1) ) /* F5 F4 */
|
||||
FADD_S( MAT(13) )
|
||||
|
||||
MOV_L( SRC(2), EBX )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F5 */
|
||||
FSTP_S( DST(0) ) /* F5 */
|
||||
FSTP_S( DST(1) ) /* */
|
||||
MOV_L( EBX, DST(2) )
|
||||
|
||||
LLBL( x86_p3_2dr_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p3_2dr_loop ) )
|
||||
|
||||
LLBL( x86_p3_2dr_done ):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points3_2d_no_rot )
|
||||
GLNAME( _mesa_x86_transform_points3_2d_no_rot ):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p3_2dnrr_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p3_2dnrr_loop ):
|
||||
|
||||
FLD_S( SRC(0) ) /* F4 */
|
||||
FMUL_S( MAT(0) )
|
||||
|
||||
FLD_S( SRC(1) ) /* F1 F4 */
|
||||
FMUL_S( MAT(5) )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F1 */
|
||||
FADD_S( MAT(12) )
|
||||
FLD_S( MAT(13) ) /* F5 F4 F1 */
|
||||
|
||||
FXCH( ST(2) ) /* F1 F4 F5 */
|
||||
FADDP( ST(0), ST(2) ) /* F4 F5 */
|
||||
|
||||
MOV_L( SRC(2), EBX )
|
||||
|
||||
FSTP_S( DST(0) ) /* F5 */
|
||||
FSTP_S( DST(1) ) /* */
|
||||
MOV_L( EBX, DST(2) )
|
||||
|
||||
LLBL( x86_p3_2dnrr_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p3_2dnrr_loop ) )
|
||||
|
||||
LLBL( x86_p3_2dnrr_done ):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points3_identity )
|
||||
GLNAME(_mesa_x86_transform_points3_identity ):
|
||||
|
||||
#define FRAME_OFFSET 16
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
PUSH_L( EBP )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p3_ir_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
CMP_L( ESI, EDI )
|
||||
JE( LLBL( x86_p3_ir_done ) )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p3_ir_loop ):
|
||||
|
||||
#if 1
|
||||
MOV_L( SRC(0), EBX )
|
||||
MOV_L( SRC(1), EBP )
|
||||
MOV_L( SRC(2), EDX )
|
||||
|
||||
MOV_L( EBX, DST(0) )
|
||||
MOV_L( EBP, DST(1) )
|
||||
MOV_L( EDX, DST(2) )
|
||||
#else
|
||||
FLD_S( SRC(0) )
|
||||
FLD_S( SRC(1) )
|
||||
FLD_S( SRC(2) )
|
||||
|
||||
FSTP_S( DST(2) )
|
||||
FSTP_S( DST(1) )
|
||||
FSTP_S( DST(0) )
|
||||
#endif
|
||||
|
||||
LLBL( x86_p3_ir_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p3_ir_loop ) )
|
||||
|
||||
LLBL( x86_p3_ir_done ):
|
||||
|
||||
POP_L( EBP )
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
|
@ -0,0 +1,639 @@
|
|||
/* $Id: x86_xform4.S,v 1.1 2001/03/29 06:46:27 gareth Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define FP_ONE 1065353216
|
||||
#define FP_ZERO 0
|
||||
|
||||
#define SRC(i) REGOFF(i * 4, ESI)
|
||||
#define DST(i) REGOFF(i * 4, EDI)
|
||||
#define MAT(i) REGOFF(i * 4, EDX)
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points4_general )
|
||||
GLNAME( _mesa_x86_transform_points4_general ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p4_gr_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p4_gr_loop ):
|
||||
|
||||
FLD_S( SRC(0) ) /* F4 */
|
||||
FMUL_S( MAT(0) )
|
||||
FLD_S( SRC(0) ) /* F5 F4 */
|
||||
FMUL_S( MAT(1) )
|
||||
FLD_S( SRC(0) ) /* F6 F5 F4 */
|
||||
FMUL_S( MAT(2) )
|
||||
FLD_S( SRC(0) ) /* F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(3) )
|
||||
|
||||
FLD_S( SRC(1) ) /* F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(4) )
|
||||
FLD_S( SRC(1) ) /* F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(5) )
|
||||
FLD_S( SRC(1) ) /* F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(6) )
|
||||
FLD_S( SRC(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(7) )
|
||||
|
||||
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
|
||||
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */
|
||||
|
||||
FLD_S( SRC(2) ) /* F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(8) )
|
||||
FLD_S( SRC(2) ) /* F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(9) )
|
||||
FLD_S( SRC(2) ) /* F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(10) )
|
||||
FLD_S( SRC(2) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(11) )
|
||||
|
||||
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
|
||||
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */
|
||||
|
||||
FLD_S( SRC(3) ) /* F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(12) )
|
||||
FLD_S( SRC(3) ) /* F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(13) )
|
||||
FLD_S( SRC(3) ) /* F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(14) )
|
||||
FLD_S( SRC(3) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT(15) )
|
||||
|
||||
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
|
||||
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */
|
||||
|
||||
FXCH( ST(3) ) /* F4 F6 F5 F7 */
|
||||
FSTP_S( DST(0) ) /* F6 F5 F7 */
|
||||
FXCH( ST(1) ) /* F5 F6 F7 */
|
||||
FSTP_S( DST(1) ) /* F6 F7 */
|
||||
FSTP_S( DST(2) ) /* F7 */
|
||||
FSTP_S( DST(3) ) /* */
|
||||
|
||||
LLBL( x86_p4_gr_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p4_gr_loop ) )
|
||||
|
||||
LLBL( x86_p4_gr_done ):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points4_perspective )
|
||||
GLNAME( _mesa_x86_transform_points4_perspective ):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p4_pr_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p4_pr_loop ):
|
||||
|
||||
FLD_S( SRC(0) ) /* F4 */
|
||||
FMUL_S( MAT(0) )
|
||||
|
||||
FLD_S( SRC(1) ) /* F5 F4 */
|
||||
FMUL_S( MAT(5) )
|
||||
|
||||
FLD_S( SRC(2) ) /* F0 F5 F4 */
|
||||
FMUL_S( MAT(8) )
|
||||
FLD_S( SRC(2) ) /* F1 F0 F5 F4 */
|
||||
FMUL_S( MAT(9) )
|
||||
FLD_S( SRC(2) ) /* F6 F1 F0 F5 F4 */
|
||||
FMUL_S( MAT(10) )
|
||||
|
||||
FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(4) ) /* F1 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(2) ) /* F6 F5 F4 */
|
||||
|
||||
FLD_S( SRC(3) ) /* F2 F6 F5 F4 */
|
||||
FMUL_S( MAT(14) )
|
||||
|
||||
FADDP( ST(0), ST(1) ) /* F6 F5 F4 */
|
||||
|
||||
MOV_L( SRC(2), EBX )
|
||||
XOR_L( CONST(-2147483648), EBX )/* change sign */
|
||||
|
||||
FXCH( ST(2) ) /* F4 F5 F6 */
|
||||
FSTP_S( DST(0) ) /* F5 F6 */
|
||||
FSTP_S( DST(1) ) /* F6 */
|
||||
FSTP_S( DST(2) ) /* */
|
||||
MOV_L( EBX, DST(3) )
|
||||
|
||||
LLBL( x86_p4_pr_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p4_pr_loop ) )
|
||||
|
||||
LLBL( x86_p4_pr_done ):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points4_3d )
|
||||
GLNAME( _mesa_x86_transform_points4_3d ):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p4_3dr_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p4_3dr_loop ):
|
||||
|
||||
FLD_S( SRC(0) ) /* F4 */
|
||||
FMUL_S( MAT(0) )
|
||||
FLD_S( SRC(0) ) /* F5 F4 */
|
||||
FMUL_S( MAT(1) )
|
||||
FLD_S( SRC(0) ) /* F6 F5 F4 */
|
||||
FMUL_S( MAT(2) )
|
||||
|
||||
FLD_S( SRC(1) ) /* F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(4) )
|
||||
FLD_S( SRC(1) ) /* F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(5) )
|
||||
FLD_S( SRC(1) ) /* F2 F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(6) )
|
||||
|
||||
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(1) ) /* F6 F5 F4 */
|
||||
|
||||
FLD_S( SRC(2) ) /* F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(8) )
|
||||
FLD_S( SRC(2) ) /* F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(9) )
|
||||
FLD_S( SRC(2) ) /* F2 F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(10) )
|
||||
|
||||
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(1) ) /* F6 F5 F4 */
|
||||
|
||||
FLD_S( SRC(3) ) /* F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(12) )
|
||||
FLD_S( SRC(3) ) /* F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(13) )
|
||||
FLD_S( SRC(3) ) /* F2 F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(14) )
|
||||
|
||||
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(1) ) /* F6 F5 F4 */
|
||||
|
||||
MOV_L( SRC(3), EBX )
|
||||
|
||||
FXCH( ST(2) ) /* F4 F5 F6 */
|
||||
FSTP_S( DST(0) ) /* F5 F6 */
|
||||
FSTP_S( DST(1) ) /* F6 */
|
||||
FSTP_S( DST(2) ) /* */
|
||||
MOV_L( EBX, DST(3) )
|
||||
|
||||
LLBL( x86_p4_3dr_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p4_3dr_loop ) )
|
||||
|
||||
LLBL( x86_p4_3dr_done ):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot)
|
||||
GLNAME(_mesa_x86_transform_points4_3d_no_rot):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p4_3dnrr_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p4_3dnrr_loop ):
|
||||
|
||||
FLD_S( SRC(0) ) /* F4 */
|
||||
FMUL_S( MAT(0) )
|
||||
|
||||
FLD_S( SRC(1) ) /* F5 F4 */
|
||||
FMUL_S( MAT(5) )
|
||||
|
||||
FLD_S( SRC(2) ) /* F6 F5 F4 */
|
||||
FMUL_S( MAT(10) )
|
||||
|
||||
FLD_S( SRC(3) ) /* F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(12) )
|
||||
FLD_S( SRC(3) ) /* F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(13) )
|
||||
FLD_S( SRC(3) ) /* F2 F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT(14) )
|
||||
|
||||
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */
|
||||
FADDP( ST(0), ST(1) ) /* F6 F5 F4 */
|
||||
|
||||
MOV_L( SRC(3), EBX )
|
||||
|
||||
FXCH( ST(2) ) /* F4 F5 F6 */
|
||||
FSTP_S( DST(0) ) /* F5 F6 */
|
||||
FSTP_S( DST(1) ) /* F6 */
|
||||
FSTP_S( DST(2) ) /* */
|
||||
MOV_L( EBX, DST(3) )
|
||||
|
||||
LLBL( x86_p4_3dnrr_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p4_3dnrr_loop ) )
|
||||
|
||||
LLBL( x86_p4_3dnrr_done ):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points4_2d )
|
||||
GLNAME( _mesa_x86_transform_points4_2d ):
|
||||
|
||||
#define FRAME_OFFSET 16
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
PUSH_L( EBP )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p4_2dr_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p4_2dr_loop ):
|
||||
|
||||
FLD_S( SRC(0) ) /* F4 */
|
||||
FMUL_S( MAT(0) )
|
||||
FLD_S( SRC(0) ) /* F5 F4 */
|
||||
FMUL_S( MAT(1) )
|
||||
|
||||
FLD_S( SRC(1) ) /* F0 F5 F4 */
|
||||
FMUL_S( MAT(4) )
|
||||
FLD_S( SRC(1) ) /* F1 F0 F5 F4 */
|
||||
FMUL_S( MAT(5) )
|
||||
|
||||
FXCH( ST(1) ) /* F0 F1 F5 F4 */
|
||||
FADDP( ST(0), ST(3) ) /* F1 F5 F4 */
|
||||
FADDP( ST(0), ST(1) ) /* F5 F4 */
|
||||
|
||||
FLD_S( SRC(3) ) /* F0 F5 F4 */
|
||||
FMUL_S( MAT(12) )
|
||||
FLD_S( SRC(3) ) /* F1 F0 F5 F4 */
|
||||
FMUL_S( MAT(13) )
|
||||
|
||||
FXCH( ST(1) ) /* F0 F1 F5 F4 */
|
||||
FADDP( ST(0), ST(3) ) /* F1 F5 F4 */
|
||||
FADDP( ST(0), ST(1) ) /* F5 F4 */
|
||||
|
||||
MOV_L( SRC(2), EBX )
|
||||
MOV_L( SRC(3), EBP )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F5 */
|
||||
FSTP_S( DST(0) ) /* F5 */
|
||||
FSTP_S( DST(1) ) /* */
|
||||
MOV_L( EBX, DST(2) )
|
||||
MOV_L( EBP, DST(3) )
|
||||
|
||||
LLBL( x86_p4_2dr_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p4_2dr_loop ) )
|
||||
|
||||
LLBL( x86_p4_2dr_done ):
|
||||
|
||||
POP_L( EBP )
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot )
|
||||
GLNAME( _mesa_x86_transform_points4_2d_no_rot ):
|
||||
|
||||
#define FRAME_OFFSET 16
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
PUSH_L( EBP )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p4_2dnrr_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p4_2dnrr_loop ):
|
||||
|
||||
FLD_S( SRC(0) ) /* F4 */
|
||||
FMUL_S( MAT(0) )
|
||||
|
||||
FLD_S( SRC(1) ) /* F5 F4 */
|
||||
FMUL_S( MAT(5) )
|
||||
|
||||
FLD_S( SRC(3) ) /* F0 F5 F4 */
|
||||
FMUL_S( MAT(12) )
|
||||
FLD_S( SRC(3) ) /* F1 F0 F5 F4 */
|
||||
FMUL_S( MAT(13) )
|
||||
|
||||
FXCH( ST(1) ) /* F0 F1 F5 F4 */
|
||||
FADDP( ST(0), ST(3) ) /* F1 F5 F4 */
|
||||
FADDP( ST(0), ST(1) ) /* F5 F4 */
|
||||
|
||||
MOV_L( SRC(2), EBX )
|
||||
MOV_L( SRC(3), EBP )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F5 */
|
||||
FSTP_S( DST(0) ) /* F5 */
|
||||
FSTP_S( DST(1) ) /* */
|
||||
MOV_L( EBX, DST(2) )
|
||||
MOV_L( EBP, DST(3) )
|
||||
|
||||
LLBL( x86_p4_2dnrr_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p4_2dnrr_loop ) )
|
||||
|
||||
LLBL( x86_p4_2dnrr_done ):
|
||||
|
||||
POP_L( EBP )
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points4_identity )
|
||||
GLNAME( _mesa_x86_transform_points4_identity ):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL( x86_p4_ir_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
CMP_L( ESI, EDI )
|
||||
JE( LLBL( x86_p4_ir_done ) )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( x86_p4_ir_loop ):
|
||||
|
||||
MOV_L( SRC(0), EBX )
|
||||
MOV_L( SRC(1), EDX )
|
||||
|
||||
MOV_L( EBX, DST(0) )
|
||||
MOV_L( EDX, DST(1) )
|
||||
|
||||
MOV_L( SRC(2), EBX )
|
||||
MOV_L( SRC(3), EDX )
|
||||
|
||||
MOV_L( EBX, DST(2) )
|
||||
MOV_L( EDX, DST(3) )
|
||||
|
||||
LLBL( x86_p4_ir_skip ):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL( x86_p4_ir_loop ) )
|
||||
|
||||
LLBL( x86_p4_ir_done ):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
Loading…
Reference in New Issue