updated Glide driver documentation

fixed SAL/SAR in assyntax.h (NASM)
fixed a bug wrt NULL pointer assignment in t_vtx_api.c
cosmetics to t_vtx_x86.c & t_vtx_x86_gcc.S
enabled STDCALL with codegen (MinGW)
This commit is contained in:
Daniel Borca 2004-04-13 07:08:34 +00:00
parent 4ba589bbf0
commit fb7766853d
7 changed files with 471 additions and 287 deletions

View File

@ -3,7 +3,7 @@
Mesa-6.0 release notes: Mesa-6.1 release notes:
----------------------- -----------------------
1) Glide2 support has been ceased; in order to keep Voodoo Rush 1) Glide2 support has been ceased; in order to keep Voodoo Rush
@ -26,7 +26,7 @@ DOS (DJGPP), Windows9x/2k (MinGW/MSVC), Linux
How to compile: How to compile:
--------------- ---------------
DJGPP/MinGW/MSVC: DJGPP/MinGW:
Place the Glide3 SDK in the top Mesa directory: Place the Glide3 SDK in the top Mesa directory:
$(MESA)/glide3/include/*.h $(MESA)/glide3/include/*.h
$(MESA)/glide3/lib/ $(MESA)/glide3/lib/
@ -35,11 +35,9 @@ DJGPP/MinGW/MSVC:
Required libraries: Required libraries:
OS specific OS specific
Type: Type:
make -f Makefile.DJ HAVE_MMX=1 HAVE_3DNOW=1 FX=1 make -f Makefile.DJ X86=1 FX=1
or or
make -f Makefile.mgw HAVE_MMX=1 HAVE_3DNOW=1 FX=1 make -f Makefile.mgw X86=1 FX=1
or
nmake -f Makefile.wfx
Look into the corresponding makefiles for further information. Look into the corresponding makefiles for further information.
Linux: Linux:

View File

@ -134,7 +134,7 @@ x86/matypes.h: x86/gen_matypes.exe
x86/gen_matypes.exe: x86/gen_matypes.c x86/gen_matypes.exe: x86/gen_matypes.c
$(CC) -o $@ $(CFLAGS) -s $< $(CC) -o $@ $(CFLAGS) -s $<
# [dBorca] Hack alert: # [dBorca]
# glapi_x86.S needs some adjustments # glapi_x86.S needs some adjustments
# in order to generate correct entrypoints # in order to generate correct entrypoints
x86/glapi_x86.o: x86/glapi_x86.S x86/glapi_x86.o: x86/glapi_x86.S
@ -142,6 +142,11 @@ x86/glapi_x86.o: x86/glapi_x86.S
#main/dispatch.o: main/dispatch.c #main/dispatch.o: main/dispatch.c
# $(CC) -o $@ $(CFLAGS) -UUSE_X86_ASM -c $< # $(CC) -o $@ $(CFLAGS) -UUSE_X86_ASM -c $<
# [dBorca]
# if we want codegen, we have to stdcall
tnl/t_vtx_x86_gcc.o: tnl/t_vtx_x86_gcc.S
$(CC) -o $@ $(CFLAGS) -DSTDCALL_API -c $<
clean: clean:
-$(call UNLINK,array_cache/*.o) -$(call UNLINK,array_cache/*.o)
-$(call UNLINK,glapi/*.o) -$(call UNLINK,glapi/*.o)

View File

@ -102,8 +102,10 @@ static void _tnl_wrap_buffers( GLcontext *ctx )
/* Deal with buffer wrapping where provoked by the vertex buffer /* Deal with buffer wrapping where provoked by the vertex buffer
* filling up, as opposed to upgrade_vertex(). * filling up, as opposed to upgrade_vertex().
*
* Make it GLAPIENTRY, so we can tail from the codegen'ed Vertex*fv
*/ */
void _tnl_wrap_filled_vertex( GLcontext *ctx ) void GLAPIENTRY _tnl_wrap_filled_vertex( GLcontext *ctx )
{ {
TNLcontext *tnl = TNL_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx);
GLfloat *data = tnl->vtx.copied.buffer; GLfloat *data = tnl->vtx.copied.buffer;
@ -403,7 +405,7 @@ static attrfv_func do_choose( GLuint attr, GLuint sz )
/* Try to use codegen: /* Try to use codegen:
*/ */
#ifdef USE_X86_ASM #ifdef USE_X86_ASM
if (tnl->AllowCodegen) if (tnl->AllowCodegen)
tnl->vtx.tabfv[attr][sz-1] = do_codegen( ctx, attr, sz ); tnl->vtx.tabfv[attr][sz-1] = do_codegen( ctx, attr, sz );
@ -473,11 +475,15 @@ static void reset_attrfv( TNLcontext *tnl )
for (i = 0 ; i < _TNL_ATTRIB_MAX ; i++) for (i = 0 ; i < _TNL_ATTRIB_MAX ; i++)
if (tnl->vtx.attrsz[i]) { if (tnl->vtx.attrsz[i]) {
GLuint j = tnl->vtx.attrsz[i] - 1; GLint j = tnl->vtx.attrsz[i] - 1;
tnl->vtx.attrsz[i] = 0; tnl->vtx.attrsz[i] = 0;
if (i < _TNL_MAX_ATTR_CODEGEN) if (i < _TNL_MAX_ATTR_CODEGEN) {
tnl->vtx.tabfv[i][j] = choose[i][j]; while (j >= 0) {
tnl->vtx.tabfv[i][j] = choose[i][j];
j--;
}
}
} }
tnl->vtx.vertex_size = 0; tnl->vtx.vertex_size = 0;

View File

@ -49,7 +49,7 @@ extern void _tnl_vtx_destroy( GLcontext *ctx );
extern void _tnl_FlushVertices( GLcontext *ctx, GLuint flags ); extern void _tnl_FlushVertices( GLcontext *ctx, GLuint flags );
extern void _tnl_flush_vtx( GLcontext *ctx ); extern void _tnl_flush_vtx( GLcontext *ctx );
extern void _tnl_wrap_filled_vertex( GLcontext *ctx ); extern void GLAPIENTRY _tnl_wrap_filled_vertex( GLcontext *ctx );
/* t_vtx_exec.c: /* t_vtx_exec.c:
*/ */

View File

@ -60,22 +60,25 @@ EXTERN( _tnl_x86_Vertex2fv );
EXTERN( _tnl_x86_Vertex3fv ); EXTERN( _tnl_x86_Vertex3fv );
EXTERN( _tnl_x86_Vertex4fv ); EXTERN( _tnl_x86_Vertex4fv );
EXTERN( _tnl_x86_dispatch_attrf ); EXTERN( _tnl_x86_dispatch_attrf1 );
EXTERN( _tnl_x86_dispatch_attrf2 );
EXTERN( _tnl_x86_dispatch_attrf3 );
EXTERN( _tnl_x86_dispatch_attrf4 );
EXTERN( _tnl_x86_dispatch_attrfv ); EXTERN( _tnl_x86_dispatch_attrfv );
EXTERN( _tnl_x86_dispatch_multitexcoordf ); EXTERN( _tnl_x86_dispatch_multitexcoordf1 );
EXTERN( _tnl_x86_dispatch_multitexcoordf2 );
EXTERN( _tnl_x86_dispatch_multitexcoordf3 );
EXTERN( _tnl_x86_dispatch_multitexcoordf4 );
EXTERN( _tnl_x86_dispatch_multitexcoordfv ); EXTERN( _tnl_x86_dispatch_multitexcoordfv );
EXTERN( _tnl_x86_dispatch_vertexattribf ); EXTERN( _tnl_x86_dispatch_vertexattribf1 );
EXTERN( _tnl_x86_dispatch_vertexattribf2 );
EXTERN( _tnl_x86_dispatch_vertexattribf3 );
EXTERN( _tnl_x86_dispatch_vertexattribf4 );
EXTERN( _tnl_x86_dispatch_vertexattribfv ); EXTERN( _tnl_x86_dispatch_vertexattribfv );
EXTERN( _tnl_x86_choose_fv ); EXTERN( _tnl_x86_choose_fv );
static void notify( void )
{
GET_CURRENT_CONTEXT( ctx );
_tnl_wrap_filled_vertex( ctx );
}
#define DONT_KNOW_OFFSETS 1 #define DONT_KNOW_OFFSETS 1
@ -93,7 +96,7 @@ static void notify( void )
#define FIXUP( CODE, KNOWN_OFFSET, CHECKVAL, NEWVAL ) \ #define FIXUP( CODE, KNOWN_OFFSET, CHECKVAL, NEWVAL ) \
do { \ do { \
GLuint subst = 0x10101010 + CHECKVAL; \ GLint subst = 0x10101010 + CHECKVAL; \
\ \
if (DONT_KNOW_OFFSETS) { \ if (DONT_KNOW_OFFSETS) { \
while (*(int *)(CODE+offset) != subst) offset++; \ while (*(int *)(CODE+offset) != subst) offset++; \
@ -112,7 +115,7 @@ do { \
#define FIXUPREL( CODE, KNOWN_OFFSET, CHECKVAL, NEWVAL )\ #define FIXUPREL( CODE, KNOWN_OFFSET, CHECKVAL, NEWVAL )\
do { \ do { \
GLuint subst = 0x10101010 + CHECKVAL; \ GLint subst = 0x10101010 + CHECKVAL; \
\ \
if (DONT_KNOW_OFFSETS) { \ if (DONT_KNOW_OFFSETS) { \
while (*(int *)(CODE+offset) != subst) offset++; \ while (*(int *)(CODE+offset) != subst) offset++; \
@ -262,53 +265,16 @@ void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen )
} }
static attrfv_func #define MKDISP(FUNC, SIZE, ATTR, WARP) \
_do_choose( GLuint attr, GLuint sz )
{
return NULL;
}
/* I purposely avoided one single macro, since they might need to be
* handled in different ways. Ohwell, once things get much clearer,
* they could collapse...
*/
#define MAKE_DISPATCH_ATTR(FUNC, SIZE, TYPE, ATTR) \
do { \ do { \
char *code; \ char *code; \
char *start = (char *)&_tnl_x86_dispatch_attr##TYPE; \ char *start = (char *)&WARP; \
char *end = (char *)&_tnl_x86_dispatch_attr##TYPE##_end; \ char *end = (char *)&WARP##_end; \
int offset = 0; \ int offset = 0; \
code = ALIGN_MALLOC( end - start, 16 ); \ code = ALIGN_MALLOC( end - start, 16 ); \
memcpy (code, start, end - start); \ memcpy (code, start, end - start); \
FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[ATTR][SIZE-1]));\ FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[ATTR][SIZE-1]));\
vfmt->FUNC##SIZE##TYPE = code; \ *(void **)&vfmt->FUNC = code; \
} while (0)
#define MAKE_DISPATCH_MULTITEXCOORD(FUNC, SIZE, TYPE, ATTR) \
do { \
char *code; \
char *start = (char *)&_tnl_x86_dispatch_multitexcoord##TYPE; \
char *end = (char *)&_tnl_x86_dispatch_multitexcoord##TYPE##_end; \
int offset = 0; \
code = ALIGN_MALLOC( end - start, 16 ); \
memcpy (code, start, end - start); \
FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[_TNL_ATTRIB_TEX0][SIZE-1]));\
vfmt->FUNC##SIZE##TYPE##ARB = code; \
} while (0)
#define MAKE_DISPATCH_VERTEXATTRIB(FUNC, SIZE, TYPE, ATTR) \
do { \
char *code; \
char *start = (char *)&_tnl_x86_dispatch_vertexattrib##TYPE; \
char *end = (char *)&_tnl_x86_dispatch_vertexattrib##TYPE##_end; \
int offset = 0; \
code = ALIGN_MALLOC( end - start, 16 ); \
memcpy (code, start, end - start); \
FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[0][SIZE-1])); \
vfmt->FUNC##SIZE##TYPE##NV = code; \
} while (0) } while (0)
@ -319,48 +285,48 @@ void _tnl_x86_exec_vtxfmt_init( GLcontext *ctx )
{ {
GLvertexformat *vfmt = &(TNL_CONTEXT(ctx)->exec_vtxfmt); GLvertexformat *vfmt = &(TNL_CONTEXT(ctx)->exec_vtxfmt);
MAKE_DISPATCH_ATTR(Color,3,f, _TNL_ATTRIB_COLOR0); MKDISP(Color3f, 3, _TNL_ATTRIB_COLOR0, _tnl_x86_dispatch_attrf3);
MAKE_DISPATCH_ATTR(Color,3,fv, _TNL_ATTRIB_COLOR0); MKDISP(Color3fv, 3, _TNL_ATTRIB_COLOR0, _tnl_x86_dispatch_attrfv);
MAKE_DISPATCH_ATTR(Color,4,f, _TNL_ATTRIB_COLOR0); MKDISP(Color4f, 4, _TNL_ATTRIB_COLOR0, _tnl_x86_dispatch_attrf4);
MAKE_DISPATCH_ATTR(Color,4,fv, _TNL_ATTRIB_COLOR0); MKDISP(Color4fv, 4, _TNL_ATTRIB_COLOR0, _tnl_x86_dispatch_attrfv);
/* vfmt->FogCoordfEXT = _tnl_FogCoordfEXT; MKDISP(FogCoordfEXT, 1, _TNL_ATTRIB_FOG, _tnl_x86_dispatch_attrf1);
vfmt->FogCoordfvEXT = _tnl_FogCoordfvEXT;*/ MKDISP(FogCoordfvEXT, 1, _TNL_ATTRIB_FOG, _tnl_x86_dispatch_attrfv);
MAKE_DISPATCH_ATTR(Normal,3,f, _TNL_ATTRIB_NORMAL); MKDISP(Normal3f, 3, _TNL_ATTRIB_NORMAL, _tnl_x86_dispatch_attrf3);
MAKE_DISPATCH_ATTR(Normal,3,fv, _TNL_ATTRIB_NORMAL); MKDISP(Normal3fv, 3, _TNL_ATTRIB_NORMAL, _tnl_x86_dispatch_attrfv);
/* vfmt->SecondaryColor3fEXT = _tnl_SecondaryColor3fEXT; MKDISP(SecondaryColor3fEXT, 3, _TNL_ATTRIB_COLOR1, _tnl_x86_dispatch_attrf3);
vfmt->SecondaryColor3fvEXT = _tnl_SecondaryColor3fvEXT; */ MKDISP(SecondaryColor3fvEXT,3, _TNL_ATTRIB_COLOR1, _tnl_x86_dispatch_attrfv);
MAKE_DISPATCH_ATTR(TexCoord,1,f, _TNL_ATTRIB_TEX0); MKDISP(TexCoord1f, 1, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrf1);
MAKE_DISPATCH_ATTR(TexCoord,1,fv, _TNL_ATTRIB_TEX0); MKDISP(TexCoord1fv, 1, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrfv);
MAKE_DISPATCH_ATTR(TexCoord,2,f, _TNL_ATTRIB_TEX0); MKDISP(TexCoord2f, 2, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrf2);
MAKE_DISPATCH_ATTR(TexCoord,2,fv, _TNL_ATTRIB_TEX0); MKDISP(TexCoord2fv, 2, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrfv);
MAKE_DISPATCH_ATTR(TexCoord,3,f, _TNL_ATTRIB_TEX0); MKDISP(TexCoord3f, 3, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrf3);
MAKE_DISPATCH_ATTR(TexCoord,3,fv, _TNL_ATTRIB_TEX0); MKDISP(TexCoord3fv, 3, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrfv);
MAKE_DISPATCH_ATTR(TexCoord,4,f, _TNL_ATTRIB_TEX0); MKDISP(TexCoord4f, 4, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrf4);
MAKE_DISPATCH_ATTR(TexCoord,4,fv, _TNL_ATTRIB_TEX0); MKDISP(TexCoord4fv, 4, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrfv);
MAKE_DISPATCH_ATTR(Vertex,2,f, _TNL_ATTRIB_POS); MKDISP(Vertex2f, 2, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrf2);
MAKE_DISPATCH_ATTR(Vertex,2,fv, _TNL_ATTRIB_POS); MKDISP(Vertex2fv, 2, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrfv);
MAKE_DISPATCH_ATTR(Vertex,3,f, _TNL_ATTRIB_POS); MKDISP(Vertex3f, 3, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrf3);
MAKE_DISPATCH_ATTR(Vertex,3,fv, _TNL_ATTRIB_POS); MKDISP(Vertex3fv, 3, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrfv);
MAKE_DISPATCH_ATTR(Vertex,4,f, _TNL_ATTRIB_POS); MKDISP(Vertex4f, 4, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrf4);
MAKE_DISPATCH_ATTR(Vertex,4,fv, _TNL_ATTRIB_POS); MKDISP(Vertex4fv, 4, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrfv);
MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,1,f, 0); MKDISP(MultiTexCoord1fARB, 1, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordf1);
MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,1,fv, 0); MKDISP(MultiTexCoord1fvARB, 1, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordfv);
MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,2,f, 0); MKDISP(MultiTexCoord2fARB, 2, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordf2);
MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,2,fv, 0); MKDISP(MultiTexCoord2fvARB, 2, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordfv);
MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,3,f, 0); MKDISP(MultiTexCoord3fARB, 3, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordf3);
MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,3,fv, 0); MKDISP(MultiTexCoord3fvARB, 3, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordfv);
MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,4,f, 0); MKDISP(MultiTexCoord4fARB, 4, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordf4);
MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,4,fv, 0); MKDISP(MultiTexCoord4fvARB, 4, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordfv);
MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,1,f, 0); MKDISP(VertexAttrib1fNV, 1, 0, _tnl_x86_dispatch_vertexattribf1);
MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,1,fv, 0); MKDISP(VertexAttrib1fvNV, 1, 0, _tnl_x86_dispatch_vertexattribfv);
MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,2,f, 0); MKDISP(VertexAttrib2fNV, 2, 0, _tnl_x86_dispatch_vertexattribf2);
MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,2,fv, 0); MKDISP(VertexAttrib2fvNV, 2, 0, _tnl_x86_dispatch_vertexattribfv);
MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,3,f, 0); MKDISP(VertexAttrib3fNV, 3, 0, _tnl_x86_dispatch_vertexattribf3);
MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,3,fv, 0); MKDISP(VertexAttrib3fvNV, 3, 0, _tnl_x86_dispatch_vertexattribfv);
MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,4,f, 0); MKDISP(VertexAttrib4fNV, 4, 0, _tnl_x86_dispatch_vertexattribf4);
MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,4,fv, 0); MKDISP(VertexAttrib4fvNV, 4, 0, _tnl_x86_dispatch_vertexattribfv);
} }
@ -384,7 +350,7 @@ void _tnl_x86choosers( attrfv_func (*choose)[4],
FIXUP(code, 0, 0, attr); FIXUP(code, 0, 0, attr);
FIXUP(code, 0, 1, size + 1); FIXUP(code, 0, 1, size + 1);
FIXUPREL(code, 0, 2, do_choose); FIXUPREL(code, 0, 2, do_choose);
choose[attr][size] = code; choose[attr][size] = (attrfv_func)code;
} }
} }
} }

View File

@ -28,97 +28,114 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* /*
* Authors: * Authors:
* Keith Whitwell <keith@tungstengraphics.com> * Keith Whitwell <keith@tungstengraphics.com>
* Daniel Borca <dborca@yahoo.com>
*/ */
#if !defined (__DJGPP__) && !defined (__MINGW32__) #if defined (__DJGPP__) || defined (__MINGW32__)
#define GLOBL( x ) \
.globl x; \
x:
#else /* defined(__DJGPP__) || defined (__MINGW32__) */
#define GLOBL( x ) \ #define GLOBL( x ) \
.globl _##x; \ .globl _##x; \
_##x: _##x:
#else /* !defined (__DJGPP__) && !defined (__MINGW32__) */
#define GLOBL( x ) \
.globl x; \
x:
#endif /* !defined (__DJGPP__) && !defined (__MINGW32__) */
#endif /* defined(__DJGPP__) || defined (__MINGW32__) */
.data #if !defined (STDCALL_API)
.align 4 #define RETCLEAN( x ) ret
#else
#define RETCLEAN( x ) ret $x
#endif
// Someone who knew a lot about this sort of thing would use this
// macro to note current offsets, etc in a special region of the #define _JMP(x) \
// object file & just make everything work out neat. I do not know .byte 0xe9; \
// enough to do that... .long x
#define _CALL(x) \
.byte 0xe8; \
.long x
/* Someone who knew a lot about this sort of thing would use this
* macro to note current offsets, etc in a special region of the
* object file & just make everything work out neat. I don't know
* enough to do that...
*/
#define SUBST( x ) (0x10101010 + x) #define SUBST( x ) (0x10101010 + x)
.data
// [dBorca] TODO
// Unfold functions for each vertex size?
// Build super-specialized SSE versions?
// STDCALL woes (HAVE_NONSTANDARD_GLAPIENTRY):
// need separate routine for the non "fv" case,
// to clean up the stack!
/* [dBorca] TODO
* Unfold functions for each vertex size?
* Build super-specialized SSE versions?
*
* There is a trick in Vertex*fv: under certain conditions,
* we tail to _tnl_wrap_filled_vertex(ctx). This means that
* if Vertex*fv is STDCALL, then _tnl_wrap_filled_vertex must
* be STDCALL as well, because (GLcontext *) and (GLfloat *)
* have the same size.
*/
.align 4
GLOBL ( _tnl_x86_Vertex1fv ) GLOBL ( _tnl_x86_Vertex1fv )
movl 4(%esp), %ecx movl 4(%esp), %ecx
push %edi push %edi
push %esi push %esi
movl SUBST(0), %edi # 0x0 --> tnl->vtx.vbptr movl SUBST(0), %edi /* 0x0 --> tnl->vtx.vbptr */
movl (%ecx), %edx # load v[0] movl (%ecx), %edx /* load v[0] */
movl %edx, (%edi) # tnl->vtx.vbptr[0] = v[0] movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
addl $4, %edi # tnl->vtx.vbptr += 1 addl $4, %edi /* tnl->vtx.vbptr += 1 */
movl $SUBST(1), %ecx # 0x1 --> (tnl->vtx.vertex_size - 1) movl $SUBST(1), %ecx /* 0x1 --> (tnl->vtx.vertex_size - 1) */
movl $SUBST(2), %esi # 0x2 --> (tnl->vtx.vertex + 1) movl $SUBST(2), %esi /* 0x2 --> (tnl->vtx.vertex + 1) */
repz repz
movsl %ds:(%esi), %es:(%edi) movsl %ds:(%esi), %es:(%edi)
movl %edi, SUBST(0) # 0x0 --> tnl->vtx.vbptr movl %edi, SUBST(0) /* 0x0 --> tnl->vtx.vbptr */
movl SUBST(3), %edx # 0x3 --> counter movl SUBST(3), %edx /* 0x3 --> counter */
pop %esi pop %esi
pop %edi pop %edi
dec %edx # counter-- dec %edx /* counter-- */
movl %edx, SUBST(3) # 0x3 --> counter movl %edx, SUBST(3) /* 0x3 --> counter */
jne .0 # if (counter != 0) return je .0 /* if (counter == 0) goto .0 */
pushl $SUBST(4) # 0x4 --> ctx RETCLEAN(4) /* return */
.byte 0xe8 # call ... .balign 16
.long SUBST(5) # ... _tnl_wrap_filled_vertex(ctx)
pop %eax
.0: .0:
ret # return movl $SUBST(4), %eax /* load ctx */
movl %eax, 4(%esp) /* push ctx */
_JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
GLOBL ( _tnl_x86_Vertex1fv_end ) GLOBL ( _tnl_x86_Vertex1fv_end )
.align 4 .align 4
GLOBL ( _tnl_x86_Vertex2fv ) GLOBL ( _tnl_x86_Vertex2fv )
movl 4(%esp), %ecx movl 4(%esp), %ecx
push %edi push %edi
push %esi push %esi
movl SUBST(0), %edi # load tnl->vtx.vbptr movl SUBST(0), %edi /* load tnl->vtx.vbptr */
movl (%ecx), %edx # load v[0] movl (%ecx), %edx /* load v[0] */
movl 4(%ecx), %eax # load v[1] movl 4(%ecx), %eax /* load v[1] */
movl %edx, (%edi) # tnl->vtx.vbptr[0] = v[0] movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
movl %eax, 4(%edi) # tnl->vtx.vbptr[1] = v[1] movl %eax, 4(%edi) /* tnl->vtx.vbptr[1] = v[1] */
addl $8, %edi # tnl->vtx.vbptr += 2 addl $8, %edi /* tnl->vtx.vbptr += 2 */
movl $SUBST(1), %ecx # vertex_size - 2 movl $SUBST(1), %ecx /* vertex_size - 2 */
movl $SUBST(2), %esi # tnl->vtx.vertex + 2 movl $SUBST(2), %esi /* tnl->vtx.vertex + 2 */
repz repz
movsl %ds:(%esi), %es:(%edi) movsl %ds:(%esi), %es:(%edi)
movl %edi, SUBST(0) # save tnl->vtx.vbptr movl %edi, SUBST(0) /* save tnl->vtx.vbptr */
movl SUBST(3), %edx # load counter movl SUBST(3), %edx /* load counter */
pop %esi pop %esi
pop %edi pop %edi
dec %edx # counter-- dec %edx /* counter-- */
movl %edx, SUBST(3) # save counter movl %edx, SUBST(3) /* save counter */
jne .1 # if (counter != 0) return je .1 /* if (counter == 0) goto .1 */
pushl $SUBST(4) # load ctx RETCLEAN(4) /* return */
.byte 0xe8 # call ... .balign 16
.long SUBST(5) # ... _tnl_wrap_filled_vertex(ctx)
pop %eax
.1: .1:
ret # return movl $SUBST(4), %eax /* load ctx */
movl %eax, 4(%esp) /* push ctx */
_JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
GLOBL ( _tnl_x86_Vertex2fv_end ) GLOBL ( _tnl_x86_Vertex2fv_end )
.align 4 .align 4
@ -126,92 +143,88 @@ GLOBL ( _tnl_x86_Vertex3fv )
movl 4(%esp), %ecx movl 4(%esp), %ecx
push %edi push %edi
push %esi push %esi
movl SUBST(0), %edi # load tnl->vtx.vbptr movl SUBST(0), %edi /* load tnl->vtx.vbptr */
movl (%ecx), %edx # load v[0] movl (%ecx), %edx /* load v[0] */
movl 4(%ecx), %eax # load v[1] movl 4(%ecx), %eax /* load v[1] */
movl 8(%ecx), %esi # load v[2] movl 8(%ecx), %esi /* load v[2] */
movl %edx, (%edi) # tnl->vtx.vbptr[0] = v[0] movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
movl %eax, 4(%edi) # tnl->vtx.vbptr[1] = v[1] movl %eax, 4(%edi) /* tnl->vtx.vbptr[1] = v[1] */
movl %esi, 8(%edi) # tnl->vtx.vbptr[2] = v[2] movl %esi, 8(%edi) /* tnl->vtx.vbptr[2] = v[2] */
addl $12, %edi # tnl->vtx.vbptr += 3 addl $12, %edi /* tnl->vtx.vbptr += 3 */
movl $SUBST(1), %ecx # vertex_size - 3 movl $SUBST(1), %ecx /* vertex_size - 3 */
movl $SUBST(2), %esi # tnl->vtx.vertex + 3 movl $SUBST(2), %esi /* tnl->vtx.vertex + 3 */
repz repz
movsl %ds:(%esi), %es:(%edi) movsl %ds:(%esi), %es:(%edi)
movl %edi, SUBST(0) # save tnl->vtx.vbptr movl %edi, SUBST(0) /* save tnl->vtx.vbptr */
movl SUBST(3), %edx # load counter movl SUBST(3), %edx /* load counter */
pop %esi pop %esi
pop %edi pop %edi
dec %edx # counter-- dec %edx /* counter-- */
movl %edx, SUBST(3) # save counter movl %edx, SUBST(3) /* save counter */
jne .2 # if (counter != 0) return je .2 /* if (counter == 0) goto .2 */
pushl $SUBST(4) # load ctx RETCLEAN(4) /* return */
.byte 0xe8 # call ... .balign 16
.long SUBST(5) # ... _tnl_wrap_filled_vertex(ctx)
pop %eax
.2: .2:
ret # return movl $SUBST(4), %eax /* load ctx */
movl %eax, 4(%esp) /* push ctx */
_JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
GLOBL ( _tnl_x86_Vertex3fv_end ) GLOBL ( _tnl_x86_Vertex3fv_end )
.align 4 .align 4
GLOBL ( _tnl_x86_Vertex4fv ) GLOBL ( _tnl_x86_Vertex4fv )
movl 4(%esp), %ecx movl 4(%esp), %ecx
push %edi push %edi
push %esi push %esi
movl SUBST(0), %edi # load tnl->vtx.vbptr movl SUBST(0), %edi /* load tnl->vtx.vbptr */
movl (%ecx), %edx # load v[0] movl (%ecx), %edx /* load v[0] */
movl 4(%ecx), %eax # load v[1] movl 4(%ecx), %eax /* load v[1] */
movl 8(%ecx), %esi # load v[2] movl 8(%ecx), %esi /* load v[2] */
movl 12(%ecx), %ecx # load v[3] movl 12(%ecx), %ecx /* load v[3] */
movl %edx, (%edi) # tnl->vtx.vbptr[0] = v[0] movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
movl %eax, 4(%edi) # tnl->vtx.vbptr[1] = v[1] movl %eax, 4(%edi) /* tnl->vtx.vbptr[1] = v[1] */
movl %esi, 8(%edi) # tnl->vtx.vbptr[2] = v[2] movl %esi, 8(%edi) /* tnl->vtx.vbptr[2] = v[2] */
movl %ecx, 12(%edi) # tnl->vtx.vbptr[3] = v[3] movl %ecx, 12(%edi) /* tnl->vtx.vbptr[3] = v[3] */
addl $16, %edi # tnl->vtx.vbptr += 4 addl $16, %edi /* tnl->vtx.vbptr += 4 */
movl $SUBST(1), %ecx # vertex_size - 4 movl $SUBST(1), %ecx /* vertex_size - 4 */
movl $SUBST(2), %esi # tnl->vtx.vertex + 3 movl $SUBST(2), %esi /* tnl->vtx.vertex + 4 */
repz repz
movsl %ds:(%esi), %es:(%edi) movsl %ds:(%esi), %es:(%edi)
movl %edi, SUBST(0) # save tnl->vtx.vbptr movl %edi, SUBST(0) /* save tnl->vtx.vbptr */
movl SUBST(3), %edx # load counter movl SUBST(3), %edx /* load counter */
pop %esi pop %esi
pop %edi pop %edi
dec %edx # counter-- dec %edx /* counter-- */
movl %edx, SUBST(3) # save counter movl %edx, SUBST(3) /* save counter */
jne .3 # if (counter != 0) return je .3 /* if (counter == 0) goto .3 */
pushl $SUBST(4) # load ctx RETCLEAN(4) /* return */
.byte 0xe8 # call ... .balign 16
.long SUBST(5) # ... _tnl_wrap_filled_vertex(ctx)
pop %eax
.3: .3:
ret # return movl $SUBST(4), %eax /* load ctx */
movl %eax, 4(%esp) /* push ctx */
_JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
GLOBL ( _tnl_x86_Vertex4fv_end ) GLOBL ( _tnl_x86_Vertex4fv_end )
/** /**
* Generic handlers for vector format data. * Generic handlers for vector format data.
*/ */
GLOBL( _tnl_x86_Attribute1fv )
GLOBL( _tnl_x86_Attribute1fv)
movl 4(%esp), %ecx movl 4(%esp), %ecx
movl (%ecx), %eax /* load v[0] */ movl (%ecx), %eax /* load v[0] */
movl %eax, SUBST(0) /* store v[0] to current vertex */ movl %eax, SUBST(0) /* store v[0] to current vertex */
ret RETCLEAN(4)
GLOBL ( _tnl_x86_Attribute1fv_end ) GLOBL ( _tnl_x86_Attribute1fv_end )
GLOBL( _tnl_x86_Attribute2fv) GLOBL( _tnl_x86_Attribute2fv )
movl 4(%esp), %ecx movl 4(%esp), %ecx
movl (%ecx), %eax /* load v[0] */ movl (%ecx), %eax /* load v[0] */
movl 4(%ecx), %edx /* load v[1] */ movl 4(%ecx), %edx /* load v[1] */
movl %eax, SUBST(0) /* store v[0] to current vertex */ movl %eax, SUBST(0) /* store v[0] to current vertex */
movl %edx, SUBST(1) /* store v[1] to current vertex */ movl %edx, SUBST(1) /* store v[1] to current vertex */
ret RETCLEAN(4)
GLOBL ( _tnl_x86_Attribute2fv_end ) GLOBL ( _tnl_x86_Attribute2fv_end )
GLOBL( _tnl_x86_Attribute3fv )
GLOBL( _tnl_x86_Attribute3fv)
movl 4(%esp), %ecx movl 4(%esp), %ecx
movl (%ecx), %eax /* load v[0] */ movl (%ecx), %eax /* load v[0] */
movl 4(%ecx), %edx /* load v[1] */ movl 4(%ecx), %edx /* load v[1] */
@ -219,10 +232,10 @@ GLOBL( _tnl_x86_Attribute3fv)
movl %eax, SUBST(0) /* store v[0] to current vertex */ movl %eax, SUBST(0) /* store v[0] to current vertex */
movl %edx, SUBST(1) /* store v[1] to current vertex */ movl %edx, SUBST(1) /* store v[1] to current vertex */
movl %ecx, SUBST(2) /* store v[2] to current vertex */ movl %ecx, SUBST(2) /* store v[2] to current vertex */
ret RETCLEAN(4)
GLOBL ( _tnl_x86_Attribute3fv_end ) GLOBL ( _tnl_x86_Attribute3fv_end )
GLOBL( _tnl_x86_Attribute4fv) GLOBL( _tnl_x86_Attribute4fv )
movl 4(%esp), %ecx movl 4(%esp), %ecx
movl (%ecx), %eax /* load v[0] */ movl (%ecx), %eax /* load v[0] */
movl 4(%ecx), %edx /* load v[1] */ movl 4(%ecx), %edx /* load v[1] */
@ -232,84 +245,131 @@ GLOBL( _tnl_x86_Attribute4fv)
movl 12(%ecx), %edx /* load v[3] */ movl 12(%ecx), %edx /* load v[3] */
movl %eax, SUBST(2) /* store v[2] to current vertex */ movl %eax, SUBST(2) /* store v[2] to current vertex */
movl %edx, SUBST(3) /* store v[3] to current vertex */ movl %edx, SUBST(3) /* store v[3] to current vertex */
ret RETCLEAN(4)
GLOBL ( _tnl_x86_Attribute4fv_end ) GLOBL ( _tnl_x86_Attribute4fv_end )
// Choosers: /* Choosers:
*
// Must generate all of these ahead of first usage. Generate at * Must generate all of these ahead of first usage. Generate at
// compile-time? * compile-time?
*/
GLOBL( _tnl_x86_choose_fv )
GLOBL( _tnl_x86_choose_fv) subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
subl $12, %esp # gcc does 16 byte alignment of stack frames? movl $SUBST(0), (%esp) /* arg 0 - attrib */
movl $SUBST(0), (%esp) # arg 0 - attrib movl $SUBST(1), 4(%esp) /* arg 1 - N */
movl $SUBST(1), 4(%esp) # arg 1 - N _CALL (SUBST(2)) /* call do_choose */
.byte 0xe8 # call ... add $12, %esp /* tear down stack frame */
.long SUBST(2) # ... do_choose jmp *%eax /* jump to new func */
add $12, %esp # tear down stack frame
jmp *%eax # jump to new func
GLOBL ( _tnl_x86_choose_fv_end ) GLOBL ( _tnl_x86_choose_fv_end )
/* FIRST LEVEL FUNCTIONS -- these are plugged directly into GL dispatch.
*
* In the 1st level dispatch functions, switch to a different
* calling convention -- (const GLfloat *v) in %ecx.
*
* As with regular (x86) dispatch, don't create a new stack frame -
* just let the 'ret' in the dispatched function return straight
* back to the original caller.
*
* Vertex/Normal/Color, etc: the address of the function pointer
* is known at codegen time.
*/
// FIRST LEVEL FUNCTIONS -- these are plugged directly into GL dispatch. /* Unfortunately, have to play with the stack in the non-fv case:
*/
#if !defined (STDCALL_API)
GLOBL( _tnl_x86_dispatch_attrf1 )
GLOBL( _tnl_x86_dispatch_attrf2 )
GLOBL( _tnl_x86_dispatch_attrf3 )
GLOBL( _tnl_x86_dispatch_attrf4 )
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
leal 16(%esp), %edx /* address of first float on stack */
movl %edx, (%esp) /* save as 'v' */
call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
addl $12, %esp /* tear down frame */
ret /* return */
GLOBL( _tnl_x86_dispatch_attrf4_end )
GLOBL( _tnl_x86_dispatch_attrf3_end )
GLOBL( _tnl_x86_dispatch_attrf2_end )
GLOBL( _tnl_x86_dispatch_attrf1_end )
#else /* defined(STDCALL_API) */
GLOBL( _tnl_x86_dispatch_attrf1 )
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
leal 16(%esp), %edx /* address of first float on stack */
movl %edx, (%esp) /* save as 'v' */
call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
ret $4 /* return */
GLOBL( _tnl_x86_dispatch_attrf1_end )
// In the 1st level dispatch functions, switch to a different GLOBL( _tnl_x86_dispatch_attrf2 )
// calling convention -- (const GLfloat *v) in %ecx. subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
// leal 16(%esp), %edx /* address of first float on stack */
// As with regular (x86) dispatch, do not create a new stack frame - movl %edx, (%esp) /* save as 'v' */
// just let the 'ret' in the dispatched function return straight call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
// back to the original caller. addl $8, %esp /* tear down frame (4 shaved off by the callee) */
ret $8 /* return */
GLOBL( _tnl_x86_dispatch_attrf2_end )
GLOBL( _tnl_x86_dispatch_attrf3 )
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
leal 16(%esp), %edx /* address of first float on stack */
movl %edx, (%esp) /* save as 'v' */
call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
ret $12 /* return */
GLOBL( _tnl_x86_dispatch_attrf3_end )
GLOBL( _tnl_x86_dispatch_attrf4 )
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
leal 16(%esp), %edx /* address of first float on stack */
movl %edx, (%esp) /* save as 'v' */
call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
ret $16 /* return */
GLOBL( _tnl_x86_dispatch_attrf4_end )
#endif /* defined(STDCALL_API) */
// Vertex/Normal/Color, etc: the address of the function pointer /* The fv case is simpler:
// is known at codegen time. */
// Unfortunately, have to play with the stack in the non-fv case:
//
GLOBL( _tnl_x86_dispatch_attrf )
subl $12, %esp # gcc does 16 byte alignment of stack frames?
leal 16(%esp), %edx # address of first float on stack
movl %edx, (%esp) # save as 'v'
call *SUBST(0) # 0x0 --> tabfv[attr][n]
addl $12, %esp # tear down frame
ret # return
GLOBL( _tnl_x86_dispatch_attrf_end )
// The fv case is simpler:
//
GLOBL( _tnl_x86_dispatch_attrfv ) GLOBL( _tnl_x86_dispatch_attrfv )
jmp *SUBST(0) # 0x0 --> tabfv[attr][n] jmp *SUBST(0) /* 0x0 --> tabfv[attr][n] */
GLOBL( _tnl_x86_dispatch_attrfv_end ) GLOBL( _tnl_x86_dispatch_attrfv_end )
// MultiTexcoord: the address of the function pointer must be /* MultiTexcoord: the address of the function pointer must be
// calculated, but can use the index argument slot to hold 'v', and * calculated, but can use the index argument slot to hold 'v', and
// avoid setting up a new stack frame. * avoid setting up a new stack frame.
// *
// [dBorca] * [dBorca]
// right, this would be the preferred approach, but gcc does not * right, this would be the preferred approach, but gcc does not
// clean up the stack after each function call when optimizing (-fdefer-pop); * clean up the stack after each function call when optimizing (-fdefer-pop);
// can it make assumptions about what is already on the stack? I dunno, * can it make assumptions about what's already on the stack? I dunno,
// but in this case, we can't mess with the caller's stack frame, and * but in this case, we can't mess with the caller's stack frame, and
// we must use a model like '_x86_dispatch_attrfv' above. Caveat emptor! * we must use a model like `_x86_dispatch_attrfv' above. Caveat emptor!
*/
// Also, will only need a maximum of four of each of these per context: /* Also, will only need a maximum of four of each of these per context:
// */
GLOBL( _tnl_x86_dispatch_multitexcoordf ) #if !defined (STDCALL_API)
GLOBL( _tnl_x86_dispatch_multitexcoordf1 )
GLOBL( _tnl_x86_dispatch_multitexcoordf2 )
GLOBL( _tnl_x86_dispatch_multitexcoordf3 )
GLOBL( _tnl_x86_dispatch_multitexcoordf4 )
movl 4(%esp), %ecx movl 4(%esp), %ecx
leal 8(%esp), %edx leal 8(%esp), %edx
andl $7, %ecx andl $7, %ecx
movl %edx, 4(%esp) movl %edx, 4(%esp)
sall $4, %ecx sall $4, %ecx
jmp *SUBST(0)(%ecx) # 0x0 - tabfv[tex0][n] jmp *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
GLOBL( _tnl_x86_dispatch_multitexcoordf_end ) GLOBL( _tnl_x86_dispatch_multitexcoordf4_end )
GLOBL( _tnl_x86_dispatch_multitexcoordf3_end )
GLOBL( _tnl_x86_dispatch_multitexcoordf2_end )
GLOBL( _tnl_x86_dispatch_multitexcoordf1_end )
GLOBL( _tnl_x86_dispatch_multitexcoordfv ) GLOBL( _tnl_x86_dispatch_multitexcoordfv )
movl 4(%esp), %ecx movl 4(%esp), %ecx
@ -317,32 +377,181 @@ GLOBL( _tnl_x86_dispatch_multitexcoordfv )
andl $7, %ecx andl $7, %ecx
movl %edx, 4(%esp) movl %edx, 4(%esp)
sall $4, %ecx sall $4, %ecx
jmp *SUBST(0)(%ecx) # 0x0 - tabfv[tex0][n] jmp *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
GLOBL( _tnl_x86_dispatch_multitexcoordfv_end ) GLOBL( _tnl_x86_dispatch_multitexcoordfv_end )
// VertexAttrib: the address of the function pointer must be #else /* defined (STDCALL_API) */
// calculated.
GLOBL( _tnl_x86_dispatch_vertexattribf ) GLOBL( _tnl_x86_dispatch_multitexcoordf1 )
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
movl 16(%esp), %ecx
leal 20(%esp), %edx
andl $7, %ecx
movl %edx, (%esp)
sall $4, %ecx
call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
ret $8 /* return */
GLOBL( _tnl_x86_dispatch_multitexcoordf1_end )
GLOBL( _tnl_x86_dispatch_multitexcoordf2 )
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
movl 16(%esp), %ecx
leal 20(%esp), %edx
andl $7, %ecx
movl %edx, (%esp)
sall $4, %ecx
call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
ret $12 /* return */
GLOBL( _tnl_x86_dispatch_multitexcoordf2_end )
GLOBL( _tnl_x86_dispatch_multitexcoordf3 )
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
movl 16(%esp), %ecx
leal 20(%esp), %edx
andl $7, %ecx
movl %edx, (%esp)
sall $4, %ecx
call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
ret $16 /* return */
GLOBL( _tnl_x86_dispatch_multitexcoordf3_end )
GLOBL( _tnl_x86_dispatch_multitexcoordf4 )
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
movl 16(%esp), %ecx
leal 20(%esp), %edx
andl $7, %ecx
movl %edx, (%esp)
sall $4, %ecx
call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
ret $20 /* return */
GLOBL( _tnl_x86_dispatch_multitexcoordf4_end )
GLOBL( _tnl_x86_dispatch_multitexcoordfv )
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
movl 16(%esp), %ecx
movl 20(%esp), %edx
andl $7, %ecx
movl %edx, (%esp)
sall $4, %ecx
call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
ret $8 /* return */
GLOBL( _tnl_x86_dispatch_multitexcoordfv_end )
#endif /* defined (STDCALL_API) */
/* VertexAttrib: the address of the function pointer must be
* calculated.
*/
#if !defined (STDCALL_API)
GLOBL( _tnl_x86_dispatch_vertexattribf1 )
GLOBL( _tnl_x86_dispatch_vertexattribf2 )
GLOBL( _tnl_x86_dispatch_vertexattribf3 )
GLOBL( _tnl_x86_dispatch_vertexattribf4 )
movl 4(%esp), %eax movl 4(%esp), %eax
cmpl $16, %eax cmpl $16, %eax
jb .8 # "cmovge" is not supported on all CPUs jb .8 /* "cmovge" is not supported on all CPUs */
movl $16, %eax movl $16, %eax
.8: .8:
leal 8(%esp), %ecx # calculate 'v' leal 8(%esp), %ecx /* calculate 'v' */
movl %ecx, 4(%esp) # save in 1st arg slot movl %ecx, 4(%esp) /* save in 1st arg slot */
sall $4, %eax sall $4, %eax
jmp *SUBST(0)(%eax) # 0x0 - tabfv[0][n] jmp *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
GLOBL( _tnl_x86_dispatch_vertexattribf_end ) GLOBL( _tnl_x86_dispatch_vertexattribf4_end )
GLOBL( _tnl_x86_dispatch_vertexattribf3_end )
GLOBL( _tnl_x86_dispatch_vertexattribf2_end )
GLOBL( _tnl_x86_dispatch_vertexattribf1_end )
GLOBL( _tnl_x86_dispatch_vertexattribfv ) GLOBL( _tnl_x86_dispatch_vertexattribfv )
movl 4(%esp), %eax movl 4(%esp), %eax
cmpl $16, %eax cmpl $16, %eax
jb .9 # "cmovge" is not supported on all CPUs jb .9 /* "cmovge" is not supported on all CPUs */
movl $16, %eax movl $16, %eax
.9: .9:
movl 8(%esp), %ecx # load 'v' movl 8(%esp), %ecx /* load 'v' */
movl %ecx, 4(%esp) # save in 1st arg slot movl %ecx, 4(%esp) /* save in 1st arg slot */
sall $4, %eax sall $4, %eax
jmp *SUBST(0)(%eax) # 0x0 - tabfv[0][n] jmp *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
GLOBL( _tnl_x86_dispatch_vertexattribfv_end ) GLOBL( _tnl_x86_dispatch_vertexattribfv_end )
#else /* defined (STDCALL_API) */
GLOBL( _tnl_x86_dispatch_vertexattribf1 )
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
movl 16(%esp), %eax
cmpl $16, %eax
jb .81 /* "cmovge" is not supported on all CPUs */
movl $16, %eax
.81:
leal 20(%esp), %ecx /* load 'v' */
movl %ecx, (%esp) /* save in 1st arg slot */
sall $4, %eax
call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
ret $8 /* return */
GLOBL( _tnl_x86_dispatch_vertexattribf1_end )
GLOBL( _tnl_x86_dispatch_vertexattribf2 )
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
movl 16(%esp), %eax
cmpl $16, %eax
jb .82 /* "cmovge" is not supported on all CPUs */
movl $16, %eax
.82:
leal 20(%esp), %ecx /* load 'v' */
movl %ecx, (%esp) /* save in 1st arg slot */
sall $4, %eax
call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
ret $12 /* return */
GLOBL( _tnl_x86_dispatch_vertexattribf2_end )
GLOBL( _tnl_x86_dispatch_vertexattribf3 )
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
movl 16(%esp), %eax
cmpl $16, %eax
jb .83 /* "cmovge" is not supported on all CPUs */
movl $16, %eax
.83:
leal 20(%esp), %ecx /* load 'v' */
movl %ecx, (%esp) /* save in 1st arg slot */
sall $4, %eax
call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
ret $16 /* return */
GLOBL( _tnl_x86_dispatch_vertexattribf3_end )
GLOBL( _tnl_x86_dispatch_vertexattribf4 )
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
movl 16(%esp), %eax
cmpl $16, %eax
jb .84 /* "cmovge" is not supported on all CPUs */
movl $16, %eax
.84:
leal 20(%esp), %ecx /* load 'v' */
movl %ecx, (%esp) /* save in 1st arg slot */
sall $4, %eax
call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
ret $20 /* return */
GLOBL( _tnl_x86_dispatch_vertexattribf4_end )
GLOBL( _tnl_x86_dispatch_vertexattribfv )
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
movl 16(%esp), %eax
cmpl $16, %eax
jb .9 /* "cmovge" is not supported on all CPUs */
movl $16, %eax
.9:
movl 20(%esp), %ecx /* load 'v' */
movl %ecx, (%esp) /* save in 1st arg slot */
sall $4, %eax
call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
ret $8 /* return */
GLOBL( _tnl_x86_dispatch_vertexattribfv_end )
#endif /* defined (STDCALL_API) */

View File

@ -1300,11 +1300,11 @@ SECTION _DATA public align=16 class=DATA use32 flat
#define REPZ REPE #define REPZ REPE
#define RET ret #define RET ret
#define SAHF sahf #define SAHF sahf
#define SAL_L(a, b) sal L_(b), L_(a) #define SAL_L(a, b) sal L_(b), B_(a)
#define SAL_W(a, b) sal W_(b), W_(a) #define SAL_W(a, b) sal W_(b), B_(a)
#define SAL_B(a, b) sal B_(b), B_(a) #define SAL_B(a, b) sal B_(b), B_(a)
#define SAR_L(a, b) sar L_(b), L_(a) #define SAR_L(a, b) sar L_(b), B_(a)
#define SAR_W(a, b) sar W_(b), W_(a) #define SAR_W(a, b) sar W_(b), B_(a)
#define SAR_B(a, b) sar B_(b), B_(a) #define SAR_B(a, b) sar B_(b), B_(a)
#define SBB_L(a, b) sbb L_(b), L_(a) #define SBB_L(a, b) sbb L_(b), L_(a)
#define SBB_W(a, b) sbb W_(b), W_(a) #define SBB_W(a, b) sbb W_(b), W_(a)