updated Glide driver documentation
fixed SAL/SAR in assyntax.h (NASM) fixed a bug wrt NULL pointer assignment in t_vtx_api.c cosmetics to t_vtx_x86.c & t_vtx_x86_gcc.S enabled STDCALL with codegen (MinGW)
This commit is contained in:
parent
4ba589bbf0
commit
fb7766853d
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Mesa-6.0 release notes:
|
Mesa-6.1 release notes:
|
||||||
-----------------------
|
-----------------------
|
||||||
|
|
||||||
1) Glide2 support has been ceased; in order to keep Voodoo Rush
|
1) Glide2 support has been ceased; in order to keep Voodoo Rush
|
||||||
|
@ -26,7 +26,7 @@ DOS (DJGPP), Windows9x/2k (MinGW/MSVC), Linux
|
||||||
How to compile:
|
How to compile:
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
DJGPP/MinGW/MSVC:
|
DJGPP/MinGW:
|
||||||
Place the Glide3 SDK in the top Mesa directory:
|
Place the Glide3 SDK in the top Mesa directory:
|
||||||
$(MESA)/glide3/include/*.h
|
$(MESA)/glide3/include/*.h
|
||||||
$(MESA)/glide3/lib/
|
$(MESA)/glide3/lib/
|
||||||
|
@ -35,11 +35,9 @@ DJGPP/MinGW/MSVC:
|
||||||
Required libraries:
|
Required libraries:
|
||||||
OS specific
|
OS specific
|
||||||
Type:
|
Type:
|
||||||
make -f Makefile.DJ HAVE_MMX=1 HAVE_3DNOW=1 FX=1
|
make -f Makefile.DJ X86=1 FX=1
|
||||||
or
|
or
|
||||||
make -f Makefile.mgw HAVE_MMX=1 HAVE_3DNOW=1 FX=1
|
make -f Makefile.mgw X86=1 FX=1
|
||||||
or
|
|
||||||
nmake -f Makefile.wfx
|
|
||||||
Look into the corresponding makefiles for further information.
|
Look into the corresponding makefiles for further information.
|
||||||
|
|
||||||
Linux:
|
Linux:
|
||||||
|
|
|
@ -134,7 +134,7 @@ x86/matypes.h: x86/gen_matypes.exe
|
||||||
x86/gen_matypes.exe: x86/gen_matypes.c
|
x86/gen_matypes.exe: x86/gen_matypes.c
|
||||||
$(CC) -o $@ $(CFLAGS) -s $<
|
$(CC) -o $@ $(CFLAGS) -s $<
|
||||||
|
|
||||||
# [dBorca] Hack alert:
|
# [dBorca]
|
||||||
# glapi_x86.S needs some adjustments
|
# glapi_x86.S needs some adjustments
|
||||||
# in order to generate correct entrypoints
|
# in order to generate correct entrypoints
|
||||||
x86/glapi_x86.o: x86/glapi_x86.S
|
x86/glapi_x86.o: x86/glapi_x86.S
|
||||||
|
@ -142,6 +142,11 @@ x86/glapi_x86.o: x86/glapi_x86.S
|
||||||
#main/dispatch.o: main/dispatch.c
|
#main/dispatch.o: main/dispatch.c
|
||||||
# $(CC) -o $@ $(CFLAGS) -UUSE_X86_ASM -c $<
|
# $(CC) -o $@ $(CFLAGS) -UUSE_X86_ASM -c $<
|
||||||
|
|
||||||
|
# [dBorca]
|
||||||
|
# if we want codegen, we have to stdcall
|
||||||
|
tnl/t_vtx_x86_gcc.o: tnl/t_vtx_x86_gcc.S
|
||||||
|
$(CC) -o $@ $(CFLAGS) -DSTDCALL_API -c $<
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
-$(call UNLINK,array_cache/*.o)
|
-$(call UNLINK,array_cache/*.o)
|
||||||
-$(call UNLINK,glapi/*.o)
|
-$(call UNLINK,glapi/*.o)
|
||||||
|
|
|
@ -102,8 +102,10 @@ static void _tnl_wrap_buffers( GLcontext *ctx )
|
||||||
|
|
||||||
/* Deal with buffer wrapping where provoked by the vertex buffer
|
/* Deal with buffer wrapping where provoked by the vertex buffer
|
||||||
* filling up, as opposed to upgrade_vertex().
|
* filling up, as opposed to upgrade_vertex().
|
||||||
|
*
|
||||||
|
* Make it GLAPIENTRY, so we can tail from the codegen'ed Vertex*fv
|
||||||
*/
|
*/
|
||||||
void _tnl_wrap_filled_vertex( GLcontext *ctx )
|
void GLAPIENTRY _tnl_wrap_filled_vertex( GLcontext *ctx )
|
||||||
{
|
{
|
||||||
TNLcontext *tnl = TNL_CONTEXT(ctx);
|
TNLcontext *tnl = TNL_CONTEXT(ctx);
|
||||||
GLfloat *data = tnl->vtx.copied.buffer;
|
GLfloat *data = tnl->vtx.copied.buffer;
|
||||||
|
@ -403,7 +405,7 @@ static attrfv_func do_choose( GLuint attr, GLuint sz )
|
||||||
|
|
||||||
|
|
||||||
/* Try to use codegen:
|
/* Try to use codegen:
|
||||||
*/
|
*/
|
||||||
#ifdef USE_X86_ASM
|
#ifdef USE_X86_ASM
|
||||||
if (tnl->AllowCodegen)
|
if (tnl->AllowCodegen)
|
||||||
tnl->vtx.tabfv[attr][sz-1] = do_codegen( ctx, attr, sz );
|
tnl->vtx.tabfv[attr][sz-1] = do_codegen( ctx, attr, sz );
|
||||||
|
@ -473,11 +475,15 @@ static void reset_attrfv( TNLcontext *tnl )
|
||||||
|
|
||||||
for (i = 0 ; i < _TNL_ATTRIB_MAX ; i++)
|
for (i = 0 ; i < _TNL_ATTRIB_MAX ; i++)
|
||||||
if (tnl->vtx.attrsz[i]) {
|
if (tnl->vtx.attrsz[i]) {
|
||||||
GLuint j = tnl->vtx.attrsz[i] - 1;
|
GLint j = tnl->vtx.attrsz[i] - 1;
|
||||||
tnl->vtx.attrsz[i] = 0;
|
tnl->vtx.attrsz[i] = 0;
|
||||||
|
|
||||||
if (i < _TNL_MAX_ATTR_CODEGEN)
|
if (i < _TNL_MAX_ATTR_CODEGEN) {
|
||||||
tnl->vtx.tabfv[i][j] = choose[i][j];
|
while (j >= 0) {
|
||||||
|
tnl->vtx.tabfv[i][j] = choose[i][j];
|
||||||
|
j--;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tnl->vtx.vertex_size = 0;
|
tnl->vtx.vertex_size = 0;
|
||||||
|
|
|
@ -49,7 +49,7 @@ extern void _tnl_vtx_destroy( GLcontext *ctx );
|
||||||
extern void _tnl_FlushVertices( GLcontext *ctx, GLuint flags );
|
extern void _tnl_FlushVertices( GLcontext *ctx, GLuint flags );
|
||||||
extern void _tnl_flush_vtx( GLcontext *ctx );
|
extern void _tnl_flush_vtx( GLcontext *ctx );
|
||||||
|
|
||||||
extern void _tnl_wrap_filled_vertex( GLcontext *ctx );
|
extern void GLAPIENTRY _tnl_wrap_filled_vertex( GLcontext *ctx );
|
||||||
|
|
||||||
/* t_vtx_exec.c:
|
/* t_vtx_exec.c:
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -60,22 +60,25 @@ EXTERN( _tnl_x86_Vertex2fv );
|
||||||
EXTERN( _tnl_x86_Vertex3fv );
|
EXTERN( _tnl_x86_Vertex3fv );
|
||||||
EXTERN( _tnl_x86_Vertex4fv );
|
EXTERN( _tnl_x86_Vertex4fv );
|
||||||
|
|
||||||
EXTERN( _tnl_x86_dispatch_attrf );
|
EXTERN( _tnl_x86_dispatch_attrf1 );
|
||||||
|
EXTERN( _tnl_x86_dispatch_attrf2 );
|
||||||
|
EXTERN( _tnl_x86_dispatch_attrf3 );
|
||||||
|
EXTERN( _tnl_x86_dispatch_attrf4 );
|
||||||
EXTERN( _tnl_x86_dispatch_attrfv );
|
EXTERN( _tnl_x86_dispatch_attrfv );
|
||||||
EXTERN( _tnl_x86_dispatch_multitexcoordf );
|
EXTERN( _tnl_x86_dispatch_multitexcoordf1 );
|
||||||
|
EXTERN( _tnl_x86_dispatch_multitexcoordf2 );
|
||||||
|
EXTERN( _tnl_x86_dispatch_multitexcoordf3 );
|
||||||
|
EXTERN( _tnl_x86_dispatch_multitexcoordf4 );
|
||||||
EXTERN( _tnl_x86_dispatch_multitexcoordfv );
|
EXTERN( _tnl_x86_dispatch_multitexcoordfv );
|
||||||
EXTERN( _tnl_x86_dispatch_vertexattribf );
|
EXTERN( _tnl_x86_dispatch_vertexattribf1 );
|
||||||
|
EXTERN( _tnl_x86_dispatch_vertexattribf2 );
|
||||||
|
EXTERN( _tnl_x86_dispatch_vertexattribf3 );
|
||||||
|
EXTERN( _tnl_x86_dispatch_vertexattribf4 );
|
||||||
EXTERN( _tnl_x86_dispatch_vertexattribfv );
|
EXTERN( _tnl_x86_dispatch_vertexattribfv );
|
||||||
|
|
||||||
EXTERN( _tnl_x86_choose_fv );
|
EXTERN( _tnl_x86_choose_fv );
|
||||||
|
|
||||||
|
|
||||||
static void notify( void )
|
|
||||||
{
|
|
||||||
GET_CURRENT_CONTEXT( ctx );
|
|
||||||
_tnl_wrap_filled_vertex( ctx );
|
|
||||||
}
|
|
||||||
|
|
||||||
#define DONT_KNOW_OFFSETS 1
|
#define DONT_KNOW_OFFSETS 1
|
||||||
|
|
||||||
|
|
||||||
|
@ -93,7 +96,7 @@ static void notify( void )
|
||||||
|
|
||||||
#define FIXUP( CODE, KNOWN_OFFSET, CHECKVAL, NEWVAL ) \
|
#define FIXUP( CODE, KNOWN_OFFSET, CHECKVAL, NEWVAL ) \
|
||||||
do { \
|
do { \
|
||||||
GLuint subst = 0x10101010 + CHECKVAL; \
|
GLint subst = 0x10101010 + CHECKVAL; \
|
||||||
\
|
\
|
||||||
if (DONT_KNOW_OFFSETS) { \
|
if (DONT_KNOW_OFFSETS) { \
|
||||||
while (*(int *)(CODE+offset) != subst) offset++; \
|
while (*(int *)(CODE+offset) != subst) offset++; \
|
||||||
|
@ -112,7 +115,7 @@ do { \
|
||||||
|
|
||||||
#define FIXUPREL( CODE, KNOWN_OFFSET, CHECKVAL, NEWVAL )\
|
#define FIXUPREL( CODE, KNOWN_OFFSET, CHECKVAL, NEWVAL )\
|
||||||
do { \
|
do { \
|
||||||
GLuint subst = 0x10101010 + CHECKVAL; \
|
GLint subst = 0x10101010 + CHECKVAL; \
|
||||||
\
|
\
|
||||||
if (DONT_KNOW_OFFSETS) { \
|
if (DONT_KNOW_OFFSETS) { \
|
||||||
while (*(int *)(CODE+offset) != subst) offset++; \
|
while (*(int *)(CODE+offset) != subst) offset++; \
|
||||||
|
@ -262,53 +265,16 @@ void _tnl_InitX86Codegen( struct _tnl_dynfn_generators *gen )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static attrfv_func
|
#define MKDISP(FUNC, SIZE, ATTR, WARP) \
|
||||||
_do_choose( GLuint attr, GLuint sz )
|
|
||||||
{
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* I purposely avoided one single macro, since they might need to be
|
|
||||||
* handled in different ways. Ohwell, once things get much clearer,
|
|
||||||
* they could collapse...
|
|
||||||
*/
|
|
||||||
#define MAKE_DISPATCH_ATTR(FUNC, SIZE, TYPE, ATTR) \
|
|
||||||
do { \
|
do { \
|
||||||
char *code; \
|
char *code; \
|
||||||
char *start = (char *)&_tnl_x86_dispatch_attr##TYPE; \
|
char *start = (char *)&WARP; \
|
||||||
char *end = (char *)&_tnl_x86_dispatch_attr##TYPE##_end; \
|
char *end = (char *)&WARP##_end; \
|
||||||
int offset = 0; \
|
int offset = 0; \
|
||||||
code = ALIGN_MALLOC( end - start, 16 ); \
|
code = ALIGN_MALLOC( end - start, 16 ); \
|
||||||
memcpy (code, start, end - start); \
|
memcpy (code, start, end - start); \
|
||||||
FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[ATTR][SIZE-1]));\
|
FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[ATTR][SIZE-1]));\
|
||||||
vfmt->FUNC##SIZE##TYPE = code; \
|
*(void **)&vfmt->FUNC = code; \
|
||||||
} while (0)
|
|
||||||
|
|
||||||
|
|
||||||
#define MAKE_DISPATCH_MULTITEXCOORD(FUNC, SIZE, TYPE, ATTR) \
|
|
||||||
do { \
|
|
||||||
char *code; \
|
|
||||||
char *start = (char *)&_tnl_x86_dispatch_multitexcoord##TYPE; \
|
|
||||||
char *end = (char *)&_tnl_x86_dispatch_multitexcoord##TYPE##_end; \
|
|
||||||
int offset = 0; \
|
|
||||||
code = ALIGN_MALLOC( end - start, 16 ); \
|
|
||||||
memcpy (code, start, end - start); \
|
|
||||||
FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[_TNL_ATTRIB_TEX0][SIZE-1]));\
|
|
||||||
vfmt->FUNC##SIZE##TYPE##ARB = code; \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
|
|
||||||
#define MAKE_DISPATCH_VERTEXATTRIB(FUNC, SIZE, TYPE, ATTR) \
|
|
||||||
do { \
|
|
||||||
char *code; \
|
|
||||||
char *start = (char *)&_tnl_x86_dispatch_vertexattrib##TYPE; \
|
|
||||||
char *end = (char *)&_tnl_x86_dispatch_vertexattrib##TYPE##_end; \
|
|
||||||
int offset = 0; \
|
|
||||||
code = ALIGN_MALLOC( end - start, 16 ); \
|
|
||||||
memcpy (code, start, end - start); \
|
|
||||||
FIXUP(code, 0, 0, (int)&(TNL_CONTEXT(ctx)->vtx.tabfv[0][SIZE-1])); \
|
|
||||||
vfmt->FUNC##SIZE##TYPE##NV = code; \
|
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
|
@ -319,48 +285,48 @@ void _tnl_x86_exec_vtxfmt_init( GLcontext *ctx )
|
||||||
{
|
{
|
||||||
GLvertexformat *vfmt = &(TNL_CONTEXT(ctx)->exec_vtxfmt);
|
GLvertexformat *vfmt = &(TNL_CONTEXT(ctx)->exec_vtxfmt);
|
||||||
|
|
||||||
MAKE_DISPATCH_ATTR(Color,3,f, _TNL_ATTRIB_COLOR0);
|
MKDISP(Color3f, 3, _TNL_ATTRIB_COLOR0, _tnl_x86_dispatch_attrf3);
|
||||||
MAKE_DISPATCH_ATTR(Color,3,fv, _TNL_ATTRIB_COLOR0);
|
MKDISP(Color3fv, 3, _TNL_ATTRIB_COLOR0, _tnl_x86_dispatch_attrfv);
|
||||||
MAKE_DISPATCH_ATTR(Color,4,f, _TNL_ATTRIB_COLOR0);
|
MKDISP(Color4f, 4, _TNL_ATTRIB_COLOR0, _tnl_x86_dispatch_attrf4);
|
||||||
MAKE_DISPATCH_ATTR(Color,4,fv, _TNL_ATTRIB_COLOR0);
|
MKDISP(Color4fv, 4, _TNL_ATTRIB_COLOR0, _tnl_x86_dispatch_attrfv);
|
||||||
/* vfmt->FogCoordfEXT = _tnl_FogCoordfEXT;
|
MKDISP(FogCoordfEXT, 1, _TNL_ATTRIB_FOG, _tnl_x86_dispatch_attrf1);
|
||||||
vfmt->FogCoordfvEXT = _tnl_FogCoordfvEXT;*/
|
MKDISP(FogCoordfvEXT, 1, _TNL_ATTRIB_FOG, _tnl_x86_dispatch_attrfv);
|
||||||
MAKE_DISPATCH_ATTR(Normal,3,f, _TNL_ATTRIB_NORMAL);
|
MKDISP(Normal3f, 3, _TNL_ATTRIB_NORMAL, _tnl_x86_dispatch_attrf3);
|
||||||
MAKE_DISPATCH_ATTR(Normal,3,fv, _TNL_ATTRIB_NORMAL);
|
MKDISP(Normal3fv, 3, _TNL_ATTRIB_NORMAL, _tnl_x86_dispatch_attrfv);
|
||||||
/* vfmt->SecondaryColor3fEXT = _tnl_SecondaryColor3fEXT;
|
MKDISP(SecondaryColor3fEXT, 3, _TNL_ATTRIB_COLOR1, _tnl_x86_dispatch_attrf3);
|
||||||
vfmt->SecondaryColor3fvEXT = _tnl_SecondaryColor3fvEXT; */
|
MKDISP(SecondaryColor3fvEXT,3, _TNL_ATTRIB_COLOR1, _tnl_x86_dispatch_attrfv);
|
||||||
MAKE_DISPATCH_ATTR(TexCoord,1,f, _TNL_ATTRIB_TEX0);
|
MKDISP(TexCoord1f, 1, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrf1);
|
||||||
MAKE_DISPATCH_ATTR(TexCoord,1,fv, _TNL_ATTRIB_TEX0);
|
MKDISP(TexCoord1fv, 1, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrfv);
|
||||||
MAKE_DISPATCH_ATTR(TexCoord,2,f, _TNL_ATTRIB_TEX0);
|
MKDISP(TexCoord2f, 2, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrf2);
|
||||||
MAKE_DISPATCH_ATTR(TexCoord,2,fv, _TNL_ATTRIB_TEX0);
|
MKDISP(TexCoord2fv, 2, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrfv);
|
||||||
MAKE_DISPATCH_ATTR(TexCoord,3,f, _TNL_ATTRIB_TEX0);
|
MKDISP(TexCoord3f, 3, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrf3);
|
||||||
MAKE_DISPATCH_ATTR(TexCoord,3,fv, _TNL_ATTRIB_TEX0);
|
MKDISP(TexCoord3fv, 3, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrfv);
|
||||||
MAKE_DISPATCH_ATTR(TexCoord,4,f, _TNL_ATTRIB_TEX0);
|
MKDISP(TexCoord4f, 4, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrf4);
|
||||||
MAKE_DISPATCH_ATTR(TexCoord,4,fv, _TNL_ATTRIB_TEX0);
|
MKDISP(TexCoord4fv, 4, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_attrfv);
|
||||||
MAKE_DISPATCH_ATTR(Vertex,2,f, _TNL_ATTRIB_POS);
|
MKDISP(Vertex2f, 2, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrf2);
|
||||||
MAKE_DISPATCH_ATTR(Vertex,2,fv, _TNL_ATTRIB_POS);
|
MKDISP(Vertex2fv, 2, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrfv);
|
||||||
MAKE_DISPATCH_ATTR(Vertex,3,f, _TNL_ATTRIB_POS);
|
MKDISP(Vertex3f, 3, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrf3);
|
||||||
MAKE_DISPATCH_ATTR(Vertex,3,fv, _TNL_ATTRIB_POS);
|
MKDISP(Vertex3fv, 3, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrfv);
|
||||||
MAKE_DISPATCH_ATTR(Vertex,4,f, _TNL_ATTRIB_POS);
|
MKDISP(Vertex4f, 4, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrf4);
|
||||||
MAKE_DISPATCH_ATTR(Vertex,4,fv, _TNL_ATTRIB_POS);
|
MKDISP(Vertex4fv, 4, _TNL_ATTRIB_POS, _tnl_x86_dispatch_attrfv);
|
||||||
|
|
||||||
MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,1,f, 0);
|
MKDISP(MultiTexCoord1fARB, 1, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordf1);
|
||||||
MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,1,fv, 0);
|
MKDISP(MultiTexCoord1fvARB, 1, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordfv);
|
||||||
MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,2,f, 0);
|
MKDISP(MultiTexCoord2fARB, 2, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordf2);
|
||||||
MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,2,fv, 0);
|
MKDISP(MultiTexCoord2fvARB, 2, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordfv);
|
||||||
MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,3,f, 0);
|
MKDISP(MultiTexCoord3fARB, 3, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordf3);
|
||||||
MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,3,fv, 0);
|
MKDISP(MultiTexCoord3fvARB, 3, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordfv);
|
||||||
MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,4,f, 0);
|
MKDISP(MultiTexCoord4fARB, 4, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordf4);
|
||||||
MAKE_DISPATCH_MULTITEXCOORD(MultiTexCoord,4,fv, 0);
|
MKDISP(MultiTexCoord4fvARB, 4, _TNL_ATTRIB_TEX0, _tnl_x86_dispatch_multitexcoordfv);
|
||||||
|
|
||||||
MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,1,f, 0);
|
MKDISP(VertexAttrib1fNV, 1, 0, _tnl_x86_dispatch_vertexattribf1);
|
||||||
MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,1,fv, 0);
|
MKDISP(VertexAttrib1fvNV, 1, 0, _tnl_x86_dispatch_vertexattribfv);
|
||||||
MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,2,f, 0);
|
MKDISP(VertexAttrib2fNV, 2, 0, _tnl_x86_dispatch_vertexattribf2);
|
||||||
MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,2,fv, 0);
|
MKDISP(VertexAttrib2fvNV, 2, 0, _tnl_x86_dispatch_vertexattribfv);
|
||||||
MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,3,f, 0);
|
MKDISP(VertexAttrib3fNV, 3, 0, _tnl_x86_dispatch_vertexattribf3);
|
||||||
MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,3,fv, 0);
|
MKDISP(VertexAttrib3fvNV, 3, 0, _tnl_x86_dispatch_vertexattribfv);
|
||||||
MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,4,f, 0);
|
MKDISP(VertexAttrib4fNV, 4, 0, _tnl_x86_dispatch_vertexattribf4);
|
||||||
MAKE_DISPATCH_VERTEXATTRIB(VertexAttrib,4,fv, 0);
|
MKDISP(VertexAttrib4fvNV, 4, 0, _tnl_x86_dispatch_vertexattribfv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -384,7 +350,7 @@ void _tnl_x86choosers( attrfv_func (*choose)[4],
|
||||||
FIXUP(code, 0, 0, attr);
|
FIXUP(code, 0, 0, attr);
|
||||||
FIXUP(code, 0, 1, size + 1);
|
FIXUP(code, 0, 1, size + 1);
|
||||||
FIXUPREL(code, 0, 2, do_choose);
|
FIXUPREL(code, 0, 2, do_choose);
|
||||||
choose[attr][size] = code;
|
choose[attr][size] = (attrfv_func)code;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,97 +28,114 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
/*
|
/*
|
||||||
* Authors:
|
* Authors:
|
||||||
* Keith Whitwell <keith@tungstengraphics.com>
|
* Keith Whitwell <keith@tungstengraphics.com>
|
||||||
|
* Daniel Borca <dborca@yahoo.com>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if !defined (__DJGPP__) && !defined (__MINGW32__)
|
#if defined (__DJGPP__) || defined (__MINGW32__)
|
||||||
|
|
||||||
#define GLOBL( x ) \
|
|
||||||
.globl x; \
|
|
||||||
x:
|
|
||||||
|
|
||||||
#else /* defined(__DJGPP__) || defined (__MINGW32__) */
|
|
||||||
|
|
||||||
#define GLOBL( x ) \
|
#define GLOBL( x ) \
|
||||||
.globl _##x; \
|
.globl _##x; \
|
||||||
_##x:
|
_##x:
|
||||||
|
#else /* !defined (__DJGPP__) && !defined (__MINGW32__) */
|
||||||
|
#define GLOBL( x ) \
|
||||||
|
.globl x; \
|
||||||
|
x:
|
||||||
|
#endif /* !defined (__DJGPP__) && !defined (__MINGW32__) */
|
||||||
|
|
||||||
#endif /* defined(__DJGPP__) || defined (__MINGW32__) */
|
|
||||||
|
|
||||||
.data
|
#if !defined (STDCALL_API)
|
||||||
.align 4
|
#define RETCLEAN( x ) ret
|
||||||
|
#else
|
||||||
|
#define RETCLEAN( x ) ret $x
|
||||||
|
#endif
|
||||||
|
|
||||||
// Someone who knew a lot about this sort of thing would use this
|
|
||||||
// macro to note current offsets, etc in a special region of the
|
#define _JMP(x) \
|
||||||
// object file & just make everything work out neat. I do not know
|
.byte 0xe9; \
|
||||||
// enough to do that...
|
.long x
|
||||||
|
|
||||||
|
#define _CALL(x) \
|
||||||
|
.byte 0xe8; \
|
||||||
|
.long x
|
||||||
|
|
||||||
|
|
||||||
|
/* Someone who knew a lot about this sort of thing would use this
|
||||||
|
* macro to note current offsets, etc in a special region of the
|
||||||
|
* object file & just make everything work out neat. I don't know
|
||||||
|
* enough to do that...
|
||||||
|
*/
|
||||||
|
|
||||||
#define SUBST( x ) (0x10101010 + x)
|
#define SUBST( x ) (0x10101010 + x)
|
||||||
|
|
||||||
|
|
||||||
|
.data
|
||||||
|
|
||||||
// [dBorca] TODO
|
|
||||||
// Unfold functions for each vertex size?
|
|
||||||
// Build super-specialized SSE versions?
|
|
||||||
// STDCALL woes (HAVE_NONSTANDARD_GLAPIENTRY):
|
|
||||||
// need separate routine for the non "fv" case,
|
|
||||||
// to clean up the stack!
|
|
||||||
|
|
||||||
|
/* [dBorca] TODO
|
||||||
|
* Unfold functions for each vertex size?
|
||||||
|
* Build super-specialized SSE versions?
|
||||||
|
*
|
||||||
|
* There is a trick in Vertex*fv: under certain conditions,
|
||||||
|
* we tail to _tnl_wrap_filled_vertex(ctx). This means that
|
||||||
|
* if Vertex*fv is STDCALL, then _tnl_wrap_filled_vertex must
|
||||||
|
* be STDCALL as well, because (GLcontext *) and (GLfloat *)
|
||||||
|
* have the same size.
|
||||||
|
*/
|
||||||
|
.align 4
|
||||||
GLOBL ( _tnl_x86_Vertex1fv )
|
GLOBL ( _tnl_x86_Vertex1fv )
|
||||||
movl 4(%esp), %ecx
|
movl 4(%esp), %ecx
|
||||||
push %edi
|
push %edi
|
||||||
push %esi
|
push %esi
|
||||||
movl SUBST(0), %edi # 0x0 --> tnl->vtx.vbptr
|
movl SUBST(0), %edi /* 0x0 --> tnl->vtx.vbptr */
|
||||||
movl (%ecx), %edx # load v[0]
|
movl (%ecx), %edx /* load v[0] */
|
||||||
movl %edx, (%edi) # tnl->vtx.vbptr[0] = v[0]
|
movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
|
||||||
addl $4, %edi # tnl->vtx.vbptr += 1
|
addl $4, %edi /* tnl->vtx.vbptr += 1 */
|
||||||
movl $SUBST(1), %ecx # 0x1 --> (tnl->vtx.vertex_size - 1)
|
movl $SUBST(1), %ecx /* 0x1 --> (tnl->vtx.vertex_size - 1) */
|
||||||
movl $SUBST(2), %esi # 0x2 --> (tnl->vtx.vertex + 1)
|
movl $SUBST(2), %esi /* 0x2 --> (tnl->vtx.vertex + 1) */
|
||||||
repz
|
repz
|
||||||
movsl %ds:(%esi), %es:(%edi)
|
movsl %ds:(%esi), %es:(%edi)
|
||||||
movl %edi, SUBST(0) # 0x0 --> tnl->vtx.vbptr
|
movl %edi, SUBST(0) /* 0x0 --> tnl->vtx.vbptr */
|
||||||
movl SUBST(3), %edx # 0x3 --> counter
|
movl SUBST(3), %edx /* 0x3 --> counter */
|
||||||
pop %esi
|
pop %esi
|
||||||
pop %edi
|
pop %edi
|
||||||
dec %edx # counter--
|
dec %edx /* counter-- */
|
||||||
movl %edx, SUBST(3) # 0x3 --> counter
|
movl %edx, SUBST(3) /* 0x3 --> counter */
|
||||||
jne .0 # if (counter != 0) return
|
je .0 /* if (counter == 0) goto .0 */
|
||||||
pushl $SUBST(4) # 0x4 --> ctx
|
RETCLEAN(4) /* return */
|
||||||
.byte 0xe8 # call ...
|
.balign 16
|
||||||
.long SUBST(5) # ... _tnl_wrap_filled_vertex(ctx)
|
|
||||||
pop %eax
|
|
||||||
.0:
|
.0:
|
||||||
ret # return
|
movl $SUBST(4), %eax /* load ctx */
|
||||||
|
movl %eax, 4(%esp) /* push ctx */
|
||||||
|
_JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
|
||||||
GLOBL ( _tnl_x86_Vertex1fv_end )
|
GLOBL ( _tnl_x86_Vertex1fv_end )
|
||||||
|
|
||||||
|
|
||||||
.align 4
|
.align 4
|
||||||
GLOBL ( _tnl_x86_Vertex2fv )
|
GLOBL ( _tnl_x86_Vertex2fv )
|
||||||
movl 4(%esp), %ecx
|
movl 4(%esp), %ecx
|
||||||
push %edi
|
push %edi
|
||||||
push %esi
|
push %esi
|
||||||
movl SUBST(0), %edi # load tnl->vtx.vbptr
|
movl SUBST(0), %edi /* load tnl->vtx.vbptr */
|
||||||
movl (%ecx), %edx # load v[0]
|
movl (%ecx), %edx /* load v[0] */
|
||||||
movl 4(%ecx), %eax # load v[1]
|
movl 4(%ecx), %eax /* load v[1] */
|
||||||
movl %edx, (%edi) # tnl->vtx.vbptr[0] = v[0]
|
movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
|
||||||
movl %eax, 4(%edi) # tnl->vtx.vbptr[1] = v[1]
|
movl %eax, 4(%edi) /* tnl->vtx.vbptr[1] = v[1] */
|
||||||
addl $8, %edi # tnl->vtx.vbptr += 2
|
addl $8, %edi /* tnl->vtx.vbptr += 2 */
|
||||||
movl $SUBST(1), %ecx # vertex_size - 2
|
movl $SUBST(1), %ecx /* vertex_size - 2 */
|
||||||
movl $SUBST(2), %esi # tnl->vtx.vertex + 2
|
movl $SUBST(2), %esi /* tnl->vtx.vertex + 2 */
|
||||||
repz
|
repz
|
||||||
movsl %ds:(%esi), %es:(%edi)
|
movsl %ds:(%esi), %es:(%edi)
|
||||||
movl %edi, SUBST(0) # save tnl->vtx.vbptr
|
movl %edi, SUBST(0) /* save tnl->vtx.vbptr */
|
||||||
movl SUBST(3), %edx # load counter
|
movl SUBST(3), %edx /* load counter */
|
||||||
pop %esi
|
pop %esi
|
||||||
pop %edi
|
pop %edi
|
||||||
dec %edx # counter--
|
dec %edx /* counter-- */
|
||||||
movl %edx, SUBST(3) # save counter
|
movl %edx, SUBST(3) /* save counter */
|
||||||
jne .1 # if (counter != 0) return
|
je .1 /* if (counter == 0) goto .1 */
|
||||||
pushl $SUBST(4) # load ctx
|
RETCLEAN(4) /* return */
|
||||||
.byte 0xe8 # call ...
|
.balign 16
|
||||||
.long SUBST(5) # ... _tnl_wrap_filled_vertex(ctx)
|
|
||||||
pop %eax
|
|
||||||
.1:
|
.1:
|
||||||
ret # return
|
movl $SUBST(4), %eax /* load ctx */
|
||||||
|
movl %eax, 4(%esp) /* push ctx */
|
||||||
|
_JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
|
||||||
GLOBL ( _tnl_x86_Vertex2fv_end )
|
GLOBL ( _tnl_x86_Vertex2fv_end )
|
||||||
|
|
||||||
.align 4
|
.align 4
|
||||||
|
@ -126,92 +143,88 @@ GLOBL ( _tnl_x86_Vertex3fv )
|
||||||
movl 4(%esp), %ecx
|
movl 4(%esp), %ecx
|
||||||
push %edi
|
push %edi
|
||||||
push %esi
|
push %esi
|
||||||
movl SUBST(0), %edi # load tnl->vtx.vbptr
|
movl SUBST(0), %edi /* load tnl->vtx.vbptr */
|
||||||
movl (%ecx), %edx # load v[0]
|
movl (%ecx), %edx /* load v[0] */
|
||||||
movl 4(%ecx), %eax # load v[1]
|
movl 4(%ecx), %eax /* load v[1] */
|
||||||
movl 8(%ecx), %esi # load v[2]
|
movl 8(%ecx), %esi /* load v[2] */
|
||||||
movl %edx, (%edi) # tnl->vtx.vbptr[0] = v[0]
|
movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
|
||||||
movl %eax, 4(%edi) # tnl->vtx.vbptr[1] = v[1]
|
movl %eax, 4(%edi) /* tnl->vtx.vbptr[1] = v[1] */
|
||||||
movl %esi, 8(%edi) # tnl->vtx.vbptr[2] = v[2]
|
movl %esi, 8(%edi) /* tnl->vtx.vbptr[2] = v[2] */
|
||||||
addl $12, %edi # tnl->vtx.vbptr += 3
|
addl $12, %edi /* tnl->vtx.vbptr += 3 */
|
||||||
movl $SUBST(1), %ecx # vertex_size - 3
|
movl $SUBST(1), %ecx /* vertex_size - 3 */
|
||||||
movl $SUBST(2), %esi # tnl->vtx.vertex + 3
|
movl $SUBST(2), %esi /* tnl->vtx.vertex + 3 */
|
||||||
repz
|
repz
|
||||||
movsl %ds:(%esi), %es:(%edi)
|
movsl %ds:(%esi), %es:(%edi)
|
||||||
movl %edi, SUBST(0) # save tnl->vtx.vbptr
|
movl %edi, SUBST(0) /* save tnl->vtx.vbptr */
|
||||||
movl SUBST(3), %edx # load counter
|
movl SUBST(3), %edx /* load counter */
|
||||||
pop %esi
|
pop %esi
|
||||||
pop %edi
|
pop %edi
|
||||||
dec %edx # counter--
|
dec %edx /* counter-- */
|
||||||
movl %edx, SUBST(3) # save counter
|
movl %edx, SUBST(3) /* save counter */
|
||||||
jne .2 # if (counter != 0) return
|
je .2 /* if (counter == 0) goto .2 */
|
||||||
pushl $SUBST(4) # load ctx
|
RETCLEAN(4) /* return */
|
||||||
.byte 0xe8 # call ...
|
.balign 16
|
||||||
.long SUBST(5) # ... _tnl_wrap_filled_vertex(ctx)
|
|
||||||
pop %eax
|
|
||||||
.2:
|
.2:
|
||||||
ret # return
|
movl $SUBST(4), %eax /* load ctx */
|
||||||
|
movl %eax, 4(%esp) /* push ctx */
|
||||||
|
_JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
|
||||||
GLOBL ( _tnl_x86_Vertex3fv_end )
|
GLOBL ( _tnl_x86_Vertex3fv_end )
|
||||||
|
|
||||||
|
|
||||||
.align 4
|
.align 4
|
||||||
GLOBL ( _tnl_x86_Vertex4fv )
|
GLOBL ( _tnl_x86_Vertex4fv )
|
||||||
movl 4(%esp), %ecx
|
movl 4(%esp), %ecx
|
||||||
push %edi
|
push %edi
|
||||||
push %esi
|
push %esi
|
||||||
movl SUBST(0), %edi # load tnl->vtx.vbptr
|
movl SUBST(0), %edi /* load tnl->vtx.vbptr */
|
||||||
movl (%ecx), %edx # load v[0]
|
movl (%ecx), %edx /* load v[0] */
|
||||||
movl 4(%ecx), %eax # load v[1]
|
movl 4(%ecx), %eax /* load v[1] */
|
||||||
movl 8(%ecx), %esi # load v[2]
|
movl 8(%ecx), %esi /* load v[2] */
|
||||||
movl 12(%ecx), %ecx # load v[3]
|
movl 12(%ecx), %ecx /* load v[3] */
|
||||||
movl %edx, (%edi) # tnl->vtx.vbptr[0] = v[0]
|
movl %edx, (%edi) /* tnl->vtx.vbptr[0] = v[0] */
|
||||||
movl %eax, 4(%edi) # tnl->vtx.vbptr[1] = v[1]
|
movl %eax, 4(%edi) /* tnl->vtx.vbptr[1] = v[1] */
|
||||||
movl %esi, 8(%edi) # tnl->vtx.vbptr[2] = v[2]
|
movl %esi, 8(%edi) /* tnl->vtx.vbptr[2] = v[2] */
|
||||||
movl %ecx, 12(%edi) # tnl->vtx.vbptr[3] = v[3]
|
movl %ecx, 12(%edi) /* tnl->vtx.vbptr[3] = v[3] */
|
||||||
addl $16, %edi # tnl->vtx.vbptr += 4
|
addl $16, %edi /* tnl->vtx.vbptr += 4 */
|
||||||
movl $SUBST(1), %ecx # vertex_size - 4
|
movl $SUBST(1), %ecx /* vertex_size - 4 */
|
||||||
movl $SUBST(2), %esi # tnl->vtx.vertex + 3
|
movl $SUBST(2), %esi /* tnl->vtx.vertex + 4 */
|
||||||
repz
|
repz
|
||||||
movsl %ds:(%esi), %es:(%edi)
|
movsl %ds:(%esi), %es:(%edi)
|
||||||
movl %edi, SUBST(0) # save tnl->vtx.vbptr
|
movl %edi, SUBST(0) /* save tnl->vtx.vbptr */
|
||||||
movl SUBST(3), %edx # load counter
|
movl SUBST(3), %edx /* load counter */
|
||||||
pop %esi
|
pop %esi
|
||||||
pop %edi
|
pop %edi
|
||||||
dec %edx # counter--
|
dec %edx /* counter-- */
|
||||||
movl %edx, SUBST(3) # save counter
|
movl %edx, SUBST(3) /* save counter */
|
||||||
jne .3 # if (counter != 0) return
|
je .3 /* if (counter == 0) goto .3 */
|
||||||
pushl $SUBST(4) # load ctx
|
RETCLEAN(4) /* return */
|
||||||
.byte 0xe8 # call ...
|
.balign 16
|
||||||
.long SUBST(5) # ... _tnl_wrap_filled_vertex(ctx)
|
|
||||||
pop %eax
|
|
||||||
.3:
|
.3:
|
||||||
ret # return
|
movl $SUBST(4), %eax /* load ctx */
|
||||||
|
movl %eax, 4(%esp) /* push ctx */
|
||||||
|
_JMP (SUBST(5)) /* jmp _tnl_wrap_filled_vertex */
|
||||||
GLOBL ( _tnl_x86_Vertex4fv_end )
|
GLOBL ( _tnl_x86_Vertex4fv_end )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generic handlers for vector format data.
|
* Generic handlers for vector format data.
|
||||||
*/
|
*/
|
||||||
|
GLOBL( _tnl_x86_Attribute1fv )
|
||||||
GLOBL( _tnl_x86_Attribute1fv)
|
|
||||||
movl 4(%esp), %ecx
|
movl 4(%esp), %ecx
|
||||||
movl (%ecx), %eax /* load v[0] */
|
movl (%ecx), %eax /* load v[0] */
|
||||||
movl %eax, SUBST(0) /* store v[0] to current vertex */
|
movl %eax, SUBST(0) /* store v[0] to current vertex */
|
||||||
ret
|
RETCLEAN(4)
|
||||||
GLOBL ( _tnl_x86_Attribute1fv_end )
|
GLOBL ( _tnl_x86_Attribute1fv_end )
|
||||||
|
|
||||||
GLOBL( _tnl_x86_Attribute2fv)
|
GLOBL( _tnl_x86_Attribute2fv )
|
||||||
movl 4(%esp), %ecx
|
movl 4(%esp), %ecx
|
||||||
movl (%ecx), %eax /* load v[0] */
|
movl (%ecx), %eax /* load v[0] */
|
||||||
movl 4(%ecx), %edx /* load v[1] */
|
movl 4(%ecx), %edx /* load v[1] */
|
||||||
movl %eax, SUBST(0) /* store v[0] to current vertex */
|
movl %eax, SUBST(0) /* store v[0] to current vertex */
|
||||||
movl %edx, SUBST(1) /* store v[1] to current vertex */
|
movl %edx, SUBST(1) /* store v[1] to current vertex */
|
||||||
ret
|
RETCLEAN(4)
|
||||||
GLOBL ( _tnl_x86_Attribute2fv_end )
|
GLOBL ( _tnl_x86_Attribute2fv_end )
|
||||||
|
|
||||||
|
GLOBL( _tnl_x86_Attribute3fv )
|
||||||
GLOBL( _tnl_x86_Attribute3fv)
|
|
||||||
movl 4(%esp), %ecx
|
movl 4(%esp), %ecx
|
||||||
movl (%ecx), %eax /* load v[0] */
|
movl (%ecx), %eax /* load v[0] */
|
||||||
movl 4(%ecx), %edx /* load v[1] */
|
movl 4(%ecx), %edx /* load v[1] */
|
||||||
|
@ -219,10 +232,10 @@ GLOBL( _tnl_x86_Attribute3fv)
|
||||||
movl %eax, SUBST(0) /* store v[0] to current vertex */
|
movl %eax, SUBST(0) /* store v[0] to current vertex */
|
||||||
movl %edx, SUBST(1) /* store v[1] to current vertex */
|
movl %edx, SUBST(1) /* store v[1] to current vertex */
|
||||||
movl %ecx, SUBST(2) /* store v[2] to current vertex */
|
movl %ecx, SUBST(2) /* store v[2] to current vertex */
|
||||||
ret
|
RETCLEAN(4)
|
||||||
GLOBL ( _tnl_x86_Attribute3fv_end )
|
GLOBL ( _tnl_x86_Attribute3fv_end )
|
||||||
|
|
||||||
GLOBL( _tnl_x86_Attribute4fv)
|
GLOBL( _tnl_x86_Attribute4fv )
|
||||||
movl 4(%esp), %ecx
|
movl 4(%esp), %ecx
|
||||||
movl (%ecx), %eax /* load v[0] */
|
movl (%ecx), %eax /* load v[0] */
|
||||||
movl 4(%ecx), %edx /* load v[1] */
|
movl 4(%ecx), %edx /* load v[1] */
|
||||||
|
@ -232,84 +245,131 @@ GLOBL( _tnl_x86_Attribute4fv)
|
||||||
movl 12(%ecx), %edx /* load v[3] */
|
movl 12(%ecx), %edx /* load v[3] */
|
||||||
movl %eax, SUBST(2) /* store v[2] to current vertex */
|
movl %eax, SUBST(2) /* store v[2] to current vertex */
|
||||||
movl %edx, SUBST(3) /* store v[3] to current vertex */
|
movl %edx, SUBST(3) /* store v[3] to current vertex */
|
||||||
ret
|
RETCLEAN(4)
|
||||||
GLOBL ( _tnl_x86_Attribute4fv_end )
|
GLOBL ( _tnl_x86_Attribute4fv_end )
|
||||||
|
|
||||||
|
|
||||||
// Choosers:
|
/* Choosers:
|
||||||
|
*
|
||||||
// Must generate all of these ahead of first usage. Generate at
|
* Must generate all of these ahead of first usage. Generate at
|
||||||
// compile-time?
|
* compile-time?
|
||||||
|
*/
|
||||||
|
GLOBL( _tnl_x86_choose_fv )
|
||||||
GLOBL( _tnl_x86_choose_fv)
|
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
|
||||||
subl $12, %esp # gcc does 16 byte alignment of stack frames?
|
movl $SUBST(0), (%esp) /* arg 0 - attrib */
|
||||||
movl $SUBST(0), (%esp) # arg 0 - attrib
|
movl $SUBST(1), 4(%esp) /* arg 1 - N */
|
||||||
movl $SUBST(1), 4(%esp) # arg 1 - N
|
_CALL (SUBST(2)) /* call do_choose */
|
||||||
.byte 0xe8 # call ...
|
add $12, %esp /* tear down stack frame */
|
||||||
.long SUBST(2) # ... do_choose
|
jmp *%eax /* jump to new func */
|
||||||
add $12, %esp # tear down stack frame
|
|
||||||
jmp *%eax # jump to new func
|
|
||||||
GLOBL ( _tnl_x86_choose_fv_end )
|
GLOBL ( _tnl_x86_choose_fv_end )
|
||||||
|
|
||||||
|
|
||||||
|
/* FIRST LEVEL FUNCTIONS -- these are plugged directly into GL dispatch.
|
||||||
|
*
|
||||||
|
* In the 1st level dispatch functions, switch to a different
|
||||||
|
* calling convention -- (const GLfloat *v) in %ecx.
|
||||||
|
*
|
||||||
|
* As with regular (x86) dispatch, don't create a new stack frame -
|
||||||
|
* just let the 'ret' in the dispatched function return straight
|
||||||
|
* back to the original caller.
|
||||||
|
*
|
||||||
|
* Vertex/Normal/Color, etc: the address of the function pointer
|
||||||
|
* is known at codegen time.
|
||||||
|
*/
|
||||||
|
|
||||||
// FIRST LEVEL FUNCTIONS -- these are plugged directly into GL dispatch.
|
/* Unfortunately, have to play with the stack in the non-fv case:
|
||||||
|
*/
|
||||||
|
#if !defined (STDCALL_API)
|
||||||
|
GLOBL( _tnl_x86_dispatch_attrf1 )
|
||||||
|
GLOBL( _tnl_x86_dispatch_attrf2 )
|
||||||
|
GLOBL( _tnl_x86_dispatch_attrf3 )
|
||||||
|
GLOBL( _tnl_x86_dispatch_attrf4 )
|
||||||
|
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
|
||||||
|
leal 16(%esp), %edx /* address of first float on stack */
|
||||||
|
movl %edx, (%esp) /* save as 'v' */
|
||||||
|
call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
|
||||||
|
addl $12, %esp /* tear down frame */
|
||||||
|
ret /* return */
|
||||||
|
GLOBL( _tnl_x86_dispatch_attrf4_end )
|
||||||
|
GLOBL( _tnl_x86_dispatch_attrf3_end )
|
||||||
|
GLOBL( _tnl_x86_dispatch_attrf2_end )
|
||||||
|
GLOBL( _tnl_x86_dispatch_attrf1_end )
|
||||||
|
|
||||||
|
#else /* defined(STDCALL_API) */
|
||||||
|
|
||||||
|
GLOBL( _tnl_x86_dispatch_attrf1 )
|
||||||
|
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
|
||||||
|
leal 16(%esp), %edx /* address of first float on stack */
|
||||||
|
movl %edx, (%esp) /* save as 'v' */
|
||||||
|
call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
|
||||||
|
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
|
||||||
|
ret $4 /* return */
|
||||||
|
GLOBL( _tnl_x86_dispatch_attrf1_end )
|
||||||
|
|
||||||
// In the 1st level dispatch functions, switch to a different
|
GLOBL( _tnl_x86_dispatch_attrf2 )
|
||||||
// calling convention -- (const GLfloat *v) in %ecx.
|
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
|
||||||
//
|
leal 16(%esp), %edx /* address of first float on stack */
|
||||||
// As with regular (x86) dispatch, do not create a new stack frame -
|
movl %edx, (%esp) /* save as 'v' */
|
||||||
// just let the 'ret' in the dispatched function return straight
|
call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
|
||||||
// back to the original caller.
|
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
|
||||||
|
ret $8 /* return */
|
||||||
|
GLOBL( _tnl_x86_dispatch_attrf2_end )
|
||||||
|
|
||||||
|
GLOBL( _tnl_x86_dispatch_attrf3 )
|
||||||
|
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
|
||||||
|
leal 16(%esp), %edx /* address of first float on stack */
|
||||||
|
movl %edx, (%esp) /* save as 'v' */
|
||||||
|
call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
|
||||||
|
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
|
||||||
|
ret $12 /* return */
|
||||||
|
GLOBL( _tnl_x86_dispatch_attrf3_end )
|
||||||
|
|
||||||
|
GLOBL( _tnl_x86_dispatch_attrf4 )
|
||||||
|
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
|
||||||
|
leal 16(%esp), %edx /* address of first float on stack */
|
||||||
|
movl %edx, (%esp) /* save as 'v' */
|
||||||
|
call *SUBST(0) /* 0x0 --> tabfv[attr][n] */
|
||||||
|
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
|
||||||
|
ret $16 /* return */
|
||||||
|
GLOBL( _tnl_x86_dispatch_attrf4_end )
|
||||||
|
#endif /* defined(STDCALL_API) */
|
||||||
|
|
||||||
// Vertex/Normal/Color, etc: the address of the function pointer
|
/* The fv case is simpler:
|
||||||
// is known at codegen time.
|
*/
|
||||||
|
|
||||||
|
|
||||||
// Unfortunately, have to play with the stack in the non-fv case:
|
|
||||||
//
|
|
||||||
GLOBL( _tnl_x86_dispatch_attrf )
|
|
||||||
subl $12, %esp # gcc does 16 byte alignment of stack frames?
|
|
||||||
leal 16(%esp), %edx # address of first float on stack
|
|
||||||
movl %edx, (%esp) # save as 'v'
|
|
||||||
call *SUBST(0) # 0x0 --> tabfv[attr][n]
|
|
||||||
addl $12, %esp # tear down frame
|
|
||||||
ret # return
|
|
||||||
GLOBL( _tnl_x86_dispatch_attrf_end )
|
|
||||||
|
|
||||||
// The fv case is simpler:
|
|
||||||
//
|
|
||||||
GLOBL( _tnl_x86_dispatch_attrfv )
|
GLOBL( _tnl_x86_dispatch_attrfv )
|
||||||
jmp *SUBST(0) # 0x0 --> tabfv[attr][n]
|
jmp *SUBST(0) /* 0x0 --> tabfv[attr][n] */
|
||||||
GLOBL( _tnl_x86_dispatch_attrfv_end )
|
GLOBL( _tnl_x86_dispatch_attrfv_end )
|
||||||
|
|
||||||
|
|
||||||
// MultiTexcoord: the address of the function pointer must be
|
/* MultiTexcoord: the address of the function pointer must be
|
||||||
// calculated, but can use the index argument slot to hold 'v', and
|
* calculated, but can use the index argument slot to hold 'v', and
|
||||||
// avoid setting up a new stack frame.
|
* avoid setting up a new stack frame.
|
||||||
//
|
*
|
||||||
// [dBorca]
|
* [dBorca]
|
||||||
// right, this would be the preferred approach, but gcc does not
|
* right, this would be the preferred approach, but gcc does not
|
||||||
// clean up the stack after each function call when optimizing (-fdefer-pop);
|
* clean up the stack after each function call when optimizing (-fdefer-pop);
|
||||||
// can it make assumptions about what is already on the stack? I dunno,
|
* can it make assumptions about what's already on the stack? I dunno,
|
||||||
// but in this case, we can't mess with the caller's stack frame, and
|
* but in this case, we can't mess with the caller's stack frame, and
|
||||||
// we must use a model like '_x86_dispatch_attrfv' above. Caveat emptor!
|
* we must use a model like `_x86_dispatch_attrfv' above. Caveat emptor!
|
||||||
|
*/
|
||||||
|
|
||||||
// Also, will only need a maximum of four of each of these per context:
|
/* Also, will only need a maximum of four of each of these per context:
|
||||||
//
|
*/
|
||||||
GLOBL( _tnl_x86_dispatch_multitexcoordf )
|
#if !defined (STDCALL_API)
|
||||||
|
GLOBL( _tnl_x86_dispatch_multitexcoordf1 )
|
||||||
|
GLOBL( _tnl_x86_dispatch_multitexcoordf2 )
|
||||||
|
GLOBL( _tnl_x86_dispatch_multitexcoordf3 )
|
||||||
|
GLOBL( _tnl_x86_dispatch_multitexcoordf4 )
|
||||||
movl 4(%esp), %ecx
|
movl 4(%esp), %ecx
|
||||||
leal 8(%esp), %edx
|
leal 8(%esp), %edx
|
||||||
andl $7, %ecx
|
andl $7, %ecx
|
||||||
movl %edx, 4(%esp)
|
movl %edx, 4(%esp)
|
||||||
sall $4, %ecx
|
sall $4, %ecx
|
||||||
jmp *SUBST(0)(%ecx) # 0x0 - tabfv[tex0][n]
|
jmp *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
|
||||||
GLOBL( _tnl_x86_dispatch_multitexcoordf_end )
|
GLOBL( _tnl_x86_dispatch_multitexcoordf4_end )
|
||||||
|
GLOBL( _tnl_x86_dispatch_multitexcoordf3_end )
|
||||||
|
GLOBL( _tnl_x86_dispatch_multitexcoordf2_end )
|
||||||
|
GLOBL( _tnl_x86_dispatch_multitexcoordf1_end )
|
||||||
|
|
||||||
GLOBL( _tnl_x86_dispatch_multitexcoordfv )
|
GLOBL( _tnl_x86_dispatch_multitexcoordfv )
|
||||||
movl 4(%esp), %ecx
|
movl 4(%esp), %ecx
|
||||||
|
@ -317,32 +377,181 @@ GLOBL( _tnl_x86_dispatch_multitexcoordfv )
|
||||||
andl $7, %ecx
|
andl $7, %ecx
|
||||||
movl %edx, 4(%esp)
|
movl %edx, 4(%esp)
|
||||||
sall $4, %ecx
|
sall $4, %ecx
|
||||||
jmp *SUBST(0)(%ecx) # 0x0 - tabfv[tex0][n]
|
jmp *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
|
||||||
GLOBL( _tnl_x86_dispatch_multitexcoordfv_end )
|
GLOBL( _tnl_x86_dispatch_multitexcoordfv_end )
|
||||||
|
|
||||||
// VertexAttrib: the address of the function pointer must be
|
#else /* defined (STDCALL_API) */
|
||||||
// calculated.
|
|
||||||
|
|
||||||
GLOBL( _tnl_x86_dispatch_vertexattribf )
|
GLOBL( _tnl_x86_dispatch_multitexcoordf1 )
|
||||||
|
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
|
||||||
|
movl 16(%esp), %ecx
|
||||||
|
leal 20(%esp), %edx
|
||||||
|
andl $7, %ecx
|
||||||
|
movl %edx, (%esp)
|
||||||
|
sall $4, %ecx
|
||||||
|
call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
|
||||||
|
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
|
||||||
|
ret $8 /* return */
|
||||||
|
GLOBL( _tnl_x86_dispatch_multitexcoordf1_end )
|
||||||
|
|
||||||
|
GLOBL( _tnl_x86_dispatch_multitexcoordf2 )
|
||||||
|
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
|
||||||
|
movl 16(%esp), %ecx
|
||||||
|
leal 20(%esp), %edx
|
||||||
|
andl $7, %ecx
|
||||||
|
movl %edx, (%esp)
|
||||||
|
sall $4, %ecx
|
||||||
|
call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
|
||||||
|
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
|
||||||
|
ret $12 /* return */
|
||||||
|
GLOBL( _tnl_x86_dispatch_multitexcoordf2_end )
|
||||||
|
|
||||||
|
GLOBL( _tnl_x86_dispatch_multitexcoordf3 )
|
||||||
|
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
|
||||||
|
movl 16(%esp), %ecx
|
||||||
|
leal 20(%esp), %edx
|
||||||
|
andl $7, %ecx
|
||||||
|
movl %edx, (%esp)
|
||||||
|
sall $4, %ecx
|
||||||
|
call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
|
||||||
|
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
|
||||||
|
ret $16 /* return */
|
||||||
|
GLOBL( _tnl_x86_dispatch_multitexcoordf3_end )
|
||||||
|
|
||||||
|
GLOBL( _tnl_x86_dispatch_multitexcoordf4 )
|
||||||
|
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
|
||||||
|
movl 16(%esp), %ecx
|
||||||
|
leal 20(%esp), %edx
|
||||||
|
andl $7, %ecx
|
||||||
|
movl %edx, (%esp)
|
||||||
|
sall $4, %ecx
|
||||||
|
call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
|
||||||
|
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
|
||||||
|
ret $20 /* return */
|
||||||
|
GLOBL( _tnl_x86_dispatch_multitexcoordf4_end )
|
||||||
|
|
||||||
|
GLOBL( _tnl_x86_dispatch_multitexcoordfv )
|
||||||
|
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
|
||||||
|
movl 16(%esp), %ecx
|
||||||
|
movl 20(%esp), %edx
|
||||||
|
andl $7, %ecx
|
||||||
|
movl %edx, (%esp)
|
||||||
|
sall $4, %ecx
|
||||||
|
call *SUBST(0)(%ecx) /* 0x0 - tabfv[tex0][n] */
|
||||||
|
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
|
||||||
|
ret $8 /* return */
|
||||||
|
GLOBL( _tnl_x86_dispatch_multitexcoordfv_end )
|
||||||
|
#endif /* defined (STDCALL_API) */
|
||||||
|
|
||||||
|
|
||||||
|
/* VertexAttrib: the address of the function pointer must be
|
||||||
|
* calculated.
|
||||||
|
*/
|
||||||
|
#if !defined (STDCALL_API)
|
||||||
|
GLOBL( _tnl_x86_dispatch_vertexattribf1 )
|
||||||
|
GLOBL( _tnl_x86_dispatch_vertexattribf2 )
|
||||||
|
GLOBL( _tnl_x86_dispatch_vertexattribf3 )
|
||||||
|
GLOBL( _tnl_x86_dispatch_vertexattribf4 )
|
||||||
movl 4(%esp), %eax
|
movl 4(%esp), %eax
|
||||||
cmpl $16, %eax
|
cmpl $16, %eax
|
||||||
jb .8 # "cmovge" is not supported on all CPUs
|
jb .8 /* "cmovge" is not supported on all CPUs */
|
||||||
movl $16, %eax
|
movl $16, %eax
|
||||||
.8:
|
.8:
|
||||||
leal 8(%esp), %ecx # calculate 'v'
|
leal 8(%esp), %ecx /* calculate 'v' */
|
||||||
movl %ecx, 4(%esp) # save in 1st arg slot
|
movl %ecx, 4(%esp) /* save in 1st arg slot */
|
||||||
sall $4, %eax
|
sall $4, %eax
|
||||||
jmp *SUBST(0)(%eax) # 0x0 - tabfv[0][n]
|
jmp *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
|
||||||
GLOBL( _tnl_x86_dispatch_vertexattribf_end )
|
GLOBL( _tnl_x86_dispatch_vertexattribf4_end )
|
||||||
|
GLOBL( _tnl_x86_dispatch_vertexattribf3_end )
|
||||||
|
GLOBL( _tnl_x86_dispatch_vertexattribf2_end )
|
||||||
|
GLOBL( _tnl_x86_dispatch_vertexattribf1_end )
|
||||||
|
|
||||||
GLOBL( _tnl_x86_dispatch_vertexattribfv )
|
GLOBL( _tnl_x86_dispatch_vertexattribfv )
|
||||||
movl 4(%esp), %eax
|
movl 4(%esp), %eax
|
||||||
cmpl $16, %eax
|
cmpl $16, %eax
|
||||||
jb .9 # "cmovge" is not supported on all CPUs
|
jb .9 /* "cmovge" is not supported on all CPUs */
|
||||||
movl $16, %eax
|
movl $16, %eax
|
||||||
.9:
|
.9:
|
||||||
movl 8(%esp), %ecx # load 'v'
|
movl 8(%esp), %ecx /* load 'v' */
|
||||||
movl %ecx, 4(%esp) # save in 1st arg slot
|
movl %ecx, 4(%esp) /* save in 1st arg slot */
|
||||||
sall $4, %eax
|
sall $4, %eax
|
||||||
jmp *SUBST(0)(%eax) # 0x0 - tabfv[0][n]
|
jmp *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
|
||||||
GLOBL( _tnl_x86_dispatch_vertexattribfv_end )
|
GLOBL( _tnl_x86_dispatch_vertexattribfv_end )
|
||||||
|
|
||||||
|
#else /* defined (STDCALL_API) */
|
||||||
|
|
||||||
|
GLOBL( _tnl_x86_dispatch_vertexattribf1 )
|
||||||
|
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
|
||||||
|
movl 16(%esp), %eax
|
||||||
|
cmpl $16, %eax
|
||||||
|
jb .81 /* "cmovge" is not supported on all CPUs */
|
||||||
|
movl $16, %eax
|
||||||
|
.81:
|
||||||
|
leal 20(%esp), %ecx /* load 'v' */
|
||||||
|
movl %ecx, (%esp) /* save in 1st arg slot */
|
||||||
|
sall $4, %eax
|
||||||
|
call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
|
||||||
|
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
|
||||||
|
ret $8 /* return */
|
||||||
|
GLOBL( _tnl_x86_dispatch_vertexattribf1_end )
|
||||||
|
|
||||||
|
GLOBL( _tnl_x86_dispatch_vertexattribf2 )
|
||||||
|
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
|
||||||
|
movl 16(%esp), %eax
|
||||||
|
cmpl $16, %eax
|
||||||
|
jb .82 /* "cmovge" is not supported on all CPUs */
|
||||||
|
movl $16, %eax
|
||||||
|
.82:
|
||||||
|
leal 20(%esp), %ecx /* load 'v' */
|
||||||
|
movl %ecx, (%esp) /* save in 1st arg slot */
|
||||||
|
sall $4, %eax
|
||||||
|
call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
|
||||||
|
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
|
||||||
|
ret $12 /* return */
|
||||||
|
GLOBL( _tnl_x86_dispatch_vertexattribf2_end )
|
||||||
|
|
||||||
|
GLOBL( _tnl_x86_dispatch_vertexattribf3 )
|
||||||
|
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
|
||||||
|
movl 16(%esp), %eax
|
||||||
|
cmpl $16, %eax
|
||||||
|
jb .83 /* "cmovge" is not supported on all CPUs */
|
||||||
|
movl $16, %eax
|
||||||
|
.83:
|
||||||
|
leal 20(%esp), %ecx /* load 'v' */
|
||||||
|
movl %ecx, (%esp) /* save in 1st arg slot */
|
||||||
|
sall $4, %eax
|
||||||
|
call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
|
||||||
|
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
|
||||||
|
ret $16 /* return */
|
||||||
|
GLOBL( _tnl_x86_dispatch_vertexattribf3_end )
|
||||||
|
|
||||||
|
GLOBL( _tnl_x86_dispatch_vertexattribf4 )
|
||||||
|
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
|
||||||
|
movl 16(%esp), %eax
|
||||||
|
cmpl $16, %eax
|
||||||
|
jb .84 /* "cmovge" is not supported on all CPUs */
|
||||||
|
movl $16, %eax
|
||||||
|
.84:
|
||||||
|
leal 20(%esp), %ecx /* load 'v' */
|
||||||
|
movl %ecx, (%esp) /* save in 1st arg slot */
|
||||||
|
sall $4, %eax
|
||||||
|
call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
|
||||||
|
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
|
||||||
|
ret $20 /* return */
|
||||||
|
GLOBL( _tnl_x86_dispatch_vertexattribf4_end )
|
||||||
|
|
||||||
|
GLOBL( _tnl_x86_dispatch_vertexattribfv )
|
||||||
|
subl $12, %esp /* gcc does 16 byte alignment of stack frames? */
|
||||||
|
movl 16(%esp), %eax
|
||||||
|
cmpl $16, %eax
|
||||||
|
jb .9 /* "cmovge" is not supported on all CPUs */
|
||||||
|
movl $16, %eax
|
||||||
|
.9:
|
||||||
|
movl 20(%esp), %ecx /* load 'v' */
|
||||||
|
movl %ecx, (%esp) /* save in 1st arg slot */
|
||||||
|
sall $4, %eax
|
||||||
|
call *SUBST(0)(%eax) /* 0x0 - tabfv[0][n] */
|
||||||
|
addl $8, %esp /* tear down frame (4 shaved off by the callee) */
|
||||||
|
ret $8 /* return */
|
||||||
|
GLOBL( _tnl_x86_dispatch_vertexattribfv_end )
|
||||||
|
#endif /* defined (STDCALL_API) */
|
||||||
|
|
|
@ -1300,11 +1300,11 @@ SECTION _DATA public align=16 class=DATA use32 flat
|
||||||
#define REPZ REPE
|
#define REPZ REPE
|
||||||
#define RET ret
|
#define RET ret
|
||||||
#define SAHF sahf
|
#define SAHF sahf
|
||||||
#define SAL_L(a, b) sal L_(b), L_(a)
|
#define SAL_L(a, b) sal L_(b), B_(a)
|
||||||
#define SAL_W(a, b) sal W_(b), W_(a)
|
#define SAL_W(a, b) sal W_(b), B_(a)
|
||||||
#define SAL_B(a, b) sal B_(b), B_(a)
|
#define SAL_B(a, b) sal B_(b), B_(a)
|
||||||
#define SAR_L(a, b) sar L_(b), L_(a)
|
#define SAR_L(a, b) sar L_(b), B_(a)
|
||||||
#define SAR_W(a, b) sar W_(b), W_(a)
|
#define SAR_W(a, b) sar W_(b), B_(a)
|
||||||
#define SAR_B(a, b) sar B_(b), B_(a)
|
#define SAR_B(a, b) sar B_(b), B_(a)
|
||||||
#define SBB_L(a, b) sbb L_(b), L_(a)
|
#define SBB_L(a, b) sbb L_(b), L_(a)
|
||||||
#define SBB_W(a, b) sbb W_(b), W_(a)
|
#define SBB_W(a, b) sbb W_(b), W_(a)
|
||||||
|
|
Loading…
Reference in New Issue