SPARC assembly optimizations from David Miller.
This commit is contained in:
parent
8bd0693101
commit
7943b349d6
|
@ -1,4 +1,4 @@
|
|||
/* $Id: m_debug_util.h,v 1.3 2001/03/30 14:44:43 gareth Exp $ */
|
||||
/* $Id: m_debug_util.h,v 1.4 2001/05/23 14:27:03 brianp Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
|
@ -38,7 +38,9 @@
|
|||
* NOTE: it works only on CPUs which know the 'rdtsc' command (586 or higher)
|
||||
* (hope, you don't try to debug Mesa on a 386 ;)
|
||||
*/
|
||||
#if defined(__GNUC__) && defined(__i386__) && defined(USE_X86_ASM)
|
||||
#if defined(__GNUC__) && \
|
||||
((defined(__i386__) && defined(USE_X86_ASM)) || \
|
||||
(defined(__sparc__) && defined(USE_SPARC_ASM)))
|
||||
#define RUN_DEBUG_BENCHMARK
|
||||
#endif
|
||||
|
||||
|
@ -67,6 +69,8 @@ extern char *mesa_profile;
|
|||
* It is assumed that all calculations are done in the cache.
|
||||
*/
|
||||
|
||||
#if defined(__i386__)
|
||||
|
||||
#if 1 /* PPro, PII, PIII version */
|
||||
|
||||
/* Profiling on the P6 architecture requires a little more work, due to
|
||||
|
@ -183,6 +187,30 @@ extern char *mesa_profile;
|
|||
|
||||
#endif
|
||||
|
||||
#elif defined(__sparc__)
|
||||
|
||||
#define INIT_COUNTER() \
|
||||
do { counter_overhead = 5; } while(0)
|
||||
|
||||
#define BEGIN_RACE(x) \
|
||||
x = LONG_MAX; \
|
||||
for (cycle_i = 0; cycle_i <10; cycle_i++) { \
|
||||
register long cycle_tmp1 asm("l0"); \
|
||||
register long cycle_tmp2 asm("l1"); \
|
||||
/* rd %tick, %l0 */ \
|
||||
__asm__ __volatile__ (".word 0xa1410000" : "=r" (cycle_tmp1)); /* save timestamp */
|
||||
|
||||
#define END_RACE(x) \
|
||||
/* rd %tick, %l1 */ \
|
||||
__asm__ __volatile__ (".word 0xa3410000" : "=r" (cycle_tmp2)); \
|
||||
if (x > (cycle_tmp2-cycle_tmp1)) x = cycle_tmp2 - cycle_tmp1; \
|
||||
} \
|
||||
x -= counter_overhead;
|
||||
|
||||
#else
|
||||
#error Your processor is not supported for RUN_XFORM_BENCHMARK
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#define BEGIN_RACE(x)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $Id: m_xform.c,v 1.13 2001/05/21 16:33:41 gareth Exp $ */
|
||||
/* $Id: m_xform.c,v 1.14 2001/05/23 14:27:03 brianp Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
|
@ -56,6 +56,10 @@
|
|||
#include "X86/common_x86_asm.h"
|
||||
#endif
|
||||
|
||||
#ifdef USE_SPARC_ASM
|
||||
#include "SPARC/sparc.h"
|
||||
#endif
|
||||
|
||||
clip_func _mesa_clip_tab[5];
|
||||
clip_func _mesa_clip_np_tab[5];
|
||||
dotprod_func _mesa_dotprod_tab[5];
|
||||
|
@ -206,6 +210,9 @@ _math_init_transformation( void )
|
|||
#ifdef USE_X86_ASM
|
||||
_mesa_init_all_x86_transform_asm();
|
||||
#endif
|
||||
#ifdef USE_SPARC_ASM
|
||||
_mesa_init_all_sparc_transform_asm();
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -0,0 +1,234 @@
|
|||
/* $Id: clip.S,v 1.1 2001/05/23 14:27:03 brianp Exp $ */
|
||||
|
||||
#ifdef __sparc_v9__
|
||||
#define LDPTR ldx
|
||||
#define V4F_DATA 0x00
|
||||
#define V4F_START 0x08
|
||||
#define V4F_COUNT 0x10
|
||||
#define V4F_STRIDE 0x14
|
||||
#define V4F_SIZE 0x18
|
||||
#define V4F_FLAGS 0x1c
|
||||
#else
|
||||
#define LDPTR ld
|
||||
#define V4F_DATA 0x00
|
||||
#define V4F_START 0x04
|
||||
#define V4F_COUNT 0x08
|
||||
#define V4F_STRIDE 0x0c
|
||||
#define V4F_SIZE 0x10
|
||||
#define V4F_FLAGS 0x14
|
||||
#endif
|
||||
|
||||
#define VEC_SIZE_1 1
|
||||
#define VEC_SIZE_2 3
|
||||
#define VEC_SIZE_3 7
|
||||
#define VEC_SIZE_4 15
|
||||
|
||||
.text
|
||||
.align 64
|
||||
|
||||
one_dot_zero:
|
||||
.word 0x3f800000 /* 1.0f */
|
||||
|
||||
/* This trick is shamelessly stolen from the x86
|
||||
* Mesa asm. Very clever, and we can do it too
|
||||
* since we have the necessary add with carry
|
||||
* instructions on Sparc.
|
||||
*/
|
||||
clip_table:
|
||||
.byte 0, 1, 0, 2, 4, 5, 4, 6
|
||||
.byte 0, 1, 0, 2, 8, 9, 8, 10
|
||||
.byte 32, 33, 32, 34, 36, 37, 36, 38
|
||||
.byte 32, 33, 32, 34, 40, 41, 40, 42
|
||||
.byte 0, 1, 0, 2, 4, 5, 4, 6
|
||||
.byte 0, 1, 0, 2, 8, 9, 8, 10
|
||||
.byte 16, 17, 16, 18, 20, 21, 20, 22
|
||||
.byte 16, 17, 16, 18, 24, 25, 24, 26
|
||||
.byte 63, 61, 63, 62, 55, 53, 55, 54
|
||||
.byte 63, 61, 63, 62, 59, 57, 59, 58
|
||||
.byte 47, 45, 47, 46, 39, 37, 39, 38
|
||||
.byte 47, 45, 47, 46, 43, 41, 43, 42
|
||||
.byte 63, 61, 63, 62, 55, 53, 55, 54
|
||||
.byte 63, 61, 63, 62, 59, 57, 59, 58
|
||||
.byte 31, 29, 31, 30, 23, 21, 23, 22
|
||||
.byte 31, 29, 31, 30, 27, 25, 27, 26
|
||||
|
||||
/* GLvector4f *clip_vec, GLvector4f *proj_vec,
|
||||
GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask */
|
||||
|
||||
.align 64
|
||||
__pc_tramp:
|
||||
retl
|
||||
nop
|
||||
|
||||
.globl _mesa_sparc_cliptest_points4
|
||||
_mesa_sparc_cliptest_points4:
|
||||
save %sp, -64, %sp
|
||||
call __pc_tramp
|
||||
sub %o7, (. - one_dot_zero - 4), %g1
|
||||
ld [%g1 + 0x0], %f4
|
||||
add %g1, 0x4, %g1
|
||||
|
||||
ld [%i0 + V4F_STRIDE], %l1
|
||||
ld [%i0 + V4F_COUNT], %g7
|
||||
LDPTR [%i0 + V4F_START], %i0
|
||||
LDPTR [%i1 + V4F_START], %i5
|
||||
ldub [%i3], %g2
|
||||
ldub [%i4], %g3
|
||||
sll %g3, 8, %g3
|
||||
or %g2, %g3, %g2
|
||||
|
||||
ld [%i1 + V4F_FLAGS], %g3
|
||||
or %g3, VEC_SIZE_4, %g3
|
||||
st %g3, [%i1 + V4F_FLAGS]
|
||||
mov 3, %g3
|
||||
st %g3, [%i1 + V4F_SIZE]
|
||||
st %g7, [%i1 + V4F_COUNT]
|
||||
clr %l2
|
||||
clr %l0
|
||||
|
||||
/* l0: i
|
||||
* g7: count
|
||||
* l1: stride
|
||||
* l2: c
|
||||
* g2: (tmpAndMask << 8) | tmpOrMask
|
||||
* g1: clip_table
|
||||
* i0: from[stride][i]
|
||||
* i2: clipMask
|
||||
* i5: vProj[4][i]
|
||||
*/
|
||||
|
||||
1: ld [%i0 + 0x0c], %f3 ! LSU Group
|
||||
ld [%i0 + 0x0c], %g5 ! LSU Group
|
||||
ld [%i0 + 0x08], %g4 ! LSU Group
|
||||
fdivs %f4, %f3, %f8 ! FGM
|
||||
addcc %g5, %g5, %g5 ! IEU1 Group
|
||||
addx %g0, 0x0, %g3 ! IEU1 Group
|
||||
addcc %g4, %g4, %g4 ! IEU1 Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
subcc %g5, %g4, %g0 ! IEU1 Group
|
||||
ld [%i0 + 0x04], %g4 ! LSU Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
addcc %g4, %g4, %g4 ! IEU1 Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
subcc %g5, %g4, %g0 ! IEU1 Group
|
||||
ld [%i0 + 0x00], %g4 ! LSU Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
addcc %g4, %g4, %g4 ! IEU1 Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
subcc %g5, %g4, %g0 ! IEU1 Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
ldub [%g1 + %g3], %g3 ! LSU Group
|
||||
cmp %g3, 0 ! IEU1 Group, stall
|
||||
be 2f ! CTI
|
||||
stb %g3, [%i2] ! LSU
|
||||
sll %g3, 8, %g4 ! IEU1 Group
|
||||
add %l2, 1, %l2 ! IEU0
|
||||
st %g0, [%i5 + 0x00] ! LSU
|
||||
or %g4, 0xff, %g4 ! IEU0 Group
|
||||
or %g2, %g3, %g2 ! IEU1
|
||||
st %g0, [%i5 + 0x04] ! LSU
|
||||
and %g2, %g4, %g2 ! IEU0 Group
|
||||
st %g0, [%i5 + 0x08] ! LSU
|
||||
b 3f ! CTI
|
||||
st %f4, [%i5 + 0x0c] ! LSU Group
|
||||
2: ld [%i0 + 0x00], %f0 ! LSU Group
|
||||
ld [%i0 + 0x04], %f1 ! LSU Group
|
||||
ld [%i0 + 0x08], %f2 ! LSU Group
|
||||
fmuls %f0, %f8, %f0 ! FGM
|
||||
st %f0, [%i5 + 0x00] ! LSU Group
|
||||
fmuls %f1, %f8, %f1 ! FGM
|
||||
st %f1, [%i5 + 0x04] ! LSU Group
|
||||
fmuls %f2, %f8, %f2 ! FGM
|
||||
st %f2, [%i5 + 0x08] ! LSU Group
|
||||
st %f8, [%i5 + 0x0c] ! LSU Group
|
||||
3: add %i5, 0x10, %i5 ! IEU1
|
||||
add %l0, 1, %l0 ! IEU0 Group
|
||||
add %i2, 1, %i2 ! IEU0 Group
|
||||
cmp %l0, %g7 ! IEU1 Group
|
||||
bne 1b ! CTI
|
||||
add %i0, %l1, %i0 ! IEU0 Group
|
||||
stb %g2, [%i3] ! LSU
|
||||
srl %g2, 8, %g3 ! IEU0 Group
|
||||
cmp %l2, %g7 ! IEU1 Group
|
||||
bl,a 1f ! CTI
|
||||
clr %g3 ! IEU0
|
||||
1: stb %g3, [%i4] ! LSU Group
|
||||
ret ! CTI Group
|
||||
restore %i1, 0x0, %o0
|
||||
|
||||
.globl _mesa_sparc_cliptest_points4_np
|
||||
_mesa_sparc_cliptest_points4_np:
|
||||
save %sp, -64, %sp
|
||||
|
||||
call __pc_tramp
|
||||
sub %o7, (. - one_dot_zero - 4), %g1
|
||||
add %g1, 0x4, %g1
|
||||
|
||||
ld [%i0 + V4F_STRIDE], %l1
|
||||
ld [%i0 + V4F_COUNT], %g7
|
||||
LDPTR [%i0 + V4F_START], %i0
|
||||
LDPTR [%i1 + V4F_START], %i5
|
||||
ldub [%i3], %g2
|
||||
ldub [%i4], %g3
|
||||
sll %g3, 8, %g3
|
||||
or %g2, %g3, %g2
|
||||
|
||||
ld [%i1 + V4F_FLAGS], %g3
|
||||
or %g3, VEC_SIZE_4, %g3
|
||||
st %g3, [%i1 + V4F_FLAGS]
|
||||
mov 3, %g3
|
||||
st %g3, [%i1 + V4F_SIZE]
|
||||
st %g7, [%i1 + V4F_COUNT]
|
||||
clr %l2
|
||||
clr %l0
|
||||
|
||||
/* l0: i
|
||||
* g7: count
|
||||
* l1: stride
|
||||
* l2: c
|
||||
* g2: (tmpAndMask << 8) | tmpOrMask
|
||||
* g1: clip_table
|
||||
* i0: from[stride][i]
|
||||
* i2: clipMask
|
||||
*/
|
||||
|
||||
1: ld [%i0 + 0x0c], %g5 ! LSU Group
|
||||
ld [%i0 + 0x08], %g4 ! LSU Group
|
||||
addcc %g5, %g5, %g5 ! IEU1 Group
|
||||
addx %g0, 0x0, %g3 ! IEU1 Group
|
||||
addcc %g4, %g4, %g4 ! IEU1 Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
subcc %g5, %g4, %g0 ! IEU1 Group
|
||||
ld [%i0 + 0x04], %g4 ! LSU Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
addcc %g4, %g4, %g4 ! IEU1 Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
subcc %g5, %g4, %g0 ! IEU1 Group
|
||||
ld [%i0 + 0x00], %g4 ! LSU Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
addcc %g4, %g4, %g4 ! IEU1 Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
subcc %g5, %g4, %g0 ! IEU1 Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
ldub [%g1 + %g3], %g3 ! LSU Group
|
||||
cmp %g3, 0 ! IEU1 Group, stall
|
||||
be 2f ! CTI
|
||||
stb %g3, [%i2] ! LSU
|
||||
sll %g3, 8, %g4 ! IEU1 Group
|
||||
add %l2, 1, %l2 ! IEU0
|
||||
or %g4, 0xff, %g4 ! IEU0 Group
|
||||
or %g2, %g3, %g2 ! IEU1
|
||||
and %g2, %g4, %g2 ! IEU0 Group
|
||||
2: add %l0, 1, %l0 ! IEU0 Group
|
||||
add %i2, 1, %i2 ! IEU0 Group
|
||||
cmp %l0, %g7 ! IEU1 Group
|
||||
bne 1b ! CTI
|
||||
add %i0, %l1, %i0 ! IEU0 Group
|
||||
stb %g2, [%i3] ! LSU
|
||||
srl %g2, 8, %g3 ! IEU0 Group
|
||||
cmp %l2, %g7 ! IEU1 Group
|
||||
bl,a 1f ! CTI
|
||||
clr %g3 ! IEU0
|
||||
1: stb %g3, [%i4] ! LSU Group
|
||||
ret ! CTI Group
|
||||
restore %i1, 0x0, %o0
|
|
@ -0,0 +1,109 @@
|
|||
/* $Id: sparc.c,v 1.1 2001/05/23 14:27:03 brianp Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.1
|
||||
*
|
||||
* Copyright (C) 1999 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Sparc assembly code by David S. Miller
|
||||
*/
|
||||
|
||||
|
||||
#include "glheader.h"
|
||||
#include "context.h"
|
||||
#include "math/m_vertices.h"
|
||||
#include "math/m_xform.h"
|
||||
#include "tnl/t_context.h"
|
||||
#include "sparc.h"
|
||||
|
||||
#ifdef DEBUG
|
||||
#include "math/m_debug.h"
|
||||
#endif
|
||||
|
||||
#define XFORM_ARGS GLvector4f *to_vec, \
|
||||
const GLfloat m[16], \
|
||||
const GLvector4f *from_vec
|
||||
|
||||
#define DECLARE_XFORM_GROUP(pfx, sz) \
|
||||
extern void _mesa_##pfx##_transform_points##sz##_general(XFORM_ARGS); \
|
||||
extern void _mesa_##pfx##_transform_points##sz##_identity(XFORM_ARGS); \
|
||||
extern void _mesa_##pfx##_transform_points##sz##_3d_no_rot(XFORM_ARGS); \
|
||||
extern void _mesa_##pfx##_transform_points##sz##_perspective(XFORM_ARGS); \
|
||||
extern void _mesa_##pfx##_transform_points##sz##_2d(XFORM_ARGS); \
|
||||
extern void _mesa_##pfx##_transform_points##sz##_2d_no_rot(XFORM_ARGS); \
|
||||
extern void _mesa_##pfx##_transform_points##sz##_3d(XFORM_ARGS);
|
||||
|
||||
#define ASSIGN_XFORM_GROUP(pfx, sz) \
|
||||
_mesa_transform_tab[sz][MATRIX_GENERAL] = \
|
||||
_mesa_##pfx##_transform_points##sz##_general; \
|
||||
_mesa_transform_tab[sz][MATRIX_IDENTITY] = \
|
||||
_mesa_##pfx##_transform_points##sz##_identity; \
|
||||
_mesa_transform_tab[sz][MATRIX_3D_NO_ROT] = \
|
||||
_mesa_##pfx##_transform_points##sz##_3d_no_rot; \
|
||||
_mesa_transform_tab[sz][MATRIX_PERSPECTIVE] = \
|
||||
_mesa_##pfx##_transform_points##sz##_perspective; \
|
||||
_mesa_transform_tab[sz][MATRIX_2D] = \
|
||||
_mesa_##pfx##_transform_points##sz##_2d; \
|
||||
_mesa_transform_tab[sz][MATRIX_2D_NO_ROT] = \
|
||||
_mesa_##pfx##_transform_points##sz##_2d_no_rot; \
|
||||
_mesa_transform_tab[sz][MATRIX_3D] = \
|
||||
_mesa_##pfx##_transform_points##sz##_3d;
|
||||
|
||||
|
||||
#ifdef USE_SPARC_ASM
|
||||
DECLARE_XFORM_GROUP(sparc, 1)
|
||||
DECLARE_XFORM_GROUP(sparc, 2)
|
||||
DECLARE_XFORM_GROUP(sparc, 3)
|
||||
DECLARE_XFORM_GROUP(sparc, 4)
|
||||
|
||||
extern GLvector4f *_mesa_sparc_cliptest_points4(GLvector4f *clip_vec,
|
||||
GLvector4f *proj_vec,
|
||||
GLubyte clipMask[],
|
||||
GLubyte *orMask,
|
||||
GLubyte *andMask);
|
||||
|
||||
extern GLvector4f *_mesa_sparc_cliptest_points4_np(GLvector4f *clip_vec,
|
||||
GLvector4f *proj_vec,
|
||||
GLubyte clipMask[],
|
||||
GLubyte *orMask,
|
||||
GLubyte *andMask);
|
||||
#endif
|
||||
|
||||
void _mesa_init_all_sparc_transform_asm(void)
|
||||
{
|
||||
#ifdef USE_SPARC_ASM
|
||||
ASSIGN_XFORM_GROUP(sparc, 1)
|
||||
ASSIGN_XFORM_GROUP(sparc, 2)
|
||||
ASSIGN_XFORM_GROUP(sparc, 3)
|
||||
ASSIGN_XFORM_GROUP(sparc, 4)
|
||||
|
||||
_mesa_clip_tab[4] = _mesa_sparc_cliptest_points4;
|
||||
_mesa_clip_np_tab[4] = _mesa_sparc_cliptest_points4_np;
|
||||
|
||||
#ifdef DEBUG
|
||||
_math_test_all_transform_functions("sparc");
|
||||
_math_test_all_cliptest_functions("sparc");
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
/* $Id: sparc.h,v 1.1 2001/05/23 14:27:03 brianp Exp $ */
|
||||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.1
|
||||
*
|
||||
* Copyright (C) 1999 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Sparc assembly code by David S. Miller
|
||||
*/
|
||||
|
||||
|
||||
#ifndef SPARC_H
|
||||
#define SPARC_H
|
||||
|
||||
extern void _mesa_init_all_sparc_transform_asm(void);
|
||||
|
||||
#endif /* !(SPARC_H) */
|
|
@ -0,0 +1,277 @@
|
|||
/* $Id: sparc_matrix.h,v 1.1 2001/05/23 14:27:03 brianp Exp $ */
|
||||
|
||||
#define M0 %f16
|
||||
#define M1 %f17
|
||||
#define M2 %f18
|
||||
#define M3 %f19
|
||||
#define M4 %f20
|
||||
#define M5 %f21
|
||||
#define M6 %f22
|
||||
#define M7 %f23
|
||||
#define M8 %f24
|
||||
#define M9 %f25
|
||||
#define M10 %f26
|
||||
#define M11 %f27
|
||||
#define M12 %f28
|
||||
#define M13 %f29
|
||||
#define M14 %f30
|
||||
#define M15 %f31
|
||||
|
||||
/* Seems to work, disable if unaligned traps begin to appear... -DaveM */
|
||||
#define USE_LD_DOUBLE
|
||||
|
||||
#ifndef USE_LD_DOUBLE
|
||||
|
||||
#define LDMATRIX_0_1_2_3_12_13_14_15(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 1 * 0x4)], M1; \
|
||||
ld [BASE + ( 2 * 0x4)], M2; \
|
||||
ld [BASE + ( 3 * 0x4)], M3; \
|
||||
ld [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (13 * 0x4)], M13; \
|
||||
ld [BASE + (14 * 0x4)], M14; \
|
||||
ld [BASE + (15 * 0x4)], M15
|
||||
|
||||
#define LDMATRIX_0_1_12_13(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 1 * 0x4)], M1; \
|
||||
ld [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (13 * 0x4)], M13
|
||||
|
||||
#define LDMATRIX_0_12_13(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (13 * 0x4)], M13
|
||||
|
||||
#define LDMATRIX_0_1_2_12_13_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 1 * 0x4)], M1; \
|
||||
ld [BASE + ( 2 * 0x4)], M2; \
|
||||
ld [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (13 * 0x4)], M13; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_12_13_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (13 * 0x4)], M13; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 1 * 0x4)], M1; \
|
||||
ld [BASE + ( 2 * 0x4)], M2; \
|
||||
ld [BASE + ( 3 * 0x4)], M3; \
|
||||
ld [BASE + ( 4 * 0x4)], M4; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ld [BASE + ( 6 * 0x4)], M6; \
|
||||
ld [BASE + ( 7 * 0x4)], M7; \
|
||||
ld [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (13 * 0x4)], M13; \
|
||||
ld [BASE + (14 * 0x4)], M14; \
|
||||
ld [BASE + (15 * 0x4)], M15
|
||||
|
||||
#define LDMATRIX_0_1_4_5_12_13(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 1 * 0x4)], M1; \
|
||||
ld [BASE + ( 4 * 0x4)], M4; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ld [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (13 * 0x4)], M13
|
||||
|
||||
#define LDMATRIX_0_5_12_13(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ld [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (13 * 0x4)], M13
|
||||
|
||||
#define LDMATRIX_0_1_2_3_4_5_6_12_13_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 1 * 0x4)], M1; \
|
||||
ld [BASE + ( 2 * 0x4)], M2; \
|
||||
ld [BASE + ( 3 * 0x4)], M3; \
|
||||
ld [BASE + ( 4 * 0x4)], M4; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ld [BASE + ( 6 * 0x4)], M6; \
|
||||
ld [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (13 * 0x4)], M13; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_5_12_13_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ld [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (13 * 0x4)], M13; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_5_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 1 * 0x4)], M1; \
|
||||
ld [BASE + ( 2 * 0x4)], M2; \
|
||||
ld [BASE + ( 3 * 0x4)], M3; \
|
||||
ld [BASE + ( 4 * 0x4)], M4; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ld [BASE + ( 6 * 0x4)], M6; \
|
||||
ld [BASE + ( 7 * 0x4)], M7; \
|
||||
ld [BASE + ( 8 * 0x4)], M8; \
|
||||
ld [BASE + ( 9 * 0x4)], M9; \
|
||||
ld [BASE + (10 * 0x4)], M10; \
|
||||
ld [BASE + (11 * 0x4)], M11; \
|
||||
ld [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (13 * 0x4)], M13; \
|
||||
ld [BASE + (14 * 0x4)], M14; \
|
||||
ld [BASE + (15 * 0x4)], M15
|
||||
|
||||
#define LDMATRIX_0_5_12_13(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ld [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (13 * 0x4)], M13
|
||||
|
||||
#define LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 1 * 0x4)], M1; \
|
||||
ld [BASE + ( 2 * 0x4)], M2; \
|
||||
ld [BASE + ( 4 * 0x4)], M4; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ld [BASE + ( 6 * 0x4)], M6; \
|
||||
ld [BASE + ( 8 * 0x4)], M8; \
|
||||
ld [BASE + ( 9 * 0x4)], M9; \
|
||||
ld [BASE + (10 * 0x4)], M10; \
|
||||
ld [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (13 * 0x4)], M13; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_5_10_12_13_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ld [BASE + (10 * 0x4)], M10; \
|
||||
ld [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (13 * 0x4)], M13; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_5_8_9_10_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ld [BASE + ( 8 * 0x4)], M8; \
|
||||
ld [BASE + ( 9 * 0x4)], M9; \
|
||||
ld [BASE + (10 * 0x4)], M10; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#else /* !(USE_LD_DOUBLE) */
|
||||
|
||||
#define LDMATRIX_0_1_2_3_12_13_14_15(BASE) \
|
||||
ldd [BASE + ( 0 * 0x4)], M0; \
|
||||
ldd [BASE + ( 2 * 0x4)], M2; \
|
||||
ldd [BASE + (12 * 0x4)], M12; \
|
||||
ldd [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_1_12_13(BASE) \
|
||||
ldd [BASE + ( 0 * 0x4)], M0; \
|
||||
ldd [BASE + (12 * 0x4)], M12
|
||||
|
||||
#define LDMATRIX_0_12_13(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ldd [BASE + (12 * 0x4)], M12
|
||||
|
||||
#define LDMATRIX_0_1_2_12_13_14(BASE) \
|
||||
ldd [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 2 * 0x4)], M2; \
|
||||
ldd [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_12_13_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ldd [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(BASE) \
|
||||
ldd [BASE + ( 0 * 0x4)], M0; \
|
||||
ldd [BASE + ( 2 * 0x4)], M2; \
|
||||
ldd [BASE + ( 4 * 0x4)], M4; \
|
||||
ldd [BASE + ( 6 * 0x4)], M6; \
|
||||
ldd [BASE + (12 * 0x4)], M12; \
|
||||
ldd [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_5_12_13(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ldd [BASE + (12 * 0x4)], M12
|
||||
|
||||
#define LDMATRIX_0_1_2_3_4_5_6_12_13_14(BASE) \
|
||||
ldd [BASE + ( 0 * 0x4)], M0; \
|
||||
ldd [BASE + ( 2 * 0x4)], M2; \
|
||||
ldd [BASE + ( 4 * 0x4)], M4; \
|
||||
ld [BASE + ( 6 * 0x4)], M6; \
|
||||
ldd [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_5_12_13_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ldd [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_5_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(BASE) \
|
||||
ldd [BASE + ( 0 * 0x4)], M0; \
|
||||
ldd [BASE + ( 2 * 0x4)], M2; \
|
||||
ldd [BASE + ( 4 * 0x4)], M4; \
|
||||
ldd [BASE + ( 6 * 0x4)], M6; \
|
||||
ldd [BASE + ( 8 * 0x4)], M8; \
|
||||
ldd [BASE + (10 * 0x4)], M10; \
|
||||
ldd [BASE + (12 * 0x4)], M12; \
|
||||
ldd [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_1_4_5_12_13(BASE) \
|
||||
ldd [BASE + ( 0 * 0x4)], M0; \
|
||||
ldd [BASE + ( 4 * 0x4)], M4; \
|
||||
ldd [BASE + (12 * 0x4)], M12
|
||||
|
||||
#define LDMATRIX_0_5_12_13(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ldd [BASE + (12 * 0x4)], M12
|
||||
|
||||
#define LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(BASE) \
|
||||
ldd [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 2 * 0x4)], M2; \
|
||||
ldd [BASE + ( 4 * 0x4)], M4; \
|
||||
ld [BASE + ( 6 * 0x4)], M6; \
|
||||
ldd [BASE + ( 8 * 0x4)], M8; \
|
||||
ld [BASE + (10 * 0x4)], M10; \
|
||||
ldd [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_5_10_12_13_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ld [BASE + (10 * 0x4)], M10; \
|
||||
ldd [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_5_8_9_10_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ldd [BASE + ( 8 * 0x4)], M8; \
|
||||
ld [BASE + (10 * 0x4)], M10; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#endif /* USE_LD_DOUBLE */
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue