gallium: Generalize the alignment macros to other compilers and any alignment.

This commit is contained in:
José Fonseca 2010-01-10 12:58:11 +00:00
parent ce1c493ff8
commit 86bfe974b8
17 changed files with 81 additions and 62 deletions

View File

@ -98,9 +98,9 @@ vs_ppc_run_linear( struct draw_vertex_shader *base,
/* loop over verts */ /* loop over verts */
for (i = 0; i < count; i += MAX_VERTICES) { for (i = 0; i < count; i += MAX_VERTICES) {
const uint max_vertices = MIN2(MAX_VERTICES, count - i); const uint max_vertices = MIN2(MAX_VERTICES, count - i);
float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4] ALIGN16_ATTRIB; PIPE_ALIGN_VAR(16, float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4]);
float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4] ALIGN16_ATTRIB; PIPE_ALIGN_VAR(16, float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4]);
float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4] ALIGN16_ATTRIB; PIPE_ALIGN_VAR(16, float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4]);
uint attr; uint attr;
/* convert (up to) four input verts to SoA format */ /* convert (up to) four input verts to SoA format */

View File

@ -51,7 +51,7 @@
* Since it's pretty much impossible to form PPC vector immediates, load * Since it's pretty much impossible to form PPC vector immediates, load
* them from memory here: * them from memory here:
*/ */
const float ppc_builtin_constants[] ALIGN16_ATTRIB = { PIPE_ALIGN_VAR(16, const float ppc_builtin_constants[]) = {
1.0f, -128.0f, 128.0, 0.0 1.0f, -128.0f, 128.0, 0.0
}; };

View File

@ -358,6 +358,7 @@ struct cell_spu_function_info
/** This is the object passed to spe_create_thread() */ /** This is the object passed to spe_create_thread() */
PIPE_ALIGN_TYPE(16,
struct cell_init_info struct cell_init_info
{ {
unsigned id; unsigned id;
@ -370,7 +371,7 @@ struct cell_init_info
uint *buffer_status; /**< points at cell_context->buffer_status */ uint *buffer_status; /**< points at cell_context->buffer_status */
struct cell_spu_function_info *spu_functions; struct cell_spu_function_info *spu_functions;
} ALIGN16_ATTRIB; });
#endif /* CELL_COMMON_H */ #endif /* CELL_COMMON_H */

View File

@ -89,7 +89,7 @@ struct cell_buffer_node;
*/ */
struct cell_buffer_list struct cell_buffer_list
{ {
struct cell_fence fence ALIGN16_ATTRIB; PIPE_ALIGN_VAR(16, struct cell_fence fence);
struct cell_buffer_node *head; struct cell_buffer_node *head;
}; };
@ -150,18 +150,18 @@ struct cell_context
/** Mapped constant buffers */ /** Mapped constant buffers */
void *mapped_constants[PIPE_SHADER_TYPES]; void *mapped_constants[PIPE_SHADER_TYPES];
struct cell_spu_function_info spu_functions ALIGN16_ATTRIB; PIPE_ALIGN_VAR(16, struct cell_spu_function_info spu_functions);
uint num_cells, num_spus; uint num_cells, num_spus;
/** Buffers for command batches, vertex/index data */ /** Buffers for command batches, vertex/index data */
uint buffer_size[CELL_NUM_BUFFERS]; uint buffer_size[CELL_NUM_BUFFERS];
ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB; PIPE_ALIGN_VAR(16, ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE]);
int cur_batch; /**< which buffer is being filled w/ commands */ int cur_batch; /**< which buffer is being filled w/ commands */
/** [4] to ensure 16-byte alignment for each status word */ /** [4] to ensure 16-byte alignment for each status word */
uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; PIPE_ALIGN_VAR(16, uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4]);
/** Associated with each command/batch buffer is a list of pipe_buffers /** Associated with each command/batch buffer is a list of pipe_buffers

View File

@ -53,8 +53,7 @@ struct spu_vs_context draw;
/** /**
* Buffers containing dynamically generated SPU code: * Buffers containing dynamically generated SPU code:
*/ */
static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] PIPE_ALIGN_VAR(16, static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]);
ALIGN16_ATTRIB;
@ -543,7 +542,7 @@ cmd_batch(uint opcode)
{ {
const uint buf = (opcode >> 8) & 0xff; const uint buf = (opcode >> 8) & 0xff;
uint size = (opcode >> 16); uint size = (opcode >> 16);
qword buffer[CELL_BUFFER_SIZE / 16] ALIGN16_ATTRIB; PIPE_ALIGN_VAR(16, qword buffer[CELL_BUFFER_SIZE / 16]);
const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]); const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]);
uint pos; uint pos;

View File

@ -1839,10 +1839,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach )
/* execute declarations (interpolants) */ /* execute declarations (interpolants) */
if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
for (i = 0; i < mach->NumDeclarations; i++) { for (i = 0; i < mach->NumDeclarations; i++) {
PIPE_ALIGN_VAR(16,
union { union {
struct tgsi_full_declaration decl; struct tgsi_full_declaration decl;
qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16];
} d ALIGN16_ATTRIB; } d);
unsigned ea = (unsigned) (mach->Declarations + pc); unsigned ea = (unsigned) (mach->Declarations + pc);
spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl));
@ -1853,10 +1854,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach )
/* execute instructions, until pc is set to -1 */ /* execute instructions, until pc is set to -1 */
while (pc != -1) { while (pc != -1) {
PIPE_ALIGN_VAR(16,
union { union {
struct tgsi_full_instruction inst; struct tgsi_full_instruction inst;
qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16];
} i ALIGN16_ATTRIB; } i);
unsigned ea = (unsigned) (mach->Instructions + pc); unsigned ea = (unsigned) (mach->Instructions + pc);
spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst));

View File

@ -98,9 +98,9 @@ struct spu_exec_machine
* 4 internal temporaries * 4 internal temporaries
* 1 address * 1 address
*/ */
PIPE_ALIGN_VAR(16,
struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS
+ TGSI_EXEC_NUM_TEMP_EXTRAS + 1] + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]);
ALIGN16_ATTRIB;
struct spu_exec_vector *Addrs; struct spu_exec_vector *Addrs;

View File

@ -144,7 +144,7 @@ export_func(struct cell_spu_function_info *spu_functions,
void void
return_function_info(void) return_function_info(void)
{ {
struct cell_spu_function_info funcs ALIGN16_ATTRIB; PIPE_ALIGN_VAR(16, struct cell_spu_function_info funcs);
int tag = TAG_MISC; int tag = TAG_MISC;
ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */

View File

@ -93,6 +93,7 @@ typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs,
vector float *constants); vector float *constants);
PIPE_ALIGN_TYPE(16,
struct spu_framebuffer struct spu_framebuffer
{ {
void *color_start; /**< addr of color surface in main memory */ void *color_start; /**< addr of color surface in main memory */
@ -107,10 +108,11 @@ struct spu_framebuffer
uint zsize; /**< 0, 2 or 4 bytes per Z */ uint zsize; /**< 0, 2 or 4 bytes per Z */
float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */ float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */
} ALIGN16_ATTRIB; });
/** per-texture level info */ /** per-texture level info */
PIPE_ALIGN_TYPE(16,
struct spu_texture_level struct spu_texture_level
{ {
void *start; void *start;
@ -123,20 +125,22 @@ struct spu_texture_level
vector signed int mask_s, mask_t, mask_r; vector signed int mask_s, mask_t, mask_r;
/** texcoord clamp limits */ /** texcoord clamp limits */
vector signed int max_s, max_t, max_r; vector signed int max_s, max_t, max_r;
} ALIGN16_ATTRIB; });
PIPE_ALIGN_TYPE(16,
struct spu_texture struct spu_texture
{ {
struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS];
uint max_level; uint max_level;
uint target; /**< PIPE_TEXTURE_x */ uint target; /**< PIPE_TEXTURE_x */
} ALIGN16_ATTRIB; });
/** /**
* All SPU global/context state will be in a singleton object of this type: * All SPU global/context state will be in a singleton object of this type:
*/ */
PIPE_ALIGN_TYPE(16,
struct spu_global struct spu_global
{ {
/** One-time init/constant info */ /** One-time init/constant info */
@ -155,8 +159,8 @@ struct spu_global
struct vertex_info vertex_info; struct vertex_info vertex_info;
/** Current color and Z tiles */ /** Current color and Z tiles */
tile_t ctile ALIGN16_ATTRIB; PIPE_ALIGN_VAR(16, tile_t ctile);
tile_t ztile ALIGN16_ATTRIB; PIPE_ALIGN_VAR(16, tile_t ztile);
/** Read depth/stencil tiles? */ /** Read depth/stencil tiles? */
boolean read_depth_stencil; boolean read_depth_stencil;
@ -165,8 +169,8 @@ struct spu_global
ubyte cur_ctile_status, cur_ztile_status; ubyte cur_ctile_status, cur_ztile_status;
/** Status of all tiles in framebuffer */ /** Status of all tiles in framebuffer */
ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; PIPE_ALIGN_VAR(16, ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]);
ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; PIPE_ALIGN_VAR(16, ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]);
/** Current fragment ops machine code, at 8-byte boundary */ /** Current fragment ops machine code, at 8-byte boundary */
uint *fragment_ops_code; uint *fragment_ops_code;
@ -175,7 +179,7 @@ struct spu_global
spu_fragment_ops_func fragment_ops[2]; spu_fragment_ops_func fragment_ops[2];
/** Current fragment program machine code, at 8-byte boundary */ /** Current fragment program machine code, at 8-byte boundary */
uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB; PIPE_ALIGN_VAR(8, uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]);
/** Current fragment ops function */ /** Current fragment ops function */
spu_fragment_program_func fragment_program; spu_fragment_program_func fragment_program;
@ -187,7 +191,7 @@ struct spu_global
/** Fragment program constants */ /** Fragment program constants */
vector float constants[4 * CELL_MAX_CONSTANTS]; vector float constants[4 * CELL_MAX_CONSTANTS];
} ALIGN16_ATTRIB; });
extern struct spu_global spu; extern struct spu_global spu;

View File

@ -169,7 +169,7 @@ void
cmd_render(const struct cell_command_render *render, uint *pos_incr) cmd_render(const struct cell_command_render *render, uint *pos_incr)
{ {
/* we'll DMA into these buffers */ /* we'll DMA into these buffers */
ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; PIPE_ALIGN_VAR(16, ubyte vertex_data[CELL_BUFFER_SIZE]);
const uint vertex_size = render->vertex_size; /* in bytes */ const uint vertex_size = render->vertex_size; /* in bytes */
/*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size;
uint index_bytes; uint index_bytes;

View File

@ -43,7 +43,7 @@ typedef void (*spu_fetch_func)(qword *out, const qword *in,
const qword *shuffle_data); const qword *shuffle_data);
static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = { PIPE_ALIGN_VAR(16, static const qword fetch_shuffle_data[5]) = {
/* Shuffle used by CVT_64_FLOAT /* Shuffle used by CVT_64_FLOAT
*/ */
{ {
@ -110,7 +110,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw,
unsigned idx; unsigned idx;
const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; const unsigned bytes_per_entry = draw->vertex_fetch.size[attr];
const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; const unsigned quads_per_entry = (bytes_per_entry + 15) / 16;
qword in[2 * 4] ALIGN16_ATTRIB; PIPE_ALIGN_VAR(16, qword in[2 * 4]);
/* Fetch four attributes for four vertices. /* Fetch four attributes for four vertices.

View File

@ -107,8 +107,8 @@ run_vertex_program(struct spu_vs_context *draw,
struct spu_exec_machine *machine = &draw->machine; struct spu_exec_machine *machine = &draw->machine;
unsigned int j; unsigned int j;
ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS); PIPE_ALIGN_VAR(16, struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS]);
ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS); PIPE_ALIGN_VAR(16, struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS]);
const float *scale = draw->viewport.scale; const float *scale = draw->viewport.scale;
const float *trans = draw->viewport.translate; const float *trans = draw->viewport.translate;
@ -119,8 +119,8 @@ run_vertex_program(struct spu_vs_context *draw,
ASSERT_ALIGN16(draw->constants); ASSERT_ALIGN16(draw->constants);
machine->Consts = (float (*)[4]) draw->constants; machine->Consts = (float (*)[4]) draw->constants;
machine->Inputs = ALIGN16_ASSIGN(inputs); machine->Inputs = inputs;
machine->Outputs = ALIGN16_ASSIGN(outputs); machine->Outputs = outputs;
spu_vertex_fetch( draw, machine, elts, count ); spu_vertex_fetch( draw, machine, elts, count );
@ -132,8 +132,9 @@ run_vertex_program(struct spu_vs_context *draw,
for (j = 0; j < count; j++) { for (j = 0; j < count; j++) {
unsigned slot; unsigned slot;
float x, y, z, w; float x, y, z, w;
PIPE_ALIGN_VAR(16,
unsigned char buffer[sizeof(struct vertex_header) unsigned char buffer[sizeof(struct vertex_header)
+ MAX_VERTEX_SIZE] ALIGN16_ATTRIB; + MAX_VERTEX_SIZE]);
struct vertex_header *const tmpOut = struct vertex_header *const tmpOut =
(struct vertex_header *) buffer; (struct vertex_header *) buffer;
const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header)
@ -186,8 +187,8 @@ run_vertex_program(struct spu_vs_context *draw,
} }
unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] PIPE_ALIGN_VAR(16,
ALIGN16_ATTRIB; unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]);
void void

View File

@ -31,6 +31,7 @@
#ifndef LP_QUAD_H #ifndef LP_QUAD_H
#define LP_QUAD_H #define LP_QUAD_H
#include "pipe/p_compiler.h"
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_exec.h"
@ -83,7 +84,7 @@ struct quad_header_inout
struct quad_header_output struct quad_header_output
{ {
/** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */
float ALIGN16_ATTRIB color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; PIPE_ALIGN_VAR(16, float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]);
}; };
@ -92,9 +93,9 @@ struct quad_header_output
*/ */
struct quad_interp_coef struct quad_interp_coef
{ {
float ALIGN16_ATTRIB a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; PIPE_ALIGN_VAR(16, float a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]);
float ALIGN16_ATTRIB dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; PIPE_ALIGN_VAR(16, float dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]);
float ALIGN16_ATTRIB dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; PIPE_ALIGN_VAR(16, float dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]);
}; };

View File

@ -130,7 +130,7 @@ shade_quads(struct llvmpipe_context *llvmpipe,
uint8_t *tile; uint8_t *tile;
uint8_t *color; uint8_t *color;
void *depth; void *depth;
uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; PIPE_ALIGN_VAR(16, uint32_t mask[4][NUM_CHANNELS]);
unsigned chan_index; unsigned chan_index;
unsigned q; unsigned q;

View File

@ -531,11 +531,11 @@ test_one(unsigned verbose,
success = TRUE; success = TRUE;
for(i = 0; i < n && success; ++i) { for(i = 0; i < n && success; ++i) {
if(mode == AoS) { if(mode == AoS) {
ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; PIPE_ALIGN_VAR(16, uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]);
ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; PIPE_ALIGN_VAR(16, uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]);
ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; PIPE_ALIGN_VAR(16, uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]);
ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; PIPE_ALIGN_VAR(16, uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]);
ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; PIPE_ALIGN_VAR(16, uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]);
int64_t start_counter = 0; int64_t start_counter = 0;
int64_t end_counter = 0; int64_t end_counter = 0;
@ -596,11 +596,11 @@ test_one(unsigned verbose,
if(mode == SoA) { if(mode == SoA) {
const unsigned stride = type.length*type.width/8; const unsigned stride = type.length*type.width/8;
ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; PIPE_ALIGN_VAR(16, uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]);
ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; PIPE_ALIGN_VAR(16, uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]);
ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; PIPE_ALIGN_VAR(16, uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]);
ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; PIPE_ALIGN_VAR(16, uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]);
ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; PIPE_ALIGN_VAR(16, uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]);
int64_t start_counter = 0; int64_t start_counter = 0;
int64_t end_counter = 0; int64_t end_counter = 0;
boolean mismatch; boolean mismatch;

View File

@ -230,8 +230,8 @@ test_one(unsigned verbose,
for(i = 0; i < n && success; ++i) { for(i = 0; i < n && success; ++i) {
unsigned src_stride = src_type.length*src_type.width/8; unsigned src_stride = src_type.length*src_type.width/8;
unsigned dst_stride = dst_type.length*dst_type.width/8; unsigned dst_stride = dst_type.length*dst_type.width/8;
ALIGN16_ATTRIB uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; PIPE_ALIGN_VAR(16, uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]);
ALIGN16_ATTRIB uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; PIPE_ALIGN_VAR(16, uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]);
double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
int64_t start_counter = 0; int64_t start_counter = 0;

View File

@ -139,22 +139,33 @@ typedef unsigned char boolean;
/* Macros for data alignment. */
#if defined(__GNUC__) #if defined(__GNUC__)
#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___aligned[SIZE] __attribute__(( aligned( 16 ) ))
#define ALIGN16_ASSIGN(NAME) NAME##___aligned /* See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Type-Attributes.html */
#define ALIGN16_ATTRIB __attribute__(( aligned( 16 ) )) #define PIPE_ALIGN_TYPE(_alignment, _type) _type __attribute__((aligned(_alignment)))
#define ALIGN8_ATTRIB __attribute__(( aligned( 8 ) ))
/* See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Variable-Attributes.html */
#define PIPE_ALIGN_VAR(_alignment, _decl) _decl __attribute__((aligned(_alignment)))
#if (__GNUC__ > 4 || (__GNUC__ == 4 &&__GNUC_MINOR__>1)) && !defined(PIPE_ARCH_X86_64) #if (__GNUC__ > 4 || (__GNUC__ == 4 &&__GNUC_MINOR__>1)) && !defined(PIPE_ARCH_X86_64)
#define ALIGN_STACK __attribute__((force_align_arg_pointer)) #define ALIGN_STACK __attribute__((force_align_arg_pointer))
#else #else
#define ALIGN_STACK #define ALIGN_STACK
#endif #endif
#else
#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___unaligned[SIZE + 1] #elif defined(_MSC_VER)
#define ALIGN16_ASSIGN(NAME) align16(NAME##___unaligned)
#define ALIGN16_ATTRIB /* See http://msdn.microsoft.com/en-us/library/83ythb65.aspx */
#define ALIGN8_ATTRIB #define PIPE_ALIGN_TYPE(_alignment, _type) __declspec(align(_alignment)) _type
#define PIPE_ALIGN_VAR(_alignment, _decl) __declspec(align(_alignment)) _decl
#define ALIGN_STACK #define ALIGN_STACK
#else
#error "Unsupported compiler"
#endif #endif