From 86bfe974b880dc2cbf40b91ba0fde34e8a9c756e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Sun, 10 Jan 2010 12:58:11 +0000 Subject: [PATCH] gallium: Generalize the alignment macros to other compilers and any alignment. --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 6 ++-- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 2 +- src/gallium/drivers/cell/common.h | 3 +- src/gallium/drivers/cell/ppu/cell_context.h | 8 ++--- src/gallium/drivers/cell/spu/spu_command.c | 5 ++-- src/gallium/drivers/cell/spu/spu_exec.c | 6 ++-- src/gallium/drivers/cell/spu/spu_exec.h | 4 +-- src/gallium/drivers/cell/spu/spu_funcs.c | 2 +- src/gallium/drivers/cell/spu/spu_main.h | 22 ++++++++------ src/gallium/drivers/cell/spu/spu_render.c | 2 +- .../drivers/cell/spu/spu_vertex_fetch.c | 4 +-- .../drivers/cell/spu/spu_vertex_shader.c | 15 +++++----- src/gallium/drivers/llvmpipe/lp_quad.h | 9 +++--- src/gallium/drivers/llvmpipe/lp_setup.c | 2 +- src/gallium/drivers/llvmpipe/lp_test_blend.c | 20 ++++++------- src/gallium/drivers/llvmpipe/lp_test_conv.c | 4 +-- src/gallium/include/pipe/p_compiler.h | 29 +++++++++++++------ 17 files changed, 81 insertions(+), 62 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index ad184bd696d..6179b5b65af 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -98,9 +98,9 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, /* loop over verts */ for (i = 0; i < count; i += MAX_VERTICES) { const uint max_vertices = MIN2(MAX_VERTICES, count - i); - float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4] ALIGN16_ATTRIB; - float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4] ALIGN16_ATTRIB; - float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4]); + PIPE_ALIGN_VAR(16, float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4]); + PIPE_ALIGN_VAR(16, float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4]); uint attr; /* convert (up to) four input verts to SoA format */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 138d2d095bb..cec5b11fd3e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -51,7 +51,7 @@ * Since it's pretty much impossible to form PPC vector immediates, load * them from memory here: */ -const float ppc_builtin_constants[] ALIGN16_ATTRIB = { +PIPE_ALIGN_VAR(16, const float ppc_builtin_constants[]) = { 1.0f, -128.0f, 128.0, 0.0 }; diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index d5f5c7bbba8..aa29dcb3947 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -358,6 +358,7 @@ struct cell_spu_function_info /** This is the object passed to spe_create_thread() */ +PIPE_ALIGN_TYPE(16, struct cell_init_info { unsigned id; @@ -370,7 +371,7 @@ struct cell_init_info uint *buffer_status; /**< points at cell_context->buffer_status */ struct cell_spu_function_info *spu_functions; -} ALIGN16_ATTRIB; +}); #endif /* CELL_COMMON_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 5c3188e7f9d..fa6e4f65cd4 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -89,7 +89,7 @@ struct cell_buffer_node; */ struct cell_buffer_list { - struct cell_fence fence ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, struct cell_fence fence); struct cell_buffer_node *head; }; @@ -150,18 +150,18 @@ struct cell_context /** Mapped constant buffers */ void *mapped_constants[PIPE_SHADER_TYPES]; - struct cell_spu_function_info spu_functions ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, struct cell_spu_function_info spu_functions); uint num_cells, num_spus; /** Buffers for command batches, vertex/index data */ uint buffer_size[CELL_NUM_BUFFERS]; - ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE]); int cur_batch; /**< which buffer is being filled w/ commands */ /** [4] to ensure 16-byte alignment for each status word */ - uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4]); /** Associated with each command/batch buffer is a list of pipe_buffers diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 12b855a3db2..2a62db4b79d 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -53,8 +53,7 @@ struct spu_vs_context draw; /** * Buffers containing dynamically generated SPU code: */ -static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] - ALIGN16_ATTRIB; +PIPE_ALIGN_VAR(16, static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]); @@ -543,7 +542,7 @@ cmd_batch(uint opcode) { const uint buf = (opcode >> 8) & 0xff; uint size = (opcode >> 16); - qword buffer[CELL_BUFFER_SIZE / 16] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, qword buffer[CELL_BUFFER_SIZE / 16]); const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]); uint pos; diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index d86d8e09a51..6db8ed419bd 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1839,10 +1839,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute declarations (interpolants) */ if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { for (i = 0; i < mach->NumDeclarations; i++) { + PIPE_ALIGN_VAR(16, union { struct tgsi_full_declaration decl; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; - } d ALIGN16_ATTRIB; + } d); unsigned ea = (unsigned) (mach->Declarations + pc); spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); @@ -1853,10 +1854,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) /* execute instructions, until pc is set to -1 */ while (pc != -1) { + PIPE_ALIGN_VAR(16, union { struct tgsi_full_instruction inst; qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; - } i ALIGN16_ATTRIB; + } i); unsigned ea = (unsigned) (mach->Instructions + pc); spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h index 86056799405..c8c6183e2eb 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -98,9 +98,9 @@ struct spu_exec_machine * 4 internal temporaries * 1 address */ + PIPE_ALIGN_VAR(16, struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS - + TGSI_EXEC_NUM_TEMP_EXTRAS + 1] - ALIGN16_ATTRIB; + + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]); struct spu_exec_vector *Addrs; diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c index ff3d609d258..a4e560b0a5e 100644 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -144,7 +144,7 @@ export_func(struct cell_spu_function_info *spu_functions, void return_function_info(void) { - struct cell_spu_function_info funcs ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, struct cell_spu_function_info funcs); int tag = TAG_MISC; ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 33767e7c51d..8500f1bb87f 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -93,6 +93,7 @@ typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs, vector float *constants); +PIPE_ALIGN_TYPE(16, struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ @@ -107,10 +108,11 @@ struct spu_framebuffer uint zsize; /**< 0, 2 or 4 bytes per Z */ float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */ -} ALIGN16_ATTRIB; +}); /** per-texture level info */ +PIPE_ALIGN_TYPE(16, struct spu_texture_level { void *start; @@ -123,20 +125,22 @@ struct spu_texture_level vector signed int mask_s, mask_t, mask_r; /** texcoord clamp limits */ vector signed int max_s, max_t, max_r; -} ALIGN16_ATTRIB; +}); +PIPE_ALIGN_TYPE(16, struct spu_texture { struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; uint max_level; uint target; /**< PIPE_TEXTURE_x */ -} ALIGN16_ATTRIB; +}); /** * All SPU global/context state will be in a singleton object of this type: */ +PIPE_ALIGN_TYPE(16, struct spu_global { /** One-time init/constant info */ @@ -155,8 +159,8 @@ struct spu_global struct vertex_info vertex_info; /** Current color and Z tiles */ - tile_t ctile ALIGN16_ATTRIB; - tile_t ztile ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, tile_t ctile); + PIPE_ALIGN_VAR(16, tile_t ztile); /** Read depth/stencil tiles? */ boolean read_depth_stencil; @@ -165,8 +169,8 @@ struct spu_global ubyte cur_ctile_status, cur_ztile_status; /** Status of all tiles in framebuffer */ - ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]); + PIPE_ALIGN_VAR(16, ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]); /** Current fragment ops machine code, at 8-byte boundary */ uint *fragment_ops_code; @@ -175,7 +179,7 @@ struct spu_global spu_fragment_ops_func fragment_ops[2]; /** Current fragment program machine code, at 8-byte boundary */ - uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB; + PIPE_ALIGN_VAR(8, uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]); /** Current fragment ops function */ spu_fragment_program_func fragment_program; @@ -187,7 +191,7 @@ struct spu_global /** Fragment program constants */ vector float constants[4 * CELL_MAX_CONSTANTS]; -} ALIGN16_ATTRIB; +}); extern struct spu_global spu; diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 5ffb7073abf..b13fe3184f8 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -169,7 +169,7 @@ void cmd_render(const struct cell_command_render *render, uint *pos_incr) { /* we'll DMA into these buffers */ - ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, ubyte vertex_data[CELL_BUFFER_SIZE]); const uint vertex_size = render->vertex_size; /* in bytes */ /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; uint index_bytes; diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 03375d84a57..43600dfe0b8 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -43,7 +43,7 @@ typedef void (*spu_fetch_func)(qword *out, const qword *in, const qword *shuffle_data); -static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = { +PIPE_ALIGN_VAR(16, static const qword fetch_shuffle_data[5]) = { /* Shuffle used by CVT_64_FLOAT */ { @@ -110,7 +110,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, unsigned idx; const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; - qword in[2 * 4] ALIGN16_ATTRIB; + PIPE_ALIGN_VAR(16, qword in[2 * 4]); /* Fetch four attributes for four vertices. diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index fbe5b34d397..49938a8001f 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -107,8 +107,8 @@ run_vertex_program(struct spu_vs_context *draw, struct spu_exec_machine *machine = &draw->machine; unsigned int j; - ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS); + PIPE_ALIGN_VAR(16, struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS]); + PIPE_ALIGN_VAR(16, struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS]); const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; @@ -119,8 +119,8 @@ run_vertex_program(struct spu_vs_context *draw, ASSERT_ALIGN16(draw->constants); machine->Consts = (float (*)[4]) draw->constants; - machine->Inputs = ALIGN16_ASSIGN(inputs); - machine->Outputs = ALIGN16_ASSIGN(outputs); + machine->Inputs = inputs; + machine->Outputs = outputs; spu_vertex_fetch( draw, machine, elts, count ); @@ -132,8 +132,9 @@ run_vertex_program(struct spu_vs_context *draw, for (j = 0; j < count; j++) { unsigned slot; float x, y, z, w; + PIPE_ALIGN_VAR(16, unsigned char buffer[sizeof(struct vertex_header) - + MAX_VERTEX_SIZE] ALIGN16_ATTRIB; + + MAX_VERTEX_SIZE]); struct vertex_header *const tmpOut = (struct vertex_header *) buffer; const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) @@ -186,8 +187,8 @@ run_vertex_program(struct spu_vs_context *draw, } -unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] - ALIGN16_ATTRIB; +PIPE_ALIGN_VAR(16, +unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]); void diff --git a/src/gallium/drivers/llvmpipe/lp_quad.h b/src/gallium/drivers/llvmpipe/lp_quad.h index 7eb05de77a1..eb285e355e3 100644 --- a/src/gallium/drivers/llvmpipe/lp_quad.h +++ b/src/gallium/drivers/llvmpipe/lp_quad.h @@ -31,6 +31,7 @@ #ifndef LP_QUAD_H #define LP_QUAD_H +#include "pipe/p_compiler.h" #include "pipe/p_state.h" #include "tgsi/tgsi_exec.h" @@ -83,7 +84,7 @@ struct quad_header_inout struct quad_header_output { /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ - float ALIGN16_ATTRIB color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; + PIPE_ALIGN_VAR(16, float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]); }; @@ -92,9 +93,9 @@ struct quad_header_output */ struct quad_interp_coef { - float ALIGN16_ATTRIB a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - float ALIGN16_ATTRIB dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16, float a0[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]); + PIPE_ALIGN_VAR(16, float dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]); + PIPE_ALIGN_VAR(16, float dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]); }; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index b18f17c0cd3..29033a067c6 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -130,7 +130,7 @@ shade_quads(struct llvmpipe_context *llvmpipe, uint8_t *tile; uint8_t *color; void *depth; - uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; + PIPE_ALIGN_VAR(16, uint32_t mask[4][NUM_CHANNELS]); unsigned chan_index; unsigned q; diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 29fff91981a..de8f872e58f 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -531,11 +531,11 @@ test_one(unsigned verbose, success = TRUE; for(i = 0; i < n && success; ++i) { if(mode == AoS) { - ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16, uint8_t src[LP_NATIVE_VECTOR_WIDTH/8]); + PIPE_ALIGN_VAR(16, uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8]); + PIPE_ALIGN_VAR(16, uint8_t con[LP_NATIVE_VECTOR_WIDTH/8]); + PIPE_ALIGN_VAR(16, uint8_t res[LP_NATIVE_VECTOR_WIDTH/8]); + PIPE_ALIGN_VAR(16, uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8]); int64_t start_counter = 0; int64_t end_counter = 0; @@ -596,11 +596,11 @@ test_one(unsigned verbose, if(mode == SoA) { const unsigned stride = type.length*type.width/8; - ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]; - ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]; + PIPE_ALIGN_VAR(16, uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8]); + PIPE_ALIGN_VAR(16, uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8]); + PIPE_ALIGN_VAR(16, uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8]); + PIPE_ALIGN_VAR(16, uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8]); + PIPE_ALIGN_VAR(16, uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8]); int64_t start_counter = 0; int64_t end_counter = 0; boolean mismatch; diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index faddfb96779..3a6353b916e 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -230,8 +230,8 @@ test_one(unsigned verbose, for(i = 0; i < n && success; ++i) { unsigned src_stride = src_type.length*src_type.width/8; unsigned dst_stride = dst_type.length*dst_type.width/8; - ALIGN16_ATTRIB uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; - ALIGN16_ATTRIB uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + PIPE_ALIGN_VAR(16, uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]); + PIPE_ALIGN_VAR(16, uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]); double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; int64_t start_counter = 0; diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index 26a940593f0..80610a07b66 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -139,22 +139,33 @@ typedef unsigned char boolean; +/* Macros for data alignment. */ #if defined(__GNUC__) -#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___aligned[SIZE] __attribute__(( aligned( 16 ) )) -#define ALIGN16_ASSIGN(NAME) NAME##___aligned -#define ALIGN16_ATTRIB __attribute__(( aligned( 16 ) )) -#define ALIGN8_ATTRIB __attribute__(( aligned( 8 ) )) + +/* See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Type-Attributes.html */ +#define PIPE_ALIGN_TYPE(_alignment, _type) _type __attribute__((aligned(_alignment))) + +/* See http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Variable-Attributes.html */ +#define PIPE_ALIGN_VAR(_alignment, _decl) _decl __attribute__((aligned(_alignment))) + #if (__GNUC__ > 4 || (__GNUC__ == 4 &&__GNUC_MINOR__>1)) && !defined(PIPE_ARCH_X86_64) #define ALIGN_STACK __attribute__((force_align_arg_pointer)) #else #define ALIGN_STACK #endif -#else -#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___unaligned[SIZE + 1] -#define ALIGN16_ASSIGN(NAME) align16(NAME##___unaligned) -#define ALIGN16_ATTRIB -#define ALIGN8_ATTRIB + +#elif defined(_MSC_VER) + +/* See http://msdn.microsoft.com/en-us/library/83ythb65.aspx */ +#define PIPE_ALIGN_TYPE(_alignment, _type) __declspec(align(_alignment)) _type +#define PIPE_ALIGN_VAR(_alignment, _decl) __declspec(align(_alignment)) _decl + #define ALIGN_STACK + +#else + +#error "Unsupported compiler" + #endif