diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 3a1b0579648..1c265603663 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -742,6 +742,7 @@ generate_fetch(struct gallivm_state *gallivm, val = lp_build_fetch_rgba_aos(gallivm, format_desc, lp_float32_vec4_type(), + FALSE, map_ptr, zero, zero, zero); LLVMBuildStore(builder, val, temp_ptr); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index 1177fb224dd..969f1f6cc94 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -62,6 +62,7 @@ LLVMValueRef lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, + boolean aligned, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index af755d460c1..3c25c329edd 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -356,6 +356,7 @@ lp_build_pack_rgba_aos(struct gallivm_state *gallivm, * Fetch a pixel into a 4 float AoS. * * \param format_desc describes format of the image we're fetching from + * \param aligned whether the data is guaranteed to be aligned * \param ptr address of the pixel block (or the texel if uncompressed) * \param i, j the sub-block pixel coordinates. For non-compressed formats * these will always be (0, 0). @@ -365,6 +366,7 @@ LLVMValueRef lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, + boolean aligned, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, @@ -400,7 +402,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, packed = lp_build_gather(gallivm, type.length/4, format_desc->block.bits, type.width*4, - base_ptr, offset, TRUE); + aligned, base_ptr, offset, TRUE); assert(format_desc->block.bits <= vec_len); @@ -437,7 +439,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, LLVMValueRef packed; packed = lp_build_gather_elem(gallivm, num_pixels, - format_desc->block.bits, 32, + format_desc->block.bits, 32, aligned, base_ptr, offset, k, FALSE); tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index ff2887ee8fa..afaabc08790 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -386,6 +386,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, type.length, format_desc->block.bits, type.width, + TRUE, base_ptr, offset, FALSE); /* @@ -411,8 +412,8 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, packed = lp_build_gather(gallivm, type.length, format_desc->block.bits, - type.width, base_ptr, offset, - FALSE); + type.width, TRUE, + base_ptr, offset, FALSE); if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) { lp_build_r11g11b10_to_float(gallivm, packed, rgba_out); } @@ -438,15 +439,15 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, unsigned mask = (1 << 8) - 1; LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4); offset = LLVMBuildAdd(builder, offset, s_offset, ""); - packed = lp_build_gather(gallivm, type.length, - 32, type.width, base_ptr, offset, FALSE); + packed = lp_build_gather(gallivm, type.length, 32, type.width, + TRUE, base_ptr, offset, FALSE); packed = LLVMBuildAnd(builder, packed, lp_build_const_int_vec(gallivm, type, mask), ""); } else { assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); - packed = lp_build_gather(gallivm, type.length, - 32, type.width, base_ptr, offset, TRUE); + packed = lp_build_gather(gallivm, type.length, 32, type.width, + TRUE, base_ptr, offset, TRUE); packed = LLVMBuildBitCast(builder, packed, lp_build_vec_type(gallivm, type), ""); } @@ -472,7 +473,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, tmp_type.norm = TRUE; tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, - base_ptr, offset, i, j); + TRUE, base_ptr, offset, i, j); lp_build_rgba8_to_fi32_soa(gallivm, type, @@ -522,7 +523,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, /* Get a single float[4]={R,G,B,A} pixel */ tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, - base_ptr, offset_elem, + TRUE, base_ptr, offset_elem, i_elem, j_elem); /* diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c index 873f354c040..4f5a45c6a3d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c @@ -497,7 +497,7 @@ lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm, assert(format_desc->block.width == 2); assert(format_desc->block.height == 1); - packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset, FALSE); + packed = lp_build_gather(gallivm, n, 32, 32, TRUE, base_ptr, offset, FALSE); (void)j; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.c b/src/gallium/auxiliary/gallivm/lp_bld_gather.c index 9155d811c06..d02602041ce 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_gather.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c @@ -76,6 +76,7 @@ lp_build_gather_elem(struct gallivm_state *gallivm, unsigned length, unsigned src_width, unsigned dst_width, + boolean aligned, LLVMValueRef base_ptr, LLVMValueRef offsets, unsigned i, @@ -93,6 +94,27 @@ lp_build_gather_elem(struct gallivm_state *gallivm, ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, ""); res = LLVMBuildLoad(gallivm->builder, ptr, ""); + /* XXX + * On some archs we probably really want to avoid having to deal + * with alignments lower than 4 bytes (if fetch size is a power of + * two >= 32). On x86 it doesn't matter, however. + * We should be able to guarantee full alignment for any kind of texture + * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch + * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends + * but I don't think that's quite what we wanted). + * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT + * looks like a good fit, but it seems this cap bit (and OpenGL) aren't + * enforcing what we want (which is what d3d10 does, the offset needs to + * be aligned to element size, but GL has bytes regardless of element + * size which would only leave us with minimum alignment restriction of 16 + * which doesn't make much sense if the type isn't 4x32bit). Due to + * translation of offsets to first_elem in sampler_views it actually seems + * gallium could not do anything else except 16 no matter what... + */ + if (!aligned) { + lp_set_load_alignment(res, 1); + } + assert(src_width <= dst_width); if (src_width > dst_width) { res = LLVMBuildTrunc(gallivm->builder, res, dst_elem_type, ""); @@ -126,6 +148,7 @@ lp_build_gather_elem(struct gallivm_state *gallivm, * @param length length of the offsets * @param src_width src element width in bits * @param dst_width result element width in bits (src will be expanded to fit) + * @param aligned whether the data is guaranteed to be aligned (to src_width) * @param base_ptr base pointer, should be a i8 pointer type. * @param offsets vector with offsets * @param vector_justify select vector rather than integer justification @@ -135,6 +158,7 @@ lp_build_gather(struct gallivm_state *gallivm, unsigned length, unsigned src_width, unsigned dst_width, + boolean aligned, LLVMValueRef base_ptr, LLVMValueRef offsets, boolean vector_justify) @@ -144,7 +168,7 @@ lp_build_gather(struct gallivm_state *gallivm, if (length == 1) { /* Scalar */ return lp_build_gather_elem(gallivm, length, - src_width, dst_width, + src_width, dst_width, aligned, base_ptr, offsets, 0, vector_justify); } else { /* Vector */ @@ -158,7 +182,7 @@ lp_build_gather(struct gallivm_state *gallivm, LLVMValueRef index = lp_build_const_int32(gallivm, i); LLVMValueRef elem; elem = lp_build_gather_elem(gallivm, length, - src_width, dst_width, + src_width, dst_width, aligned, base_ptr, offsets, i, vector_justify); res = LLVMBuildInsertElement(gallivm->builder, res, elem, index, ""); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.h b/src/gallium/auxiliary/gallivm/lp_bld_gather.h index ee694732d36..3ede4763a70 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_gather.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.h @@ -45,6 +45,7 @@ lp_build_gather_elem(struct gallivm_state *gallivm, unsigned length, unsigned src_width, unsigned dst_width, + boolean aligned, LLVMValueRef base_ptr, LLVMValueRef offsets, unsigned i, @@ -55,6 +56,7 @@ lp_build_gather(struct gallivm_state *gallivm, unsigned length, unsigned src_width, unsigned dst_width, + boolean aligned, LLVMValueRef base_ptr, LLVMValueRef offsets, boolean vector_justify); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index 394521d382a..d7fde810a76 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -581,6 +581,7 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld, bld->texel_type.length, bld->format_desc->block.bits, bld->texel_type.width, + TRUE, data_ptr, offset, TRUE); rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); @@ -589,6 +590,7 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld, rgba8 = lp_build_fetch_rgba_aos(bld->gallivm, bld->format_desc, u8n.type, + TRUE, data_ptr, offset, x_subcoord, y_subcoord); @@ -919,6 +921,7 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld, bld->texel_type.length, bld->format_desc->block.bits, bld->texel_type.width, + TRUE, data_ptr, offset[k][j][i], TRUE); rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); @@ -927,6 +930,7 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld, rgba8 = lp_build_fetch_rgba_aos(bld->gallivm, bld->format_desc, u8n.type, + TRUE, data_ptr, offset[k][j][i], x_subcoord[i], y_subcoord[j]); diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 48bf06e3c45..d9abd1ae37c 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -112,7 +112,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose, block = LLVMAppendBasicBlockInContext(context, func, "entry"); LLVMPositionBuilderAtEnd(builder, block); - rgba = lp_build_fetch_rgba_aos(gallivm, desc, type, + rgba = lp_build_fetch_rgba_aos(gallivm, desc, type, TRUE, packed_ptr, offset, i, j); LLVMBuildStore(builder, rgba, rgba_ptr); @@ -252,6 +252,7 @@ test_format_unorm8(unsigned verbose, FILE *fp, } /* To ensure it's 16-byte aligned */ + /* Could skip this and use unaligned lp_build_fetch_rgba_aos */ memcpy(packed, test->packed, sizeof packed); for (i = 0; i < desc->block.height; ++i) {