diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index 34da8698b85..cd17040d3ef 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -733,64 +733,69 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, /* * Try calling lp_build_fetch_rgba_aos for all pixels. + * Should only really hit subsampled, compressed + * (for s3tc srgb too, for rgtc the unorm ones only) by now. + * (This is invalid for plain 8unorm formats because we're lazy with + * the swizzle since some results would arrive swizzled, some not.) */ - if (util_format_fits_8unorm(format_desc) && + if ((format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) && + (util_format_fits_8unorm(format_desc) || + format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) && type.floating && type.width == 32 && (type.length == 1 || (type.length % 4 == 0))) { struct lp_type tmp_type; - LLVMValueRef tmp; + struct lp_build_context bld; + LLVMValueRef packed, rgba[4]; + const struct util_format_description *flinear_desc; + const struct util_format_description *frgba8_desc; + unsigned chan; + lp_build_context_init(&bld, gallivm, type); + + /* + * Make sure the conversion in aos really only does convert to rgba8 + * and not anything more (so use linear format, adjust type). + */ + flinear_desc = util_format_description(util_format_linear(format)); memset(&tmp_type, 0, sizeof tmp_type); tmp_type.width = 8; tmp_type.length = type.length * 4; tmp_type.norm = TRUE; - tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, - aligned, base_ptr, offset, i, j, cache); + packed = lp_build_fetch_rgba_aos(gallivm, flinear_desc, tmp_type, + aligned, base_ptr, offset, i, j, cache); + packed = LLVMBuildBitCast(builder, packed, bld.int_vec_type, ""); - lp_build_rgba8_to_fi32_soa(gallivm, - type, - tmp, - rgba_out); - - return; - } - - if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC && - /* non-srgb case is already handled above */ - format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB && - type.floating && type.width == 32 && - (type.length == 1 || (type.length % 4 == 0)) && - cache) { - const struct util_format_description *format_decompressed; - const struct util_format_description *flinear_desc; - LLVMValueRef packed; - flinear_desc = util_format_description(util_format_linear(format_desc->format)); - /* This probably only works with aligned data */ - packed = lp_build_fetch_cached_texels(gallivm, - flinear_desc, - type.length, - base_ptr, - offset, - i, j, - cache); - packed = LLVMBuildBitCast(builder, packed, - lp_build_int_vec_type(gallivm, type), ""); /* - * The values are now packed so they match ordinary srgb RGBA8 format, + * The values are now packed so they match ordinary (srgb) RGBA8 format, * hence need to use matching format for unpack. */ - format_decompressed = util_format_description(PIPE_FORMAT_R8G8B8A8_SRGB); - + frgba8_desc = util_format_description(PIPE_FORMAT_R8G8B8A8_UNORM); + if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC); + frgba8_desc = util_format_description(PIPE_FORMAT_R8G8B8A8_SRGB); + } lp_build_unpack_rgba_soa(gallivm, - format_decompressed, + frgba8_desc, type, - packed, rgba_out); + packed, rgba); + /* + * We converted 4 channels. Make sure llvm can drop unneeded ones + * (luckily the rgba order is fixed, only LA needs special case). + */ + for (chan = 0; chan < 4; chan++) { + enum pipe_swizzle swizzle = format_desc->swizzle[chan]; + if (chan == 3 && util_format_is_luminance_alpha(format)) { + swizzle = PIPE_SWIZZLE_W; + } + rgba_out[chan] = lp_build_swizzle_soa_channel(&bld, rgba, swizzle); + } return; } + /* * Fallback to calling lp_build_fetch_rgba_aos for each pixel. * @@ -798,6 +803,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, * miss some opportunities to do vectorization, but this is * convenient for formats or scenarios for which there was no * opportunity or incentive to optimize. + * + * We do NOT want to end up here, this typically is quite terrible, + * in particular if the formats have less than 4 channels. + * + * Right now, this should only be hit for: + * - RGTC snorm formats + * (those miss fast fetch functions hence they are terrible anyway) */ {