207 lines
7.1 KiB
C
207 lines
7.1 KiB
C
/**************************************************************************
|
|
*
|
|
* Copyright 2010 VMware, Inc.
|
|
* All Rights Reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sub license, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
|
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
|
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
|
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
|
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*
|
|
* The above copyright notice and this permission notice (including the
|
|
* next paragraph) shall be included in all copies or substantial portions
|
|
* of the Software.
|
|
*
|
|
**************************************************************************/
|
|
|
|
|
|
#include "lp_bld_type.h"
|
|
#include "lp_bld_arit.h"
|
|
#include "lp_bld_const.h"
|
|
#include "lp_bld_swizzle.h"
|
|
#include "lp_bld_quad.h"
|
|
#include "lp_bld_pack.h"
|
|
|
|
|
|
static const unsigned char
|
|
swizzle_left[4] = {
|
|
LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_LEFT,
|
|
LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_QUAD_BOTTOM_LEFT
|
|
};
|
|
|
|
static const unsigned char
|
|
swizzle_right[4] = {
|
|
LP_BLD_QUAD_TOP_RIGHT, LP_BLD_QUAD_TOP_RIGHT,
|
|
LP_BLD_QUAD_BOTTOM_RIGHT, LP_BLD_QUAD_BOTTOM_RIGHT
|
|
};
|
|
|
|
static const unsigned char
|
|
swizzle_top[4] = {
|
|
LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_RIGHT,
|
|
LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_RIGHT
|
|
};
|
|
|
|
static const unsigned char
|
|
swizzle_bottom[4] = {
|
|
LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_QUAD_BOTTOM_RIGHT,
|
|
LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_QUAD_BOTTOM_RIGHT
|
|
};
|
|
|
|
|
|
LLVMValueRef
|
|
lp_build_ddx(struct lp_build_context *bld,
|
|
LLVMValueRef a)
|
|
{
|
|
LLVMValueRef a_left = lp_build_swizzle_aos(bld, a, swizzle_left);
|
|
LLVMValueRef a_right = lp_build_swizzle_aos(bld, a, swizzle_right);
|
|
return lp_build_sub(bld, a_right, a_left);
|
|
}
|
|
|
|
|
|
LLVMValueRef
|
|
lp_build_ddy(struct lp_build_context *bld,
|
|
LLVMValueRef a)
|
|
{
|
|
LLVMValueRef a_top = lp_build_swizzle_aos(bld, a, swizzle_top);
|
|
LLVMValueRef a_bottom = lp_build_swizzle_aos(bld, a, swizzle_bottom);
|
|
return lp_build_sub(bld, a_bottom, a_top);
|
|
}
|
|
|
|
/*
|
|
* Helper for building packed ddx/ddy vector for one coord (scalar per quad
|
|
* values). The vector will look like this (8-wide):
|
|
* dr1dx _____ -dr1dy _____ dr2dx _____ -dr2dy _____
|
|
* This only requires one shuffle instead of two for more straightforward packing.
|
|
*/
|
|
LLVMValueRef
|
|
lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
|
|
LLVMValueRef a)
|
|
{
|
|
struct gallivm_state *gallivm = bld->gallivm;
|
|
LLVMBuilderRef builder = gallivm->builder;
|
|
LLVMValueRef vec1, vec2;
|
|
|
|
/* use aos swizzle helper */
|
|
|
|
static const unsigned char swizzle1[] = { /* no-op swizzle */
|
|
LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE,
|
|
LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_SWIZZLE_DONTCARE
|
|
};
|
|
static const unsigned char swizzle2[] = {
|
|
LP_BLD_QUAD_TOP_RIGHT, LP_BLD_SWIZZLE_DONTCARE,
|
|
LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE
|
|
};
|
|
|
|
vec1 = lp_build_swizzle_aos(bld, a, swizzle1);
|
|
vec2 = lp_build_swizzle_aos(bld, a, swizzle2);
|
|
|
|
if (bld->type.floating)
|
|
return LLVMBuildFSub(builder, vec2, vec1, "ddxddy");
|
|
else
|
|
return LLVMBuildSub(builder, vec2, vec1, "ddxddy");
|
|
}
|
|
|
|
|
|
/*
|
|
* Helper for building packed ddx/ddy vector for one coord (scalar per quad
|
|
* values). The vector will look like this (8-wide):
|
|
* ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy
|
|
* This only needs 2 (v)shufps.
|
|
*/
|
|
LLVMValueRef
|
|
lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld,
|
|
LLVMValueRef a, LLVMValueRef b)
|
|
{
|
|
struct gallivm_state *gallivm = bld->gallivm;
|
|
LLVMBuilderRef builder = gallivm->builder;
|
|
LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH/4];
|
|
LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH/4];
|
|
LLVMValueRef vec1, vec2;
|
|
unsigned length, num_quads, i;
|
|
|
|
/* XXX: do hsub version */
|
|
length = bld->type.length;
|
|
num_quads = length / 4;
|
|
for (i = 0; i < num_quads; i++) {
|
|
unsigned s1 = 4 * i;
|
|
unsigned s2 = 4 * i + length;
|
|
shuffles1[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
|
|
shuffles1[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
|
|
shuffles1[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
|
|
shuffles1[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
|
|
shuffles2[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s1);
|
|
shuffles2[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s1);
|
|
shuffles2[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s2);
|
|
shuffles2[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s2);
|
|
}
|
|
vec1 = LLVMBuildShuffleVector(builder, a, b,
|
|
LLVMConstVector(shuffles1, length), "");
|
|
vec2 = LLVMBuildShuffleVector(builder, a, b,
|
|
LLVMConstVector(shuffles2, length), "");
|
|
if (bld->type.floating)
|
|
return LLVMBuildFSub(builder, vec2, vec1, "ddxddyddxddy");
|
|
else
|
|
return LLVMBuildSub(builder, vec2, vec1, "ddxddyddxddy");
|
|
}
|
|
|
|
|
|
/**
|
|
* Twiddle from quad format to row format
|
|
*
|
|
* src0 src1
|
|
* ######### ######### #################
|
|
* # 0 | 1 # # 4 | 5 # # 0 | 1 | 4 | 5 # src0
|
|
* #---+---# #---+---# -> #################
|
|
* # 2 | 3 # # 6 | 7 # # 2 | 3 | 6 | 7 # src1
|
|
* ######### ######### #################
|
|
*
|
|
*/
|
|
void
|
|
lp_bld_quad_twiddle(struct gallivm_state *gallivm,
|
|
struct lp_type lp_dst_type,
|
|
const LLVMValueRef* src,
|
|
unsigned src_count,
|
|
LLVMValueRef* dst)
|
|
{
|
|
LLVMBuilderRef builder = gallivm->builder;
|
|
LLVMTypeRef dst_type_ref;
|
|
LLVMTypeRef type2_ref;
|
|
struct lp_type type2;
|
|
unsigned i;
|
|
|
|
assert((src_count % 2) == 0);
|
|
|
|
/* Create a type with only 2 elements */
|
|
type2 = lp_dst_type;
|
|
type2.width = (lp_dst_type.width * lp_dst_type.length) / 2;
|
|
type2.length = 2;
|
|
type2.floating = 0;
|
|
|
|
type2_ref = lp_build_vec_type(gallivm, type2);
|
|
dst_type_ref = lp_build_vec_type(gallivm, lp_dst_type);
|
|
|
|
for (i = 0; i < src_count; i += 2) {
|
|
LLVMValueRef src0, src1;
|
|
|
|
src0 = LLVMBuildBitCast(builder, src[i + 0], type2_ref, "");
|
|
src1 = LLVMBuildBitCast(builder, src[i + 1], type2_ref, "");
|
|
|
|
dst[i + 0] = lp_build_interleave2(gallivm, type2, src0, src1, 0);
|
|
dst[i + 1] = lp_build_interleave2(gallivm, type2, src0, src1, 1);
|
|
|
|
dst[i + 0] = LLVMBuildBitCast(builder, dst[i + 0], dst_type_ref, "");
|
|
dst[i + 1] = LLVMBuildBitCast(builder, dst[i + 1], dst_type_ref, "");
|
|
}
|
|
}
|