radeonsi: implement legacy GL_DOUBLE vertex formats
so that we can disable u_vbuf for GL core profiles. Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
2c8ee2e825
commit
4c36553a46
|
@ -319,6 +319,21 @@ static LLVMValueRef get_instance_index_for_fetch(
|
|||
LLVMGetParam(radeon_bld->main_fn, param_start_instance), "");
|
||||
}
|
||||
|
||||
/* Bitcast <4 x float> to <2 x double>, extract the component, and convert
|
||||
* to float. */
|
||||
static LLVMValueRef extract_double_to_float(struct si_shader_context *ctx,
|
||||
LLVMValueRef vec4,
|
||||
unsigned double_index)
|
||||
{
|
||||
LLVMBuilderRef builder = ctx->gallivm.builder;
|
||||
LLVMTypeRef f64 = LLVMDoubleTypeInContext(ctx->gallivm.context);
|
||||
LLVMValueRef dvec2 = LLVMBuildBitCast(builder, vec4,
|
||||
LLVMVectorType(f64, 2), "");
|
||||
LLVMValueRef index = LLVMConstInt(ctx->i32, double_index, 0);
|
||||
LLVMValueRef value = LLVMBuildExtractElement(builder, dvec2, index, "");
|
||||
return LLVMBuildFPTrunc(builder, value, ctx->f32, "");
|
||||
}
|
||||
|
||||
static void declare_input_vs(
|
||||
struct si_shader_context *ctx,
|
||||
unsigned input_index,
|
||||
|
@ -330,14 +345,15 @@ static void declare_input_vs(
|
|||
|
||||
unsigned chan;
|
||||
unsigned fix_fetch;
|
||||
unsigned num_fetches;
|
||||
unsigned fetch_stride;
|
||||
|
||||
LLVMValueRef t_list_ptr;
|
||||
LLVMValueRef t_offset;
|
||||
LLVMValueRef t_list;
|
||||
LLVMValueRef attribute_offset;
|
||||
LLVMValueRef buffer_index;
|
||||
LLVMValueRef vertex_index;
|
||||
LLVMValueRef args[3];
|
||||
LLVMValueRef input;
|
||||
LLVMValueRef input[3];
|
||||
|
||||
/* Load the T list */
|
||||
t_list_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_VERTEX_BUFFERS);
|
||||
|
@ -346,29 +362,42 @@ static void declare_input_vs(
|
|||
|
||||
t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset);
|
||||
|
||||
/* Build the attribute offset */
|
||||
attribute_offset = lp_build_const_int32(gallivm, 0);
|
||||
|
||||
buffer_index = LLVMGetParam(ctx->main_fn,
|
||||
vertex_index = LLVMGetParam(ctx->main_fn,
|
||||
ctx->param_vertex_index0 +
|
||||
input_index);
|
||||
|
||||
fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (4 * input_index)) & 0xf;
|
||||
|
||||
/* Do multiple loads for double formats. */
|
||||
if (fix_fetch == SI_FIX_FETCH_RGB_64_FLOAT) {
|
||||
num_fetches = 3; /* 3 2-dword loads */
|
||||
fetch_stride = 8;
|
||||
} else if (fix_fetch == SI_FIX_FETCH_RGBA_64_FLOAT) {
|
||||
num_fetches = 2; /* 2 4-dword loads */
|
||||
fetch_stride = 16;
|
||||
} else {
|
||||
num_fetches = 1;
|
||||
fetch_stride = 0;
|
||||
}
|
||||
|
||||
args[0] = t_list;
|
||||
args[1] = attribute_offset;
|
||||
args[2] = buffer_index;
|
||||
input = lp_build_intrinsic(gallivm->builder,
|
||||
args[2] = vertex_index;
|
||||
|
||||
for (unsigned i = 0; i < num_fetches; i++) {
|
||||
args[1] = LLVMConstInt(ctx->i32, fetch_stride * i, 0);
|
||||
|
||||
input[i] = lp_build_intrinsic(gallivm->builder,
|
||||
"llvm.SI.vs.load.input", ctx->v4f32, args, 3,
|
||||
LP_FUNC_ATTR_READNONE);
|
||||
}
|
||||
|
||||
/* Break up the vec4 into individual components */
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
|
||||
out[chan] = LLVMBuildExtractElement(gallivm->builder,
|
||||
input, llvm_chan, "");
|
||||
input[0], llvm_chan, "");
|
||||
}
|
||||
|
||||
fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (4 * input_index)) & 0xf;
|
||||
|
||||
switch (fix_fetch) {
|
||||
case SI_FIX_FETCH_A2_SNORM:
|
||||
case SI_FIX_FETCH_A2_SSCALED:
|
||||
|
@ -464,6 +493,25 @@ static void declare_input_vs(
|
|||
out[chan], ctx->f32, "");
|
||||
}
|
||||
break;
|
||||
case SI_FIX_FETCH_RG_64_FLOAT:
|
||||
for (chan = 0; chan < 2; chan++)
|
||||
out[chan] = extract_double_to_float(ctx, input[0], chan);
|
||||
|
||||
out[2] = LLVMConstReal(ctx->f32, 0);
|
||||
out[3] = LLVMConstReal(ctx->f32, 1);
|
||||
break;
|
||||
case SI_FIX_FETCH_RGB_64_FLOAT:
|
||||
for (chan = 0; chan < 3; chan++)
|
||||
out[chan] = extract_double_to_float(ctx, input[chan], 0);
|
||||
|
||||
out[3] = LLVMConstReal(ctx->f32, 1);
|
||||
break;
|
||||
case SI_FIX_FETCH_RGBA_64_FLOAT:
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
out[chan] = extract_double_to_float(ctx, input[chan / 2],
|
||||
chan % 2);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -247,6 +247,10 @@ enum {
|
|||
SI_FIX_FETCH_RGBA_32_SSCALED,
|
||||
SI_FIX_FETCH_RGBA_32_FIXED,
|
||||
SI_FIX_FETCH_RGBX_32_FIXED,
|
||||
SI_FIX_FETCH_RG_64_FLOAT,
|
||||
SI_FIX_FETCH_RGB_64_FLOAT,
|
||||
SI_FIX_FETCH_RGBA_64_FLOAT,
|
||||
SI_FIX_FETCH_RESERVED_15, /* maximum */
|
||||
};
|
||||
|
||||
struct si_shader;
|
||||
|
|
|
@ -1762,6 +1762,19 @@ static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen,
|
|||
return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
|
||||
}
|
||||
break;
|
||||
case 64:
|
||||
/* Legacy double formats. */
|
||||
switch (desc->nr_channels) {
|
||||
case 1: /* 1 load */
|
||||
return V_008F0C_BUF_DATA_FORMAT_32_32;
|
||||
case 2: /* 1 load */
|
||||
return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
|
||||
case 3: /* 3 loads */
|
||||
return V_008F0C_BUF_DATA_FORMAT_32_32;
|
||||
case 4: /* 2 loads */
|
||||
return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return V_008F0C_BUF_DATA_FORMAT_INVALID;
|
||||
|
@ -3359,6 +3372,7 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
|
|||
unsigned data_format, num_format;
|
||||
int first_non_void;
|
||||
unsigned vbo_index = elements[i].vertex_buffer_index;
|
||||
unsigned char swizzle[4];
|
||||
|
||||
if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
|
||||
FREE(v);
|
||||
|
@ -3375,13 +3389,8 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
|
|||
data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
|
||||
num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
|
||||
channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL;
|
||||
memcpy(swizzle, desc->swizzle, sizeof(swizzle));
|
||||
|
||||
v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
|
||||
S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
|
||||
S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
|
||||
S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
|
||||
S_008F0C_NUM_FORMAT(num_format) |
|
||||
S_008F0C_DATA_FORMAT(data_format);
|
||||
v->format_size[i] = desc->block.bits / 8;
|
||||
|
||||
/* The hardware always treats the 2-bit alpha channel as
|
||||
|
@ -3421,7 +3430,42 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
|
|||
v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_USCALED << (4 * i);
|
||||
}
|
||||
}
|
||||
} else if (channel && channel->size == 64 &&
|
||||
channel->type == UTIL_FORMAT_TYPE_FLOAT) {
|
||||
switch (desc->nr_channels) {
|
||||
case 1:
|
||||
case 2:
|
||||
v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RG_64_FLOAT << (4 * i);
|
||||
swizzle[0] = PIPE_SWIZZLE_X;
|
||||
swizzle[1] = PIPE_SWIZZLE_Y;
|
||||
swizzle[2] = desc->nr_channels == 2 ? PIPE_SWIZZLE_Z : PIPE_SWIZZLE_0;
|
||||
swizzle[3] = desc->nr_channels == 2 ? PIPE_SWIZZLE_W : PIPE_SWIZZLE_0;
|
||||
break;
|
||||
case 3:
|
||||
v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGB_64_FLOAT << (4 * i);
|
||||
swizzle[0] = PIPE_SWIZZLE_X; /* 3 loads */
|
||||
swizzle[1] = PIPE_SWIZZLE_Y;
|
||||
swizzle[2] = PIPE_SWIZZLE_0;
|
||||
swizzle[3] = PIPE_SWIZZLE_0;
|
||||
break;
|
||||
case 4:
|
||||
v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_64_FLOAT << (4 * i);
|
||||
swizzle[0] = PIPE_SWIZZLE_X; /* 2 loads */
|
||||
swizzle[1] = PIPE_SWIZZLE_Y;
|
||||
swizzle[2] = PIPE_SWIZZLE_Z;
|
||||
swizzle[3] = PIPE_SWIZZLE_W;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
|
||||
S_008F0C_DST_SEL_Y(si_map_swizzle(swizzle[1])) |
|
||||
S_008F0C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
|
||||
S_008F0C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
|
||||
S_008F0C_NUM_FORMAT(num_format) |
|
||||
S_008F0C_DATA_FORMAT(data_format);
|
||||
|
||||
/* We work around the fact that 8_8_8 and 16_16_16 data formats
|
||||
* do not exist by using the corresponding 4-component formats.
|
||||
|
|
Loading…
Reference in New Issue