mesa/src/microsoft/compiler/dxil_nir_lower_int_cubemaps.c

582 lines
21 KiB
C

/*
* Copyright © Microsoft Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "dxil_nir_lower_int_cubemaps.h"
#include "nir_builder.h"
#include "nir_builtin_builder.h"
static bool
type_needs_lowering(const struct glsl_type *type)
{
type = glsl_without_array(type);
if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
return false;
if (glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_CUBE)
return false;
if (glsl_type_is_image(type))
return true;
return glsl_base_type_is_integer(glsl_get_sampler_result_type(type));
}
static bool
lower_int_cubmap_to_array_filter(const nir_instr *instr,
const void *options)
{
bool lower_samplers = *(bool *)options;
if (instr->type == nir_instr_type_intrinsic) {
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
case nir_intrinsic_image_atomic_add:
case nir_intrinsic_image_atomic_and:
case nir_intrinsic_image_atomic_comp_swap:
case nir_intrinsic_image_atomic_dec_wrap:
case nir_intrinsic_image_atomic_exchange:
case nir_intrinsic_image_atomic_fadd:
case nir_intrinsic_image_atomic_fmax:
case nir_intrinsic_image_atomic_fmin:
case nir_intrinsic_image_atomic_imax:
case nir_intrinsic_image_atomic_imin:
case nir_intrinsic_image_atomic_inc_wrap:
case nir_intrinsic_image_atomic_or:
case nir_intrinsic_image_atomic_umax:
case nir_intrinsic_image_atomic_umin:
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_image_load:
case nir_intrinsic_image_size:
case nir_intrinsic_image_store:
case nir_intrinsic_image_deref_atomic_add:
case nir_intrinsic_image_deref_atomic_and:
case nir_intrinsic_image_deref_atomic_comp_swap:
case nir_intrinsic_image_deref_atomic_dec_wrap:
case nir_intrinsic_image_deref_atomic_exchange:
case nir_intrinsic_image_deref_atomic_fadd:
case nir_intrinsic_image_deref_atomic_fmax:
case nir_intrinsic_image_deref_atomic_fmin:
case nir_intrinsic_image_deref_atomic_imax:
case nir_intrinsic_image_deref_atomic_imin:
case nir_intrinsic_image_deref_atomic_inc_wrap:
case nir_intrinsic_image_deref_atomic_or:
case nir_intrinsic_image_deref_atomic_umax:
case nir_intrinsic_image_deref_atomic_umin:
case nir_intrinsic_image_deref_atomic_xor:
case nir_intrinsic_image_deref_load:
case nir_intrinsic_image_deref_size:
case nir_intrinsic_image_deref_store:
return nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_CUBE;
default:
return false;
}
} else if (instr->type == nir_instr_type_deref) {
nir_deref_instr *deref = nir_instr_as_deref(instr);
return type_needs_lowering(deref->type);
} else if (instr->type == nir_instr_type_tex && lower_samplers) {
nir_tex_instr *tex = nir_instr_as_tex(instr);
if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
return false;
switch (tex->op) {
case nir_texop_tex:
case nir_texop_txb:
case nir_texop_txd:
case nir_texop_txl:
case nir_texop_txs:
case nir_texop_lod:
case nir_texop_tg4:
break;
default:
return false;
}
int sampler_deref = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
assert(sampler_deref >= 0);
nir_deref_instr *deref = nir_instr_as_deref(tex->src[sampler_deref].src.ssa->parent_instr);
nir_variable *cube = nir_deref_instr_get_variable(deref);
return glsl_base_type_is_integer(glsl_get_sampler_result_type(cube->type));
}
return false;
}
typedef struct {
bool image;
nir_ssa_def *rx;
nir_ssa_def *ry;
nir_ssa_def *rz;
nir_ssa_def *arx;
nir_ssa_def *ary;
nir_ssa_def *arz;
nir_ssa_def *array;
} coord_t;
/* This is taken from from sp_tex_sample:convert_cube */
static nir_ssa_def *
evaluate_face_x(nir_builder *b, coord_t *coord)
{
nir_ssa_def *sign = nir_fsign(b, coord->rx);
nir_ssa_def *positive = nir_fge(b, coord->rx, nir_imm_float(b, 0.0));
nir_ssa_def *ima = nir_fdiv(b, nir_imm_float(b, -0.5), coord->arx);
nir_ssa_def *x = nir_fadd(b, nir_fmul(b, nir_fmul(b, sign, ima), coord->rz), nir_imm_float(b, 0.5));
nir_ssa_def *y = nir_fadd(b, nir_fmul(b, ima, coord->ry), nir_imm_float(b, 0.5));
nir_ssa_def *face = nir_bcsel(b, positive, nir_imm_float(b, 0.0), nir_imm_float(b, 1.0));
if (coord->array)
face = nir_fadd(b, face, coord->array);
return coord->image ?
nir_vec4(b, x,y, face, nir_ssa_undef(b, 1, 32)) :
nir_vec3(b, x,y, face);
}
static nir_ssa_def *
evaluate_face_y(nir_builder *b, coord_t *coord)
{
nir_ssa_def *sign = nir_fsign(b, coord->ry);
nir_ssa_def *positive = nir_fge(b, coord->ry, nir_imm_float(b, 0.0));
nir_ssa_def *ima = nir_fdiv(b, nir_imm_float(b, 0.5), coord->ary);
nir_ssa_def *x = nir_fadd(b, nir_fmul(b, ima, coord->rx), nir_imm_float(b, 0.5));
nir_ssa_def *y = nir_fadd(b, nir_fmul(b, nir_fmul(b, sign, ima), coord->rz), nir_imm_float(b, 0.5));
nir_ssa_def *face = nir_bcsel(b, positive, nir_imm_float(b, 2.0), nir_imm_float(b, 3.0));
if (coord->array)
face = nir_fadd(b, face, coord->array);
return coord->image ?
nir_vec4(b, x,y, face, nir_ssa_undef(b, 1, 32)) :
nir_vec3(b, x,y, face);
}
static nir_ssa_def *
evaluate_face_z(nir_builder *b, coord_t *coord)
{
nir_ssa_def *sign = nir_fsign(b, coord->rz);
nir_ssa_def *positive = nir_fge(b, coord->rz, nir_imm_float(b, 0.0));
nir_ssa_def *ima = nir_fdiv(b, nir_imm_float(b, -0.5), coord->arz);
nir_ssa_def *x = nir_fadd(b, nir_fmul(b, nir_fmul(b, sign, ima), nir_fneg(b, coord->rx)), nir_imm_float(b, 0.5));
nir_ssa_def *y = nir_fadd(b, nir_fmul(b, ima, coord->ry), nir_imm_float(b, 0.5));
nir_ssa_def *face = nir_bcsel(b, positive, nir_imm_float(b, 4.0), nir_imm_float(b, 5.0));
if (coord->array)
face = nir_fadd(b, face, coord->array);
return coord->image ?
nir_vec4(b, x,y, face, nir_ssa_undef(b, 1, 32)) :
nir_vec3(b, x,y, face);
}
static nir_ssa_def *
create_array_tex_from_cube_tex(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *coord, nir_texop op)
{
nir_tex_instr *array_tex;
array_tex = nir_tex_instr_create(b->shader, tex->num_srcs);
array_tex->op = op;
array_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
array_tex->is_array = true;
array_tex->is_shadow = tex->is_shadow;
array_tex->is_new_style_shadow = tex->is_new_style_shadow;
array_tex->texture_index = tex->texture_index;
array_tex->sampler_index = tex->sampler_index;
array_tex->dest_type = tex->dest_type;
array_tex->coord_components = 3;
nir_src coord_src = nir_src_for_ssa(coord);
for (unsigned i = 0; i < tex->num_srcs; i++) {
nir_src *psrc = (tex->src[i].src_type == nir_tex_src_coord) ?
&coord_src : &tex->src[i].src;
nir_src_copy(&array_tex->src[i].src, psrc);
array_tex->src[i].src_type = tex->src[i].src_type;
}
nir_ssa_dest_init(&array_tex->instr, &array_tex->dest,
nir_tex_instr_dest_size(array_tex), 32, NULL);
nir_builder_instr_insert(b, &array_tex->instr);
return &array_tex->dest.ssa;
}
static nir_ssa_def *
handle_cube_edge(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *face, nir_ssa_def *array_slice_cube_base, nir_ssa_def *tex_size)
{
enum cube_remap
{
cube_remap_zero = 0,
cube_remap_x,
cube_remap_y,
cube_remap_tex_size,
cube_remap_tex_size_minus_x,
cube_remap_tex_size_minus_y,
cube_remap_size,
};
struct cube_remap_table
{
enum cube_remap remap_x;
enum cube_remap remap_y;
uint32_t remap_face;
};
static const struct cube_remap_table cube_remap_neg_x[6] =
{
{cube_remap_tex_size, cube_remap_y, 4},
{cube_remap_tex_size, cube_remap_y, 5},
{cube_remap_y, cube_remap_zero, 1},
{cube_remap_tex_size_minus_y, cube_remap_tex_size, 1},
{cube_remap_tex_size, cube_remap_y, 1},
{cube_remap_tex_size, cube_remap_y, 0},
};
static const struct cube_remap_table cube_remap_pos_x[6] =
{
{cube_remap_zero, cube_remap_y, 5},
{cube_remap_zero, cube_remap_y, 4},
{cube_remap_tex_size_minus_y, cube_remap_zero, 0},
{cube_remap_y, cube_remap_tex_size, 0},
{cube_remap_zero, cube_remap_y, 0},
{cube_remap_zero, cube_remap_y, 1},
};
static const struct cube_remap_table cube_remap_neg_y[6] =
{
{cube_remap_tex_size, cube_remap_tex_size_minus_x, 2},
{cube_remap_zero, cube_remap_x, 2},
{cube_remap_tex_size_minus_x, cube_remap_zero, 5},
{cube_remap_x, cube_remap_tex_size, 4},
{cube_remap_x, cube_remap_tex_size, 2},
{cube_remap_tex_size_minus_x, cube_remap_zero, 2},
};
static const struct cube_remap_table cube_remap_pos_y[6] =
{
{cube_remap_tex_size, cube_remap_x, 3},
{cube_remap_zero, cube_remap_tex_size_minus_x, 3},
{cube_remap_x, cube_remap_zero, 4},
{cube_remap_tex_size_minus_x, cube_remap_tex_size, 5},
{cube_remap_x, cube_remap_zero, 3},
{cube_remap_tex_size_minus_x, cube_remap_tex_size, 3},
};
static const struct cube_remap_table* remap_tables[4] = {
cube_remap_neg_x,
cube_remap_pos_x,
cube_remap_neg_y,
cube_remap_pos_y
};
nir_ssa_def *zero = nir_imm_int(b, 0);
/* Doesn't matter since the texture is square */
tex_size = nir_channel(b, tex_size, 0);
nir_ssa_def *x_on = nir_iand(b, nir_ige(b, x, zero), nir_ige(b, tex_size, x));
nir_ssa_def *y_on = nir_iand(b, nir_ige(b, y, zero), nir_ige(b, tex_size, y));
nir_ssa_def *one_on = nir_ixor(b, x_on, y_on);
/* If the sample did not fall off the face in either dimension, then set output = input */
nir_ssa_def *x_result = x;
nir_ssa_def *y_result = y;
nir_ssa_def *face_result = face;
/* otherwise, if the sample fell off the face in either the X or the Y direction, remap to the new face */
nir_ssa_def *remap_predicates[4] =
{
nir_iand(b, one_on, nir_ilt(b, x, zero)),
nir_iand(b, one_on, nir_ilt(b, tex_size, x)),
nir_iand(b, one_on, nir_ilt(b, y, zero)),
nir_iand(b, one_on, nir_ilt(b, tex_size, y)),
};
nir_ssa_def *remap_array[cube_remap_size];
remap_array[cube_remap_zero] = zero;
remap_array[cube_remap_x] = x;
remap_array[cube_remap_y] = y;
remap_array[cube_remap_tex_size] = tex_size;
remap_array[cube_remap_tex_size_minus_x] = nir_isub(b, tex_size, x);
remap_array[cube_remap_tex_size_minus_y] = nir_isub(b, tex_size, y);
/* For each possible way the sample could have fallen off */
for (unsigned i = 0; i < 4; i++) {
const struct cube_remap_table* remap_table = remap_tables[i];
/* For each possible original face */
for (unsigned j = 0; j < 6; j++) {
nir_ssa_def *predicate = nir_iand(b, remap_predicates[i], nir_ieq(b, face, nir_imm_int(b, j)));
x_result = nir_bcsel(b, predicate, remap_array[remap_table[j].remap_x], x_result);
y_result = nir_bcsel(b, predicate, remap_array[remap_table[j].remap_y], y_result);
face_result = nir_bcsel(b, predicate, remap_array[remap_table[j].remap_face], face_result);
}
}
return nir_vec3(b, x_result, y_result, nir_iadd(b, face_result, array_slice_cube_base));
}
static nir_ssa_def *
handle_cube_gather(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *coord)
{
nir_ssa_def *tex_size = nir_get_texture_size(b, tex);
/* nir_get_texture_size puts the cursor before the tex op */
b->cursor = nir_after_instr(coord->parent_instr);
nir_ssa_def *const_05 = nir_imm_float(b, 0.5f);
nir_ssa_def *texel_coords = nir_fmul(b, nir_channels(b, coord, 3),
nir_i2f32(b, nir_channels(b, tex_size, 3)));
nir_ssa_def *x_orig = nir_channel(b, texel_coords, 0);
nir_ssa_def *y_orig = nir_channel(b, texel_coords, 1);
nir_ssa_def *x_pos = nir_f2i32(b, nir_fadd(b, x_orig, const_05));
nir_ssa_def *x_neg = nir_f2i32(b, nir_fsub(b, x_orig, const_05));
nir_ssa_def *y_pos = nir_f2i32(b, nir_fadd(b, y_orig, const_05));
nir_ssa_def *y_neg = nir_f2i32(b, nir_fsub(b, y_orig, const_05));
nir_ssa_def *coords[4][2] = {
{ x_neg, y_pos },
{ x_pos, y_pos },
{ x_pos, y_neg },
{ x_neg, y_neg },
};
nir_ssa_def *array_slice_2d = nir_f2i32(b, nir_channel(b, coord, 2));
nir_ssa_def *face = nir_imod(b, array_slice_2d, nir_imm_int(b, 6));
nir_ssa_def *array_slice_cube_base = nir_isub(b, array_slice_2d, face);
nir_ssa_def *channels[4];
for (unsigned i = 0; i < 4; ++i) {
nir_ssa_def *final_coord = handle_cube_edge(b, coords[i][0], coords[i][1], face, array_slice_cube_base, tex_size);
nir_ssa_def *sampled_val = create_array_tex_from_cube_tex(b, tex, final_coord, nir_texop_txf);
channels[i] = nir_channel(b, sampled_val, tex->component);
}
return nir_vec(b, channels, 4);
}
static nir_ssa_def *
lower_cube_coords(nir_builder *b, nir_ssa_def *coord, bool is_array, bool is_image)
{
coord_t coords;
coords.image = is_image;
coords.rx = nir_channel(b, coord, 0);
coords.ry = nir_channel(b, coord, 1);
coords.rz = nir_channel(b, coord, 2);
coords.arx = nir_fabs(b, coords.rx);
coords.ary = nir_fabs(b, coords.ry);
coords.arz = nir_fabs(b, coords.rz);
coords.array = NULL;
if (is_array)
coords.array = nir_fmul(b, nir_channel(b, coord, 3), nir_imm_float(b, 6.0f));
nir_ssa_def *use_face_x = nir_iand(b,
nir_fge(b, coords.arx, coords.ary),
nir_fge(b, coords.arx, coords.arz));
nir_if *use_face_x_if = nir_push_if(b, use_face_x);
nir_ssa_def *face_x_coord = evaluate_face_x(b, &coords);
nir_if *use_face_x_else = nir_push_else(b, use_face_x_if);
nir_ssa_def *use_face_y = nir_iand(b,
nir_fge(b, coords.ary, coords.arx),
nir_fge(b, coords.ary, coords.arz));
nir_if *use_face_y_if = nir_push_if(b, use_face_y);
nir_ssa_def *face_y_coord = evaluate_face_y(b, &coords);
nir_if *use_face_y_else = nir_push_else(b, use_face_y_if);
nir_ssa_def *face_z_coord = evaluate_face_z(b, &coords);
nir_pop_if(b, use_face_y_else);
nir_ssa_def *face_y_or_z_coord = nir_if_phi(b, face_y_coord, face_z_coord);
nir_pop_if(b, use_face_x_else);
// This contains in xy the normalized sample coordinates, and in z the face index
nir_ssa_def *coord_and_face = nir_if_phi(b, face_x_coord, face_y_or_z_coord);
return coord_and_face;
}
static nir_ssa_def *
lower_cube_sample(nir_builder *b, nir_tex_instr *tex)
{
int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
assert(coord_index >= 0);
/* Evaluate the face and the xy coordinates for a 2D tex op */
nir_ssa_def *coord = tex->src[coord_index].src.ssa;
nir_ssa_def *coord_and_face = lower_cube_coords(b, coord, tex->is_array, false);
if (tex->op == nir_texop_tg4)
return handle_cube_gather(b, tex, coord_and_face);
else
return create_array_tex_from_cube_tex(b, tex, coord_and_face, tex->op);
}
static nir_ssa_def *
lower_cube_image_load_store_atomic(nir_builder *b, nir_intrinsic_instr *intr)
{
b->cursor = nir_before_instr(&intr->instr);
nir_intrinsic_set_image_array(intr, true);
nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D);
return NIR_LOWER_INSTR_PROGRESS;
}
static nir_ssa_def *
lower_cube_txs(nir_builder *b, nir_tex_instr *tex)
{
b->cursor = nir_after_instr(&tex->instr);
if (!tex->is_array)
return nir_channels(b, &tex->dest.ssa, 3);
nir_ssa_def *array_dim = nir_channel(b, &tex->dest.ssa, 2);
nir_ssa_def *cube_array_dim = nir_idiv(b, array_dim, nir_imm_int(b, 6));
return nir_vec3(b, nir_channel(b, &tex->dest.ssa, 0),
nir_channel(b, &tex->dest.ssa, 1),
cube_array_dim);
}
static nir_ssa_def *
lower_cube_image_size(nir_builder *b, nir_intrinsic_instr *intr)
{
b->cursor = nir_after_instr(&intr->instr);
if (!nir_intrinsic_image_array(intr))
return nir_channels(b, &intr->dest.ssa, 3);
nir_ssa_def *array_dim = nir_channel(b, &intr->dest.ssa, 2);
nir_ssa_def *cube_array_dim = nir_idiv(b, array_dim, nir_imm_int(b, 6));
return nir_vec3(b, nir_channel(b, &intr->dest.ssa, 0),
nir_channel(b, &intr->dest.ssa, 1),
cube_array_dim);
}
static const struct glsl_type *
make_2darray_sampler_from_cubemap(const struct glsl_type *type)
{
return glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE ?
glsl_sampler_type(
GLSL_SAMPLER_DIM_2D,
false, true,
glsl_get_sampler_result_type(type)) : type;
}
static const struct glsl_type *
make_2darray_image_from_cubemap(const struct glsl_type *type)
{
return glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE ?
glsl_image_type(
GLSL_SAMPLER_DIM_2D,
true,
glsl_get_sampler_result_type(type)) : type;
}
static const struct glsl_type *
make_2darray_from_cubemap_with_array(const struct glsl_type *type, bool is_image)
{
if (glsl_type_is_array(type)) {
const struct glsl_type *new_type = glsl_without_array(type);
return new_type != type ? glsl_array_type(make_2darray_from_cubemap_with_array(glsl_without_array(type), is_image),
glsl_get_length(type), 0) : type;
} else if (is_image)
return make_2darray_image_from_cubemap(type);
else
return make_2darray_sampler_from_cubemap(type);
}
static nir_ssa_def *
lower_int_cubemap_to_array_tex(nir_builder *b, nir_tex_instr *tex)
{
switch (tex->op) {
case nir_texop_tex:
case nir_texop_txb:
case nir_texop_txd:
case nir_texop_txl:
case nir_texop_lod:
case nir_texop_tg4:
return lower_cube_sample(b, tex);
case nir_texop_txs:
return lower_cube_txs(b, tex);
default:
unreachable("Unsupported cupe map texture operation");
}
}
static nir_ssa_def *
lower_cube_image_intrinsic(nir_builder *b, nir_intrinsic_instr *intr)
{
if (intr->intrinsic == nir_intrinsic_image_size ||
intr->intrinsic == nir_intrinsic_image_deref_size)
return lower_cube_image_size(b, intr);
else
return lower_cube_image_load_store_atomic(b, intr);
}
static nir_ssa_def *
lower_cube_image_deref(nir_builder *b, nir_deref_instr *deref)
{
deref->type = make_2darray_from_cubemap_with_array(
deref->type,
glsl_type_is_image(glsl_without_array(deref->type)));
return NIR_LOWER_INSTR_PROGRESS;
}
static nir_ssa_def *
lower_int_cubmap_to_array_impl(nir_builder *b, nir_instr *instr,
void *options)
{
bool lower_samplers = *(bool *)options;
if (instr->type == nir_instr_type_tex && lower_samplers)
return lower_int_cubemap_to_array_tex(b, nir_instr_as_tex(instr));
else if (instr->type == nir_instr_type_intrinsic)
return lower_cube_image_intrinsic(b, nir_instr_as_intrinsic(instr));
else if (instr->type == nir_instr_type_deref)
return lower_cube_image_deref(b, nir_instr_as_deref(instr));
return NULL;
}
bool
dxil_nir_lower_int_cubemaps(nir_shader *s, bool lower_samplers)
{
bool result =
nir_shader_lower_instructions(s,
lower_int_cubmap_to_array_filter,
lower_int_cubmap_to_array_impl,
&lower_samplers);
if (result) {
nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform | nir_var_image) {
if (!type_needs_lowering(var->type))
continue;
bool is_image = glsl_type_is_image(glsl_without_array(var->type));
var->type = make_2darray_from_cubemap_with_array(var->type, is_image);
}
}
return result;
}