diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index 9223e838095..7d449262585 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -28,6 +28,10 @@ struct exec_list; typedef struct nir_builder { struct exec_list *cf_node_list; + + nir_block *before_block; + nir_block *after_block; + nir_instr *before_instr; nir_instr *after_instr; @@ -48,6 +52,30 @@ nir_builder_insert_after_cf_list(nir_builder *build, struct exec_list *cf_node_list) { build->cf_node_list = cf_node_list; + build->before_block = NULL; + build->after_block = NULL; + build->before_instr = NULL; + build->after_instr = NULL; +} + +static inline void +nir_builder_insert_before_block(nir_builder *build, + nir_block *block) +{ + build->cf_node_list = NULL; + build->before_block = block; + build->after_block = NULL; + build->before_instr = NULL; + build->after_instr = NULL; +} + +static inline void +nir_builder_insert_after_block(nir_builder *build, + nir_block *block) +{ + build->cf_node_list = NULL; + build->before_block = NULL; + build->after_block = block; build->before_instr = NULL; build->after_instr = NULL; } @@ -56,6 +84,8 @@ static inline void nir_builder_insert_before_instr(nir_builder *build, nir_instr *before_instr) { build->cf_node_list = NULL; + build->before_block = NULL; + build->after_block = NULL; build->before_instr = before_instr; build->after_instr = NULL; } @@ -64,6 +94,8 @@ static inline void nir_builder_insert_after_instr(nir_builder *build, nir_instr *after_instr) { build->cf_node_list = NULL; + build->before_block = NULL; + build->after_block = NULL; build->before_instr = NULL; build->after_instr = after_instr; } @@ -73,6 +105,10 @@ nir_builder_instr_insert(nir_builder *build, nir_instr *instr) { if (build->cf_node_list) { nir_instr_insert_after_cf_list(build->cf_node_list, instr); + } else if (build->before_block) { + nir_instr_insert_before_block(build->before_block, instr); + } else if (build->after_block) { + nir_instr_insert_after_block(build->after_block, instr); } else if (build->before_instr) { nir_instr_insert_before(build->before_instr, instr); } else { @@ -240,6 +276,23 @@ nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4], nir_imov_alu(build, alu_src, num_components); } +/* Selects the right fdot given the number of components in each source. */ +static inline nir_ssa_def * +nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1) +{ + assert(src0->num_components == src1->num_components); + switch (src0->num_components) { + case 1: return nir_fmul(build, src0, src1); + case 2: return nir_fdot2(build, src0, src1); + case 3: return nir_fdot3(build, src0, src1); + case 4: return nir_fdot4(build, src0, src1); + default: + unreachable("bad component size"); + } + + return NULL; +} + /** * Turns a nir_src into a nir_ssa_def * so it can be passed to * nir_build_alu()-based builder calls. diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index 35421506545..3c00bdb3c18 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -240,7 +240,7 @@ glsl_vector_type(enum glsl_base_type base_type, unsigned components) const glsl_type * glsl_matrix_type(enum glsl_base_type base_type, unsigned rows, unsigned columns) { - assert(rows > 1 && rows <= 4 && columns > 1 && columns <= 4); + assert(rows >= 1 && rows <= 4 && columns >= 1 && columns <= 4); return glsl_type::get_instance(base_type, rows, columns); } @@ -270,3 +270,10 @@ glsl_function_type(const glsl_type *return_type, { return glsl_type::get_function_instance(return_type, params, num_params); } + +const glsl_type * +glsl_transposed_type(const struct glsl_type *type) +{ + return glsl_type::get_instance(type->base_type, type->matrix_columns, + type->vector_elements); +} diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index ceb131c9f47..60e1d9d96fc 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -103,6 +103,8 @@ const struct glsl_type * glsl_function_type(const struct glsl_type *return_type, const struct glsl_function_param *params, unsigned num_params); +const struct glsl_type *glsl_transposed_type(const struct glsl_type *type); + #ifdef __cplusplus } #endif diff --git a/src/glsl/nir/spirv_glsl450_to_nir.c b/src/glsl/nir/spirv_glsl450_to_nir.c index 3b9d0940aad..100fde9ce7f 100644 --- a/src/glsl/nir/spirv_glsl450_to_nir.c +++ b/src/glsl/nir/spirv_glsl450_to_nir.c @@ -140,12 +140,14 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + val->ssa = rzalloc(b, struct vtn_ssa_value); + val->ssa->type = val->type; /* Collect the various SSA sources */ unsigned num_inputs = count - 5; nir_ssa_def *src[3]; for (unsigned i = 0; i < num_inputs; i++) - src[i] = vtn_ssa_value(b, w[i + 5]); + src[i] = vtn_ssa_value(b, w[i + 5])->def; nir_op op; switch (entrypoint) { @@ -158,16 +160,16 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, case Ceil: op = nir_op_fceil; break; case Fract: op = nir_op_ffract; break; case Radians: - val->ssa = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 0.01745329251)); + val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 0.01745329251)); return; case Degrees: - val->ssa = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 57.2957795131)); + val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 57.2957795131)); return; case Sin: op = nir_op_fsin; break; case Cos: op = nir_op_fcos; break; case Tan: - val->ssa = nir_fdiv(&b->nb, nir_fsin(&b->nb, src[0]), - nir_fcos(&b->nb, src[0])); + val->ssa->def = nir_fdiv(&b->nb, nir_fsin(&b->nb, src[0]), + nir_fcos(&b->nb, src[0])); return; case Pow: op = nir_op_fpow; break; case Exp2: op = nir_op_fexp2; break; @@ -180,7 +182,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, case Max: op = nir_op_fmax; break; case Mix: op = nir_op_flrp; break; case Step: - val->ssa = nir_sge(&b->nb, src[1], src[0]); + val->ssa->def = nir_sge(&b->nb, src[1], src[0]); return; case FloatBitsToInt: @@ -188,7 +190,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, case IntBitsToFloat: case UintBitsToFloat: /* Probably going to be removed from the final version of the spec. */ - val->ssa = src[0]; + val->ssa->def = src[0]; return; case Fma: op = nir_op_ffma; break; @@ -207,13 +209,13 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, case UnpackHalf2x16: op = nir_op_unpack_half_2x16; break; case Length: - val->ssa = build_length(&b->nb, src[0]); + val->ssa->def = build_length(&b->nb, src[0]); return; case Distance: - val->ssa = build_length(&b->nb, nir_fsub(&b->nb, src[0], src[1])); + val->ssa->def = build_length(&b->nb, nir_fsub(&b->nb, src[0], src[1])); return; case Normalize: - val->ssa = nir_fdiv(&b->nb, src[0], build_length(&b->nb, src[0])); + val->ssa->def = nir_fdiv(&b->nb, src[0], build_length(&b->nb, src[0])); return; case UaddCarry: op = nir_op_uadd_carry; break; @@ -256,7 +258,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); nir_ssa_dest_init(&instr->instr, &instr->dest.dest, glsl_get_vector_elements(val->type), val->name); - val->ssa = &instr->dest.dest.ssa; + val->ssa->def = &instr->dest.dest.ssa; for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) instr->src[i].src = nir_src_for_ssa(src[i]); diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index d8523fd5a52..a47c683ffdf 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -28,23 +28,93 @@ #include "spirv_to_nir_private.h" #include "nir_vla.h" -nir_ssa_def * +static struct vtn_ssa_value * +vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, + const struct glsl_type *type) +{ + struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant); + + if (entry) + return entry->data; + + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + if (glsl_type_is_vector_or_scalar(type)) { + unsigned num_components = glsl_get_vector_elements(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, num_components); + + for (unsigned i = 0; i < num_components; i++) + load->value.u[i] = constant->value.u[i]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + val->def = &load->def; + } else { + assert(glsl_type_is_matrix(type)); + unsigned rows = glsl_get_vector_elements(val->type); + unsigned columns = glsl_get_matrix_columns(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, columns); + + for (unsigned i = 0; i < columns; i++) { + struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value); + col_val->type = glsl_get_column_type(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, rows); + + for (unsigned j = 0; j < rows; j++) + load->value.u[j] = constant->value.u[rows * i + j]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + col_val->def = &load->def; + + val->elems[i] = col_val; + } + } + break; + + case GLSL_TYPE_ARRAY: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + const struct glsl_type *elem_type = glsl_get_array_element(val->type); + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + break; + } + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *elem_type = + glsl_get_struct_field(val->type, i); + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + } + break; + } + + default: + unreachable("bad constant type"); + } + + return val; +} + +struct vtn_ssa_value * vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) { struct vtn_value *val = vtn_untyped_value(b, value_id); switch (val->value_type) { - case vtn_value_type_constant: { - assert(glsl_type_is_vector_or_scalar(val->type)); - unsigned num_components = glsl_get_vector_elements(val->type); - nir_load_const_instr *load = - nir_load_const_instr_create(b->shader, num_components); - - for (unsigned i = 0; i < num_components; i++) - load->value.u[0] = val->constant->value.u[0]; - - nir_builder_instr_insert(&b->nb, &load->instr); - return &load->def; - } + case vtn_value_type_constant: + return vtn_const_ssa_value(b, val->constant, val->type); case vtn_value_type_ssa: return val->ssa; @@ -512,11 +582,210 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, case SpvDecorationFPFastMathMode: case SpvDecorationLinkageAttributes: case SpvDecorationSpecId: + break; default: unreachable("Unhandled variable decoration"); } } +static struct vtn_ssa_value * +_vtn_variable_load(struct vtn_builder *b, + nir_deref_var *src_deref, nir_deref *src_deref_tail) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = src_deref_tail->type; + + /* The deref tail may contain a deref to select a component of a vector (in + * other words, it might not be an actual tail) so we have to save it away + * here since we overwrite it later. + */ + nir_deref *old_child = src_deref_tail->child; + + if (glsl_type_is_vector_or_scalar(val->type)) { + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); + load->variables[0] = + nir_deref_as_var(nir_copy_deref(load, &src_deref->deref)); + load->num_components = glsl_get_vector_elements(val->type); + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); + + nir_builder_instr_insert(&b->nb, &load->instr); + + if (src_deref->var->data.mode == nir_var_uniform && + glsl_get_base_type(val->type) == GLSL_TYPE_BOOL) { + /* Uniform boolean loads need to be fixed up since they're defined + * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE. + */ + val->def = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); + } else { + val->def = &load->dest.ssa; + } + } else if (glsl_get_base_type(val->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(val->type)) { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + + nir_deref_array *deref = nir_deref_array_create(b); + deref->deref_array_type = nir_deref_array_type_direct; + deref->deref.type = glsl_get_array_element(val->type); + src_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->base_offset = i; + val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); + } + } else { + assert(glsl_get_base_type(val->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + + nir_deref_struct *deref = nir_deref_struct_create(b, 0); + src_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->index = i; + deref->deref.type = glsl_get_struct_field(val->type, i); + val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); + } + } + + src_deref_tail->child = old_child; + + return val; +} + +static void +_vtn_variable_store(struct vtn_builder *b, nir_deref_var *dest_deref, + nir_deref *dest_deref_tail, struct vtn_ssa_value *src) +{ + nir_deref *old_child = dest_deref_tail->child; + + if (glsl_type_is_vector_or_scalar(src->type)) { + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); + store->variables[0] = + nir_deref_as_var(nir_copy_deref(store, &dest_deref->deref)); + store->src[0] = nir_src_for_ssa(src->def); + + nir_builder_instr_insert(&b->nb, &store->instr); + } else if (glsl_get_base_type(src->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(src->type)) { + unsigned elems = glsl_get_length(src->type); + + nir_deref_array *deref = nir_deref_array_create(b); + deref->deref_array_type = nir_deref_array_type_direct; + deref->deref.type = glsl_get_array_element(src->type); + dest_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->base_offset = i; + _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); + } + } else { + assert(glsl_get_base_type(src->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(src->type); + + nir_deref_struct *deref = nir_deref_struct_create(b, 0); + dest_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->index = i; + deref->deref.type = glsl_get_struct_field(src->type, i); + _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); + } + } + + dest_deref_tail->child = old_child; +} + +/* + * Gets the NIR-level deref tail, which may have as a child an array deref + * selecting which component due to OpAccessChain supporting per-component + * indexing in SPIR-V. + */ + +static nir_deref * +get_deref_tail(nir_deref_var *deref) +{ + nir_deref *cur = &deref->deref; + while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child) + cur = cur->child; + + return cur; +} + +static nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, + nir_ssa_def *src, unsigned index); + +static nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, + nir_ssa_def *src, + nir_ssa_def *index); + +static struct vtn_ssa_value * +vtn_variable_load(struct vtn_builder *b, nir_deref_var *src) +{ + nir_deref *src_tail = get_deref_tail(src); + struct vtn_ssa_value *val = _vtn_variable_load(b, src, src_tail); + + if (src_tail->child) { + nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); + assert(vec_deref->deref.child == NULL); + val->type = vec_deref->deref.type; + if (vec_deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset); + else + val->def = vtn_vector_extract_dynamic(b, val->def, + vec_deref->indirect.ssa); + } + + return val; +} + +static nir_ssa_def * vtn_vector_insert(struct vtn_builder *b, + nir_ssa_def *src, nir_ssa_def *insert, + unsigned index); + +static nir_ssa_def * vtn_vector_insert_dynamic(struct vtn_builder *b, + nir_ssa_def *src, + nir_ssa_def *insert, + nir_ssa_def *index); +static void +vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest) +{ + nir_deref *dest_tail = get_deref_tail(dest); + if (dest_tail->child) { + struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_tail); + nir_deref_array *deref = nir_deref_as_array(dest_tail->child); + assert(deref->deref.child == NULL); + if (deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_insert(b, val->def, src->def, + deref->base_offset); + else + val->def = vtn_vector_insert_dynamic(b, val->def, src->def, + deref->indirect.ssa); + _vtn_variable_store(b, dest, dest_tail, val); + } else { + _vtn_variable_store(b, dest, dest_tail, src); + } +} + +static void +vtn_variable_copy(struct vtn_builder *b, nir_deref_var *src, + nir_deref_var *dest) +{ + nir_deref *src_tail = get_deref_tail(src); + + if (src_tail->child) { + assert(get_deref_tail(dest)->child); + struct vtn_ssa_value *val = vtn_variable_load(b, src); + vtn_variable_store(b, val, dest); + } else { + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); + copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); + + nir_builder_instr_insert(&b->nb, ©->instr); + } +} + static void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -645,7 +914,8 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, assert(idx_val->value_type == vtn_value_type_ssa); deref_arr->deref_array_type = nir_deref_array_type_indirect; deref_arr->base_offset = 0; - deref_arr->indirect = nir_src_for_ssa(vtn_ssa_value(b, w[1])); + deref_arr->indirect = + nir_src_for_ssa(vtn_ssa_value(b, w[1])->def); } tail->child = &deref_arr->deref; break; @@ -671,12 +941,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; nir_deref_var *src = vtn_value(b, w[2], vtn_value_type_deref)->deref; - nir_intrinsic_instr *copy = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); - copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); - copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); - - nir_builder_instr_insert(&b->nb, ©->instr); + vtn_variable_copy(b, src, dest); break; } @@ -689,61 +954,15 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, return; } - assert(glsl_type_is_vector_or_scalar(src_type)); struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); - load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src->deref)); - load->num_components = glsl_get_vector_elements(src_type); - nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, - val->name); - - nir_builder_instr_insert(&b->nb, &load->instr); - val->type = src_type; - - if (src->var->data.mode == nir_var_uniform && - glsl_get_base_type(src_type) == GLSL_TYPE_BOOL) { - /* Uniform boolean loads need to be fixed up since they're defined - * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE. - */ - val->ssa = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); - } else { - val->ssa = &load->dest.ssa; - } + val->ssa = vtn_variable_load(b, src); break; } case SpvOpStore: { nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; - const struct glsl_type *dest_type = nir_deref_tail(&dest->deref)->type; - struct vtn_value *src_val = vtn_untyped_value(b, w[2]); - if (src_val->value_type == vtn_value_type_ssa) { - assert(glsl_type_is_vector_or_scalar(dest_type)); - nir_intrinsic_instr *store = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); - store->src[0] = nir_src_for_ssa(src_val->ssa); - store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest->deref)); - store->num_components = glsl_get_vector_elements(dest_type); - - nir_builder_instr_insert(&b->nb, &store->instr); - } else { - assert(src_val->value_type == vtn_value_type_constant); - - nir_variable *const_tmp = rzalloc(b->shader, nir_variable); - const_tmp->type = dest_type; - const_tmp->name = "const_temp"; - const_tmp->data.mode = nir_var_local; - const_tmp->data.read_only = true; - exec_list_push_tail(&b->impl->locals, &const_tmp->node); - - nir_intrinsic_instr *copy = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); - copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); - copy->variables[1] = nir_deref_var_create(copy, const_tmp); - - nir_builder_instr_insert(&b->nb, ©->instr); - } + struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]); + vtn_variable_store(b, src, dest); break; } @@ -767,7 +986,7 @@ static nir_tex_src vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) { nir_tex_src src; - src.src = nir_src_for_ssa(vtn_value(b, index, vtn_value_type_ssa)->ssa); + src.src = nir_src_for_ssa(vtn_value(b, index, vtn_value_type_ssa)->ssa->def); src.src_type = type; return src; } @@ -804,13 +1023,16 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, case SpvOpTextureGather: case SpvOpTextureGatherOffset: case SpvOpTextureGatherOffsets: - case SpvOpTextureQueryLod: + case SpvOpTextureQueryLod: { /* All these types have the coordinate as their first real argument */ - coord_components = glsl_get_vector_elements(b->values[w[4]].type); - p->src = nir_src_for_ssa(vtn_ssa_value(b, w[4])); + struct vtn_ssa_value *coord = vtn_ssa_value(b, w[4]); + coord_components = glsl_get_vector_elements(coord->type); + p->src = nir_src_for_ssa(coord->def); p->src_type = nir_tex_src_coord; p++; break; + } + default: break; } @@ -877,16 +1099,270 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL); - val->ssa = &instr->dest.ssa; + val->ssa->def = &instr->dest.ssa; + val->ssa->type = val->type; nir_builder_instr_insert(&b->nb, &instr->instr); } +static struct vtn_ssa_value * +vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + if (!glsl_type_is_vector_or_scalar(type)) { + unsigned elems = glsl_get_length(type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *child_type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + child_type = glsl_get_column_type(type); + break; + case GLSL_TYPE_ARRAY: + child_type = glsl_get_array_element(type); + break; + case GLSL_TYPE_STRUCT: + child_type = glsl_get_struct_field(type, i); + break; + default: + unreachable("unkown base type"); + } + + val->elems[i] = vtn_create_ssa_value(b, child_type); + } + } + + return val; +} + +static nir_alu_instr * +create_vec(void *mem_ctx, unsigned num_components) +{ + nir_op op; + switch (num_components) { + case 1: op = nir_op_fmov; break; + case 2: op = nir_op_vec2; break; + case 3: op = nir_op_vec3; break; + case 4: op = nir_op_vec4; break; + default: unreachable("bad vector size"); + } + + nir_alu_instr *vec = nir_alu_instr_create(mem_ctx, op); + nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL); + + return vec; +} + +static struct vtn_ssa_value * +vtn_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) +{ + if (src->transposed) + return src->transposed; + + struct vtn_ssa_value *dest = + vtn_create_ssa_value(b, glsl_transposed_type(src->type)); + + for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) { + nir_alu_instr *vec = create_vec(b, glsl_get_matrix_columns(src->type)); + if (glsl_type_is_vector_or_scalar(src->type)) { + vec->src[0].src = nir_src_for_ssa(src->def); + vec->src[0].swizzle[0] = i; + } else { + for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) { + vec->src[j].src = nir_src_for_ssa(src->elems[j]->def); + vec->src[j].swizzle[0] = i; + } + } + nir_builder_instr_insert(&b->nb, &vec->instr); + dest->elems[i]->def = &vec->dest.dest.ssa; + } + + dest->transposed = src; + + return dest; +} + +/* + * Normally, column vectors in SPIR-V correspond to a single NIR SSA + * definition. But for matrix multiplies, we want to do one routine for + * multiplying a matrix by a matrix and then pretend that vectors are matrices + * with one column. So we "wrap" these things, and unwrap the result before we + * send it off. + */ + +static struct vtn_ssa_value * +vtn_wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (val == NULL) + return NULL; + + if (glsl_type_is_matrix(val->type)) + return val; + + struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); + dest->type = val->type; + dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); + dest->elems[0] = val; + + return dest; +} + +static struct vtn_ssa_value * +vtn_unwrap_matrix(struct vtn_ssa_value *val) +{ + if (glsl_type_is_matrix(val->type)) + return val; + + return val->elems[0]; +} + +static struct vtn_ssa_value * +vtn_matrix_multiply(struct vtn_builder *b, + struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) +{ + + struct vtn_ssa_value *src0 = vtn_wrap_matrix(b, _src0); + struct vtn_ssa_value *src1 = vtn_wrap_matrix(b, _src1); + struct vtn_ssa_value *src0_transpose = vtn_wrap_matrix(b, _src0->transposed); + struct vtn_ssa_value *src1_transpose = vtn_wrap_matrix(b, _src1->transposed); + + unsigned src0_rows = glsl_get_vector_elements(src0->type); + unsigned src0_columns = glsl_get_matrix_columns(src0->type); + unsigned src1_columns = glsl_get_matrix_columns(src1->type); + + struct vtn_ssa_value *dest = + vtn_create_ssa_value(b, glsl_matrix_type(glsl_get_base_type(src0->type), + src0_rows, src1_columns)); + + dest = vtn_wrap_matrix(b, dest); + + bool transpose_result = false; + if (src0_transpose && src1_transpose) { + /* transpose(A) * transpose(B) = transpose(B * A) */ + src1 = src0_transpose; + src0 = src1_transpose; + src0_transpose = NULL; + src1_transpose = NULL; + transpose_result = true; + } + + if (src0_transpose && !src1_transpose && + glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { + /* We already have the rows of src0 and the columns of src1 available, + * so we can just take the dot product of each row with each column to + * get the result. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + nir_alu_instr *vec = create_vec(b, src0_rows); + for (unsigned j = 0; j < src0_rows; j++) { + vec->src[j].src = + nir_src_for_ssa(nir_fdot(&b->nb, src0_transpose->elems[j]->def, + src1->elems[i]->def)); + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + dest->elems[i]->def = &vec->dest.dest.ssa; + } + } else { + /* We don't handle the case where src1 is transposed but not src0, since + * the general case only uses individual components of src1 so the + * optimizer should chew through the transpose we emitted for src1. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ + dest->elems[i]->def = + nir_fmul(&b->nb, src0->elems[0]->def, + vtn_vector_extract(b, src1->elems[i]->def, 0)); + for (unsigned j = 1; j < src0_columns; j++) { + dest->elems[i]->def = + nir_fadd(&b->nb, dest->elems[i]->def, + nir_fmul(&b->nb, src0->elems[j]->def, + vtn_vector_extract(b, + src1->elems[i]->def, j))); + } + } + } + + dest = vtn_unwrap_matrix(dest); + + if (transpose_result) + dest = vtn_transpose(b, dest); + + return dest; +} + +static struct vtn_ssa_value * +vtn_mat_times_scalar(struct vtn_builder *b, + struct vtn_ssa_value *mat, + nir_ssa_def *scalar) +{ + struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type); + for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) { + if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT) + dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar); + else + dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar); + } + + return dest; +} + static void vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { - unreachable("Matrix math not handled"); + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + + switch (opcode) { + case SpvOpTranspose: { + struct vtn_ssa_value *src = vtn_ssa_value(b, w[3]); + val->ssa = vtn_transpose(b, src); + break; + } + + case SpvOpOuterProduct: { + struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); + + val->ssa = vtn_matrix_multiply(b, src0, vtn_transpose(b, src1)); + break; + } + + case SpvOpMatrixTimesScalar: { + struct vtn_ssa_value *mat = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *scalar = vtn_ssa_value(b, w[4]); + + if (mat->transposed) { + val->ssa = vtn_transpose(b, vtn_mat_times_scalar(b, mat->transposed, + scalar->def)); + } else { + val->ssa = vtn_mat_times_scalar(b, mat, scalar->def); + } + break; + } + + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: { + struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); + + val->ssa = vtn_matrix_multiply(b, src0, src1); + break; + } + + default: unreachable("unknown matrix opcode"); + } } static void @@ -895,12 +1371,13 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, { struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + val->ssa = vtn_create_ssa_value(b, val->type); /* Collect the various SSA sources */ unsigned num_inputs = count - 3; nir_ssa_def *src[4]; for (unsigned i = 0; i < num_inputs; i++) - src[i] = vtn_ssa_value(b, w[i + 3]); + src[i] = vtn_ssa_value(b, w[i + 3])->def; /* Indicates that the first two arguments should be swapped. This is * used for implementing greater-than and less-than-or-equal. @@ -1012,24 +1489,24 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, case SpvOpDPdxCoarse: op = nir_op_fddx_coarse; break; case SpvOpDPdyCoarse: op = nir_op_fddy_coarse; break; case SpvOpFwidth: - val->ssa = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); return; case SpvOpFwidthFine: - val->ssa = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); return; case SpvOpFwidthCoarse: - val->ssa = nir_fadd(&b->nb, - nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), - nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); return; case SpvOpVectorTimesScalar: /* The builder will take care of splatting for us. */ - val->ssa = nir_fmul(&b->nb, src[0], src[1]); + val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); return; case SpvOpSRem: @@ -1057,7 +1534,7 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); nir_ssa_dest_init(&instr->instr, &instr->dest.dest, glsl_get_vector_elements(val->type), val->name); - val->ssa = &instr->dest.dest.ssa; + val->ssa->def = &instr->dest.dest.ssa; for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) instr->src[i].src = nir_src_for_ssa(src[i]); @@ -1065,6 +1542,350 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, nir_builder_instr_insert(&b->nb, &instr->instr); } +static nir_ssa_def * +vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index) +{ + unsigned swiz[4] = { index }; + return nir_swizzle(&b->nb, src, swiz, 1, true); +} + + +static nir_ssa_def * +vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, + unsigned index) +{ + nir_alu_instr *vec = create_vec(b->shader, src->num_components); + + for (unsigned i = 0; i < src->num_components; i++) { + if (i == index) { + vec->src[i].src = nir_src_for_ssa(insert); + } else { + vec->src[i].src = nir_src_for_ssa(src); + vec->src[i].swizzle[0] = i; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +static nir_ssa_def * +vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_extract(b, src, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_extract(b, src, i), dest); + + return dest; +} + +static nir_ssa_def * +vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *insert, nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_insert(b, src, insert, i), dest); + + return dest; +} + +static nir_ssa_def * +vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components, + nir_ssa_def *src0, nir_ssa_def *src1, + const uint32_t *indices) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components); + + nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1); + nir_builder_instr_insert(&b->nb, &undef->instr); + + for (unsigned i = 0; i < num_components; i++) { + uint32_t index = indices[i]; + if (index == 0xffffffff) { + vec->src[i].src = nir_src_for_ssa(&undef->def); + } else if (index < src0->num_components) { + vec->src[i].src = nir_src_for_ssa(src0); + vec->src[i].swizzle[0] = index; + } else { + vec->src[i].src = nir_src_for_ssa(src1); + vec->src[i].swizzle[0] = index - src0->num_components; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +/* + * Concatentates a number of vectors/scalars together to produce a vector + */ +static nir_ssa_def * +vtn_vector_construct(struct vtn_builder *b, unsigned num_components, + unsigned num_srcs, nir_ssa_def **srcs) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components); + + unsigned dest_idx = 0; + for (unsigned i = 0; i < num_srcs; i++) { + nir_ssa_def *src = srcs[i]; + for (unsigned j = 0; j < src->num_components; j++) { + vec->src[dest_idx].src = nir_src_for_ssa(src); + vec->src[dest_idx].swizzle[0] = j; + dest_idx++; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +static struct vtn_ssa_value * +vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src) +{ + struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value); + dest->type = src->type; + + if (glsl_type_is_vector_or_scalar(src->type)) { + dest->def = src->def; + } else { + unsigned elems = glsl_get_length(src->type); + + dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]); + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_ssa_value *insert, const uint32_t *indices, + unsigned num_indices) +{ + struct vtn_ssa_value *dest = vtn_composite_copy(b, src); + + struct vtn_ssa_value *cur = dest; + unsigned i; + for (i = 0; i < num_indices - 1; i++) { + cur = cur->elems[indices[i]]; + } + + if (glsl_type_is_vector_or_scalar(cur->type)) { + /* According to the SPIR-V spec, OpCompositeInsert may work down to + * the component granularity. In that case, the last index will be + * the index to insert the scalar into the vector. + */ + + cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]); + } else { + cur->elems[indices[i]] = insert; + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, + const uint32_t *indices, unsigned num_indices) +{ + struct vtn_ssa_value *cur = src; + for (unsigned i = 0; i < num_indices; i++) { + if (glsl_type_is_vector_or_scalar(cur->type)) { + assert(i == num_indices - 1); + /* According to the SPIR-V spec, OpCompositeExtract may work down to + * the component granularity. The last index will be the index of the + * vector to extract. + */ + + struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value); + ret->type = glsl_scalar_type(glsl_get_base_type(cur->type)); + ret->def = vtn_vector_extract(b, cur->def, indices[i]); + return ret; + } + } + + return cur; +} + +static void +vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + + switch (opcode) { + case SpvOpVectorExtractDynamic: + val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def); + break; + + case SpvOpVectorInsertDynamic: + val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + vtn_ssa_value(b, w[5])->def); + break; + + case SpvOpVectorShuffle: + val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(val->type), + vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + w + 5); + break; + + case SpvOpCompositeConstruct: { + val->ssa = rzalloc(b, struct vtn_ssa_value); + unsigned elems = count - 3; + if (glsl_type_is_vector_or_scalar(val->type)) { + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < elems; i++) + srcs[i] = vtn_ssa_value(b, w[3 + i])->def; + val->ssa->def = + vtn_vector_construct(b, glsl_get_vector_elements(val->type), + elems, srcs); + } else { + val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]); + } + break; + } + case SpvOpCompositeExtract: + val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]), + w + 4, count - 4); + break; + + case SpvOpCompositeInsert: + val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]), + vtn_ssa_value(b, w[3]), + w + 5, count - 5); + break; + + case SpvOpCopyObject: + val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); + break; + + default: + unreachable("unknown composite operation"); + } + + val->ssa->type = val->type; +} + +static void +vtn_phi_node_init(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (glsl_type_is_vector_or_scalar(val->type)) { + nir_phi_instr *phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, + glsl_get_vector_elements(val->type), NULL); + exec_list_make_empty(&phi->srcs); + nir_builder_instr_insert(&b->nb, &phi->instr); + val->def = &phi->dest.ssa; + } else { + unsigned elems = glsl_get_length(val->type); + for (unsigned i = 0; i < elems; i++) + vtn_phi_node_init(b, val->elems[i]); + } +} + +static struct vtn_ssa_value * +vtn_phi_node_create(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = vtn_create_ssa_value(b, type); + vtn_phi_node_init(b, val); + return val; +} + +static void +vtn_handle_phi_first_pass(struct vtn_builder *b, const uint32_t *w) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->type = vtn_value(b, w[1], vtn_value_type_type)->type; + val->ssa = vtn_phi_node_create(b, val->type); +} + +static void +vtn_phi_node_add_src(struct vtn_ssa_value *phi, const nir_block *pred, + struct vtn_ssa_value *val) +{ + assert(phi->type == val->type); + if (glsl_type_is_vector_or_scalar(phi->type)) { + nir_phi_instr *phi_instr = nir_instr_as_phi(phi->def->parent_instr); + nir_phi_src *src = ralloc(phi_instr, nir_phi_src); + src->pred = (nir_block *) pred; + src->src = nir_src_for_ssa(val->def); + exec_list_push_tail(&phi_instr->srcs, &src->node); + } else { + unsigned elems = glsl_get_length(phi->type); + for (unsigned i = 0; i < elems; i++) + vtn_phi_node_add_src(phi->elems[i], pred, val->elems[i]); + } +} + +static struct vtn_ssa_value * +vtn_get_phi_node_src(struct vtn_builder *b, nir_block *block, + const struct glsl_type *type, const uint32_t *w, + unsigned count) +{ + struct hash_entry *entry = _mesa_hash_table_search(b->block_table, block); + if (entry) { + struct vtn_block *spv_block = entry->data; + for (unsigned off = 4; off < count; off += 2) { + if (spv_block == vtn_value(b, w[off], vtn_value_type_block)->block) { + return vtn_ssa_value(b, w[off - 1]); + } + } + } + + nir_builder_insert_before_block(&b->nb, block); + struct vtn_ssa_value *phi = vtn_phi_node_create(b, type); + + struct set_entry *entry2; + set_foreach(block->predecessors, entry2) { + nir_block *pred = (nir_block *) entry2->key; + struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, type, w, + count); + vtn_phi_node_add_src(phi, pred, val); + } + + return phi; +} + +static bool +vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode == SpvOpLabel) { + b->block = vtn_value(b, w[1], vtn_value_type_block)->block; + return true; + } + + if (opcode != SpvOpPhi) + return true; + + struct vtn_ssa_value *phi = vtn_value(b, w[2], vtn_value_type_ssa)->ssa; + + struct set_entry *entry; + set_foreach(b->block->block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + + struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, phi->type, w, + count); + vtn_phi_node_add_src(phi, pred, val); + } + + return true; +} + static bool vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) @@ -1214,6 +2035,7 @@ vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode, } case SpvOpFunctionEnd: + b->func->end = w; b->func = NULL; break; @@ -1432,6 +2254,20 @@ vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, vtn_handle_matrix_alu(b, opcode, w, count); break; + case SpvOpVectorExtractDynamic: + case SpvOpVectorInsertDynamic: + case SpvOpVectorShuffle: + case SpvOpCompositeConstruct: + case SpvOpCompositeExtract: + case SpvOpCompositeInsert: + case SpvOpCopyObject: + vtn_handle_composite(b, opcode, w, count); + break; + + case SpvOpPhi: + vtn_handle_phi_first_pass(b, w); + break; + default: unreachable("Unhandled opcode"); } @@ -1446,20 +2282,11 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, { struct vtn_block *block = start; while (block != end_block) { - if (block->block != NULL) { - /* We've already visited this block once before so this is a - * back-edge. Back-edges are only allowed to point to a loop - * merge. - */ - assert(block == cont_block); - return; - } - if (block->merge_op == SpvOpLoopMerge) { /* This is the jump into a loop. */ - cont_block = block; - break_block = vtn_value(b, block->merge_block_id, - vtn_value_type_block)->block; + struct vtn_block *new_cont_block = block; + struct vtn_block *new_break_block = + vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; nir_loop *loop = nir_loop_create(b->shader); nir_cf_node_insert_end(b->nb.cf_node_list, &loop->cf_node); @@ -1470,10 +2297,10 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, block->merge_op = SpvOpNop; nir_builder_insert_after_cf_list(&b->nb, &loop->body); - vtn_walk_blocks(b, block, break_block, cont_block, NULL); + vtn_walk_blocks(b, block, new_break_block, new_cont_block, NULL); nir_builder_insert_after_cf_list(&b->nb, old_list); - block = break_block; + block = new_break_block; continue; } @@ -1484,6 +2311,12 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, vtn_foreach_instruction(b, block->label, block->branch, vtn_handle_body_instruction); + nir_cf_node *cur_cf_node = + exec_node_data(nir_cf_node, exec_list_get_tail(b->nb.cf_node_list), + node); + nir_block *cur_block = nir_cf_node_as_block(cur_cf_node); + _mesa_hash_table_insert(b->block_table, cur_block, block); + switch (branch_op) { case SpvOpBranch: { struct vtn_block *branch_block = @@ -1502,8 +2335,16 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, return; } else if (branch_block == end_block) { + /* We're branching to the merge block of an if, since for loops + * and functions end_block == NULL, so we're done here. + */ return; } else { + /* We're branching to another block, and according to the rules, + * we can only branch to another block with one predecessor (so + * we're the only one jumping to it) so we can just process it + * next. + */ block = branch_block; continue; } @@ -1517,7 +2358,7 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, vtn_value(b, w[3], vtn_value_type_block)->block; nir_if *if_stmt = nir_if_create(b->shader); - if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])); + if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def); nir_cf_node_insert_end(b->nb.cf_node_list, &if_stmt->cf_node); if (then_block == break_block) { @@ -1545,7 +2386,10 @@ vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, &jump->instr); block = then_block; } else { - /* Conventional if statement */ + /* According to the rules we're branching to two blocks that don't + * have any other predecessors, so we can handle this as a + * conventional if. + */ assert(block->merge_op == SpvOpSelectionMerge); struct vtn_block *merge_block = vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; @@ -1652,9 +2496,15 @@ spirv_to_nir(const uint32_t *words, size_t word_count, foreach_list_typed(struct vtn_function, func, node, &b->functions) { b->impl = nir_function_impl_create(func->overload); + b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); + b->block_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); nir_builder_init(&b->nb, b->impl); nir_builder_insert_after_cf_list(&b->nb, &b->impl->body); vtn_walk_blocks(b, func->start_block, NULL, NULL, NULL); + vtn_foreach_instruction(b, func->start_block->label, func->end, + vtn_handle_phi_second_pass); } ralloc_free(b); diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h index d2b364bdfeb..b157e023a68 100644 --- a/src/glsl/nir/spirv_to_nir_private.h +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -60,11 +60,27 @@ struct vtn_function { nir_function_overload *overload; struct vtn_block *start_block; + + const uint32_t *end; }; typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t, const uint32_t *, unsigned); +struct vtn_ssa_value { + union { + nir_ssa_def *def; + struct vtn_ssa_value **elems; + }; + + /* For matrices, a transposed version of the value, or NULL if it hasn't + * been computed + */ + struct vtn_ssa_value *transposed; + + const struct glsl_type *type; +}; + struct vtn_value { enum vtn_value_type value_type; const char *name; @@ -77,7 +93,7 @@ struct vtn_value { nir_deref_var *deref; struct vtn_function *func; struct vtn_block *block; - nir_ssa_def *ssa; + struct vtn_ssa_value *ssa; vtn_instruction_handler ext_handler; }; }; @@ -96,6 +112,20 @@ struct vtn_builder { nir_function_impl *impl; struct vtn_block *block; + /* + * In SPIR-V, constants are global, whereas in NIR, the load_const + * instruction we use is per-function. So while we parse each function, we + * keep a hash table of constants we've resolved to nir_ssa_value's so + * far, and we lazily resolve them when we see them used in a function. + */ + struct hash_table *const_table; + + /* + * Map from nir_block to the vtn_block which ends with it -- used for + * handling phi nodes. + */ + struct hash_table *block_table; + unsigned value_id_bound; struct vtn_value *values; @@ -134,7 +164,7 @@ vtn_value(struct vtn_builder *b, uint32_t value_id, return val; } -nir_ssa_def *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); +struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, struct vtn_value *,