2019-02-20 17:53:47 +00:00
|
|
|
/*
|
|
|
|
* Copyright © 2019 Google LLC
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
|
|
* DEALINGS IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "tu_private.h"
|
|
|
|
|
|
|
|
#include "spirv/nir_spirv.h"
|
|
|
|
#include "util/mesa-sha1.h"
|
2020-02-20 05:48:28 +00:00
|
|
|
#include "nir/nir_xfb_info.h"
|
2020-04-10 14:19:36 +01:00
|
|
|
#include "nir/nir_vulkan.h"
|
2020-04-21 17:14:23 +01:00
|
|
|
#include "vk_util.h"
|
2019-02-20 17:53:47 +00:00
|
|
|
|
|
|
|
#include "ir3/ir3_nir.h"
|
|
|
|
|
2020-07-06 17:16:39 +01:00
|
|
|
nir_shader *
|
|
|
|
tu_spirv_to_nir(struct tu_device *dev,
|
|
|
|
const VkPipelineShaderStageCreateInfo *stage_info,
|
|
|
|
gl_shader_stage stage)
|
2019-02-20 17:53:47 +00:00
|
|
|
{
|
|
|
|
/* TODO these are made-up */
|
|
|
|
const struct spirv_to_nir_options spirv_options = {
|
2019-09-28 00:04:30 +01:00
|
|
|
.frag_coord_is_sysval = true,
|
2020-06-29 18:18:20 +01:00
|
|
|
|
|
|
|
.ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
|
|
|
|
.ssbo_addr_format = nir_address_format_vec2_index_32bit_offset,
|
|
|
|
|
|
|
|
/* Accessed via stg/ldg */
|
|
|
|
.phys_ssbo_addr_format = nir_address_format_64bit_global,
|
|
|
|
|
|
|
|
/* Accessed via the const register file */
|
|
|
|
.push_const_addr_format = nir_address_format_logical,
|
|
|
|
|
|
|
|
/* Accessed via ldl/stl */
|
|
|
|
.shared_addr_format = nir_address_format_32bit_offset,
|
|
|
|
|
|
|
|
/* Accessed via stg/ldg (not used with Vulkan?) */
|
|
|
|
.global_addr_format = nir_address_format_64bit_global,
|
|
|
|
|
2020-07-02 10:33:42 +01:00
|
|
|
/* ViewID is a sysval in geometry stages and an input in the FS */
|
|
|
|
.view_index_is_input = stage == MESA_SHADER_FRAGMENT,
|
2020-02-20 05:48:28 +00:00
|
|
|
.caps = {
|
2020-04-24 16:05:48 +01:00
|
|
|
.transform_feedback = true,
|
|
|
|
.tessellation = true,
|
2020-06-24 21:00:30 +01:00
|
|
|
.draw_parameters = true,
|
2020-11-03 16:36:34 +00:00
|
|
|
.image_read_without_format = true,
|
|
|
|
.image_write_without_format = true,
|
2020-06-29 18:33:50 +01:00
|
|
|
.variable_pointers = true,
|
2020-07-16 14:49:36 +01:00
|
|
|
.stencil_export = true,
|
2020-07-02 10:34:54 +01:00
|
|
|
.multiview = true,
|
2020-07-14 15:38:09 +01:00
|
|
|
.shader_viewport_index_layer = true,
|
2020-09-23 12:08:37 +01:00
|
|
|
.geometry_streams = true,
|
2021-03-11 10:35:31 +00:00
|
|
|
.device_group = true,
|
2020-02-20 05:48:28 +00:00
|
|
|
},
|
2019-02-20 17:53:47 +00:00
|
|
|
};
|
|
|
|
const nir_shader_compiler_options *nir_options =
|
2020-07-06 17:16:39 +01:00
|
|
|
ir3_get_compiler_options(dev->compiler);
|
2019-02-20 17:53:47 +00:00
|
|
|
|
|
|
|
/* convert VkSpecializationInfo */
|
2020-07-06 17:16:39 +01:00
|
|
|
const VkSpecializationInfo *spec_info = stage_info->pSpecializationInfo;
|
2019-02-20 17:53:47 +00:00
|
|
|
struct nir_spirv_specialization *spec = NULL;
|
|
|
|
uint32_t num_spec = 0;
|
|
|
|
if (spec_info && spec_info->mapEntryCount) {
|
2020-04-22 20:05:13 +01:00
|
|
|
spec = calloc(spec_info->mapEntryCount, sizeof(*spec));
|
2019-02-20 17:53:47 +00:00
|
|
|
if (!spec)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < spec_info->mapEntryCount; i++) {
|
|
|
|
const VkSpecializationMapEntry *entry = &spec_info->pMapEntries[i];
|
|
|
|
const void *data = spec_info->pData + entry->offset;
|
|
|
|
assert(data + entry->size <= spec_info->pData + spec_info->dataSize);
|
|
|
|
spec[i].id = entry->constantID;
|
2020-04-22 19:43:51 +01:00
|
|
|
switch (entry->size) {
|
|
|
|
case 8:
|
2020-04-22 20:05:13 +01:00
|
|
|
spec[i].value.u64 = *(const uint64_t *)data;
|
2020-04-22 19:43:51 +01:00
|
|
|
break;
|
|
|
|
case 4:
|
2020-04-22 20:05:13 +01:00
|
|
|
spec[i].value.u32 = *(const uint32_t *)data;
|
2020-04-22 19:43:51 +01:00
|
|
|
break;
|
|
|
|
case 2:
|
2020-04-22 20:05:13 +01:00
|
|
|
spec[i].value.u16 = *(const uint16_t *)data;
|
2020-04-22 19:43:51 +01:00
|
|
|
break;
|
|
|
|
case 1:
|
2020-04-22 20:05:13 +01:00
|
|
|
spec[i].value.u8 = *(const uint8_t *)data;
|
2020-04-22 19:43:51 +01:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(!"Invalid spec constant size");
|
|
|
|
break;
|
|
|
|
}
|
2019-02-20 17:53:47 +00:00
|
|
|
spec[i].defined_on_module = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
num_spec = spec_info->mapEntryCount;
|
|
|
|
}
|
|
|
|
|
2020-07-06 17:16:39 +01:00
|
|
|
struct tu_shader_module *module =
|
|
|
|
tu_shader_module_from_handle(stage_info->module);
|
|
|
|
assert(module->code_size % 4 == 0);
|
2019-05-19 08:22:17 +01:00
|
|
|
nir_shader *nir =
|
2020-07-06 17:16:39 +01:00
|
|
|
spirv_to_nir(module->code, module->code_size / 4,
|
|
|
|
spec, num_spec, stage, stage_info->pName,
|
2019-02-20 17:53:47 +00:00
|
|
|
&spirv_options, nir_options);
|
|
|
|
|
|
|
|
free(spec);
|
|
|
|
|
2019-05-19 08:22:17 +01:00
|
|
|
assert(nir->info.stage == stage);
|
|
|
|
nir_validate_shader(nir, "after spirv_to_nir");
|
2019-02-20 17:53:47 +00:00
|
|
|
|
2020-07-06 17:16:39 +01:00
|
|
|
if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_NIR)) {
|
|
|
|
fprintf(stderr, "translated nir:\n");
|
|
|
|
nir_print_shader(nir, stderr);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* multi step inlining procedure */
|
|
|
|
NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
|
|
|
|
NIR_PASS_V(nir, nir_lower_returns);
|
|
|
|
NIR_PASS_V(nir, nir_inline_functions);
|
|
|
|
NIR_PASS_V(nir, nir_copy_prop);
|
|
|
|
NIR_PASS_V(nir, nir_opt_deref);
|
|
|
|
foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
|
|
|
|
if (!func->is_entrypoint)
|
|
|
|
exec_node_remove(&func->node);
|
|
|
|
}
|
|
|
|
assert(exec_list_length(&nir->functions) == 1);
|
|
|
|
NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp);
|
|
|
|
|
|
|
|
/* Split member structs. We do this before lower_io_to_temporaries so that
|
|
|
|
* it doesn't lower system values to temporaries by accident.
|
|
|
|
*/
|
|
|
|
NIR_PASS_V(nir, nir_split_var_copies);
|
|
|
|
NIR_PASS_V(nir, nir_split_per_member_structs);
|
|
|
|
|
|
|
|
NIR_PASS_V(nir, nir_remove_dead_variables,
|
|
|
|
nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
|
|
|
|
NULL);
|
|
|
|
|
|
|
|
NIR_PASS_V(nir, nir_propagate_invariant);
|
|
|
|
|
|
|
|
NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true);
|
|
|
|
|
|
|
|
NIR_PASS_V(nir, nir_lower_global_vars_to_local);
|
|
|
|
NIR_PASS_V(nir, nir_split_var_copies);
|
|
|
|
NIR_PASS_V(nir, nir_lower_var_copies);
|
|
|
|
|
|
|
|
NIR_PASS_V(nir, nir_opt_copy_prop_vars);
|
|
|
|
NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);
|
|
|
|
|
|
|
|
/* ir3 doesn't support indirect input/output */
|
|
|
|
/* TODO: We shouldn't perform this lowering pass on gl_TessLevelInner
|
|
|
|
* and gl_TessLevelOuter. Since the tess levels are actually stored in
|
|
|
|
* a global BO, they can be directly accessed via stg and ldg.
|
|
|
|
* nir_lower_indirect_derefs will instead generate a big if-ladder which
|
|
|
|
* isn't *incorrect* but is much less efficient. */
|
|
|
|
NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out, UINT32_MAX);
|
|
|
|
|
|
|
|
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
|
|
|
|
|
|
|
|
NIR_PASS_V(nir, nir_lower_system_values);
|
|
|
|
NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
|
|
|
|
|
2020-09-24 15:04:18 +01:00
|
|
|
NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
|
|
|
|
|
2020-07-06 17:16:39 +01:00
|
|
|
NIR_PASS_V(nir, nir_lower_frexp);
|
|
|
|
|
2020-07-06 17:26:14 +01:00
|
|
|
ir3_optimize_loop(nir);
|
|
|
|
|
2019-05-19 08:22:17 +01:00
|
|
|
return nir;
|
2019-02-20 17:53:47 +00:00
|
|
|
}
|
|
|
|
|
2019-12-19 00:30:37 +00:00
|
|
|
static void
|
|
|
|
lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
|
|
|
|
struct tu_shader *shader)
|
2019-09-26 05:29:26 +01:00
|
|
|
{
|
2020-03-18 12:12:31 +00:00
|
|
|
uint32_t base = nir_intrinsic_base(instr);
|
|
|
|
assert(base % 4 == 0);
|
|
|
|
assert(base >= shader->push_consts.lo * 16);
|
|
|
|
base -= shader->push_consts.lo * 16;
|
2021-01-02 07:31:11 +00:00
|
|
|
|
|
|
|
nir_ssa_def *load =
|
|
|
|
nir_load_uniform(b, instr->num_components, instr->dest.ssa.bit_size,
|
|
|
|
nir_ushr(b, instr->src[0].ssa, nir_imm_int(b, 2)),
|
|
|
|
.base = base / 4);
|
|
|
|
|
2021-03-03 06:13:38 +00:00
|
|
|
nir_ssa_def_rewrite_uses(&instr->dest.ssa, load);
|
2019-10-14 16:09:27 +01:00
|
|
|
|
2019-12-19 00:30:37 +00:00
|
|
|
nir_instr_remove(&instr->instr);
|
|
|
|
}
|
2019-09-26 05:29:26 +01:00
|
|
|
|
2019-12-19 00:30:37 +00:00
|
|
|
static void
|
|
|
|
lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr,
|
|
|
|
struct tu_shader *shader,
|
|
|
|
const struct tu_pipeline_layout *layout)
|
|
|
|
{
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
nir_ssa_def *vulkan_idx = instr->src[0].ssa;
|
2019-09-26 05:29:26 +01:00
|
|
|
|
2019-10-07 02:42:27 +01:00
|
|
|
unsigned set = nir_intrinsic_desc_set(instr);
|
|
|
|
unsigned binding = nir_intrinsic_binding(instr);
|
2019-12-14 06:05:11 +00:00
|
|
|
struct tu_descriptor_set_layout *set_layout = layout->set[set].layout;
|
|
|
|
struct tu_descriptor_set_binding_layout *binding_layout =
|
|
|
|
&set_layout->binding[binding];
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
uint32_t base;
|
2019-10-07 02:42:27 +01:00
|
|
|
|
2020-06-09 13:40:58 +01:00
|
|
|
shader->active_desc_sets |= 1u << set;
|
|
|
|
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
switch (binding_layout->type) {
|
2019-10-07 02:42:27 +01:00
|
|
|
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
|
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
base = layout->set[set].dynamic_offset_start +
|
2020-06-15 04:10:01 +01:00
|
|
|
binding_layout->dynamic_offset_offset;
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
set = MAX_SETS;
|
2019-10-07 02:42:27 +01:00
|
|
|
break;
|
|
|
|
default:
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
base = binding_layout->offset / (4 * A6XX_TEX_CONST_DWORDS);
|
2019-10-07 02:42:27 +01:00
|
|
|
break;
|
|
|
|
}
|
2019-09-26 05:29:26 +01:00
|
|
|
|
2020-06-29 18:18:20 +01:00
|
|
|
nir_ssa_def *def = nir_vec3(b, nir_imm_int(b, set),
|
|
|
|
nir_iadd(b, nir_imm_int(b, base), vulkan_idx),
|
|
|
|
nir_imm_int(b, 0));
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
|
2021-03-03 06:13:38 +00:00
|
|
|
nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
|
2019-09-26 05:29:26 +01:00
|
|
|
nir_instr_remove(&instr->instr);
|
2019-12-19 00:30:37 +00:00
|
|
|
}
|
2019-09-26 05:29:26 +01:00
|
|
|
|
2020-06-29 18:18:20 +01:00
|
|
|
static void
|
|
|
|
lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin)
|
|
|
|
{
|
|
|
|
/* Loading the descriptor happens as part of the load/store instruction so
|
|
|
|
* this is a no-op.
|
|
|
|
*/
|
2021-03-03 06:01:15 +00:00
|
|
|
nir_ssa_def_rewrite_uses_src(&intrin->dest.ssa, intrin->src[0]);
|
2020-06-29 18:18:20 +01:00
|
|
|
nir_instr_remove(&intrin->instr);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
lower_ssbo_ubo_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)
|
|
|
|
{
|
|
|
|
const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic];
|
|
|
|
|
|
|
|
/* The bindless base is part of the instruction, which means that part of
|
|
|
|
* the "pointer" has to be constant. We solve this in the same way the blob
|
|
|
|
* does, by generating a bunch of if-statements. In the usual case where
|
|
|
|
* the descriptor set is constant this will get optimized out.
|
|
|
|
*/
|
|
|
|
|
|
|
|
unsigned buffer_src;
|
|
|
|
if (intrin->intrinsic == nir_intrinsic_store_ssbo) {
|
|
|
|
/* This has the value first */
|
|
|
|
buffer_src = 1;
|
|
|
|
} else {
|
|
|
|
buffer_src = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
nir_ssa_def *base_idx = nir_channel(b, intrin->src[buffer_src].ssa, 0);
|
|
|
|
nir_ssa_def *descriptor_idx = nir_channel(b, intrin->src[buffer_src].ssa, 1);
|
|
|
|
|
|
|
|
nir_ssa_def *results[MAX_SETS + 1] = { NULL };
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < MAX_SETS + 1; i++) {
|
|
|
|
/* if (base_idx == i) { ... */
|
2020-08-15 06:11:27 +01:00
|
|
|
nir_if *nif = nir_push_if(b, nir_ieq_imm(b, base_idx, i));
|
2020-06-29 18:18:20 +01:00
|
|
|
|
2021-01-02 07:31:11 +00:00
|
|
|
nir_ssa_def *bindless =
|
|
|
|
nir_bindless_resource_ir3(b, 32, descriptor_idx, .desc_set = i);
|
2020-06-29 18:18:20 +01:00
|
|
|
|
|
|
|
nir_intrinsic_instr *copy =
|
|
|
|
nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
|
|
|
|
|
|
|
|
copy->num_components = intrin->num_components;
|
|
|
|
|
|
|
|
for (unsigned src = 0; src < info->num_srcs; src++) {
|
|
|
|
if (src == buffer_src)
|
2021-01-02 07:31:11 +00:00
|
|
|
copy->src[src] = nir_src_for_ssa(bindless);
|
2020-06-29 18:18:20 +01:00
|
|
|
else
|
|
|
|
copy->src[src] = nir_src_for_ssa(intrin->src[src].ssa);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (unsigned idx = 0; idx < info->num_indices; idx++) {
|
|
|
|
copy->const_index[idx] = intrin->const_index[idx];
|
|
|
|
}
|
|
|
|
|
|
|
|
if (info->has_dest) {
|
|
|
|
nir_ssa_dest_init(©->instr, ©->dest,
|
|
|
|
intrin->dest.ssa.num_components,
|
|
|
|
intrin->dest.ssa.bit_size,
|
|
|
|
intrin->dest.ssa.name);
|
|
|
|
results[i] = ©->dest.ssa;
|
|
|
|
}
|
|
|
|
|
|
|
|
nir_builder_instr_insert(b, ©->instr);
|
|
|
|
|
|
|
|
/* } else { ... */
|
|
|
|
nir_push_else(b, nif);
|
|
|
|
}
|
|
|
|
|
|
|
|
nir_ssa_def *result =
|
|
|
|
nir_ssa_undef(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size);
|
|
|
|
for (int i = MAX_SETS; i >= 0; i--) {
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
if (info->has_dest)
|
|
|
|
result = nir_if_phi(b, results[i], result);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (info->has_dest)
|
2021-03-03 06:13:38 +00:00
|
|
|
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, result);
|
2020-06-29 18:18:20 +01:00
|
|
|
nir_instr_remove(&intrin->instr);
|
|
|
|
}
|
|
|
|
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
static nir_ssa_def *
|
|
|
|
build_bindless(nir_builder *b, nir_deref_instr *deref, bool is_sampler,
|
|
|
|
struct tu_shader *shader,
|
|
|
|
const struct tu_pipeline_layout *layout)
|
2019-12-09 21:31:35 +00:00
|
|
|
{
|
|
|
|
nir_variable *var = nir_deref_instr_get_variable(deref);
|
|
|
|
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
unsigned set = var->data.descriptor_set;
|
|
|
|
unsigned binding = var->data.binding;
|
|
|
|
const struct tu_descriptor_set_binding_layout *bind_layout =
|
|
|
|
&layout->set[set].layout->binding[binding];
|
|
|
|
|
2020-06-15 04:10:01 +01:00
|
|
|
/* input attachments use non bindless workaround */
|
|
|
|
if (bind_layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
|
|
|
|
const struct glsl_type *glsl_type = glsl_without_array(var->type);
|
|
|
|
uint32_t idx = var->data.index * 2;
|
|
|
|
|
2021-03-08 05:23:31 +00:00
|
|
|
BITSET_SET_RANGE(b->shader->info.textures_used, idx * 2, ((idx * 2) + (bind_layout->array_size * 2)) - 1);
|
2020-06-15 04:10:01 +01:00
|
|
|
|
|
|
|
/* D24S8 workaround: stencil of D24S8 will be sampled as uint */
|
|
|
|
if (glsl_get_sampler_result_type(glsl_type) == GLSL_TYPE_UINT)
|
|
|
|
idx += 1;
|
|
|
|
|
|
|
|
if (deref->deref_type == nir_deref_type_var)
|
|
|
|
return nir_imm_int(b, idx);
|
|
|
|
|
|
|
|
nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
|
|
|
|
return nir_iadd(b, nir_imm_int(b, idx),
|
|
|
|
nir_imul_imm(b, arr_index, 2));
|
|
|
|
}
|
|
|
|
|
2020-06-09 13:40:58 +01:00
|
|
|
shader->active_desc_sets |= 1u << set;
|
|
|
|
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
nir_ssa_def *desc_offset;
|
|
|
|
unsigned descriptor_stride;
|
2020-06-15 04:10:01 +01:00
|
|
|
unsigned offset = 0;
|
|
|
|
/* Samplers come second in combined image/sampler descriptors, see
|
|
|
|
* write_combined_image_sampler_descriptor().
|
|
|
|
*/
|
|
|
|
if (is_sampler && bind_layout->type ==
|
|
|
|
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
|
|
|
|
offset = 1;
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
}
|
2020-06-15 04:10:01 +01:00
|
|
|
desc_offset =
|
|
|
|
nir_imm_int(b, (bind_layout->offset / (4 * A6XX_TEX_CONST_DWORDS)) +
|
|
|
|
offset);
|
|
|
|
descriptor_stride = bind_layout->size / (4 * A6XX_TEX_CONST_DWORDS);
|
2019-12-09 21:31:35 +00:00
|
|
|
|
2020-02-05 22:54:42 +00:00
|
|
|
if (deref->deref_type != nir_deref_type_var) {
|
|
|
|
assert(deref->deref_type == nir_deref_type_array);
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
|
|
|
|
nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
|
|
|
|
desc_offset = nir_iadd(b, desc_offset,
|
|
|
|
nir_imul_imm(b, arr_index, descriptor_stride));
|
2020-02-05 22:54:42 +00:00
|
|
|
}
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
|
2021-01-02 07:31:11 +00:00
|
|
|
return nir_bindless_resource_ir3(b, 32, desc_offset, .desc_set = set);
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
lower_image_deref(nir_builder *b,
|
|
|
|
nir_intrinsic_instr *instr, struct tu_shader *shader,
|
|
|
|
const struct tu_pipeline_layout *layout)
|
|
|
|
{
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
|
|
|
|
nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout);
|
|
|
|
nir_rewrite_image_intrinsic(instr, bindless, true);
|
2019-12-09 21:31:35 +00:00
|
|
|
}
|
|
|
|
|
2019-12-19 00:30:37 +00:00
|
|
|
static bool
|
|
|
|
lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
|
|
|
|
struct tu_shader *shader,
|
|
|
|
const struct tu_pipeline_layout *layout)
|
|
|
|
{
|
|
|
|
switch (instr->intrinsic) {
|
|
|
|
case nir_intrinsic_load_push_constant:
|
|
|
|
lower_load_push_constant(b, instr, shader);
|
|
|
|
return true;
|
|
|
|
|
2020-06-29 18:18:20 +01:00
|
|
|
case nir_intrinsic_load_vulkan_descriptor:
|
|
|
|
lower_load_vulkan_descriptor(instr);
|
|
|
|
return true;
|
|
|
|
|
2019-12-19 00:30:37 +00:00
|
|
|
case nir_intrinsic_vulkan_resource_index:
|
|
|
|
lower_vulkan_resource_index(b, instr, shader, layout);
|
|
|
|
return true;
|
|
|
|
|
2020-06-29 18:18:20 +01:00
|
|
|
case nir_intrinsic_load_ubo:
|
|
|
|
case nir_intrinsic_load_ssbo:
|
|
|
|
case nir_intrinsic_store_ssbo:
|
|
|
|
case nir_intrinsic_ssbo_atomic_add:
|
|
|
|
case nir_intrinsic_ssbo_atomic_imin:
|
|
|
|
case nir_intrinsic_ssbo_atomic_umin:
|
|
|
|
case nir_intrinsic_ssbo_atomic_imax:
|
|
|
|
case nir_intrinsic_ssbo_atomic_umax:
|
|
|
|
case nir_intrinsic_ssbo_atomic_and:
|
|
|
|
case nir_intrinsic_ssbo_atomic_or:
|
|
|
|
case nir_intrinsic_ssbo_atomic_xor:
|
|
|
|
case nir_intrinsic_ssbo_atomic_exchange:
|
|
|
|
case nir_intrinsic_ssbo_atomic_comp_swap:
|
|
|
|
case nir_intrinsic_ssbo_atomic_fadd:
|
|
|
|
case nir_intrinsic_ssbo_atomic_fmin:
|
|
|
|
case nir_intrinsic_ssbo_atomic_fmax:
|
|
|
|
case nir_intrinsic_ssbo_atomic_fcomp_swap:
|
2020-09-22 09:24:45 +01:00
|
|
|
case nir_intrinsic_get_ssbo_size:
|
2020-06-29 18:18:20 +01:00
|
|
|
lower_ssbo_ubo_intrinsic(b, instr);
|
|
|
|
return true;
|
|
|
|
|
2019-12-09 21:31:35 +00:00
|
|
|
case nir_intrinsic_image_deref_load:
|
|
|
|
case nir_intrinsic_image_deref_store:
|
|
|
|
case nir_intrinsic_image_deref_atomic_add:
|
|
|
|
case nir_intrinsic_image_deref_atomic_imin:
|
|
|
|
case nir_intrinsic_image_deref_atomic_umin:
|
|
|
|
case nir_intrinsic_image_deref_atomic_imax:
|
|
|
|
case nir_intrinsic_image_deref_atomic_umax:
|
|
|
|
case nir_intrinsic_image_deref_atomic_and:
|
|
|
|
case nir_intrinsic_image_deref_atomic_or:
|
|
|
|
case nir_intrinsic_image_deref_atomic_xor:
|
|
|
|
case nir_intrinsic_image_deref_atomic_exchange:
|
|
|
|
case nir_intrinsic_image_deref_atomic_comp_swap:
|
|
|
|
case nir_intrinsic_image_deref_size:
|
|
|
|
case nir_intrinsic_image_deref_samples:
|
2020-02-05 22:54:42 +00:00
|
|
|
lower_image_deref(b, instr, shader, layout);
|
2019-12-09 21:31:35 +00:00
|
|
|
return true;
|
|
|
|
|
2019-12-19 00:30:37 +00:00
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
2019-09-26 05:29:26 +01:00
|
|
|
}
|
|
|
|
|
2020-04-10 14:19:36 +01:00
|
|
|
static void
|
|
|
|
lower_tex_ycbcr(const struct tu_pipeline_layout *layout,
|
|
|
|
nir_builder *builder,
|
|
|
|
nir_tex_instr *tex)
|
|
|
|
{
|
|
|
|
int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
|
|
|
|
assert(deref_src_idx >= 0);
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
|
|
|
|
|
|
|
|
nir_variable *var = nir_deref_instr_get_variable(deref);
|
|
|
|
const struct tu_descriptor_set_layout *set_layout =
|
|
|
|
layout->set[var->data.descriptor_set].layout;
|
|
|
|
const struct tu_descriptor_set_binding_layout *binding =
|
|
|
|
&set_layout->binding[var->data.binding];
|
|
|
|
const struct tu_sampler_ycbcr_conversion *ycbcr_samplers =
|
|
|
|
tu_immutable_ycbcr_samplers(set_layout, binding);
|
|
|
|
|
|
|
|
if (!ycbcr_samplers)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* For the following instructions, we don't apply any change */
|
|
|
|
if (tex->op == nir_texop_txs ||
|
|
|
|
tex->op == nir_texop_query_levels ||
|
|
|
|
tex->op == nir_texop_lod)
|
|
|
|
return;
|
|
|
|
|
|
|
|
assert(tex->texture_index == 0);
|
|
|
|
unsigned array_index = 0;
|
|
|
|
if (deref->deref_type != nir_deref_type_var) {
|
|
|
|
assert(deref->deref_type == nir_deref_type_array);
|
|
|
|
if (!nir_src_is_const(deref->arr.index))
|
|
|
|
return;
|
|
|
|
array_index = nir_src_as_uint(deref->arr.index);
|
|
|
|
array_index = MIN2(array_index, binding->array_size - 1);
|
|
|
|
}
|
|
|
|
const struct tu_sampler_ycbcr_conversion *ycbcr_sampler = ycbcr_samplers + array_index;
|
|
|
|
|
|
|
|
if (ycbcr_sampler->ycbcr_model == VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
|
|
|
|
return;
|
|
|
|
|
|
|
|
builder->cursor = nir_after_instr(&tex->instr);
|
|
|
|
|
|
|
|
uint8_t bits = vk_format_get_component_bits(ycbcr_sampler->format,
|
|
|
|
UTIL_FORMAT_COLORSPACE_RGB,
|
|
|
|
PIPE_SWIZZLE_X);
|
|
|
|
uint32_t bpcs[3] = {bits, bits, bits}; /* TODO: use right bpc for each channel ? */
|
|
|
|
nir_ssa_def *result = nir_convert_ycbcr_to_rgb(builder,
|
|
|
|
ycbcr_sampler->ycbcr_model,
|
|
|
|
ycbcr_sampler->ycbcr_range,
|
|
|
|
&tex->dest.ssa,
|
|
|
|
bpcs);
|
2021-03-03 16:35:36 +00:00
|
|
|
nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, result,
|
2020-04-10 14:19:36 +01:00
|
|
|
result->parent_instr);
|
|
|
|
|
|
|
|
builder->cursor = nir_before_instr(&tex->instr);
|
|
|
|
}
|
|
|
|
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
static bool
|
|
|
|
lower_tex(nir_builder *b, nir_tex_instr *tex,
|
|
|
|
struct tu_shader *shader, const struct tu_pipeline_layout *layout)
|
|
|
|
{
|
2020-04-10 14:19:36 +01:00
|
|
|
lower_tex_ycbcr(layout, b, tex);
|
|
|
|
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
|
|
|
|
if (sampler_src_idx >= 0) {
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
|
|
|
|
nir_ssa_def *bindless = build_bindless(b, deref, true, shader, layout);
|
|
|
|
nir_instr_rewrite_src(&tex->instr, &tex->src[sampler_src_idx].src,
|
|
|
|
nir_src_for_ssa(bindless));
|
|
|
|
tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle;
|
|
|
|
}
|
|
|
|
|
|
|
|
int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
|
|
|
|
if (tex_src_idx >= 0) {
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src);
|
|
|
|
nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout);
|
|
|
|
nir_instr_rewrite_src(&tex->instr, &tex->src[tex_src_idx].src,
|
|
|
|
nir_src_for_ssa(bindless));
|
|
|
|
tex->src[tex_src_idx].src_type = nir_tex_src_texture_handle;
|
2020-06-15 04:10:01 +01:00
|
|
|
|
|
|
|
/* for the input attachment case: */
|
|
|
|
if (bindless->parent_instr->type != nir_instr_type_intrinsic)
|
|
|
|
tex->src[tex_src_idx].src_type = nir_tex_src_texture_offset;
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
lower_impl(nir_function_impl *impl, struct tu_shader *shader,
|
|
|
|
const struct tu_pipeline_layout *layout)
|
|
|
|
{
|
|
|
|
nir_builder b;
|
|
|
|
nir_builder_init(&b, impl);
|
|
|
|
bool progress = false;
|
|
|
|
|
|
|
|
nir_foreach_block(block, impl) {
|
|
|
|
nir_foreach_instr_safe(instr, block) {
|
|
|
|
b.cursor = nir_before_instr(instr);
|
|
|
|
switch (instr->type) {
|
|
|
|
case nir_instr_type_tex:
|
|
|
|
progress |= lower_tex(&b, nir_instr_as_tex(instr), shader, layout);
|
|
|
|
break;
|
|
|
|
case nir_instr_type_intrinsic:
|
|
|
|
progress |= lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader, layout);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-29 18:18:20 +01:00
|
|
|
if (progress)
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_none);
|
|
|
|
else
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_all);
|
|
|
|
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
return progress;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-03-18 12:12:31 +00:00
|
|
|
/* Figure out the range of push constants that we're actually going to push to
|
|
|
|
* the shader, and tell the backend to reserve this range when pushing UBO
|
|
|
|
* constants.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void
|
|
|
|
gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader)
|
|
|
|
{
|
|
|
|
uint32_t min = UINT32_MAX, max = 0;
|
|
|
|
nir_foreach_function(function, shader) {
|
|
|
|
if (!function->impl)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
nir_foreach_block(block, function->impl) {
|
|
|
|
nir_foreach_instr_safe(instr, block) {
|
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
if (intrin->intrinsic != nir_intrinsic_load_push_constant)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
uint32_t base = nir_intrinsic_base(intrin);
|
|
|
|
uint32_t range = nir_intrinsic_range(intrin);
|
|
|
|
min = MIN2(min, base);
|
|
|
|
max = MAX2(max, base + range);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (min >= max) {
|
|
|
|
tu_shader->push_consts.lo = 0;
|
|
|
|
tu_shader->push_consts.count = 0;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* CP_LOAD_STATE OFFSET and NUM_UNIT are in units of vec4 (4 dwords),
|
|
|
|
* however there's an alignment requirement of 4 on OFFSET. Expand the
|
|
|
|
* range and change units accordingly.
|
|
|
|
*/
|
|
|
|
tu_shader->push_consts.lo = (min / 16) / 4 * 4;
|
|
|
|
tu_shader->push_consts.count =
|
|
|
|
align(max, 16) / 16 - tu_shader->push_consts.lo;
|
|
|
|
}
|
|
|
|
|
2019-09-26 05:29:26 +01:00
|
|
|
static bool
|
2019-12-14 06:05:11 +00:00
|
|
|
tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader,
|
|
|
|
const struct tu_pipeline_layout *layout)
|
2019-09-26 05:29:26 +01:00
|
|
|
{
|
|
|
|
bool progress = false;
|
|
|
|
|
2020-03-18 12:12:31 +00:00
|
|
|
gather_push_constants(shader, tu_shader);
|
|
|
|
|
2019-09-26 05:29:26 +01:00
|
|
|
nir_foreach_function(function, shader) {
|
|
|
|
if (function->impl)
|
2019-12-14 06:05:11 +00:00
|
|
|
progress |= lower_impl(function->impl, tu_shader, layout);
|
2019-09-26 05:29:26 +01:00
|
|
|
}
|
|
|
|
|
2020-06-17 12:02:49 +01:00
|
|
|
/* Remove now-unused variables so that when we gather the shader info later
|
|
|
|
* they won't be counted.
|
|
|
|
*/
|
2020-06-17 14:39:18 +01:00
|
|
|
|
|
|
|
if (progress)
|
|
|
|
nir_opt_dce(shader);
|
|
|
|
|
|
|
|
progress |=
|
|
|
|
nir_remove_dead_variables(shader,
|
|
|
|
nir_var_uniform | nir_var_mem_ubo | nir_var_mem_ssbo,
|
|
|
|
NULL);
|
2020-06-17 12:02:49 +01:00
|
|
|
|
2019-09-26 05:29:26 +01:00
|
|
|
return progress;
|
|
|
|
}
|
|
|
|
|
2020-12-14 13:05:57 +00:00
|
|
|
static bool
|
|
|
|
lower_image_size_filter(const nir_instr *instr, UNUSED const void *data)
|
|
|
|
{
|
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
if(intrin->intrinsic != nir_intrinsic_bindless_image_size)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return (intrin->num_components == 3 && nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* imageSize() expects the last component of the return value to be the
|
|
|
|
* number of layers in the texture array. In the case of cube map array,
|
|
|
|
* it will return a ivec3, with the third component being the number of
|
|
|
|
* layer-faces. Therefore, we need to divide it by 6 (# faces of the
|
|
|
|
* cube map).
|
|
|
|
*/
|
|
|
|
static nir_ssa_def *
|
|
|
|
lower_image_size_lower(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|
|
|
{
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
b->cursor = nir_after_instr(&intrin->instr);
|
|
|
|
nir_ssa_def *channels[NIR_MAX_VEC_COMPONENTS];
|
|
|
|
for (unsigned i = 0; i < intrin->num_components; i++) {
|
|
|
|
channels[i] = nir_vector_extract(b, &intrin->dest.ssa, nir_imm_int(b, i));
|
|
|
|
}
|
|
|
|
|
|
|
|
channels[2] = nir_idiv(b, channels[2], nir_imm_int(b, 6u));
|
|
|
|
nir_ssa_def *result = nir_vec(b, channels, intrin->num_components);
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
tu_lower_image_size(nir_shader *shader)
|
|
|
|
{
|
|
|
|
return nir_shader_lower_instructions(shader,
|
|
|
|
lower_image_size_filter,
|
|
|
|
lower_image_size_lower,
|
|
|
|
NULL);
|
|
|
|
}
|
|
|
|
|
2020-06-29 18:18:20 +01:00
|
|
|
static void
|
|
|
|
shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
|
|
|
|
{
|
|
|
|
assert(glsl_type_is_vector_or_scalar(type));
|
|
|
|
|
|
|
|
unsigned comp_size =
|
|
|
|
glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
|
|
|
|
unsigned length = glsl_get_vector_elements(type);
|
|
|
|
*size = comp_size * length;
|
|
|
|
*align = 4;
|
|
|
|
}
|
|
|
|
|
2020-02-20 05:48:28 +00:00
|
|
|
static void
|
2020-06-16 10:44:23 +01:00
|
|
|
tu_gather_xfb_info(nir_shader *nir, struct ir3_stream_output_info *info)
|
2020-02-20 05:48:28 +00:00
|
|
|
{
|
|
|
|
nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL);
|
|
|
|
|
|
|
|
if (!xfb)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* creating a map from VARYING_SLOT_* enums to consecutive index */
|
|
|
|
uint8_t num_outputs = 0;
|
|
|
|
uint64_t outputs_written = 0;
|
|
|
|
for (int i = 0; i < xfb->output_count; i++)
|
|
|
|
outputs_written |= BITFIELD64_BIT(xfb->outputs[i].location);
|
|
|
|
|
|
|
|
uint8_t output_map[VARYING_SLOT_TESS_MAX];
|
|
|
|
memset(output_map, 0, sizeof(output_map));
|
|
|
|
|
|
|
|
for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
|
|
|
|
if (outputs_written & BITFIELD64_BIT(attr))
|
|
|
|
output_map[attr] = num_outputs++;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(xfb->output_count < IR3_MAX_SO_OUTPUTS);
|
|
|
|
info->num_outputs = xfb->output_count;
|
|
|
|
|
2020-09-23 12:08:37 +01:00
|
|
|
for (int i = 0; i < IR3_MAX_SO_BUFFERS; i++) {
|
2020-02-20 05:48:28 +00:00
|
|
|
info->stride[i] = xfb->buffers[i].stride / 4;
|
2020-09-23 12:08:37 +01:00
|
|
|
info->buffer_to_stream[i] = xfb->buffer_to_stream[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
info->streams_written = xfb->streams_written;
|
2020-02-20 05:48:28 +00:00
|
|
|
|
|
|
|
for (int i = 0; i < xfb->output_count; i++) {
|
|
|
|
info->output[i].register_index = output_map[xfb->outputs[i].location];
|
|
|
|
info->output[i].start_component = xfb->outputs[i].component_offset;
|
|
|
|
info->output[i].num_components =
|
|
|
|
util_bitcount(xfb->outputs[i].component_mask);
|
|
|
|
info->output[i].output_buffer = xfb->outputs[i].buffer;
|
2020-03-17 03:50:59 +00:00
|
|
|
info->output[i].dst_offset = xfb->outputs[i].offset / 4;
|
2020-02-20 05:48:28 +00:00
|
|
|
info->output[i].stream = xfb->buffer_to_stream[xfb->outputs[i].buffer];
|
|
|
|
}
|
|
|
|
|
|
|
|
ralloc_free(xfb);
|
|
|
|
}
|
|
|
|
|
2019-02-20 17:53:47 +00:00
|
|
|
struct tu_shader *
|
|
|
|
tu_shader_create(struct tu_device *dev,
|
2020-07-06 17:16:39 +01:00
|
|
|
nir_shader *nir,
|
2020-07-02 10:33:42 +01:00
|
|
|
unsigned multiview_mask,
|
2019-12-14 06:05:11 +00:00
|
|
|
struct tu_pipeline_layout *layout,
|
2019-02-20 17:53:47 +00:00
|
|
|
const VkAllocationCallbacks *alloc)
|
|
|
|
{
|
|
|
|
struct tu_shader *shader;
|
|
|
|
|
|
|
|
shader = vk_zalloc2(
|
2020-07-13 04:08:15 +01:00
|
|
|
&dev->vk.alloc, alloc,
|
2020-06-16 10:44:23 +01:00
|
|
|
sizeof(*shader),
|
2019-02-20 17:53:47 +00:00
|
|
|
8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
|
|
|
if (!shader)
|
|
|
|
return NULL;
|
|
|
|
|
2020-02-20 05:48:28 +00:00
|
|
|
/* Gather information for transform feedback.
|
|
|
|
* This should be called after nir_split_per_member_structs.
|
2020-03-17 03:57:03 +00:00
|
|
|
* Also needs to be called after nir_remove_dead_variables with varyings,
|
|
|
|
* so that we could align stream outputs correctly.
|
2020-02-20 05:48:28 +00:00
|
|
|
*/
|
2020-06-16 10:44:23 +01:00
|
|
|
struct ir3_stream_output_info so_info = {};
|
2020-02-20 05:48:28 +00:00
|
|
|
if (nir->info.stage == MESA_SHADER_VERTEX ||
|
|
|
|
nir->info.stage == MESA_SHADER_TESS_EVAL ||
|
|
|
|
nir->info.stage == MESA_SHADER_GEOMETRY)
|
2020-06-16 10:44:23 +01:00
|
|
|
tu_gather_xfb_info(nir, &so_info);
|
2020-02-20 05:48:28 +00:00
|
|
|
|
2020-07-06 17:16:39 +01:00
|
|
|
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
2020-07-01 15:55:46 +01:00
|
|
|
NIR_PASS_V(nir, nir_lower_input_attachments,
|
|
|
|
&(nir_input_attachment_options) {
|
|
|
|
.use_fragcoord_sysval = true,
|
2020-07-01 16:21:47 +01:00
|
|
|
.use_layer_id_sysval = false,
|
2020-07-02 10:33:42 +01:00
|
|
|
/* When using multiview rendering, we must use
|
|
|
|
* gl_ViewIndex as the layer id to pass to the texture
|
|
|
|
* sampling function. gl_Layer doesn't work when
|
|
|
|
* multiview is enabled.
|
|
|
|
*/
|
|
|
|
.use_view_id_for_layer = multiview_mask != 0,
|
2020-07-01 15:55:46 +01:00
|
|
|
});
|
|
|
|
}
|
2019-12-12 22:05:22 +00:00
|
|
|
|
2020-07-06 17:16:39 +01:00
|
|
|
if (nir->info.stage == MESA_SHADER_VERTEX && multiview_mask) {
|
2020-08-21 12:46:09 +01:00
|
|
|
tu_nir_lower_multiview(nir, multiview_mask,
|
|
|
|
&shader->multi_pos_output, dev);
|
2020-07-02 10:33:42 +01:00
|
|
|
}
|
|
|
|
|
2020-05-27 23:09:33 +01:00
|
|
|
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
|
|
|
|
nir_address_format_32bit_offset);
|
|
|
|
|
2020-06-29 18:18:20 +01:00
|
|
|
NIR_PASS_V(nir, nir_lower_explicit_io,
|
|
|
|
nir_var_mem_ubo | nir_var_mem_ssbo,
|
|
|
|
nir_address_format_vec2_index_32bit_offset);
|
|
|
|
|
|
|
|
if (nir->info.stage == MESA_SHADER_COMPUTE) {
|
|
|
|
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
|
|
|
|
nir_var_mem_shared, shared_type_info);
|
|
|
|
NIR_PASS_V(nir, nir_lower_explicit_io,
|
|
|
|
nir_var_mem_shared,
|
|
|
|
nir_address_format_32bit_offset);
|
|
|
|
}
|
|
|
|
|
2020-07-06 17:16:39 +01:00
|
|
|
nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);
|
|
|
|
nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, nir->info.stage);
|
|
|
|
|
2019-12-14 06:05:11 +00:00
|
|
|
NIR_PASS_V(nir, tu_lower_io, shader, layout);
|
2019-09-26 05:29:26 +01:00
|
|
|
|
2020-12-14 13:05:57 +00:00
|
|
|
NIR_PASS_V(nir, tu_lower_image_size);
|
|
|
|
|
2019-05-19 07:55:01 +01:00
|
|
|
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
2019-02-20 17:53:47 +00:00
|
|
|
|
2020-06-15 22:37:24 +01:00
|
|
|
ir3_finalize_nir(dev->compiler, nir);
|
|
|
|
|
2020-06-16 10:44:23 +01:00
|
|
|
shader->ir3_shader =
|
|
|
|
ir3_shader_from_nir(dev->compiler, nir,
|
|
|
|
align(shader->push_consts.count, 4),
|
|
|
|
&so_info);
|
2019-02-20 17:53:47 +00:00
|
|
|
|
|
|
|
return shader;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tu_shader_destroy(struct tu_device *dev,
|
|
|
|
struct tu_shader *shader,
|
|
|
|
const VkAllocationCallbacks *alloc)
|
|
|
|
{
|
2020-06-16 10:44:23 +01:00
|
|
|
ir3_shader_destroy(shader->ir3_shader);
|
2019-02-20 17:53:47 +00:00
|
|
|
|
2020-07-13 04:08:15 +01:00
|
|
|
vk_free2(&dev->vk.alloc, alloc, shader);
|
2019-02-20 17:53:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
VkResult
|
|
|
|
tu_CreateShaderModule(VkDevice _device,
|
|
|
|
const VkShaderModuleCreateInfo *pCreateInfo,
|
|
|
|
const VkAllocationCallbacks *pAllocator,
|
|
|
|
VkShaderModule *pShaderModule)
|
|
|
|
{
|
|
|
|
TU_FROM_HANDLE(tu_device, device, _device);
|
|
|
|
struct tu_shader_module *module;
|
|
|
|
|
|
|
|
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
|
|
|
|
assert(pCreateInfo->flags == 0);
|
|
|
|
assert(pCreateInfo->codeSize % 4 == 0);
|
|
|
|
|
2020-07-13 04:08:15 +01:00
|
|
|
module = vk_object_alloc(&device->vk, pAllocator,
|
|
|
|
sizeof(*module) + pCreateInfo->codeSize,
|
|
|
|
VK_OBJECT_TYPE_SHADER_MODULE);
|
2019-02-20 17:53:47 +00:00
|
|
|
if (module == NULL)
|
|
|
|
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
|
|
|
|
module->code_size = pCreateInfo->codeSize;
|
|
|
|
memcpy(module->code, pCreateInfo->pCode, pCreateInfo->codeSize);
|
|
|
|
|
|
|
|
*pShaderModule = tu_shader_module_to_handle(module);
|
|
|
|
|
|
|
|
return VK_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tu_DestroyShaderModule(VkDevice _device,
|
|
|
|
VkShaderModule _module,
|
|
|
|
const VkAllocationCallbacks *pAllocator)
|
|
|
|
{
|
|
|
|
TU_FROM_HANDLE(tu_device, device, _device);
|
|
|
|
TU_FROM_HANDLE(tu_shader_module, module, _module);
|
|
|
|
|
|
|
|
if (!module)
|
|
|
|
return;
|
|
|
|
|
2020-07-13 04:08:15 +01:00
|
|
|
vk_object_free(&device->vk, pAllocator, module);
|
2019-02-20 17:53:47 +00:00
|
|
|
}
|