2019-02-20 17:53:47 +00:00
|
|
|
/*
|
|
|
|
* Copyright © 2019 Google LLC
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
|
|
* DEALINGS IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "tu_private.h"
|
|
|
|
|
|
|
|
#include "spirv/nir_spirv.h"
|
|
|
|
#include "util/mesa-sha1.h"
|
2020-02-20 05:48:28 +00:00
|
|
|
#include "nir/nir_xfb_info.h"
|
2020-04-10 14:19:36 +01:00
|
|
|
#include "nir/nir_vulkan.h"
|
2020-04-21 17:14:23 +01:00
|
|
|
#include "vk_util.h"
|
2019-02-20 17:53:47 +00:00
|
|
|
|
|
|
|
#include "ir3/ir3_nir.h"
|
|
|
|
|
2019-05-19 07:55:01 +01:00
|
|
|
static nir_shader *
|
2019-02-20 17:53:47 +00:00
|
|
|
tu_spirv_to_nir(struct ir3_compiler *compiler,
|
|
|
|
const uint32_t *words,
|
|
|
|
size_t word_count,
|
|
|
|
gl_shader_stage stage,
|
|
|
|
const char *entry_point_name,
|
|
|
|
const VkSpecializationInfo *spec_info)
|
|
|
|
{
|
|
|
|
/* TODO these are made-up */
|
|
|
|
const struct spirv_to_nir_options spirv_options = {
|
2019-09-28 00:04:30 +01:00
|
|
|
.frag_coord_is_sysval = true,
|
2019-02-20 17:53:47 +00:00
|
|
|
.lower_ubo_ssbo_access_to_offsets = true,
|
2020-02-20 05:48:28 +00:00
|
|
|
.caps = {
|
|
|
|
.transform_feedback = compiler->gpu_id >= 600,
|
|
|
|
},
|
2019-02-20 17:53:47 +00:00
|
|
|
};
|
|
|
|
const nir_shader_compiler_options *nir_options =
|
|
|
|
ir3_get_compiler_options(compiler);
|
|
|
|
|
|
|
|
/* convert VkSpecializationInfo */
|
|
|
|
struct nir_spirv_specialization *spec = NULL;
|
|
|
|
uint32_t num_spec = 0;
|
|
|
|
if (spec_info && spec_info->mapEntryCount) {
|
2020-04-22 20:05:13 +01:00
|
|
|
spec = calloc(spec_info->mapEntryCount, sizeof(*spec));
|
2019-02-20 17:53:47 +00:00
|
|
|
if (!spec)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < spec_info->mapEntryCount; i++) {
|
|
|
|
const VkSpecializationMapEntry *entry = &spec_info->pMapEntries[i];
|
|
|
|
const void *data = spec_info->pData + entry->offset;
|
|
|
|
assert(data + entry->size <= spec_info->pData + spec_info->dataSize);
|
|
|
|
spec[i].id = entry->constantID;
|
2020-04-22 19:43:51 +01:00
|
|
|
switch (entry->size) {
|
|
|
|
case 8:
|
2020-04-22 20:05:13 +01:00
|
|
|
spec[i].value.u64 = *(const uint64_t *)data;
|
2020-04-22 19:43:51 +01:00
|
|
|
break;
|
|
|
|
case 4:
|
2020-04-22 20:05:13 +01:00
|
|
|
spec[i].value.u32 = *(const uint32_t *)data;
|
2020-04-22 19:43:51 +01:00
|
|
|
break;
|
|
|
|
case 2:
|
2020-04-22 20:05:13 +01:00
|
|
|
spec[i].value.u16 = *(const uint16_t *)data;
|
2020-04-22 19:43:51 +01:00
|
|
|
break;
|
|
|
|
case 1:
|
2020-04-22 20:05:13 +01:00
|
|
|
spec[i].value.u8 = *(const uint8_t *)data;
|
2020-04-22 19:43:51 +01:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(!"Invalid spec constant size");
|
|
|
|
break;
|
|
|
|
}
|
2019-02-20 17:53:47 +00:00
|
|
|
spec[i].defined_on_module = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
num_spec = spec_info->mapEntryCount;
|
|
|
|
}
|
|
|
|
|
2019-05-19 08:22:17 +01:00
|
|
|
nir_shader *nir =
|
2019-02-20 17:53:47 +00:00
|
|
|
spirv_to_nir(words, word_count, spec, num_spec, stage, entry_point_name,
|
|
|
|
&spirv_options, nir_options);
|
|
|
|
|
|
|
|
free(spec);
|
|
|
|
|
2019-05-19 08:22:17 +01:00
|
|
|
assert(nir->info.stage == stage);
|
|
|
|
nir_validate_shader(nir, "after spirv_to_nir");
|
2019-02-20 17:53:47 +00:00
|
|
|
|
2019-05-19 08:22:17 +01:00
|
|
|
return nir;
|
2019-02-20 17:53:47 +00:00
|
|
|
}
|
|
|
|
|
2019-12-19 00:30:37 +00:00
|
|
|
static void
|
|
|
|
lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
|
|
|
|
struct tu_shader *shader)
|
2019-09-26 05:29:26 +01:00
|
|
|
{
|
2019-12-19 00:30:37 +00:00
|
|
|
nir_intrinsic_instr *load =
|
2020-03-18 12:12:31 +00:00
|
|
|
nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
|
2019-12-19 00:30:37 +00:00
|
|
|
load->num_components = instr->num_components;
|
2020-03-18 12:12:31 +00:00
|
|
|
uint32_t base = nir_intrinsic_base(instr);
|
|
|
|
assert(base % 4 == 0);
|
|
|
|
assert(base >= shader->push_consts.lo * 16);
|
|
|
|
base -= shader->push_consts.lo * 16;
|
|
|
|
nir_intrinsic_set_base(load, base / 4);
|
|
|
|
load->src[0] =
|
|
|
|
nir_src_for_ssa(nir_ushr(b, instr->src[0].ssa, nir_imm_int(b, 2)));
|
2019-12-19 00:30:37 +00:00
|
|
|
nir_ssa_dest_init(&load->instr, &load->dest,
|
|
|
|
load->num_components, instr->dest.ssa.bit_size,
|
|
|
|
instr->dest.ssa.name);
|
|
|
|
nir_builder_instr_insert(b, &load->instr);
|
|
|
|
nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(&load->dest.ssa));
|
2019-10-14 16:09:27 +01:00
|
|
|
|
2019-12-19 00:30:37 +00:00
|
|
|
nir_instr_remove(&instr->instr);
|
|
|
|
}
|
2019-09-26 05:29:26 +01:00
|
|
|
|
2019-12-19 00:30:37 +00:00
|
|
|
static void
|
|
|
|
lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr,
|
|
|
|
struct tu_shader *shader,
|
|
|
|
const struct tu_pipeline_layout *layout)
|
|
|
|
{
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
nir_ssa_def *vulkan_idx = instr->src[0].ssa;
|
2019-09-26 05:29:26 +01:00
|
|
|
|
2019-10-07 02:42:27 +01:00
|
|
|
unsigned set = nir_intrinsic_desc_set(instr);
|
|
|
|
unsigned binding = nir_intrinsic_binding(instr);
|
2019-12-14 06:05:11 +00:00
|
|
|
struct tu_descriptor_set_layout *set_layout = layout->set[set].layout;
|
|
|
|
struct tu_descriptor_set_binding_layout *binding_layout =
|
|
|
|
&set_layout->binding[binding];
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
uint32_t base;
|
2019-10-07 02:42:27 +01:00
|
|
|
|
2020-06-09 13:40:58 +01:00
|
|
|
shader->active_desc_sets |= 1u << set;
|
|
|
|
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
switch (binding_layout->type) {
|
2019-10-07 02:42:27 +01:00
|
|
|
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
|
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
base = layout->set[set].dynamic_offset_start +
|
2020-06-15 04:10:01 +01:00
|
|
|
binding_layout->dynamic_offset_offset;
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
set = MAX_SETS;
|
2019-10-07 02:42:27 +01:00
|
|
|
break;
|
|
|
|
default:
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
base = binding_layout->offset / (4 * A6XX_TEX_CONST_DWORDS);
|
2019-10-07 02:42:27 +01:00
|
|
|
break;
|
|
|
|
}
|
2019-09-26 05:29:26 +01:00
|
|
|
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
nir_intrinsic_instr *bindless =
|
|
|
|
nir_intrinsic_instr_create(b->shader,
|
|
|
|
nir_intrinsic_bindless_resource_ir3);
|
2020-06-17 14:47:29 +01:00
|
|
|
bindless->num_components = 0;
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
nir_ssa_dest_init(&bindless->instr, &bindless->dest,
|
|
|
|
1, 32, NULL);
|
|
|
|
nir_intrinsic_set_desc_set(bindless, set);
|
|
|
|
bindless->src[0] = nir_src_for_ssa(nir_iadd(b, nir_imm_int(b, base), vulkan_idx));
|
|
|
|
nir_builder_instr_insert(b, &bindless->instr);
|
|
|
|
|
2019-09-26 05:29:26 +01:00
|
|
|
nir_ssa_def_rewrite_uses(&instr->dest.ssa,
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
nir_src_for_ssa(&bindless->dest.ssa));
|
2019-09-26 05:29:26 +01:00
|
|
|
nir_instr_remove(&instr->instr);
|
2019-12-19 00:30:37 +00:00
|
|
|
}
|
2019-09-26 05:29:26 +01:00
|
|
|
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
static nir_ssa_def *
|
|
|
|
build_bindless(nir_builder *b, nir_deref_instr *deref, bool is_sampler,
|
|
|
|
struct tu_shader *shader,
|
|
|
|
const struct tu_pipeline_layout *layout)
|
2019-12-09 21:31:35 +00:00
|
|
|
{
|
|
|
|
nir_variable *var = nir_deref_instr_get_variable(deref);
|
|
|
|
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
unsigned set = var->data.descriptor_set;
|
|
|
|
unsigned binding = var->data.binding;
|
|
|
|
const struct tu_descriptor_set_binding_layout *bind_layout =
|
|
|
|
&layout->set[set].layout->binding[binding];
|
|
|
|
|
2020-06-15 04:10:01 +01:00
|
|
|
/* input attachments use non bindless workaround */
|
|
|
|
if (bind_layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
|
|
|
|
const struct glsl_type *glsl_type = glsl_without_array(var->type);
|
|
|
|
uint32_t idx = var->data.index * 2;
|
|
|
|
|
|
|
|
b->shader->info.textures_used |=
|
|
|
|
((1ull << (bind_layout->array_size * 2)) - 1) << (idx * 2);
|
|
|
|
|
|
|
|
/* D24S8 workaround: stencil of D24S8 will be sampled as uint */
|
|
|
|
if (glsl_get_sampler_result_type(glsl_type) == GLSL_TYPE_UINT)
|
|
|
|
idx += 1;
|
|
|
|
|
|
|
|
if (deref->deref_type == nir_deref_type_var)
|
|
|
|
return nir_imm_int(b, idx);
|
|
|
|
|
|
|
|
nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
|
|
|
|
return nir_iadd(b, nir_imm_int(b, idx),
|
|
|
|
nir_imul_imm(b, arr_index, 2));
|
|
|
|
}
|
|
|
|
|
2020-06-09 13:40:58 +01:00
|
|
|
shader->active_desc_sets |= 1u << set;
|
|
|
|
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
nir_ssa_def *desc_offset;
|
|
|
|
unsigned descriptor_stride;
|
2020-06-15 04:10:01 +01:00
|
|
|
unsigned offset = 0;
|
|
|
|
/* Samplers come second in combined image/sampler descriptors, see
|
|
|
|
* write_combined_image_sampler_descriptor().
|
|
|
|
*/
|
|
|
|
if (is_sampler && bind_layout->type ==
|
|
|
|
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
|
|
|
|
offset = 1;
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
}
|
2020-06-15 04:10:01 +01:00
|
|
|
desc_offset =
|
|
|
|
nir_imm_int(b, (bind_layout->offset / (4 * A6XX_TEX_CONST_DWORDS)) +
|
|
|
|
offset);
|
|
|
|
descriptor_stride = bind_layout->size / (4 * A6XX_TEX_CONST_DWORDS);
|
2019-12-09 21:31:35 +00:00
|
|
|
|
2020-02-05 22:54:42 +00:00
|
|
|
if (deref->deref_type != nir_deref_type_var) {
|
|
|
|
assert(deref->deref_type == nir_deref_type_array);
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
|
|
|
|
nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
|
|
|
|
desc_offset = nir_iadd(b, desc_offset,
|
|
|
|
nir_imul_imm(b, arr_index, descriptor_stride));
|
2020-02-05 22:54:42 +00:00
|
|
|
}
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
|
|
|
|
nir_intrinsic_instr *bindless =
|
|
|
|
nir_intrinsic_instr_create(b->shader,
|
|
|
|
nir_intrinsic_bindless_resource_ir3);
|
2020-06-17 14:47:29 +01:00
|
|
|
bindless->num_components = 0;
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
nir_ssa_dest_init(&bindless->instr, &bindless->dest,
|
|
|
|
1, 32, NULL);
|
|
|
|
nir_intrinsic_set_desc_set(bindless, set);
|
|
|
|
bindless->src[0] = nir_src_for_ssa(desc_offset);
|
|
|
|
nir_builder_instr_insert(b, &bindless->instr);
|
|
|
|
|
|
|
|
return &bindless->dest.ssa;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
lower_image_deref(nir_builder *b,
|
|
|
|
nir_intrinsic_instr *instr, struct tu_shader *shader,
|
|
|
|
const struct tu_pipeline_layout *layout)
|
|
|
|
{
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
|
|
|
|
nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout);
|
|
|
|
nir_rewrite_image_intrinsic(instr, bindless, true);
|
2019-12-09 21:31:35 +00:00
|
|
|
}
|
|
|
|
|
2019-12-19 00:30:37 +00:00
|
|
|
static bool
|
|
|
|
lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
|
|
|
|
struct tu_shader *shader,
|
|
|
|
const struct tu_pipeline_layout *layout)
|
|
|
|
{
|
|
|
|
switch (instr->intrinsic) {
|
|
|
|
case nir_intrinsic_load_layer_id:
|
|
|
|
/* TODO: remove this when layered rendering is implemented */
|
|
|
|
nir_ssa_def_rewrite_uses(&instr->dest.ssa,
|
|
|
|
nir_src_for_ssa(nir_imm_int(b, 0)));
|
|
|
|
nir_instr_remove(&instr->instr);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case nir_intrinsic_load_push_constant:
|
|
|
|
lower_load_push_constant(b, instr, shader);
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case nir_intrinsic_vulkan_resource_index:
|
|
|
|
lower_vulkan_resource_index(b, instr, shader, layout);
|
|
|
|
return true;
|
|
|
|
|
2019-12-09 21:31:35 +00:00
|
|
|
case nir_intrinsic_image_deref_load:
|
|
|
|
case nir_intrinsic_image_deref_store:
|
|
|
|
case nir_intrinsic_image_deref_atomic_add:
|
|
|
|
case nir_intrinsic_image_deref_atomic_imin:
|
|
|
|
case nir_intrinsic_image_deref_atomic_umin:
|
|
|
|
case nir_intrinsic_image_deref_atomic_imax:
|
|
|
|
case nir_intrinsic_image_deref_atomic_umax:
|
|
|
|
case nir_intrinsic_image_deref_atomic_and:
|
|
|
|
case nir_intrinsic_image_deref_atomic_or:
|
|
|
|
case nir_intrinsic_image_deref_atomic_xor:
|
|
|
|
case nir_intrinsic_image_deref_atomic_exchange:
|
|
|
|
case nir_intrinsic_image_deref_atomic_comp_swap:
|
|
|
|
case nir_intrinsic_image_deref_size:
|
|
|
|
case nir_intrinsic_image_deref_samples:
|
2020-02-05 22:54:42 +00:00
|
|
|
lower_image_deref(b, instr, shader, layout);
|
2019-12-09 21:31:35 +00:00
|
|
|
return true;
|
|
|
|
|
2019-12-19 00:30:37 +00:00
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
2019-09-26 05:29:26 +01:00
|
|
|
}
|
|
|
|
|
2020-04-10 14:19:36 +01:00
|
|
|
static void
|
|
|
|
lower_tex_ycbcr(const struct tu_pipeline_layout *layout,
|
|
|
|
nir_builder *builder,
|
|
|
|
nir_tex_instr *tex)
|
|
|
|
{
|
|
|
|
int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
|
|
|
|
assert(deref_src_idx >= 0);
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
|
|
|
|
|
|
|
|
nir_variable *var = nir_deref_instr_get_variable(deref);
|
|
|
|
const struct tu_descriptor_set_layout *set_layout =
|
|
|
|
layout->set[var->data.descriptor_set].layout;
|
|
|
|
const struct tu_descriptor_set_binding_layout *binding =
|
|
|
|
&set_layout->binding[var->data.binding];
|
|
|
|
const struct tu_sampler_ycbcr_conversion *ycbcr_samplers =
|
|
|
|
tu_immutable_ycbcr_samplers(set_layout, binding);
|
|
|
|
|
|
|
|
if (!ycbcr_samplers)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* For the following instructions, we don't apply any change */
|
|
|
|
if (tex->op == nir_texop_txs ||
|
|
|
|
tex->op == nir_texop_query_levels ||
|
|
|
|
tex->op == nir_texop_lod)
|
|
|
|
return;
|
|
|
|
|
|
|
|
assert(tex->texture_index == 0);
|
|
|
|
unsigned array_index = 0;
|
|
|
|
if (deref->deref_type != nir_deref_type_var) {
|
|
|
|
assert(deref->deref_type == nir_deref_type_array);
|
|
|
|
if (!nir_src_is_const(deref->arr.index))
|
|
|
|
return;
|
|
|
|
array_index = nir_src_as_uint(deref->arr.index);
|
|
|
|
array_index = MIN2(array_index, binding->array_size - 1);
|
|
|
|
}
|
|
|
|
const struct tu_sampler_ycbcr_conversion *ycbcr_sampler = ycbcr_samplers + array_index;
|
|
|
|
|
|
|
|
if (ycbcr_sampler->ycbcr_model == VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
|
|
|
|
return;
|
|
|
|
|
|
|
|
builder->cursor = nir_after_instr(&tex->instr);
|
|
|
|
|
|
|
|
uint8_t bits = vk_format_get_component_bits(ycbcr_sampler->format,
|
|
|
|
UTIL_FORMAT_COLORSPACE_RGB,
|
|
|
|
PIPE_SWIZZLE_X);
|
|
|
|
uint32_t bpcs[3] = {bits, bits, bits}; /* TODO: use right bpc for each channel ? */
|
|
|
|
nir_ssa_def *result = nir_convert_ycbcr_to_rgb(builder,
|
|
|
|
ycbcr_sampler->ycbcr_model,
|
|
|
|
ycbcr_sampler->ycbcr_range,
|
|
|
|
&tex->dest.ssa,
|
|
|
|
bpcs);
|
|
|
|
nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(result),
|
|
|
|
result->parent_instr);
|
|
|
|
|
|
|
|
builder->cursor = nir_before_instr(&tex->instr);
|
|
|
|
}
|
|
|
|
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
static bool
|
|
|
|
lower_tex(nir_builder *b, nir_tex_instr *tex,
|
|
|
|
struct tu_shader *shader, const struct tu_pipeline_layout *layout)
|
|
|
|
{
|
2020-04-10 14:19:36 +01:00
|
|
|
lower_tex_ycbcr(layout, b, tex);
|
|
|
|
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
|
|
|
|
if (sampler_src_idx >= 0) {
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
|
|
|
|
nir_ssa_def *bindless = build_bindless(b, deref, true, shader, layout);
|
|
|
|
nir_instr_rewrite_src(&tex->instr, &tex->src[sampler_src_idx].src,
|
|
|
|
nir_src_for_ssa(bindless));
|
|
|
|
tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle;
|
|
|
|
}
|
|
|
|
|
|
|
|
int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
|
|
|
|
if (tex_src_idx >= 0) {
|
|
|
|
nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src);
|
|
|
|
nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout);
|
|
|
|
nir_instr_rewrite_src(&tex->instr, &tex->src[tex_src_idx].src,
|
|
|
|
nir_src_for_ssa(bindless));
|
|
|
|
tex->src[tex_src_idx].src_type = nir_tex_src_texture_handle;
|
2020-06-15 04:10:01 +01:00
|
|
|
|
|
|
|
/* for the input attachment case: */
|
|
|
|
if (bindless->parent_instr->type != nir_instr_type_intrinsic)
|
|
|
|
tex->src[tex_src_idx].src_type = nir_tex_src_texture_offset;
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
lower_impl(nir_function_impl *impl, struct tu_shader *shader,
|
|
|
|
const struct tu_pipeline_layout *layout)
|
|
|
|
{
|
|
|
|
nir_builder b;
|
|
|
|
nir_builder_init(&b, impl);
|
|
|
|
bool progress = false;
|
|
|
|
|
|
|
|
nir_foreach_block(block, impl) {
|
|
|
|
nir_foreach_instr_safe(instr, block) {
|
|
|
|
b.cursor = nir_before_instr(instr);
|
|
|
|
switch (instr->type) {
|
|
|
|
case nir_instr_type_tex:
|
|
|
|
progress |= lower_tex(&b, nir_instr_as_tex(instr), shader, layout);
|
|
|
|
break;
|
|
|
|
case nir_instr_type_intrinsic:
|
|
|
|
progress |= lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader, layout);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-03-18 12:12:31 +00:00
|
|
|
/* Figure out the range of push constants that we're actually going to push to
|
|
|
|
* the shader, and tell the backend to reserve this range when pushing UBO
|
|
|
|
* constants.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void
|
|
|
|
gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader)
|
|
|
|
{
|
|
|
|
uint32_t min = UINT32_MAX, max = 0;
|
|
|
|
nir_foreach_function(function, shader) {
|
|
|
|
if (!function->impl)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
nir_foreach_block(block, function->impl) {
|
|
|
|
nir_foreach_instr_safe(instr, block) {
|
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
|
|
if (intrin->intrinsic != nir_intrinsic_load_push_constant)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
uint32_t base = nir_intrinsic_base(intrin);
|
|
|
|
uint32_t range = nir_intrinsic_range(intrin);
|
|
|
|
min = MIN2(min, base);
|
|
|
|
max = MAX2(max, base + range);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (min >= max) {
|
|
|
|
tu_shader->push_consts.lo = 0;
|
|
|
|
tu_shader->push_consts.count = 0;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* CP_LOAD_STATE OFFSET and NUM_UNIT are in units of vec4 (4 dwords),
|
|
|
|
* however there's an alignment requirement of 4 on OFFSET. Expand the
|
|
|
|
* range and change units accordingly.
|
|
|
|
*/
|
|
|
|
tu_shader->push_consts.lo = (min / 16) / 4 * 4;
|
|
|
|
tu_shader->push_consts.count =
|
|
|
|
align(max, 16) / 16 - tu_shader->push_consts.lo;
|
|
|
|
}
|
|
|
|
|
2019-09-26 05:29:26 +01:00
|
|
|
static bool
|
2019-12-14 06:05:11 +00:00
|
|
|
tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader,
|
|
|
|
const struct tu_pipeline_layout *layout)
|
2019-09-26 05:29:26 +01:00
|
|
|
{
|
|
|
|
bool progress = false;
|
|
|
|
|
2020-03-18 12:12:31 +00:00
|
|
|
gather_push_constants(shader, tu_shader);
|
|
|
|
|
2019-09-26 05:29:26 +01:00
|
|
|
nir_foreach_function(function, shader) {
|
|
|
|
if (function->impl)
|
2019-12-14 06:05:11 +00:00
|
|
|
progress |= lower_impl(function->impl, tu_shader, layout);
|
2019-09-26 05:29:26 +01:00
|
|
|
}
|
|
|
|
|
2020-06-17 12:02:49 +01:00
|
|
|
/* Remove now-unused variables so that when we gather the shader info later
|
|
|
|
* they won't be counted.
|
|
|
|
*/
|
2020-06-17 14:39:18 +01:00
|
|
|
|
|
|
|
if (progress)
|
|
|
|
nir_opt_dce(shader);
|
|
|
|
|
|
|
|
progress |=
|
|
|
|
nir_remove_dead_variables(shader,
|
|
|
|
nir_var_uniform | nir_var_mem_ubo | nir_var_mem_ssbo,
|
|
|
|
NULL);
|
2020-06-17 12:02:49 +01:00
|
|
|
|
2019-09-26 05:29:26 +01:00
|
|
|
return progress;
|
|
|
|
}
|
|
|
|
|
2020-02-20 05:48:28 +00:00
|
|
|
static void
|
2020-06-16 10:44:23 +01:00
|
|
|
tu_gather_xfb_info(nir_shader *nir, struct ir3_stream_output_info *info)
|
2020-02-20 05:48:28 +00:00
|
|
|
{
|
|
|
|
nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL);
|
|
|
|
|
|
|
|
if (!xfb)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* creating a map from VARYING_SLOT_* enums to consecutive index */
|
|
|
|
uint8_t num_outputs = 0;
|
|
|
|
uint64_t outputs_written = 0;
|
|
|
|
for (int i = 0; i < xfb->output_count; i++)
|
|
|
|
outputs_written |= BITFIELD64_BIT(xfb->outputs[i].location);
|
|
|
|
|
|
|
|
uint8_t output_map[VARYING_SLOT_TESS_MAX];
|
|
|
|
memset(output_map, 0, sizeof(output_map));
|
|
|
|
|
|
|
|
for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
|
|
|
|
if (outputs_written & BITFIELD64_BIT(attr))
|
|
|
|
output_map[attr] = num_outputs++;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(xfb->output_count < IR3_MAX_SO_OUTPUTS);
|
|
|
|
info->num_outputs = xfb->output_count;
|
|
|
|
|
|
|
|
for (int i = 0; i < IR3_MAX_SO_BUFFERS; i++)
|
|
|
|
info->stride[i] = xfb->buffers[i].stride / 4;
|
|
|
|
|
|
|
|
for (int i = 0; i < xfb->output_count; i++) {
|
|
|
|
info->output[i].register_index = output_map[xfb->outputs[i].location];
|
|
|
|
info->output[i].start_component = xfb->outputs[i].component_offset;
|
|
|
|
info->output[i].num_components =
|
|
|
|
util_bitcount(xfb->outputs[i].component_mask);
|
|
|
|
info->output[i].output_buffer = xfb->outputs[i].buffer;
|
2020-03-17 03:50:59 +00:00
|
|
|
info->output[i].dst_offset = xfb->outputs[i].offset / 4;
|
2020-02-20 05:48:28 +00:00
|
|
|
info->output[i].stream = xfb->buffer_to_stream[xfb->outputs[i].buffer];
|
|
|
|
}
|
|
|
|
|
|
|
|
ralloc_free(xfb);
|
|
|
|
}
|
|
|
|
|
2019-02-20 17:53:47 +00:00
|
|
|
struct tu_shader *
|
|
|
|
tu_shader_create(struct tu_device *dev,
|
|
|
|
gl_shader_stage stage,
|
|
|
|
const VkPipelineShaderStageCreateInfo *stage_info,
|
2019-12-14 06:05:11 +00:00
|
|
|
struct tu_pipeline_layout *layout,
|
2019-02-20 17:53:47 +00:00
|
|
|
const VkAllocationCallbacks *alloc)
|
|
|
|
{
|
|
|
|
struct tu_shader *shader;
|
|
|
|
|
|
|
|
shader = vk_zalloc2(
|
|
|
|
&dev->alloc, alloc,
|
2020-06-16 10:44:23 +01:00
|
|
|
sizeof(*shader),
|
2019-02-20 17:53:47 +00:00
|
|
|
8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
|
|
|
if (!shader)
|
|
|
|
return NULL;
|
|
|
|
|
2020-05-06 23:07:48 +01:00
|
|
|
nir_shader *nir;
|
|
|
|
if (stage_info) {
|
|
|
|
/* translate SPIR-V to NIR */
|
|
|
|
const struct tu_shader_module *module =
|
|
|
|
tu_shader_module_from_handle(stage_info->module);
|
|
|
|
assert(module->code_size % 4 == 0);
|
|
|
|
nir = tu_spirv_to_nir(
|
|
|
|
dev->compiler, (const uint32_t *) module->code, module->code_size / 4,
|
|
|
|
stage, stage_info->pName, stage_info->pSpecializationInfo);
|
|
|
|
} else {
|
|
|
|
assert(stage == MESA_SHADER_FRAGMENT);
|
|
|
|
nir_builder fs_b;
|
|
|
|
const nir_shader_compiler_options *nir_options =
|
|
|
|
ir3_get_compiler_options(dev->compiler);
|
|
|
|
nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, nir_options);
|
|
|
|
fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "noop_fs");
|
|
|
|
nir = fs_b.shader;
|
|
|
|
}
|
|
|
|
|
2019-05-19 07:55:01 +01:00
|
|
|
if (!nir) {
|
2019-02-20 17:53:47 +00:00
|
|
|
vk_free2(&dev->alloc, alloc, shader);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_NIR)) {
|
|
|
|
fprintf(stderr, "translated nir:\n");
|
|
|
|
nir_print_shader(nir, stderr);
|
|
|
|
}
|
|
|
|
|
2019-10-07 03:25:59 +01:00
|
|
|
/* multi step inlining procedure */
|
2020-02-07 20:18:49 +00:00
|
|
|
NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
|
2019-10-07 03:25:59 +01:00
|
|
|
NIR_PASS_V(nir, nir_lower_returns);
|
|
|
|
NIR_PASS_V(nir, nir_inline_functions);
|
|
|
|
NIR_PASS_V(nir, nir_opt_deref);
|
|
|
|
foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
|
|
|
|
if (!func->is_entrypoint)
|
|
|
|
exec_node_remove(&func->node);
|
|
|
|
}
|
|
|
|
assert(exec_list_length(&nir->functions) == 1);
|
2020-02-07 20:18:49 +00:00
|
|
|
NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp);
|
2019-10-07 03:25:59 +01:00
|
|
|
|
|
|
|
/* Split member structs. We do this before lower_io_to_temporaries so that
|
|
|
|
* it doesn't lower system values to temporaries by accident.
|
|
|
|
*/
|
|
|
|
NIR_PASS_V(nir, nir_split_var_copies);
|
|
|
|
NIR_PASS_V(nir, nir_split_per_member_structs);
|
|
|
|
|
2020-03-17 03:57:03 +00:00
|
|
|
NIR_PASS_V(nir, nir_remove_dead_variables,
|
2020-05-28 01:59:28 +01:00
|
|
|
nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
|
|
|
|
NULL);
|
2020-03-17 03:57:03 +00:00
|
|
|
|
2020-02-20 05:48:28 +00:00
|
|
|
/* Gather information for transform feedback.
|
|
|
|
* This should be called after nir_split_per_member_structs.
|
2020-03-17 03:57:03 +00:00
|
|
|
* Also needs to be called after nir_remove_dead_variables with varyings,
|
|
|
|
* so that we could align stream outputs correctly.
|
2020-02-20 05:48:28 +00:00
|
|
|
*/
|
2020-06-16 10:44:23 +01:00
|
|
|
struct ir3_stream_output_info so_info = {};
|
2020-02-20 05:48:28 +00:00
|
|
|
if (nir->info.stage == MESA_SHADER_VERTEX ||
|
|
|
|
nir->info.stage == MESA_SHADER_TESS_EVAL ||
|
|
|
|
nir->info.stage == MESA_SHADER_GEOMETRY)
|
2020-06-16 10:44:23 +01:00
|
|
|
tu_gather_xfb_info(nir, &so_info);
|
2020-02-20 05:48:28 +00:00
|
|
|
|
2019-10-07 03:25:59 +01:00
|
|
|
NIR_PASS_V(nir, nir_propagate_invariant);
|
|
|
|
|
|
|
|
NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true);
|
|
|
|
|
|
|
|
NIR_PASS_V(nir, nir_lower_global_vars_to_local);
|
|
|
|
NIR_PASS_V(nir, nir_split_var_copies);
|
|
|
|
NIR_PASS_V(nir, nir_lower_var_copies);
|
|
|
|
|
|
|
|
NIR_PASS_V(nir, nir_opt_copy_prop_vars);
|
|
|
|
NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);
|
|
|
|
|
|
|
|
/* ir3 doesn't support indirect input/output */
|
|
|
|
NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out);
|
2019-02-20 17:53:47 +00:00
|
|
|
|
2019-12-15 23:56:47 +00:00
|
|
|
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
|
|
|
|
|
2019-10-07 03:26:49 +01:00
|
|
|
nir_assign_io_var_locations(&nir->inputs, &nir->num_inputs, stage);
|
|
|
|
nir_assign_io_var_locations(&nir->outputs, &nir->num_outputs, stage);
|
2019-02-20 17:53:47 +00:00
|
|
|
|
|
|
|
NIR_PASS_V(nir, nir_lower_system_values);
|
2019-03-22 08:24:57 +00:00
|
|
|
NIR_PASS_V(nir, nir_lower_frexp);
|
2019-09-26 05:29:26 +01:00
|
|
|
|
2019-12-12 22:05:22 +00:00
|
|
|
if (stage == MESA_SHADER_FRAGMENT)
|
|
|
|
NIR_PASS_V(nir, nir_lower_input_attachments, true);
|
|
|
|
|
2019-12-14 06:05:11 +00:00
|
|
|
NIR_PASS_V(nir, tu_lower_io, shader, layout);
|
2019-09-26 05:29:26 +01:00
|
|
|
|
2019-05-19 07:55:01 +01:00
|
|
|
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
2019-02-20 17:53:47 +00:00
|
|
|
|
2020-06-16 10:44:23 +01:00
|
|
|
shader->ir3_shader =
|
|
|
|
ir3_shader_from_nir(dev->compiler, nir,
|
|
|
|
align(shader->push_consts.count, 4),
|
|
|
|
&so_info);
|
2019-02-20 17:53:47 +00:00
|
|
|
|
|
|
|
return shader;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tu_shader_destroy(struct tu_device *dev,
|
|
|
|
struct tu_shader *shader,
|
|
|
|
const VkAllocationCallbacks *alloc)
|
|
|
|
{
|
2020-06-16 10:44:23 +01:00
|
|
|
ir3_shader_destroy(shader->ir3_shader);
|
2019-02-20 17:53:47 +00:00
|
|
|
|
|
|
|
vk_free2(&dev->alloc, alloc, shader);
|
|
|
|
}
|
|
|
|
|
|
|
|
VkResult
|
|
|
|
tu_CreateShaderModule(VkDevice _device,
|
|
|
|
const VkShaderModuleCreateInfo *pCreateInfo,
|
|
|
|
const VkAllocationCallbacks *pAllocator,
|
|
|
|
VkShaderModule *pShaderModule)
|
|
|
|
{
|
|
|
|
TU_FROM_HANDLE(tu_device, device, _device);
|
|
|
|
struct tu_shader_module *module;
|
|
|
|
|
|
|
|
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
|
|
|
|
assert(pCreateInfo->flags == 0);
|
|
|
|
assert(pCreateInfo->codeSize % 4 == 0);
|
|
|
|
|
|
|
|
module = vk_alloc2(&device->alloc, pAllocator,
|
|
|
|
sizeof(*module) + pCreateInfo->codeSize, 8,
|
|
|
|
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
|
|
|
if (module == NULL)
|
|
|
|
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
|
|
|
|
module->code_size = pCreateInfo->codeSize;
|
|
|
|
memcpy(module->code, pCreateInfo->pCode, pCreateInfo->codeSize);
|
|
|
|
|
|
|
|
_mesa_sha1_compute(module->code, module->code_size, module->sha1);
|
|
|
|
|
|
|
|
*pShaderModule = tu_shader_module_to_handle(module);
|
|
|
|
|
|
|
|
return VK_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tu_DestroyShaderModule(VkDevice _device,
|
|
|
|
VkShaderModule _module,
|
|
|
|
const VkAllocationCallbacks *pAllocator)
|
|
|
|
{
|
|
|
|
TU_FROM_HANDLE(tu_device, device, _device);
|
|
|
|
TU_FROM_HANDLE(tu_shader_module, module, _module);
|
|
|
|
|
|
|
|
if (!module)
|
|
|
|
return;
|
|
|
|
|
|
|
|
vk_free2(&device->alloc, pAllocator, module);
|
|
|
|
}
|