tu: Implement pipeline caching with shared Vulkan cache

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16147>
This commit is contained in:
Connor Abbott 2022-02-17 20:48:36 +01:00 committed by Marge Bot
parent 43981f0f58
commit 05329d7f9a
6 changed files with 527 additions and 596 deletions

View File

@ -44,7 +44,6 @@ libtu_files = files(
'tu_nir_lower_multiview.c',
'tu_pass.c',
'tu_pipeline.c',
'tu_pipeline_cache.c',
'tu_private.h',
'tu_query.c',
'tu_shader.c',

View File

@ -230,6 +230,11 @@ get_device_extensions(const struct tu_physical_device *device,
};
}
static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {
&tu_shaders_ops,
NULL,
};
VkResult
tu_physical_device_init(struct tu_physical_device *device,
struct tu_instance *instance)
@ -275,13 +280,6 @@ tu_physical_device_init(struct tu_physical_device *device,
goto fail_free_name;
}
/* The gpu id is already embedded in the uuid so we just pass "tu"
* when creating the cache.
*/
char buf[VK_UUID_SIZE * 2 + 1];
disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
device->disk_cache = disk_cache_create(device->name, buf, 0);
fd_get_driver_uuid(device->driver_uuid);
fd_get_device_uuid(device->device_uuid, &device->dev_id);
@ -298,21 +296,28 @@ tu_physical_device_init(struct tu_physical_device *device,
&supported_extensions,
&dispatch_table);
if (result != VK_SUCCESS)
goto fail_free_cache;
goto fail_free_name;
#if TU_HAS_SURFACE
result = tu_wsi_init(device);
if (result != VK_SUCCESS) {
vk_startup_errorf(instance, result, "WSI init failure");
vk_physical_device_finish(&device->vk);
goto fail_free_cache;
goto fail_free_name;
}
#endif
/* The gpu id is already embedded in the uuid so we just pass "tu"
* when creating the cache.
*/
char buf[VK_UUID_SIZE * 2 + 1];
disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
device->vk.disk_cache = disk_cache_create(device->name, buf, 0);
device->vk.pipeline_cache_import_ops = cache_import_ops;
return VK_SUCCESS;
fail_free_cache:
disk_cache_destroy(device->disk_cache);
fail_free_name:
vk_free(&instance->vk.alloc, (void *)device->name);
return result;
@ -325,7 +330,6 @@ tu_physical_device_finish(struct tu_physical_device *device)
tu_wsi_finish(device);
#endif
disk_cache_destroy(device->disk_cache);
close(device->local_fd);
if (device->master_fd != -1)
close(device->master_fd);
@ -1790,6 +1794,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
&(struct ir3_compiler_options) {
.robust_ubo_access = robust_buffer_access2,
.push_ubo_with_preamble = true,
.disable_cache = true,
});
if (!device->compiler) {
result = vk_startup_errorf(physical_device->instance,
@ -1851,16 +1856,11 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
/* initialize to ones so ffs can be used to find unused slots */
BITSET_ONES(device->custom_border_color);
VkPipelineCacheCreateInfo ci;
ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
ci.pNext = NULL;
ci.flags = 0;
ci.pInitialData = NULL;
ci.initialDataSize = 0;
VkPipelineCache pc;
result =
tu_CreatePipelineCache(tu_device_to_handle(device), &ci, NULL, &pc);
if (result != VK_SUCCESS) {
struct vk_pipeline_cache_create_info pcc_info = { };
device->mem_cache = vk_pipeline_cache_create(&device->vk, &pcc_info,
false);
if (!device->mem_cache) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
vk_startup_errorf(device->instance, result, "create pipeline cache failed");
goto fail_pipeline_cache;
}
@ -1929,8 +1929,6 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
}
pthread_condattr_destroy(&condattr);
device->mem_cache = tu_pipeline_cache_from_handle(pc);
result = tu_autotune_init(&device->autotune, device);
if (result != VK_SUCCESS) {
goto fail_timeline_cond;
@ -1959,7 +1957,7 @@ fail_prepare_perfcntrs_pass_cs:
fail_perfcntrs_pass_entries_alloc:
free(device->perfcntrs_pass_cs);
fail_perfcntrs_pass_alloc:
tu_DestroyPipelineCache(tu_device_to_handle(device), pc, NULL);
vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
fail_pipeline_cache:
tu_destroy_clear_blit_shaders(device);
fail_global_bo_map:
@ -2009,8 +2007,7 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
ir3_compiler_destroy(device->compiler);
VkPipelineCache pc = tu_pipeline_cache_to_handle(device->mem_cache);
tu_DestroyPipelineCache(tu_device_to_handle(device), pc, NULL);
vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
if (device->perfcntrs_pass_cs) {
free(device->perfcntrs_pass_cs_entries);

View File

@ -250,13 +250,12 @@ struct tu_pipeline_builder
{
struct tu_device *device;
void *mem_ctx;
struct tu_pipeline_cache *cache;
struct vk_pipeline_cache *cache;
struct tu_pipeline_layout *layout;
const VkAllocationCallbacks *alloc;
const VkGraphicsPipelineCreateInfo *create_info;
struct tu_shader *shaders[MESA_SHADER_FRAGMENT + 1];
struct ir3_shader_variant *variants[MESA_SHADER_FRAGMENT + 1];
struct tu_compiled_shaders *shaders;
struct ir3_shader_variant *binning_variant;
uint64_t shader_iova[MESA_SHADER_FRAGMENT + 1];
uint64_t binning_vs_iova;
@ -660,7 +659,7 @@ tu6_emit_xs(struct tu_cs *cs,
}
static void
tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader,
tu6_emit_cs_config(struct tu_cs *cs,
const struct ir3_shader_variant *v,
const struct tu_pvtmem_config *pvtmem,
uint64_t binary_iova)
@ -1686,8 +1685,8 @@ tu6_emit_program_config(struct tu_cs *cs,
.gs_state = true,
.fs_state = true,
.gfx_ibo = true));
for (; stage < ARRAY_SIZE(builder->shaders); stage++) {
tu6_emit_xs_config(cs, stage, builder->variants[stage]);
for (; stage < ARRAY_SIZE(builder->shader_iova); stage++) {
tu6_emit_xs_config(cs, stage, builder->shaders->variants[stage]);
}
}
@ -1697,16 +1696,16 @@ tu6_emit_program(struct tu_cs *cs,
bool binning_pass,
struct tu_pipeline *pipeline)
{
const struct ir3_shader_variant *vs = builder->variants[MESA_SHADER_VERTEX];
const struct ir3_shader_variant *vs = builder->shaders->variants[MESA_SHADER_VERTEX];
const struct ir3_shader_variant *bs = builder->binning_variant;
const struct ir3_shader_variant *hs = builder->variants[MESA_SHADER_TESS_CTRL];
const struct ir3_shader_variant *ds = builder->variants[MESA_SHADER_TESS_EVAL];
const struct ir3_shader_variant *gs = builder->variants[MESA_SHADER_GEOMETRY];
const struct ir3_shader_variant *fs = builder->variants[MESA_SHADER_FRAGMENT];
const struct ir3_shader_variant *hs = builder->shaders->variants[MESA_SHADER_TESS_CTRL];
const struct ir3_shader_variant *ds = builder->shaders->variants[MESA_SHADER_TESS_EVAL];
const struct ir3_shader_variant *gs = builder->shaders->variants[MESA_SHADER_GEOMETRY];
const struct ir3_shader_variant *fs = builder->shaders->variants[MESA_SHADER_FRAGMENT];
gl_shader_stage stage = MESA_SHADER_VERTEX;
uint32_t cps_per_patch = builder->create_info->pTessellationState ?
builder->create_info->pTessellationState->patchControlPoints : 0;
bool multi_pos_output = builder->shaders[MESA_SHADER_VERTEX]->multi_pos_output;
bool multi_pos_output = builder->shaders->multi_pos_output;
/* Don't use the binning pass variant when GS is present because we don't
* support compiling correct binning pass variants with GS.
@ -1717,8 +1716,8 @@ tu6_emit_program(struct tu_cs *cs,
stage++;
}
for (; stage < ARRAY_SIZE(builder->shaders); stage++) {
const struct ir3_shader_variant *xs = builder->variants[stage];
for (; stage < ARRAY_SIZE(builder->shaders->variants); stage++) {
const struct ir3_shader_variant *xs = builder->shaders->variants[stage];
if (stage == MESA_SHADER_FRAGMENT && binning_pass)
fs = xs = NULL;
@ -2255,24 +2254,23 @@ tu_pipeline_allocate_cs(struct tu_device *dev,
struct tu_pipeline *pipeline,
struct tu_pipeline_layout *layout,
struct tu_pipeline_builder *builder,
struct tu_pipeline_cache *cache,
struct ir3_shader_variant *compute)
{
uint32_t size = 1024 + tu6_load_state_size(pipeline, layout, compute);
/* graphics case: */
if (builder) {
for (uint32_t i = 0; i < ARRAY_SIZE(builder->variants); i++) {
if (builder->variants[i]) {
size += builder->variants[i]->info.size / 4;
for (uint32_t i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) {
if (builder->shaders->variants[i]) {
size += builder->shaders->variants[i]->info.size / 4;
}
}
size += builder->binning_variant->info.size / 4;
builder->additional_cs_reserve_size = 0;
for (unsigned i = 0; i < ARRAY_SIZE(builder->variants); i++) {
struct ir3_shader_variant *variant = builder->variants[i];
for (unsigned i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) {
struct ir3_shader_variant *variant = builder->shaders->variants[i];
if (variant) {
builder->additional_cs_reserve_size +=
tu_xs_get_additional_cs_size_dwords(variant);
@ -2445,10 +2443,248 @@ tu_link_shaders(struct tu_pipeline_builder *builder,
}
}
static void
tu_shader_key_init(struct tu_shader_key *key,
const VkPipelineShaderStageCreateInfo *stage_info,
struct tu_device *dev)
{
enum ir3_wavesize_option api_wavesize, real_wavesize;
if (stage_info) {
if (stage_info->flags &
VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) {
api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE;
} else {
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *size_info =
vk_find_struct_const(stage_info->pNext,
PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
if (size_info) {
if (size_info->requiredSubgroupSize == dev->compiler->threadsize_base) {
api_wavesize = IR3_SINGLE_ONLY;
} else {
assert(size_info->requiredSubgroupSize == dev->compiler->threadsize_base * 2);
api_wavesize = IR3_DOUBLE_ONLY;
}
} else {
/* Match the exposed subgroupSize. */
api_wavesize = IR3_DOUBLE_ONLY;
}
if (stage_info->flags &
VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT)
real_wavesize = api_wavesize;
else if (api_wavesize == IR3_SINGLE_ONLY)
real_wavesize = IR3_SINGLE_ONLY;
else
real_wavesize = IR3_SINGLE_OR_DOUBLE;
}
} else {
api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE;
}
key->api_wavesize = api_wavesize;
key->real_wavesize = real_wavesize;
}
static void
tu_hash_stage(struct mesa_sha1 *ctx,
const VkPipelineShaderStageCreateInfo *stage,
const struct tu_shader_key *key)
{
VK_FROM_HANDLE(vk_shader_module, module, stage->module);
const VkSpecializationInfo *spec_info = stage->pSpecializationInfo;
_mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1));
_mesa_sha1_update(ctx, stage->pName, strlen(stage->pName));
if (spec_info && spec_info->mapEntryCount) {
_mesa_sha1_update(ctx, spec_info->pMapEntries,
spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
_mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize);
}
_mesa_sha1_update(ctx, key, sizeof(*key));
}
/* Hash flags which can affect ir3 shader compilation which aren't known until
* logical device creation.
*/
static void
tu_hash_compiler(struct mesa_sha1 *ctx, const struct ir3_compiler *compiler)
{
_mesa_sha1_update(ctx, &compiler->robust_ubo_access,
sizeof(compiler->robust_ubo_access));
_mesa_sha1_update(ctx, &ir3_shader_debug, sizeof(ir3_shader_debug));
}
static void
tu_hash_shaders(unsigned char *hash,
const VkPipelineShaderStageCreateInfo **stages,
const struct tu_pipeline_layout *layout,
const struct tu_shader_key *keys,
const struct ir3_shader_key *ir3_key,
const struct ir3_compiler *compiler)
{
struct mesa_sha1 ctx;
_mesa_sha1_init(&ctx);
if (layout)
_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
_mesa_sha1_update(&ctx, ir3_key, sizeof(ir3_key));
for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
if (stages[i]) {
tu_hash_stage(&ctx, stages[i], &keys[i]);
}
}
tu_hash_compiler(&ctx, compiler);
_mesa_sha1_final(&ctx, hash);
}
static void
tu_hash_compute(unsigned char *hash,
const VkPipelineShaderStageCreateInfo *stage,
const struct tu_pipeline_layout *layout,
const struct tu_shader_key *key,
const struct ir3_compiler *compiler)
{
struct mesa_sha1 ctx;
_mesa_sha1_init(&ctx);
if (layout)
_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
tu_hash_stage(&ctx, stage, key);
tu_hash_compiler(&ctx, compiler);
_mesa_sha1_final(&ctx, hash);
}
static bool
tu_shaders_serialize(struct vk_pipeline_cache_object *object,
struct blob *blob);
static struct vk_pipeline_cache_object *
tu_shaders_deserialize(struct vk_device *device,
const void *key_data, size_t key_size,
struct blob_reader *blob);
static void
tu_shaders_destroy(struct vk_pipeline_cache_object *object)
{
struct tu_compiled_shaders *shaders =
container_of(object, struct tu_compiled_shaders, base);
for (unsigned i = 0; i < ARRAY_SIZE(shaders->variants); i++)
ralloc_free(shaders->variants[i]);
vk_pipeline_cache_object_finish(&shaders->base);
vk_free(&object->device->alloc, shaders);
}
const struct vk_pipeline_cache_object_ops tu_shaders_ops = {
.serialize = tu_shaders_serialize,
.deserialize = tu_shaders_deserialize,
.destroy = tu_shaders_destroy,
};
static struct tu_compiled_shaders *
tu_shaders_init(struct tu_device *dev, const void *key_data, size_t key_size)
{
VK_MULTIALLOC(ma);
VK_MULTIALLOC_DECL(&ma, struct tu_compiled_shaders, shaders, 1);
VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, key_size);
if (!vk_multialloc_zalloc(&ma, &dev->vk.alloc,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
return NULL;
memcpy(obj_key_data, key_data, key_size);
vk_pipeline_cache_object_init(&dev->vk, &shaders->base,
&tu_shaders_ops, obj_key_data, key_size);
return shaders;
}
static bool
tu_shaders_serialize(struct vk_pipeline_cache_object *object,
struct blob *blob)
{
struct tu_compiled_shaders *shaders =
container_of(object, struct tu_compiled_shaders, base);
blob_write_bytes(blob, shaders->push_consts, sizeof(shaders->push_consts));
blob_write_uint8(blob, shaders->active_desc_sets);
blob_write_uint8(blob, shaders->multi_pos_output);
for (unsigned i = 0; i < ARRAY_SIZE(shaders->variants); i++) {
if (shaders->variants[i]) {
blob_write_uint8(blob, 1);
ir3_store_variant(blob, shaders->variants[i]);
} else {
blob_write_uint8(blob, 0);
}
}
return true;
}
static struct vk_pipeline_cache_object *
tu_shaders_deserialize(struct vk_device *_device,
const void *key_data, size_t key_size,
struct blob_reader *blob)
{
struct tu_device *dev = container_of(_device, struct tu_device, vk);
struct tu_compiled_shaders *shaders =
tu_shaders_init(dev, key_data, key_size);
if (!shaders)
return NULL;
blob_copy_bytes(blob, shaders->push_consts, sizeof(shaders->push_consts));
shaders->active_desc_sets = blob_read_uint8(blob);
shaders->multi_pos_output = blob_read_uint8(blob);
for (unsigned i = 0; i < ARRAY_SIZE(shaders->variants); i++) {
bool has_shader = blob_read_uint8(blob);
if (has_shader) {
shaders->variants[i] = ir3_retrieve_variant(blob, dev->compiler, NULL);
}
}
return &shaders->base;
}
static struct tu_compiled_shaders *
tu_pipeline_cache_lookup(struct vk_pipeline_cache *cache,
const void *key_data, size_t key_size)
{
struct vk_pipeline_cache_object *object =
vk_pipeline_cache_lookup_object(cache, key_data, key_size,
&tu_shaders_ops, NULL);
if (object)
return container_of(object, struct tu_compiled_shaders, base);
else
return NULL;
}
static struct tu_compiled_shaders *
tu_pipeline_cache_insert(struct vk_pipeline_cache *cache,
struct tu_compiled_shaders *shaders)
{
struct vk_pipeline_cache_object *object =
vk_pipeline_cache_add_object(cache, &shaders->base);
return container_of(object, struct tu_compiled_shaders, base);
}
static VkResult
tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
struct tu_pipeline *pipeline)
{
VkResult result = VK_SUCCESS;
const struct ir3_compiler *compiler = builder->device->compiler;
const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = {
NULL
@ -2459,10 +2695,40 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
stage_infos[stage] = &builder->create_info->pStages[i];
}
struct ir3_shader_key key = {};
tu_pipeline_shader_key_init(&key, pipeline, builder->create_info);
struct tu_shader_key keys[ARRAY_SIZE(stage_infos)] = { };
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
stage < ARRAY_SIZE(keys); stage++) {
tu_shader_key_init(&keys[stage], stage_infos[stage], builder->device);
}
nir_shader *nir[ARRAY_SIZE(builder->shaders)] = { NULL };
struct ir3_shader_key ir3_key = {};
tu_pipeline_shader_key_init(&ir3_key, pipeline, builder->create_info);
keys[MESA_SHADER_VERTEX].multiview_mask = builder->multiview_mask;
keys[MESA_SHADER_FRAGMENT].multiview_mask = builder->multiview_mask;
unsigned char pipeline_sha1[20];
tu_hash_shaders(pipeline_sha1, stage_infos, builder->layout, keys, &ir3_key, compiler);
const bool executable_info = builder->create_info->flags &
VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
char *nir_initial_disasm[ARRAY_SIZE(stage_infos)] = { NULL };
struct tu_compiled_shaders *compiled_shaders;
if (!executable_info) {
compiled_shaders =
tu_pipeline_cache_lookup(builder->cache, &pipeline_sha1,
sizeof(pipeline_sha1));
if (compiled_shaders)
goto done;
}
nir_shader *nir[ARRAY_SIZE(stage_infos)] = { NULL };
struct tu_shader *shaders[ARRAY_SIZE(nir)] = { NULL };
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
stage < ARRAY_SIZE(nir); stage++) {
@ -2471,8 +2737,10 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
continue;
nir[stage] = tu_spirv_to_nir(builder->device, builder->mem_ctx, stage_info, stage);
if (!nir[stage])
return VK_ERROR_OUT_OF_HOST_MEMORY;
if (!nir[stage]) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
}
if (!nir[MESA_SHADER_FRAGMENT]) {
@ -2484,11 +2752,6 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
nir[MESA_SHADER_FRAGMENT] = fs_b.shader;
}
const bool executable_info = builder->create_info->flags &
VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
char *nir_initial_disasm[ARRAY_SIZE(builder->shaders)] = { NULL };
if (executable_info) {
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
stage < ARRAY_SIZE(nir); stage++) {
@ -2509,26 +2772,27 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
continue;
struct tu_shader *shader =
tu_shader_create(builder->device, nir[stage], stage_infos[stage],
builder->multiview_mask, builder->layout,
builder->alloc);
if (!shader)
return VK_ERROR_OUT_OF_HOST_MEMORY;
tu_shader_create(builder->device, nir[stage], &keys[stage],
builder->layout, builder->alloc);
if (!shader) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
/* In SPIR-V generated from GLSL, the primitive mode is specified in the
* tessellation evaluation shader, but in SPIR-V generated from HLSL,
* the mode is specified in the tessellation control shader. */
if ((stage == MESA_SHADER_TESS_EVAL || stage == MESA_SHADER_TESS_CTRL) &&
key.tessellation == IR3_TESS_NONE) {
key.tessellation = tu6_get_tessmode(shader);
ir3_key.tessellation == IR3_TESS_NONE) {
ir3_key.tessellation = tu6_get_tessmode(shader);
}
if (stage > MESA_SHADER_TESS_CTRL) {
if (stage == MESA_SHADER_FRAGMENT) {
key.tcs_store_primid = key.tcs_store_primid ||
ir3_key.tcs_store_primid = ir3_key.tcs_store_primid ||
(nir[stage]->info.inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID));
} else {
key.tcs_store_primid = key.tcs_store_primid ||
ir3_key.tcs_store_primid = ir3_key.tcs_store_primid ||
BITSET_TEST(nir[stage]->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
}
}
@ -2537,85 +2801,121 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
* which is set in tu_lower_io. */
desc_sets |= shader->active_desc_sets;
builder->shaders[stage] = shader;
shaders[stage] = shader;
}
pipeline->active_desc_sets = desc_sets;
struct tu_shader *last_shader = builder->shaders[MESA_SHADER_GEOMETRY];
struct tu_shader *last_shader = shaders[MESA_SHADER_GEOMETRY];
if (!last_shader)
last_shader = builder->shaders[MESA_SHADER_TESS_EVAL];
last_shader = shaders[MESA_SHADER_TESS_EVAL];
if (!last_shader)
last_shader = builder->shaders[MESA_SHADER_VERTEX];
last_shader = shaders[MESA_SHADER_VERTEX];
uint64_t outputs_written = last_shader->ir3_shader->nir->info.outputs_written;
key.layer_zero = !(outputs_written & VARYING_BIT_LAYER);
key.view_zero = !(outputs_written & VARYING_BIT_VIEWPORT);
ir3_key.layer_zero = !(outputs_written & VARYING_BIT_LAYER);
ir3_key.view_zero = !(outputs_written & VARYING_BIT_VIEWPORT);
pipeline->tess.patch_type = key.tessellation;
compiled_shaders =
tu_shaders_init(builder->device, &pipeline_sha1, sizeof(pipeline_sha1));
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
stage < ARRAY_SIZE(builder->shaders); stage++) {
if (!builder->shaders[stage])
continue;
bool created;
builder->variants[stage] =
ir3_shader_get_variant(builder->shaders[stage]->ir3_shader,
&key, false, executable_info, &created);
if (!builder->variants[stage])
return VK_ERROR_OUT_OF_HOST_MEMORY;
if (!compiled_shaders) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
uint32_t safe_constlens = ir3_trim_constlen(builder->variants, compiler);
key.safe_constlen = true;
compiled_shaders->active_desc_sets = desc_sets;
compiled_shaders->multi_pos_output =
shaders[MESA_SHADER_VERTEX]->multi_pos_output;
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
stage < ARRAY_SIZE(builder->shaders); stage++) {
if (!builder->shaders[stage])
stage < ARRAY_SIZE(shaders); stage++) {
if (!shaders[stage])
continue;
compiled_shaders->variants[stage] =
ir3_shader_create_variant(shaders[stage]->ir3_shader, &ir3_key,
executable_info);
if (!compiled_shaders->variants[stage])
return VK_ERROR_OUT_OF_HOST_MEMORY;
compiled_shaders->push_consts[stage] = shaders[stage]->push_consts;
}
uint32_t safe_constlens = ir3_trim_constlen(compiled_shaders->variants, compiler);
ir3_key.safe_constlen = true;
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
stage < ARRAY_SIZE(shaders); stage++) {
if (!shaders[stage])
continue;
if (safe_constlens & (1 << stage)) {
bool created;
builder->variants[stage] =
ir3_shader_get_variant(builder->shaders[stage]->ir3_shader,
&key, false, executable_info, &created);
if (!builder->variants[stage])
return VK_ERROR_OUT_OF_HOST_MEMORY;
ralloc_free(compiled_shaders->variants[stage]);
compiled_shaders->variants[stage] =
ir3_shader_create_variant(shaders[stage]->ir3_shader, &ir3_key,
executable_info);
if (!compiled_shaders->variants[stage]) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
}
}
const struct tu_shader *vs = builder->shaders[MESA_SHADER_VERTEX];
struct ir3_shader_variant *variant;
if (vs->ir3_shader->stream_output.num_outputs ||
!ir3_has_binning_vs(&key)) {
variant = builder->variants[MESA_SHADER_VERTEX];
} else {
bool created;
key.safe_constlen = !!(safe_constlens & (1 << MESA_SHADER_VERTEX));
variant = ir3_shader_get_variant(vs->ir3_shader, &key,
true, executable_info, &created);
if (!variant)
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
builder->binning_variant = variant;
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
stage < ARRAY_SIZE(nir); stage++) {
if (builder->variants[stage]) {
tu_append_executable(pipeline, builder->variants[stage],
if (shaders[stage]) {
tu_shader_destroy(builder->device, shaders[stage], builder->alloc);
}
}
compiled_shaders =
tu_pipeline_cache_insert(builder->cache, compiled_shaders);
done:
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
stage < ARRAY_SIZE(nir); stage++) {
if (compiled_shaders->variants[stage]) {
tu_append_executable(pipeline, compiled_shaders->variants[stage],
nir_initial_disasm[stage]);
}
}
if (builder->binning_variant != builder->variants[MESA_SHADER_VERTEX]) {
tu_append_executable(pipeline, builder->binning_variant, NULL);
struct ir3_shader_variant *vs =
compiled_shaders->variants[MESA_SHADER_VERTEX];
struct ir3_shader_variant *variant;
if (!vs->stream_output.num_outputs && ir3_has_binning_vs(&vs->key)) {
tu_append_executable(pipeline, vs->binning, NULL);
variant = vs->binning;
} else {
variant = vs;
}
builder->binning_variant = variant;
builder->shaders = compiled_shaders;
pipeline->active_desc_sets = compiled_shaders->active_desc_sets;
if (compiled_shaders->variants[MESA_SHADER_TESS_CTRL]) {
pipeline->tess.patch_type =
compiled_shaders->variants[MESA_SHADER_TESS_CTRL]->key.tessellation;
}
return VK_SUCCESS;
fail:
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
stage < ARRAY_SIZE(nir); stage++) {
if (shaders[stage]) {
tu_shader_destroy(builder->device, shaders[stage], builder->alloc);
}
}
if (compiled_shaders)
vk_pipeline_cache_object_unref(&compiled_shaders->base);
return result;
}
static void
@ -2722,12 +3022,12 @@ tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder,
static void
tu_pipeline_set_linkage(struct tu_program_descriptor_linkage *link,
struct tu_shader *shader,
struct tu_push_constant_range *push_consts,
struct ir3_shader_variant *v)
{
link->const_state = *ir3_const_state(v);
link->constlen = v->constlen;
link->push_consts = shader->push_consts;
link->push_consts = *push_consts;
}
static void
@ -2765,13 +3065,13 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder,
}
pipeline->active_stages = stages;
for (unsigned i = 0; i < ARRAY_SIZE(builder->shaders); i++) {
if (!builder->shaders[i])
for (unsigned i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) {
if (!builder->shaders->variants[i])
continue;
tu_pipeline_set_linkage(&pipeline->program.link[i],
builder->shaders[i],
builder->variants[i]);
&builder->shaders->push_consts[i],
builder->shaders->variants[i]);
}
}
@ -2781,7 +3081,7 @@ tu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder,
{
const VkPipelineVertexInputStateCreateInfo *vi_info =
builder->create_info->pVertexInputState;
const struct ir3_shader_variant *vs = builder->variants[MESA_SHADER_VERTEX];
const struct ir3_shader_variant *vs = builder->shaders->variants[MESA_SHADER_VERTEX];
const struct ir3_shader_variant *bs = builder->binning_variant;
/* Bindings may contain holes */
@ -2847,7 +3147,7 @@ tu_pipeline_builder_parse_tessellation(struct tu_pipeline_builder *builder,
vk_find_struct_const(tess_info->pNext, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO);
pipeline->tess.upper_left_domain_origin = !domain_info ||
domain_info->domainOrigin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT;
const struct ir3_shader_variant *hs = builder->variants[MESA_SHADER_TESS_CTRL];
const struct ir3_shader_variant *hs = builder->shaders->variants[MESA_SHADER_TESS_CTRL];
pipeline->tess.param_stride = hs->output_size * 4;
}
@ -3092,8 +3392,8 @@ tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder,
.bfref = ds_info->back.reference & 0xff));
}
if (builder->shaders[MESA_SHADER_FRAGMENT]) {
const struct ir3_shader_variant *fs = &builder->shaders[MESA_SHADER_FRAGMENT]->ir3_shader->variants[0];
if (builder->shaders->variants[MESA_SHADER_FRAGMENT]) {
const struct ir3_shader_variant *fs = builder->shaders->variants[MESA_SHADER_FRAGMENT];
if (fs->has_kill || fs->no_earlyz || fs->writes_pos) {
pipeline->lrz.force_disable_mask |= TU_LRZ_FORCE_DISABLE_WRITE;
}
@ -3300,18 +3600,19 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder,
result = tu_pipeline_builder_compile_shaders(builder, *pipeline);
if (result != VK_SUCCESS) {
vk_object_free(&builder->device->vk, builder->alloc, *pipeline);
return result;
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
result = tu_pipeline_allocate_cs(builder->device, *pipeline,
builder->layout, builder, builder->cache, NULL);
builder->layout, builder, NULL);
if (result != VK_SUCCESS) {
vk_object_free(&builder->device->vk, builder->alloc, *pipeline);
return result;
}
for (uint32_t i = 0; i < ARRAY_SIZE(builder->variants); i++)
builder->shader_iova[i] = tu_upload_variant(*pipeline, builder->variants[i]);
for (uint32_t i = 0; i < ARRAY_SIZE(builder->shader_iova); i++)
builder->shader_iova[i] =
tu_upload_variant(*pipeline, builder->shaders->variants[i]);
builder->binning_vs_iova =
tu_upload_variant(*pipeline, builder->binning_variant);
@ -3323,10 +3624,10 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder,
uint32_t pvtmem_size = 0;
bool per_wave = true;
for (uint32_t i = 0; i < ARRAY_SIZE(builder->variants); i++) {
if (builder->variants[i]) {
pvtmem_size = MAX2(pvtmem_size, builder->variants[i]->pvtmem_size);
if (!builder->variants[i]->pvtmem_per_wave)
for (uint32_t i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) {
if (builder->shaders->variants[i]) {
pvtmem_size = MAX2(pvtmem_size, builder->shaders->variants[i]->pvtmem_size);
if (!builder->shaders->variants[i]->pvtmem_per_wave)
per_wave = false;
}
}
@ -3362,11 +3663,8 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder,
static void
tu_pipeline_builder_finish(struct tu_pipeline_builder *builder)
{
for (uint32_t i = 0; i < ARRAY_SIZE(builder->shaders); i++) {
if (!builder->shaders[i])
continue;
tu_shader_destroy(builder->device, builder->shaders[i], builder->alloc);
}
if (builder->shaders)
vk_pipeline_cache_object_unref(&builder->shaders->base);
ralloc_free(builder->mem_ctx);
}
@ -3374,7 +3672,7 @@ static void
tu_pipeline_builder_init_graphics(
struct tu_pipeline_builder *builder,
struct tu_device *dev,
struct tu_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
const VkGraphicsPipelineCreateInfo *create_info,
const VkAllocationCallbacks *alloc)
{
@ -3461,7 +3759,9 @@ tu_graphics_pipeline_create(VkDevice device,
VkPipeline *pPipeline)
{
TU_FROM_HANDLE(tu_device, dev, device);
TU_FROM_HANDLE(tu_pipeline_cache, cache, pipelineCache);
TU_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache);
cache = cache ? cache : dev->mem_cache;
struct tu_pipeline_builder builder;
tu_pipeline_builder_init_graphics(&builder, dev, cache,
@ -3509,11 +3809,13 @@ tu_compute_pipeline_create(VkDevice device,
VkPipeline *pPipeline)
{
TU_FROM_HANDLE(tu_device, dev, device);
TU_FROM_HANDLE(tu_pipeline_cache, cache, pipelineCache);
TU_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache);
TU_FROM_HANDLE(tu_pipeline_layout, layout, pCreateInfo->layout);
const VkPipelineShaderStageCreateInfo *stage_info = &pCreateInfo->stage;
VkResult result;
cache = cache ? cache : dev->mem_cache;
struct tu_pipeline *pipeline;
*pPipeline = VK_NULL_HANDLE;
@ -3526,38 +3828,73 @@ tu_compute_pipeline_create(VkDevice device,
pipeline->executables_mem_ctx = ralloc_context(NULL);
util_dynarray_init(&pipeline->executables, pipeline->executables_mem_ctx);
struct ir3_shader_key key = {};
struct tu_shader_key key;
tu_shader_key_init(&key, stage_info, dev);
void *pipeline_mem_ctx = ralloc_context(NULL);
nir_shader *nir = tu_spirv_to_nir(dev, pipeline_mem_ctx, stage_info, MESA_SHADER_COMPUTE);
unsigned char pipeline_sha1[20];
tu_hash_compute(pipeline_sha1, stage_info, layout, &key, dev->compiler);
struct tu_compiled_shaders *compiled = NULL;
const bool executable_info = pCreateInfo->flags &
VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
char *nir_initial_disasm = executable_info ?
nir_shader_as_str(nir, pipeline->executables_mem_ctx) : NULL;
if (!executable_info)
compiled = tu_pipeline_cache_lookup(cache, pipeline_sha1, sizeof(pipeline_sha1));
struct tu_shader *shader =
tu_shader_create(dev, nir, stage_info, 0, layout, pAllocator);
if (!shader) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
char *nir_initial_disasm = NULL;
if (!compiled) {
struct ir3_shader_key ir3_key = {};
nir_shader *nir = tu_spirv_to_nir(dev, pipeline_mem_ctx, stage_info,
MESA_SHADER_COMPUTE);
nir_initial_disasm = executable_info ?
nir_shader_as_str(nir, pipeline->executables_mem_ctx) : NULL;
struct tu_shader *shader =
tu_shader_create(dev, nir, &key, layout, pAllocator);
if (!shader) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
compiled = tu_shaders_init(dev, &pipeline_sha1, sizeof(pipeline_sha1));
if (!compiled) {
tu_shader_destroy(dev, shader, pAllocator);
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
compiled->active_desc_sets = shader->active_desc_sets;
compiled->push_consts[MESA_SHADER_COMPUTE] = shader->push_consts;
struct ir3_shader_variant *v =
ir3_shader_create_variant(shader->ir3_shader, &ir3_key, executable_info);
tu_shader_destroy(dev, shader, pAllocator);
if (!v) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
compiled->variants[MESA_SHADER_COMPUTE] = v;
compiled = tu_pipeline_cache_insert(cache, compiled);
}
pipeline->active_desc_sets = shader->active_desc_sets;
pipeline->active_desc_sets = compiled->active_desc_sets;
bool created;
struct ir3_shader_variant *v =
ir3_shader_get_variant(shader->ir3_shader, &key, false, executable_info, &created);
if (!v) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
struct ir3_shader_variant *v = compiled->variants[MESA_SHADER_COMPUTE];
tu_pipeline_set_linkage(&pipeline->program.link[MESA_SHADER_COMPUTE],
shader, v);
&compiled->push_consts[MESA_SHADER_COMPUTE], v);
result = tu_pipeline_allocate_cs(dev, pipeline, layout, NULL, cache, v);
result = tu_pipeline_allocate_cs(dev, pipeline, layout, NULL, v);
if (result != VK_SUCCESS)
goto fail;
@ -3574,14 +3911,14 @@ tu_compute_pipeline_create(VkDevice device,
struct tu_cs prog_cs;
uint32_t additional_reserve_size = tu_xs_get_additional_cs_size_dwords(v);
tu_cs_begin_sub_stream(&pipeline->cs, 64 + additional_reserve_size, &prog_cs);
tu6_emit_cs_config(&prog_cs, shader, v, &pvtmem, shader_iova);
tu6_emit_cs_config(&prog_cs, v, &pvtmem, shader_iova);
pipeline->program.state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs);
tu6_emit_load_state(pipeline, layout, true);
tu_append_executable(pipeline, v, nir_initial_disasm);
tu_shader_destroy(dev, shader, pAllocator);
vk_pipeline_cache_object_unref(&compiled->base);
ralloc_free(pipeline_mem_ctx);
*pPipeline = tu_pipeline_to_handle(pipeline);
@ -3589,8 +3926,8 @@ tu_compute_pipeline_create(VkDevice device,
return VK_SUCCESS;
fail:
if (shader)
tu_shader_destroy(dev, shader, pAllocator);
if (compiled)
vk_pipeline_cache_object_unref(&compiled->base);
ralloc_free(pipeline_mem_ctx);

View File

@ -1,379 +0,0 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "tu_private.h"
#include "util/debug.h"
#include "util/disk_cache.h"
#include "util/mesa-sha1.h"
#include "util/u_atomic.h"
#include "vulkan/util/vk_util.h"
struct cache_entry_variant_info
{
};
struct cache_entry
{
union {
unsigned char sha1[20];
uint32_t sha1_dw[5];
};
uint32_t code_sizes[MESA_SHADER_STAGES];
struct tu_shader_variant *variants[MESA_SHADER_STAGES];
char code[0];
};
static void
tu_pipeline_cache_init(struct tu_pipeline_cache *cache,
struct tu_device *device)
{
cache->device = device;
pthread_mutex_init(&cache->mutex, NULL);
cache->modified = false;
cache->kernel_count = 0;
cache->total_size = 0;
cache->table_size = 1024;
const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
cache->hash_table = malloc(byte_size);
/* We don't consider allocation failure fatal, we just start with a 0-sized
* cache. Disable caching when we want to keep shader debug info, since
* we don't get the debug info on cached shaders. */
if (cache->hash_table == NULL)
cache->table_size = 0;
else
memset(cache->hash_table, 0, byte_size);
}
static void
tu_pipeline_cache_finish(struct tu_pipeline_cache *cache)
{
for (unsigned i = 0; i < cache->table_size; ++i)
if (cache->hash_table[i]) {
vk_free(&cache->alloc, cache->hash_table[i]);
}
pthread_mutex_destroy(&cache->mutex);
free(cache->hash_table);
}
static uint32_t
entry_size(struct cache_entry *entry)
{
size_t ret = sizeof(*entry);
for (int i = 0; i < MESA_SHADER_STAGES; ++i)
if (entry->code_sizes[i])
ret +=
sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
return ret;
}
static struct cache_entry *
tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache,
const unsigned char *sha1)
{
const uint32_t mask = cache->table_size - 1;
const uint32_t start = (*(uint32_t *) sha1);
if (cache->table_size == 0)
return NULL;
for (uint32_t i = 0; i < cache->table_size; i++) {
const uint32_t index = (start + i) & mask;
struct cache_entry *entry = cache->hash_table[index];
if (!entry)
return NULL;
if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
return entry;
}
}
unreachable("hash table should never be full");
}
static struct cache_entry *
tu_pipeline_cache_search(struct tu_pipeline_cache *cache,
const unsigned char *sha1)
{
struct cache_entry *entry;
pthread_mutex_lock(&cache->mutex);
entry = tu_pipeline_cache_search_unlocked(cache, sha1);
pthread_mutex_unlock(&cache->mutex);
return entry;
}
static void
tu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache,
struct cache_entry *entry)
{
const uint32_t mask = cache->table_size - 1;
const uint32_t start = entry->sha1_dw[0];
/* We'll always be able to insert when we get here. */
assert(cache->kernel_count < cache->table_size / 2);
for (uint32_t i = 0; i < cache->table_size; i++) {
const uint32_t index = (start + i) & mask;
if (!cache->hash_table[index]) {
cache->hash_table[index] = entry;
break;
}
}
cache->total_size += entry_size(entry);
cache->kernel_count++;
}
static VkResult
tu_pipeline_cache_grow(struct tu_pipeline_cache *cache)
{
const uint32_t table_size = cache->table_size * 2;
const uint32_t old_table_size = cache->table_size;
const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
struct cache_entry **table;
struct cache_entry **old_table = cache->hash_table;
table = malloc(byte_size);
if (table == NULL)
return vk_error(cache, VK_ERROR_OUT_OF_HOST_MEMORY);
cache->hash_table = table;
cache->table_size = table_size;
cache->kernel_count = 0;
cache->total_size = 0;
memset(cache->hash_table, 0, byte_size);
for (uint32_t i = 0; i < old_table_size; i++) {
struct cache_entry *entry = old_table[i];
if (!entry)
continue;
tu_pipeline_cache_set_entry(cache, entry);
}
free(old_table);
return VK_SUCCESS;
}
static void
tu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache,
struct cache_entry *entry)
{
if (cache->kernel_count == cache->table_size / 2)
tu_pipeline_cache_grow(cache);
/* Failing to grow that hash table isn't fatal, but may mean we don't
* have enough space to add this new kernel. Only add it if there's room.
*/
if (cache->kernel_count < cache->table_size / 2)
tu_pipeline_cache_set_entry(cache, entry);
}
static void
tu_pipeline_cache_load(struct tu_pipeline_cache *cache,
const void *data,
size_t size)
{
struct tu_device *device = cache->device;
struct vk_pipeline_cache_header header;
if (size < sizeof(header))
return;
memcpy(&header, data, sizeof(header));
if (header.header_size < sizeof(header))
return;
if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
return;
if (header.vendor_id != 0x5143)
return;
if (header.device_id != device->physical_device->dev_id.chip_id)
return;
if (memcmp(header.uuid, device->physical_device->cache_uuid,
VK_UUID_SIZE) != 0)
return;
char *end = (void *) data + size;
char *p = (void *) data + header.header_size;
while (end - p >= sizeof(struct cache_entry)) {
struct cache_entry *entry = (struct cache_entry *) p;
struct cache_entry *dest_entry;
size_t size = entry_size(entry);
if (end - p < size)
break;
dest_entry =
vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
if (dest_entry) {
memcpy(dest_entry, entry, size);
for (int i = 0; i < MESA_SHADER_STAGES; ++i)
dest_entry->variants[i] = NULL;
tu_pipeline_cache_add_entry(cache, dest_entry);
}
p += size;
}
}
VKAPI_ATTR VkResult VKAPI_CALL
tu_CreatePipelineCache(VkDevice _device,
const VkPipelineCacheCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkPipelineCache *pPipelineCache)
{
TU_FROM_HANDLE(tu_device, device, _device);
struct tu_pipeline_cache *cache;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
assert(pCreateInfo->flags == 0);
cache = vk_object_alloc(&device->vk, pAllocator, sizeof(*cache),
VK_OBJECT_TYPE_PIPELINE_CACHE);
if (cache == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
if (pAllocator)
cache->alloc = *pAllocator;
else
cache->alloc = device->vk.alloc;
tu_pipeline_cache_init(cache, device);
if (pCreateInfo->initialDataSize > 0) {
tu_pipeline_cache_load(cache, pCreateInfo->pInitialData,
pCreateInfo->initialDataSize);
}
*pPipelineCache = tu_pipeline_cache_to_handle(cache);
return VK_SUCCESS;
}
VKAPI_ATTR void VKAPI_CALL
tu_DestroyPipelineCache(VkDevice _device,
VkPipelineCache _cache,
const VkAllocationCallbacks *pAllocator)
{
TU_FROM_HANDLE(tu_device, device, _device);
TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
if (!cache)
return;
tu_pipeline_cache_finish(cache);
vk_object_free(&device->vk, pAllocator, cache);
}
VKAPI_ATTR VkResult VKAPI_CALL
tu_GetPipelineCacheData(VkDevice _device,
VkPipelineCache _cache,
size_t *pDataSize,
void *pData)
{
TU_FROM_HANDLE(tu_device, device, _device);
TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
struct vk_pipeline_cache_header *header;
VkResult result = VK_SUCCESS;
pthread_mutex_lock(&cache->mutex);
const size_t size = sizeof(*header) + cache->total_size;
if (pData == NULL) {
pthread_mutex_unlock(&cache->mutex);
*pDataSize = size;
return VK_SUCCESS;
}
if (*pDataSize < sizeof(*header)) {
pthread_mutex_unlock(&cache->mutex);
*pDataSize = 0;
return VK_INCOMPLETE;
}
void *p = pData, *end = pData + *pDataSize;
header = p;
header->header_size = sizeof(*header);
header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
header->vendor_id = 0x5143;
header->device_id = device->physical_device->dev_id.chip_id;
memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
p += header->header_size;
struct cache_entry *entry;
for (uint32_t i = 0; i < cache->table_size; i++) {
if (!cache->hash_table[i])
continue;
entry = cache->hash_table[i];
const uint32_t size = entry_size(entry);
if (end < p + size) {
result = VK_INCOMPLETE;
break;
}
memcpy(p, entry, size);
for (int j = 0; j < MESA_SHADER_STAGES; ++j)
((struct cache_entry *) p)->variants[j] = NULL;
p += size;
}
*pDataSize = p - pData;
pthread_mutex_unlock(&cache->mutex);
return result;
}
static void
tu_pipeline_cache_merge(struct tu_pipeline_cache *dst,
struct tu_pipeline_cache *src)
{
for (uint32_t i = 0; i < src->table_size; i++) {
struct cache_entry *entry = src->hash_table[i];
if (!entry || tu_pipeline_cache_search(dst, entry->sha1))
continue;
tu_pipeline_cache_add_entry(dst, entry);
src->hash_table[i] = NULL;
}
}
VKAPI_ATTR VkResult VKAPI_CALL
tu_MergePipelineCaches(VkDevice _device,
VkPipelineCache destCache,
uint32_t srcCacheCount,
const VkPipelineCache *pSrcCaches)
{
TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache);
for (uint32_t i = 0; i < srcCacheCount; i++) {
TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]);
tu_pipeline_cache_merge(dst, src);
}
return VK_SUCCESS;
}

View File

@ -65,6 +65,7 @@
#include "vk_log.h"
#include "vk_physical_device.h"
#include "vk_shader_module.h"
#include "vk_pipeline_cache.h"
#include "wsi_common.h"
#include "ir3/ir3_compiler.h"
@ -239,11 +240,6 @@ struct tu_physical_device
/* Address space and global fault count for this local_fd with DRM backend */
uint64_t fault_count;
/* This is the drivers on-disk cache used as a fallback as opposed to
* the pipeline cache defined by apps.
*/
struct disk_cache *disk_cache;
struct tu_memory_heap heap;
struct vk_sync_type syncobj_type;
@ -521,7 +517,7 @@ struct tu_device
struct ir3_compiler *compiler;
/* Backup in-memory cache to be used if the app doesn't provide one */
struct tu_pipeline_cache *mem_cache;
struct vk_pipeline_cache *mem_cache;
#define MIN_SCRATCH_BO_SIZE_LOG2 12 /* A page */
@ -1367,6 +1363,24 @@ struct tu_shader
bool multi_pos_output;
};
struct tu_shader_key {
unsigned multiview_mask;
enum ir3_wavesize_option api_wavesize, real_wavesize;
};
struct tu_compiled_shaders
{
struct vk_pipeline_cache_object base;
struct tu_push_constant_range push_consts[MESA_SHADER_STAGES];
uint8_t active_desc_sets;
bool multi_pos_output;
struct ir3_shader_variant *variants[MESA_SHADER_STAGES];
};
extern const struct vk_pipeline_cache_object_ops tu_shaders_ops;
bool
tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, bool *multi_pos_output,
struct tu_device *dev);
@ -1380,8 +1394,7 @@ tu_spirv_to_nir(struct tu_device *dev,
struct tu_shader *
tu_shader_create(struct tu_device *dev,
nir_shader *nir,
const VkPipelineShaderStageCreateInfo *stage_info,
unsigned multiview_mask,
const struct tu_shader_key *key,
struct tu_pipeline_layout *layout,
const VkAllocationCallbacks *alloc);

View File

@ -700,8 +700,7 @@ tu_gather_xfb_info(nir_shader *nir, struct ir3_stream_output_info *info)
struct tu_shader *
tu_shader_create(struct tu_device *dev,
nir_shader *nir,
const VkPipelineShaderStageCreateInfo *stage_info,
unsigned multiview_mask,
const struct tu_shader_key *key,
struct tu_pipeline_layout *layout,
const VkAllocationCallbacks *alloc)
{
@ -729,7 +728,7 @@ tu_shader_create(struct tu_device *dev,
* sampling function. gl_Layer doesn't work when
* multiview is enabled.
*/
.use_view_id_for_layer = multiview_mask != 0,
.use_view_id_for_layer = key->multiview_mask != 0,
});
}
@ -740,8 +739,8 @@ tu_shader_create(struct tu_device *dev,
*/
ir3_nir_lower_io_to_temporaries(nir);
if (nir->info.stage == MESA_SHADER_VERTEX && multiview_mask) {
tu_nir_lower_multiview(nir, multiview_mask,
if (nir->info.stage == MESA_SHADER_VERTEX && key->multiview_mask) {
tu_nir_lower_multiview(nir, key->multiview_mask,
&shader->multi_pos_output, dev);
}
@ -801,46 +800,11 @@ tu_shader_create(struct tu_device *dev,
ir3_finalize_nir(dev->compiler, nir);
enum ir3_wavesize_option api_wavesize, real_wavesize;
if (stage_info) {
if (stage_info->flags &
VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) {
api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE;
} else {
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *size_info =
vk_find_struct_const(stage_info->pNext,
PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
if (size_info) {
if (size_info->requiredSubgroupSize == dev->compiler->threadsize_base) {
api_wavesize = IR3_SINGLE_ONLY;
} else {
assert(size_info->requiredSubgroupSize == dev->compiler->threadsize_base * 2);
api_wavesize = IR3_DOUBLE_ONLY;
}
} else {
/* Match the exposed subgroupSize. */
api_wavesize = IR3_DOUBLE_ONLY;
}
if (stage_info->flags &
VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT)
real_wavesize = api_wavesize;
else if (api_wavesize == IR3_SINGLE_ONLY)
real_wavesize = IR3_SINGLE_ONLY;
else
real_wavesize = IR3_SINGLE_OR_DOUBLE;
}
} else {
api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE;
}
shader->ir3_shader =
ir3_shader_from_nir(dev->compiler, nir, &(struct ir3_shader_options) {
.reserved_user_consts = align(shader->push_consts.count, 4),
.api_wavesize = api_wavesize,
.real_wavesize = real_wavesize,
.api_wavesize = key->api_wavesize,
.real_wavesize = key->real_wavesize,
}, &so_info);
return shader;