1033 lines
32 KiB
C
1033 lines
32 KiB
C
/*
|
|
* Copyright © 2019 Raspberry Pi Ltd
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "v3dv_private.h"
|
|
#include "vulkan/util/vk_util.h"
|
|
#include "util/blob.h"
|
|
#include "nir/nir_serialize.h"
|
|
|
|
static const bool debug_cache = false;
|
|
static const bool dump_stats = false;
|
|
static const bool dump_stats_on_destroy = false;
|
|
|
|
/* Shared for nir/variants */
|
|
#define V3DV_MAX_PIPELINE_CACHE_ENTRIES 4096
|
|
|
|
static uint32_t
|
|
sha1_hash_func(const void *sha1)
|
|
{
|
|
return _mesa_hash_data(sha1, 20);
|
|
}
|
|
|
|
static bool
|
|
sha1_compare_func(const void *sha1_a, const void *sha1_b)
|
|
{
|
|
return memcmp(sha1_a, sha1_b, 20) == 0;
|
|
}
|
|
|
|
struct serialized_nir {
|
|
unsigned char sha1_key[20];
|
|
size_t size;
|
|
char data[0];
|
|
};
|
|
|
|
static void
|
|
cache_dump_stats(struct v3dv_pipeline_cache *cache)
|
|
{
|
|
fprintf(stderr, " NIR cache entries: %d\n", cache->nir_stats.count);
|
|
fprintf(stderr, " NIR cache miss count: %d\n", cache->nir_stats.miss);
|
|
fprintf(stderr, " NIR cache hit count: %d\n", cache->nir_stats.hit);
|
|
|
|
fprintf(stderr, " cache entries: %d\n", cache->stats.count);
|
|
fprintf(stderr, " cache miss count: %d\n", cache->stats.miss);
|
|
fprintf(stderr, " cache hit count: %d\n", cache->stats.hit);
|
|
|
|
fprintf(stderr, " on-disk cache hit count: %d\n", cache->stats.on_disk_hit);
|
|
}
|
|
|
|
static void
|
|
pipeline_cache_lock(struct v3dv_pipeline_cache *cache)
|
|
{
|
|
if (!cache->externally_synchronized)
|
|
mtx_lock(&cache->mutex);
|
|
}
|
|
|
|
static void
|
|
pipeline_cache_unlock(struct v3dv_pipeline_cache *cache)
|
|
{
|
|
if (!cache->externally_synchronized)
|
|
mtx_unlock(&cache->mutex);
|
|
}
|
|
|
|
void
|
|
v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
|
|
struct v3dv_pipeline_cache *cache,
|
|
nir_shader *nir,
|
|
unsigned char sha1_key[20])
|
|
{
|
|
if (!cache || !cache->nir_cache)
|
|
return;
|
|
|
|
if (cache->nir_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
|
|
return;
|
|
|
|
pipeline_cache_lock(cache);
|
|
struct hash_entry *entry =
|
|
_mesa_hash_table_search(cache->nir_cache, sha1_key);
|
|
pipeline_cache_unlock(cache);
|
|
if (entry)
|
|
return;
|
|
|
|
struct blob blob;
|
|
blob_init(&blob);
|
|
|
|
nir_serialize(&blob, nir, false);
|
|
if (blob.out_of_memory) {
|
|
blob_finish(&blob);
|
|
return;
|
|
}
|
|
|
|
pipeline_cache_lock(cache);
|
|
/* Because ralloc isn't thread-safe, we have to do all this inside the
|
|
* lock. We could unlock for the big memcpy but it's probably not worth
|
|
* the hassle.
|
|
*/
|
|
entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
|
|
if (entry) {
|
|
blob_finish(&blob);
|
|
pipeline_cache_unlock(cache);
|
|
return;
|
|
}
|
|
|
|
struct serialized_nir *snir =
|
|
ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size);
|
|
memcpy(snir->sha1_key, sha1_key, 20);
|
|
snir->size = blob.size;
|
|
memcpy(snir->data, blob.data, blob.size);
|
|
|
|
blob_finish(&blob);
|
|
|
|
cache->nir_stats.count++;
|
|
if (debug_cache) {
|
|
char sha1buf[41];
|
|
_mesa_sha1_format(sha1buf, snir->sha1_key);
|
|
fprintf(stderr, "pipeline cache %p, new nir entry %s\n", cache, sha1buf);
|
|
if (dump_stats)
|
|
cache_dump_stats(cache);
|
|
}
|
|
|
|
_mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
|
|
|
|
pipeline_cache_unlock(cache);
|
|
}
|
|
|
|
nir_shader*
|
|
v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
|
|
struct v3dv_pipeline_cache *cache,
|
|
const nir_shader_compiler_options *nir_options,
|
|
unsigned char sha1_key[20])
|
|
{
|
|
if (!cache || !cache->nir_cache)
|
|
return NULL;
|
|
|
|
if (debug_cache) {
|
|
char sha1buf[41];
|
|
_mesa_sha1_format(sha1buf, sha1_key);
|
|
|
|
fprintf(stderr, "pipeline cache %p, search for nir %s\n", cache, sha1buf);
|
|
}
|
|
|
|
const struct serialized_nir *snir = NULL;
|
|
|
|
pipeline_cache_lock(cache);
|
|
struct hash_entry *entry =
|
|
_mesa_hash_table_search(cache->nir_cache, sha1_key);
|
|
if (entry)
|
|
snir = entry->data;
|
|
pipeline_cache_unlock(cache);
|
|
|
|
if (snir) {
|
|
struct blob_reader blob;
|
|
blob_reader_init(&blob, snir->data, snir->size);
|
|
|
|
/* We use context NULL as we want the p_stage to keep the reference to
|
|
* nir, as we keep open the possibility of provide a shader variant
|
|
* after cache creation
|
|
*/
|
|
nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);
|
|
if (blob.overrun) {
|
|
ralloc_free(nir);
|
|
} else {
|
|
cache->nir_stats.hit++;
|
|
if (debug_cache) {
|
|
fprintf(stderr, "[v3dv nir cache] hit: %p\n", nir);
|
|
if (dump_stats)
|
|
cache_dump_stats(cache);
|
|
}
|
|
return nir;
|
|
}
|
|
}
|
|
|
|
cache->nir_stats.miss++;
|
|
if (debug_cache) {
|
|
fprintf(stderr, "[v3dv nir cache] miss\n");
|
|
if (dump_stats)
|
|
cache_dump_stats(cache);
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
void
|
|
v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
|
|
struct v3dv_device *device,
|
|
VkPipelineCacheCreateFlags flags,
|
|
bool cache_enabled)
|
|
{
|
|
cache->device = device;
|
|
mtx_init(&cache->mutex, mtx_plain);
|
|
|
|
if (cache_enabled) {
|
|
cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
|
|
sha1_compare_func);
|
|
cache->nir_stats.miss = 0;
|
|
cache->nir_stats.hit = 0;
|
|
cache->nir_stats.count = 0;
|
|
|
|
cache->cache = _mesa_hash_table_create(NULL, sha1_hash_func,
|
|
sha1_compare_func);
|
|
cache->stats.miss = 0;
|
|
cache->stats.hit = 0;
|
|
cache->stats.count = 0;
|
|
|
|
cache->externally_synchronized = flags &
|
|
VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT;
|
|
} else {
|
|
cache->nir_cache = NULL;
|
|
cache->cache = NULL;
|
|
}
|
|
|
|
}
|
|
|
|
static struct v3dv_pipeline_shared_data *
|
|
v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
|
|
struct blob_reader *blob);
|
|
|
|
static void
|
|
pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
|
|
struct v3dv_pipeline_shared_data *shared_data,
|
|
bool from_disk_cache);
|
|
|
|
static bool
|
|
v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
|
|
struct blob *blob);
|
|
|
|
/**
|
|
* It searchs for pipeline cached data, and returns a v3dv_pipeline_shared_data with
|
|
* it, or NULL if doesn't have it cached. On the former, it will increases the
|
|
* ref_count, so caller is responsible to unref it.
|
|
*/
|
|
struct v3dv_pipeline_shared_data *
|
|
v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
|
|
unsigned char sha1_key[20],
|
|
bool *cache_hit)
|
|
{
|
|
if (!cache || !cache->cache)
|
|
return NULL;
|
|
|
|
if (debug_cache) {
|
|
char sha1buf[41];
|
|
_mesa_sha1_format(sha1buf, sha1_key);
|
|
|
|
fprintf(stderr, "pipeline cache %p, search pipeline with key %s\n", cache, sha1buf);
|
|
}
|
|
|
|
pipeline_cache_lock(cache);
|
|
|
|
struct hash_entry *entry =
|
|
_mesa_hash_table_search(cache->cache, sha1_key);
|
|
|
|
if (entry) {
|
|
struct v3dv_pipeline_shared_data *cache_entry =
|
|
(struct v3dv_pipeline_shared_data *) entry->data;
|
|
assert(cache_entry);
|
|
|
|
cache->stats.hit++;
|
|
*cache_hit = true;
|
|
if (debug_cache) {
|
|
fprintf(stderr, "[v3dv cache] hit: %p\n", cache_entry);
|
|
if (dump_stats)
|
|
cache_dump_stats(cache);
|
|
}
|
|
|
|
|
|
v3dv_pipeline_shared_data_ref(cache_entry);
|
|
|
|
pipeline_cache_unlock(cache);
|
|
|
|
return cache_entry;
|
|
}
|
|
|
|
cache->stats.miss++;
|
|
if (debug_cache) {
|
|
fprintf(stderr, "[v3dv cache] miss\n");
|
|
if (dump_stats)
|
|
cache_dump_stats(cache);
|
|
}
|
|
|
|
pipeline_cache_unlock(cache);
|
|
|
|
#ifdef ENABLE_SHADER_CACHE
|
|
struct v3dv_device *device = cache->device;
|
|
struct disk_cache *disk_cache = device->pdevice->disk_cache;
|
|
/* Note that the on-disk-cache can be independently disabled, while keeping
|
|
* the pipeline cache working, by using the environment variable
|
|
* MESA_SHADER_CACHE_DISABLE. In that case the calls to disk_cache_put/get
|
|
* will not do anything.
|
|
*/
|
|
if (disk_cache && device->instance->pipeline_cache_enabled) {
|
|
cache_key cache_key;
|
|
disk_cache_compute_key(disk_cache, sha1_key, 20, cache_key);
|
|
|
|
size_t buffer_size;
|
|
uint8_t *buffer = disk_cache_get(disk_cache, cache_key, &buffer_size);
|
|
if (unlikely(V3D_DEBUG & V3D_DEBUG_CACHE)) {
|
|
char sha1buf[41];
|
|
_mesa_sha1_format(sha1buf, cache_key);
|
|
fprintf(stderr, "[v3dv on-disk cache] %s %s\n",
|
|
buffer ? "hit" : "miss",
|
|
sha1buf);
|
|
}
|
|
|
|
if (buffer) {
|
|
struct blob_reader blob;
|
|
struct v3dv_pipeline_shared_data *shared_data;
|
|
|
|
blob_reader_init(&blob, buffer, buffer_size);
|
|
shared_data = v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
|
|
free(buffer);
|
|
|
|
if (shared_data) {
|
|
/* Technically we could increase on_disk_hit as soon as we have a
|
|
* buffer, but we are more interested on hits that got a valid
|
|
* shared_data
|
|
*/
|
|
cache->stats.on_disk_hit++;
|
|
if (cache)
|
|
pipeline_cache_upload_shared_data(cache, shared_data, true);
|
|
return shared_data;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return NULL;
|
|
}
|
|
|
|
void
|
|
v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
|
|
struct v3dv_pipeline_shared_data *shared_data)
|
|
{
|
|
assert(shared_data->ref_cnt == 0);
|
|
|
|
for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
|
|
if (shared_data->variants[stage] != NULL)
|
|
v3dv_shader_variant_destroy(device, shared_data->variants[stage]);
|
|
|
|
/* We don't free binning descriptor maps as we are sharing them
|
|
* with the render shaders.
|
|
*/
|
|
if (shared_data->maps[stage] != NULL &&
|
|
!broadcom_shader_stage_is_binning(stage)) {
|
|
vk_free(&device->vk.alloc, shared_data->maps[stage]);
|
|
}
|
|
}
|
|
|
|
if (shared_data->assembly_bo)
|
|
v3dv_bo_free(device, shared_data->assembly_bo);
|
|
|
|
vk_free(&device->vk.alloc, shared_data);
|
|
}
|
|
|
|
static struct v3dv_pipeline_shared_data *
|
|
v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache *cache,
|
|
const unsigned char sha1_key[20],
|
|
struct v3dv_descriptor_maps **maps,
|
|
struct v3dv_shader_variant **variants,
|
|
const uint64_t *total_assembly,
|
|
const uint32_t total_assembly_size)
|
|
{
|
|
size_t size = sizeof(struct v3dv_pipeline_shared_data);
|
|
/* We create new_entry using the device alloc. Right now shared_data is ref
|
|
* and unref by both the pipeline and the pipeline cache, so we can't
|
|
* ensure that the cache or pipeline alloc will be available on the last
|
|
* unref.
|
|
*/
|
|
struct v3dv_pipeline_shared_data *new_entry =
|
|
vk_zalloc2(&cache->device->vk.alloc, NULL, size, 8,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
|
|
|
if (new_entry == NULL)
|
|
return NULL;
|
|
|
|
new_entry->ref_cnt = 1;
|
|
memcpy(new_entry->sha1_key, sha1_key, 20);
|
|
|
|
for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
|
|
new_entry->maps[stage] = maps[stage];
|
|
new_entry->variants[stage] = variants[stage];
|
|
}
|
|
|
|
struct v3dv_bo *bo = v3dv_bo_alloc(cache->device, total_assembly_size,
|
|
"pipeline shader assembly", true);
|
|
if (!bo) {
|
|
fprintf(stderr, "failed to allocate memory for shaders assembly\n");
|
|
v3dv_pipeline_shared_data_unref(cache->device, new_entry);
|
|
return NULL;
|
|
}
|
|
|
|
bool ok = v3dv_bo_map(cache->device, bo, total_assembly_size);
|
|
if (!ok) {
|
|
fprintf(stderr, "failed to map source shader buffer\n");
|
|
v3dv_pipeline_shared_data_unref(cache->device, new_entry);
|
|
return NULL;
|
|
}
|
|
|
|
memcpy(bo->map, total_assembly, total_assembly_size);
|
|
|
|
new_entry->assembly_bo = bo;
|
|
|
|
return new_entry;
|
|
}
|
|
|
|
static void
|
|
pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
|
|
struct v3dv_pipeline_shared_data *shared_data,
|
|
bool from_disk_cache)
|
|
{
|
|
assert(shared_data);
|
|
|
|
if (!cache || !cache->cache)
|
|
return;
|
|
|
|
if (cache->stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
|
|
return;
|
|
|
|
pipeline_cache_lock(cache);
|
|
struct hash_entry *entry = NULL;
|
|
|
|
/* If this is being called from the disk cache, we already know that the
|
|
* entry is not on the hash table
|
|
*/
|
|
if (!from_disk_cache)
|
|
entry = _mesa_hash_table_search(cache->cache, shared_data->sha1_key);
|
|
|
|
if (entry) {
|
|
pipeline_cache_unlock(cache);
|
|
return;
|
|
}
|
|
|
|
v3dv_pipeline_shared_data_ref(shared_data);
|
|
_mesa_hash_table_insert(cache->cache, shared_data->sha1_key, shared_data);
|
|
cache->stats.count++;
|
|
if (debug_cache) {
|
|
char sha1buf[41];
|
|
_mesa_sha1_format(sha1buf, shared_data->sha1_key);
|
|
|
|
fprintf(stderr, "pipeline cache %p, new cache entry with sha1 key %s:%p\n\n",
|
|
cache, sha1buf, shared_data);
|
|
if (dump_stats)
|
|
cache_dump_stats(cache);
|
|
}
|
|
|
|
pipeline_cache_unlock(cache);
|
|
|
|
#ifdef ENABLE_SHADER_CACHE
|
|
/* If we are being called from a on-disk-cache hit, we can skip writing to
|
|
* the disk cache
|
|
*/
|
|
if (from_disk_cache)
|
|
return;
|
|
|
|
struct v3dv_device *device = cache->device;
|
|
struct disk_cache *disk_cache = device->pdevice->disk_cache;
|
|
if (disk_cache) {
|
|
struct blob binary;
|
|
blob_init(&binary);
|
|
if (v3dv_pipeline_shared_data_write_to_blob(shared_data, &binary)) {
|
|
cache_key cache_key;
|
|
disk_cache_compute_key(disk_cache, shared_data->sha1_key, 20, cache_key);
|
|
|
|
if (unlikely(V3D_DEBUG & V3D_DEBUG_CACHE)) {
|
|
char sha1buf[41];
|
|
_mesa_sha1_format(sha1buf, shared_data->sha1_key);
|
|
fprintf(stderr, "[v3dv on-disk cache] storing %s\n", sha1buf);
|
|
}
|
|
disk_cache_put(disk_cache, cache_key, binary.data, binary.size, NULL);
|
|
}
|
|
|
|
blob_finish(&binary);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/* Uploads all the "cacheable" or shared data from the pipeline */
|
|
void
|
|
v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
|
|
struct v3dv_pipeline_cache *cache)
|
|
{
|
|
pipeline_cache_upload_shared_data(cache, pipeline->shared_data, false);
|
|
}
|
|
|
|
static struct serialized_nir*
|
|
serialized_nir_create_from_blob(struct v3dv_pipeline_cache *cache,
|
|
struct blob_reader *blob)
|
|
{
|
|
const unsigned char *sha1_key = blob_read_bytes(blob, 20);
|
|
uint32_t snir_size = blob_read_uint32(blob);
|
|
const char* snir_data = blob_read_bytes(blob, snir_size);
|
|
if (blob->overrun)
|
|
return NULL;
|
|
|
|
struct serialized_nir *snir =
|
|
ralloc_size(cache->nir_cache, sizeof(*snir) + snir_size);
|
|
memcpy(snir->sha1_key, sha1_key, 20);
|
|
snir->size = snir_size;
|
|
memcpy(snir->data, snir_data, snir_size);
|
|
|
|
return snir;
|
|
}
|
|
|
|
static struct v3dv_shader_variant*
|
|
shader_variant_create_from_blob(struct v3dv_device *device,
|
|
struct blob_reader *blob)
|
|
{
|
|
VkResult result;
|
|
|
|
enum broadcom_shader_stage stage = blob_read_uint32(blob);
|
|
|
|
uint32_t prog_data_size = blob_read_uint32(blob);
|
|
/* FIXME: as we include the stage perhaps we can avoid prog_data_size? */
|
|
assert(prog_data_size == v3d_prog_data_size(broadcom_shader_stage_to_gl(stage)));
|
|
|
|
const void *prog_data = blob_read_bytes(blob, prog_data_size);
|
|
if (blob->overrun)
|
|
return NULL;
|
|
|
|
uint32_t ulist_count = blob_read_uint32(blob);
|
|
uint32_t contents_size = sizeof(enum quniform_contents) * ulist_count;
|
|
const void *contents_data = blob_read_bytes(blob, contents_size);
|
|
if (blob->overrun)
|
|
return NULL;
|
|
|
|
uint ulist_data_size = sizeof(uint32_t) * ulist_count;
|
|
const void *ulist_data_data = blob_read_bytes(blob, ulist_data_size);
|
|
if (blob->overrun)
|
|
return NULL;
|
|
|
|
uint32_t assembly_offset = blob_read_uint32(blob);
|
|
uint32_t qpu_insts_size = blob_read_uint32(blob);
|
|
|
|
/* shader_variant_create expects a newly created prog_data for their own,
|
|
* as it is what the v3d compiler returns. So we are also allocating one
|
|
* (including the uniform list) and filled it up with the data that we read
|
|
* from the blob
|
|
*/
|
|
struct v3d_prog_data *new_prog_data = rzalloc_size(NULL, prog_data_size);
|
|
memcpy(new_prog_data, prog_data, prog_data_size);
|
|
struct v3d_uniform_list *ulist = &new_prog_data->uniforms;
|
|
ulist->count = ulist_count;
|
|
ulist->contents = ralloc_array(new_prog_data, enum quniform_contents, ulist->count);
|
|
memcpy(ulist->contents, contents_data, contents_size);
|
|
ulist->data = ralloc_array(new_prog_data, uint32_t, ulist->count);
|
|
memcpy(ulist->data, ulist_data_data, ulist_data_size);
|
|
|
|
return v3dv_shader_variant_create(device, stage,
|
|
new_prog_data, prog_data_size,
|
|
assembly_offset,
|
|
NULL, qpu_insts_size,
|
|
&result);
|
|
}
|
|
|
|
static struct v3dv_pipeline_shared_data *
|
|
v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
|
|
struct blob_reader *blob)
|
|
{
|
|
const unsigned char *sha1_key = blob_read_bytes(blob, 20);
|
|
|
|
struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES] = { 0 };
|
|
|
|
uint8_t descriptor_maps_count = blob_read_uint8(blob);
|
|
for (uint8_t count = 0; count < descriptor_maps_count; count++) {
|
|
uint8_t stage = blob_read_uint8(blob);
|
|
|
|
const struct v3dv_descriptor_maps *current_maps =
|
|
blob_read_bytes(blob, sizeof(struct v3dv_descriptor_maps));
|
|
|
|
if (blob->overrun)
|
|
return NULL;
|
|
|
|
maps[stage] = vk_zalloc2(&cache->device->vk.alloc, NULL,
|
|
sizeof(struct v3dv_descriptor_maps), 8,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
|
|
|
if (maps[stage] == NULL)
|
|
return NULL;
|
|
|
|
memcpy(maps[stage], current_maps, sizeof(struct v3dv_descriptor_maps));
|
|
if (broadcom_shader_stage_is_render_with_binning(stage)) {
|
|
enum broadcom_shader_stage bin_stage =
|
|
broadcom_binning_shader_stage_for_render_stage(stage);
|
|
maps[bin_stage] = maps[stage];
|
|
}
|
|
}
|
|
|
|
uint8_t variant_count = blob_read_uint8(blob);
|
|
|
|
struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES] = { 0 };
|
|
|
|
for (uint8_t count = 0; count < variant_count; count++) {
|
|
uint8_t stage = blob_read_uint8(blob);
|
|
struct v3dv_shader_variant *variant =
|
|
shader_variant_create_from_blob(cache->device, blob);
|
|
variants[stage] = variant;
|
|
}
|
|
|
|
uint32_t total_assembly_size = blob_read_uint32(blob);
|
|
const uint64_t *total_assembly =
|
|
blob_read_bytes(blob, total_assembly_size);
|
|
|
|
if (blob->overrun)
|
|
return NULL;
|
|
|
|
return v3dv_pipeline_shared_data_new(cache, sha1_key, maps, variants,
|
|
total_assembly, total_assembly_size);
|
|
}
|
|
|
|
static void
|
|
pipeline_cache_load(struct v3dv_pipeline_cache *cache,
|
|
size_t size,
|
|
const void *data)
|
|
{
|
|
struct v3dv_device *device = cache->device;
|
|
struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
|
|
struct vk_pipeline_cache_header header;
|
|
|
|
if (cache->cache == NULL || cache->nir_cache == NULL)
|
|
return;
|
|
|
|
struct blob_reader blob;
|
|
blob_reader_init(&blob, data, size);
|
|
|
|
blob_copy_bytes(&blob, &header, sizeof(header));
|
|
if (size < sizeof(header))
|
|
return;
|
|
memcpy(&header, data, sizeof(header));
|
|
if (header.header_size < sizeof(header))
|
|
return;
|
|
if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
|
|
return;
|
|
if (header.vendor_id != v3dv_physical_device_vendor_id(pdevice))
|
|
return;
|
|
if (header.device_id != v3dv_physical_device_device_id(pdevice))
|
|
return;
|
|
if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
|
|
return;
|
|
|
|
uint32_t nir_count = blob_read_uint32(&blob);
|
|
if (blob.overrun)
|
|
return;
|
|
|
|
for (uint32_t i = 0; i < nir_count; i++) {
|
|
struct serialized_nir *snir =
|
|
serialized_nir_create_from_blob(cache, &blob);
|
|
|
|
if (!snir)
|
|
break;
|
|
|
|
_mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
|
|
cache->nir_stats.count++;
|
|
}
|
|
|
|
uint32_t count = blob_read_uint32(&blob);
|
|
if (blob.overrun)
|
|
return;
|
|
|
|
for (uint32_t i = 0; i < count; i++) {
|
|
struct v3dv_pipeline_shared_data *cache_entry =
|
|
v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
|
|
if (!cache_entry)
|
|
break;
|
|
|
|
_mesa_hash_table_insert(cache->cache, cache_entry->sha1_key, cache_entry);
|
|
cache->stats.count++;
|
|
}
|
|
|
|
if (debug_cache) {
|
|
fprintf(stderr, "pipeline cache %p, loaded %i nir shaders and "
|
|
"%i entries\n", cache, nir_count, count);
|
|
if (dump_stats)
|
|
cache_dump_stats(cache);
|
|
}
|
|
}
|
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
|
v3dv_CreatePipelineCache(VkDevice _device,
|
|
const VkPipelineCacheCreateInfo *pCreateInfo,
|
|
const VkAllocationCallbacks *pAllocator,
|
|
VkPipelineCache *pPipelineCache)
|
|
{
|
|
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
|
struct v3dv_pipeline_cache *cache;
|
|
|
|
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
|
|
|
|
cache = vk_object_zalloc(&device->vk, pAllocator,
|
|
sizeof(*cache),
|
|
VK_OBJECT_TYPE_PIPELINE_CACHE);
|
|
|
|
if (cache == NULL)
|
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
v3dv_pipeline_cache_init(cache, device, pCreateInfo->flags,
|
|
device->instance->pipeline_cache_enabled);
|
|
|
|
if (pCreateInfo->initialDataSize > 0) {
|
|
pipeline_cache_load(cache,
|
|
pCreateInfo->initialDataSize,
|
|
pCreateInfo->pInitialData);
|
|
}
|
|
|
|
*pPipelineCache = v3dv_pipeline_cache_to_handle(cache);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
void
|
|
v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache)
|
|
{
|
|
mtx_destroy(&cache->mutex);
|
|
|
|
if (dump_stats_on_destroy)
|
|
cache_dump_stats(cache);
|
|
|
|
if (cache->nir_cache) {
|
|
hash_table_foreach(cache->nir_cache, entry)
|
|
ralloc_free(entry->data);
|
|
|
|
_mesa_hash_table_destroy(cache->nir_cache, NULL);
|
|
}
|
|
|
|
if (cache->cache) {
|
|
hash_table_foreach(cache->cache, entry) {
|
|
struct v3dv_pipeline_shared_data *cache_entry = entry->data;
|
|
if (cache_entry)
|
|
v3dv_pipeline_shared_data_unref(cache->device, cache_entry);
|
|
}
|
|
|
|
_mesa_hash_table_destroy(cache->cache, NULL);
|
|
}
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
v3dv_DestroyPipelineCache(VkDevice _device,
|
|
VkPipelineCache _cache,
|
|
const VkAllocationCallbacks *pAllocator)
|
|
{
|
|
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
|
V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
|
|
|
|
if (!cache)
|
|
return;
|
|
|
|
v3dv_pipeline_cache_finish(cache);
|
|
|
|
vk_object_free(&device->vk, pAllocator, cache);
|
|
}
|
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
|
v3dv_MergePipelineCaches(VkDevice device,
|
|
VkPipelineCache dstCache,
|
|
uint32_t srcCacheCount,
|
|
const VkPipelineCache *pSrcCaches)
|
|
{
|
|
V3DV_FROM_HANDLE(v3dv_pipeline_cache, dst, dstCache);
|
|
|
|
if (!dst->cache || !dst->nir_cache)
|
|
return VK_SUCCESS;
|
|
|
|
for (uint32_t i = 0; i < srcCacheCount; i++) {
|
|
V3DV_FROM_HANDLE(v3dv_pipeline_cache, src, pSrcCaches[i]);
|
|
if (!src->cache || !src->nir_cache)
|
|
continue;
|
|
|
|
hash_table_foreach(src->nir_cache, entry) {
|
|
struct serialized_nir *src_snir = entry->data;
|
|
assert(src_snir);
|
|
|
|
if (_mesa_hash_table_search(dst->nir_cache, src_snir->sha1_key))
|
|
continue;
|
|
|
|
/* FIXME: we are using serialized nir shaders because they are
|
|
* convenient to create and store on the cache, but requires to do a
|
|
* copy here (and some other places) of the serialized NIR. Perhaps
|
|
* it would make sense to move to handle the NIR shaders with shared
|
|
* structures with ref counts, as the variants.
|
|
*/
|
|
struct serialized_nir *snir_dst =
|
|
ralloc_size(dst->nir_cache, sizeof(*snir_dst) + src_snir->size);
|
|
memcpy(snir_dst->sha1_key, src_snir->sha1_key, 20);
|
|
snir_dst->size = src_snir->size;
|
|
memcpy(snir_dst->data, src_snir->data, src_snir->size);
|
|
|
|
_mesa_hash_table_insert(dst->nir_cache, snir_dst->sha1_key, snir_dst);
|
|
dst->nir_stats.count++;
|
|
if (debug_cache) {
|
|
char sha1buf[41];
|
|
_mesa_sha1_format(sha1buf, snir_dst->sha1_key);
|
|
|
|
fprintf(stderr, "pipeline cache %p, added nir entry %s "
|
|
"from pipeline cache %p\n",
|
|
dst, sha1buf, src);
|
|
if (dump_stats)
|
|
cache_dump_stats(dst);
|
|
}
|
|
}
|
|
|
|
hash_table_foreach(src->cache, entry) {
|
|
struct v3dv_pipeline_shared_data *cache_entry = entry->data;
|
|
assert(cache_entry);
|
|
|
|
if (_mesa_hash_table_search(dst->cache, cache_entry->sha1_key))
|
|
continue;
|
|
|
|
v3dv_pipeline_shared_data_ref(cache_entry);
|
|
_mesa_hash_table_insert(dst->cache, cache_entry->sha1_key, cache_entry);
|
|
|
|
dst->stats.count++;
|
|
if (debug_cache) {
|
|
char sha1buf[41];
|
|
_mesa_sha1_format(sha1buf, cache_entry->sha1_key);
|
|
|
|
fprintf(stderr, "pipeline cache %p, added entry %s "
|
|
"from pipeline cache %p\n",
|
|
dst, sha1buf, src);
|
|
if (dump_stats)
|
|
cache_dump_stats(dst);
|
|
}
|
|
}
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static bool
|
|
shader_variant_write_to_blob(const struct v3dv_shader_variant *variant,
|
|
struct blob *blob)
|
|
{
|
|
blob_write_uint32(blob, variant->stage);
|
|
|
|
blob_write_uint32(blob, variant->prog_data_size);
|
|
blob_write_bytes(blob, variant->prog_data.base, variant->prog_data_size);
|
|
|
|
struct v3d_uniform_list *ulist = &variant->prog_data.base->uniforms;
|
|
blob_write_uint32(blob, ulist->count);
|
|
blob_write_bytes(blob, ulist->contents, sizeof(enum quniform_contents) * ulist->count);
|
|
blob_write_bytes(blob, ulist->data, sizeof(uint32_t) * ulist->count);
|
|
|
|
blob_write_uint32(blob, variant->assembly_offset);
|
|
blob_write_uint32(blob, variant->qpu_insts_size);
|
|
|
|
return !blob->out_of_memory;
|
|
}
|
|
|
|
static bool
|
|
v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
|
|
struct blob *blob)
|
|
{
|
|
blob_write_bytes(blob, cache_entry->sha1_key, 20);
|
|
|
|
uint8_t descriptor_maps_count = 0;
|
|
for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
|
|
if (broadcom_shader_stage_is_binning(stage))
|
|
continue;
|
|
if (cache_entry->maps[stage] == NULL)
|
|
continue;
|
|
descriptor_maps_count++;
|
|
}
|
|
|
|
/* Compute pipelines only have one descriptor map,
|
|
* graphics pipelines may have 2 (VS+FS) or 3 (VS+GS+FS), since the binning
|
|
* stages take the descriptor map from the render stage.
|
|
*/
|
|
assert((descriptor_maps_count >= 2 && descriptor_maps_count <= 3) ||
|
|
(descriptor_maps_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
|
|
blob_write_uint8(blob, descriptor_maps_count);
|
|
|
|
for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
|
|
if (cache_entry->maps[stage] == NULL)
|
|
continue;
|
|
if (broadcom_shader_stage_is_binning(stage))
|
|
continue;
|
|
|
|
blob_write_uint8(blob, stage);
|
|
blob_write_bytes(blob, cache_entry->maps[stage],
|
|
sizeof(struct v3dv_descriptor_maps));
|
|
}
|
|
|
|
uint8_t variant_count = 0;
|
|
for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
|
|
if (cache_entry->variants[stage] == NULL)
|
|
continue;
|
|
variant_count++;
|
|
}
|
|
|
|
/* Graphics pipelines with VS+FS have 3 variants, VS+GS+FS will have 5 and
|
|
* compute pipelines only have 1.
|
|
*/
|
|
assert((variant_count == 5 || variant_count == 3) ||
|
|
(variant_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
|
|
blob_write_uint8(blob, variant_count);
|
|
|
|
uint32_t total_assembly_size = 0;
|
|
for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
|
|
if (cache_entry->variants[stage] == NULL)
|
|
continue;
|
|
|
|
blob_write_uint8(blob, stage);
|
|
if (!shader_variant_write_to_blob(cache_entry->variants[stage], blob))
|
|
return false;
|
|
|
|
total_assembly_size += cache_entry->variants[stage]->qpu_insts_size;
|
|
}
|
|
blob_write_uint32(blob, total_assembly_size);
|
|
|
|
assert(cache_entry->assembly_bo->map);
|
|
assert(cache_entry->assembly_bo->size >= total_assembly_size);
|
|
blob_write_bytes(blob, cache_entry->assembly_bo->map, total_assembly_size);
|
|
|
|
return !blob->out_of_memory;
|
|
}
|
|
|
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
|
v3dv_GetPipelineCacheData(VkDevice _device,
|
|
VkPipelineCache _cache,
|
|
size_t *pDataSize,
|
|
void *pData)
|
|
{
|
|
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
|
V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
|
|
|
|
struct blob blob;
|
|
if (pData) {
|
|
blob_init_fixed(&blob, pData, *pDataSize);
|
|
} else {
|
|
blob_init_fixed(&blob, NULL, SIZE_MAX);
|
|
}
|
|
|
|
struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
|
|
VkResult result = VK_INCOMPLETE;
|
|
|
|
pipeline_cache_lock(cache);
|
|
|
|
struct vk_pipeline_cache_header header = {
|
|
.header_size = sizeof(struct vk_pipeline_cache_header),
|
|
.header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,
|
|
.vendor_id = v3dv_physical_device_vendor_id(pdevice),
|
|
.device_id = v3dv_physical_device_device_id(pdevice),
|
|
};
|
|
memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
|
|
blob_write_bytes(&blob, &header, sizeof(header));
|
|
|
|
uint32_t nir_count = 0;
|
|
intptr_t nir_count_offset = blob_reserve_uint32(&blob);
|
|
if (nir_count_offset < 0) {
|
|
*pDataSize = 0;
|
|
goto done;
|
|
}
|
|
|
|
if (cache->nir_cache) {
|
|
hash_table_foreach(cache->nir_cache, entry) {
|
|
const struct serialized_nir *snir = entry->data;
|
|
|
|
size_t save_size = blob.size;
|
|
|
|
blob_write_bytes(&blob, snir->sha1_key, 20);
|
|
blob_write_uint32(&blob, snir->size);
|
|
blob_write_bytes(&blob, snir->data, snir->size);
|
|
|
|
if (blob.out_of_memory) {
|
|
blob.size = save_size;
|
|
goto done;
|
|
}
|
|
|
|
nir_count++;
|
|
}
|
|
}
|
|
blob_overwrite_uint32(&blob, nir_count_offset, nir_count);
|
|
|
|
uint32_t count = 0;
|
|
intptr_t count_offset = blob_reserve_uint32(&blob);
|
|
if (count_offset < 0) {
|
|
*pDataSize = 0;
|
|
goto done;
|
|
}
|
|
|
|
if (cache->cache) {
|
|
hash_table_foreach(cache->cache, entry) {
|
|
struct v3dv_pipeline_shared_data *cache_entry = entry->data;
|
|
|
|
size_t save_size = blob.size;
|
|
if (!v3dv_pipeline_shared_data_write_to_blob(cache_entry, &blob)) {
|
|
/* If it fails reset to the previous size and bail */
|
|
blob.size = save_size;
|
|
goto done;
|
|
}
|
|
|
|
count++;
|
|
}
|
|
}
|
|
|
|
blob_overwrite_uint32(&blob, count_offset, count);
|
|
|
|
*pDataSize = blob.size;
|
|
|
|
result = VK_SUCCESS;
|
|
|
|
if (debug_cache) {
|
|
assert(count <= cache->stats.count);
|
|
fprintf(stderr, "GetPipelineCacheData: serializing cache %p, "
|
|
"%i nir shader entries "
|
|
"%i entries, %u DataSize\n",
|
|
cache, nir_count, count, (uint32_t) *pDataSize);
|
|
}
|
|
|
|
done:
|
|
blob_finish(&blob);
|
|
|
|
pipeline_cache_unlock(cache);
|
|
|
|
return result;
|
|
}
|