mesa/src/util/disk_cache.c

464 lines
13 KiB
C
Raw Normal View History

2016-09-27 23:55:02 +01:00
/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifdef ENABLE_SHADER_CACHE
2016-09-27 23:55:02 +01:00
#include <ctype.h>
#include <ftw.h>
2016-09-27 23:55:02 +01:00
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/file.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <errno.h>
#include <dirent.h>
#include <inttypes.h>
#include "util/crc32.h"
#include "util/debug.h"
#include "util/rand_xor.h"
2016-09-27 23:55:02 +01:00
#include "util/u_atomic.h"
#include "util/mesa-sha1.h"
#include "util/ralloc.h"
#include "util/compiler.h"
2016-09-27 23:55:02 +01:00
#include "disk_cache.h"
#include "disk_cache_os.h"
2016-09-27 23:55:02 +01:00
/* The cache version should be bumped whenever a change is made to the
* structure of cache entries or the index. This will give any 3rd party
* applications reading the cache entries a chance to adjust to the changes.
*
* - The cache version is checked internally when reading a cache entry. If we
* ever have a mismatch we are in big trouble as this means we had a cache
* collision. In case of such an event please check the skys for giant
* asteroids and that the entire Mesa team hasn't been eaten by wolves.
*
* - There is no strict requirement that cache versions be backwards
* compatible but effort should be taken to limit disruption where possible.
*/
#define CACHE_VERSION 1
#define DRV_KEY_CPY(_dst, _src, _src_size) \
do { \
memcpy(_dst, _src, _src_size); \
_dst += _src_size; \
} while (0);
struct disk_cache *
disk_cache_create(const char *gpu_name, const char *driver_id,
uint64_t driver_flags)
2016-09-27 23:55:02 +01:00
{
void *local;
struct disk_cache *cache = NULL;
char *max_size_str;
uint64_t max_size;
2016-09-27 23:55:02 +01:00
uint8_t cache_version = CACHE_VERSION;
size_t cv_size = sizeof(cache_version);
if (!disk_cache_enabled())
return NULL;
2016-09-27 23:55:02 +01:00
/* A ralloc context for transient data during this invocation. */
local = ralloc_context(NULL);
if (local == NULL)
goto fail;
cache = rzalloc(NULL, struct disk_cache);
if (cache == NULL)
goto fail;
/* Assume failure. */
cache->path_init_failed = true;
#ifdef ANDROID
/* Android needs the "disk cache" to be enabled for
* EGL_ANDROID_blob_cache's callbacks to be called, but it doesn't actually
* want any storing to disk to happen inside of the driver.
*/
goto path_fail;
#endif
char *path = disk_cache_generate_cache_dir(local);
if (!path)
goto path_fail;
2016-09-27 23:55:02 +01:00
if (!disk_cache_mmap_cache_index(local, cache, path))
goto path_fail;
2016-09-27 23:55:02 +01:00
max_size = 0;
max_size_str = getenv("MESA_GLSL_CACHE_MAX_SIZE");
#ifdef MESA_GLSL_CACHE_MAX_SIZE
if( !max_size_str ) {
max_size_str = MESA_GLSL_CACHE_MAX_SIZE;
}
#endif
2016-09-27 23:55:02 +01:00
if (max_size_str) {
char *end;
max_size = strtoul(max_size_str, &end, 10);
if (end == max_size_str) {
max_size = 0;
} else {
switch (*end) {
case 'K':
case 'k':
max_size *= 1024;
break;
case 'M':
case 'm':
max_size *= 1024*1024;
break;
case '\0':
case 'G':
case 'g':
default:
max_size *= 1024*1024*1024;
break;
}
}
}
/* Default to 1GB for maximum cache size. */
if (max_size == 0) {
max_size = 1024*1024*1024;
}
2016-09-27 23:55:02 +01:00
cache->max_size = max_size;
/* 4 threads were chosen below because just about all modern CPUs currently
* available that run Mesa have *at least* 4 cores. For these CPUs allowing
* more threads can result in the queue being processed faster, thus
* avoiding excessive memory use due to a backlog of cache entrys building
* up in the queue. Since we set the UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY
* flag this should have little negative impact on low core systems.
*
* The queue will resize automatically when it's full, so adding new jobs
* doesn't stall.
*/
util_queue_init(&cache->cache_queue, "disk$", 32, 4,
UTIL_QUEUE_INIT_RESIZE_IF_FULL |
UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY |
UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY);
cache->path_init_failed = false;
path_fail:
cache->driver_keys_blob_size = cv_size;
/* Create driver id keys */
size_t id_size = strlen(driver_id) + 1;
size_t gpu_name_size = strlen(gpu_name) + 1;
cache->driver_keys_blob_size += id_size;
cache->driver_keys_blob_size += gpu_name_size;
/* We sometimes store entire structs that contains a pointers in the cache,
* use pointer size as a key to avoid hard to debug issues.
*/
uint8_t ptr_size = sizeof(void *);
size_t ptr_size_size = sizeof(ptr_size);
cache->driver_keys_blob_size += ptr_size_size;
size_t driver_flags_size = sizeof(driver_flags);
cache->driver_keys_blob_size += driver_flags_size;
cache->driver_keys_blob =
ralloc_size(cache, cache->driver_keys_blob_size);
if (!cache->driver_keys_blob)
goto fail;
uint8_t *drv_key_blob = cache->driver_keys_blob;
DRV_KEY_CPY(drv_key_blob, &cache_version, cv_size)
DRV_KEY_CPY(drv_key_blob, driver_id, id_size)
DRV_KEY_CPY(drv_key_blob, gpu_name, gpu_name_size)
DRV_KEY_CPY(drv_key_blob, &ptr_size, ptr_size_size)
DRV_KEY_CPY(drv_key_blob, &driver_flags, driver_flags_size)
/* Seed our rand function */
s_rand_xorshift128plus(cache->seed_xorshift128plus, true);
ralloc_free(local);
2016-09-27 23:55:02 +01:00
return cache;
fail:
if (cache)
ralloc_free(cache);
ralloc_free(local);
return NULL;
2016-09-27 23:55:02 +01:00
}
void
disk_cache_destroy(struct disk_cache *cache)
2016-09-27 23:55:02 +01:00
{
if (cache && !cache->path_init_failed) {
util_queue_finish(&cache->cache_queue);
util_queue_destroy(&cache->cache_queue);
disk_cache_destroy_mmap(cache);
}
2016-09-27 23:55:02 +01:00
ralloc_free(cache);
}
void
disk_cache_wait_for_idle(struct disk_cache *cache)
{
util_queue_finish(&cache->cache_queue);
}
void
disk_cache_remove(struct disk_cache *cache, const cache_key key)
{
char *filename = disk_cache_get_cache_filename(cache, key);
if (filename == NULL) {
return;
}
disk_cache_evict_item(cache, filename);
}
static struct disk_cache_put_job *
create_put_job(struct disk_cache *cache, const cache_key key,
const void *data, size_t size,
struct cache_item_metadata *cache_item_metadata)
{
struct disk_cache_put_job *dc_job = (struct disk_cache_put_job *)
malloc(sizeof(struct disk_cache_put_job) + size);
if (dc_job) {
dc_job->cache = cache;
memcpy(dc_job->key, key, sizeof(cache_key));
dc_job->data = dc_job + 1;
memcpy(dc_job->data, data, size);
dc_job->size = size;
/* Copy the cache item metadata */
if (cache_item_metadata) {
dc_job->cache_item_metadata.type = cache_item_metadata->type;
if (cache_item_metadata->type == CACHE_ITEM_TYPE_GLSL) {
dc_job->cache_item_metadata.num_keys =
cache_item_metadata->num_keys;
dc_job->cache_item_metadata.keys = (cache_key *)
malloc(cache_item_metadata->num_keys * sizeof(cache_key));
if (!dc_job->cache_item_metadata.keys)
goto fail;
memcpy(dc_job->cache_item_metadata.keys,
cache_item_metadata->keys,
sizeof(cache_key) * cache_item_metadata->num_keys);
}
} else {
dc_job->cache_item_metadata.type = CACHE_ITEM_TYPE_UNKNOWN;
dc_job->cache_item_metadata.keys = NULL;
}
}
return dc_job;
fail:
free(dc_job);
return NULL;
}
static void
destroy_put_job(void *job, int thread_index)
{
if (job) {
struct disk_cache_put_job *dc_job = (struct disk_cache_put_job *) job;
free(dc_job->cache_item_metadata.keys);
free(job);
}
}
static void
cache_put(void *job, int thread_index)
2016-09-27 23:55:02 +01:00
{
assert(job);
unsigned i = 0;
char *filename = NULL;
struct disk_cache_put_job *dc_job = (struct disk_cache_put_job *) job;
2016-09-27 23:55:02 +01:00
filename = disk_cache_get_cache_filename(dc_job->cache, dc_job->key);
2016-09-27 23:55:02 +01:00
if (filename == NULL)
goto done;
/* If the cache is too large, evict something else first. */
while (*dc_job->cache->size + dc_job->size > dc_job->cache->max_size &&
i < 8) {
disk_cache_evict_lru_item(dc_job->cache);
i++;
}
/* Create CRC of the data. We will read this when restoring the cache and
* use it to check for corruption.
*/
util/disk_cache: compress individual cache entries This reduces the cache size for Deus Ex from ~160M to ~30M for radeonsi (these numbers differ from Grigori's results below probably due to different graphics quality settings). I'm also seeing the following improvements in minimum fps in the Shadow of Mordor benchmark on an i5-6400 CPU@2.70GHz, with a HDD: no-cache: ~10fps with-cache-no-compression: ~15fps with-cache-and-compression: ~20fps Note: The with cache results are from the second run after closing and opening the game to avoid the in-memory cache. Since we mainly care about decompression I went with Z_BEST_COMPRESSION as suggested on irc by Steinar H. Gunderson who has benchmarked decompression speeds. Grigori Goronzy provided the following stats for Deus Ex: Mankind Divided start-up times on a Athlon X4 860k with a SSD: No Cache 215 sec Cold Cache zlib BEST_COMPRESSION 285 sec Warm Cache zlib BEST_COMPRESSION 33 sec Cold Cache zlib BEST_SPEED 264 sec Warm Cache zlib BEST_SPEED 33 sec Cold Cache no compression 266 sec Warm Cache no compression 34 sec The total cache size for that game is 48 MiB with BEST_COMPRESSION, 56 MiB with BEST_SPEED and 170 MiB with no compression. These numbers suggest that it may be ok to go with Z_BEST_SPEED but we should gather some actual decompression times before doing so. Other options might be to do the compression in a separate thread, this might allow us to use a higher compression algorithim such as LZMA. Reviewed-by: Grigori Goronzy <greg@chown.ath.cx> Acked-by: Marek Olšák <marek.olsak@amd.com>
2017-03-01 05:04:23 +00:00
struct cache_entry_file_data cf_data;
cf_data.crc32 = util_hash_crc32(dc_job->data, dc_job->size);
cf_data.uncompressed_size = dc_job->size;
util/disk_cache: compress individual cache entries This reduces the cache size for Deus Ex from ~160M to ~30M for radeonsi (these numbers differ from Grigori's results below probably due to different graphics quality settings). I'm also seeing the following improvements in minimum fps in the Shadow of Mordor benchmark on an i5-6400 CPU@2.70GHz, with a HDD: no-cache: ~10fps with-cache-no-compression: ~15fps with-cache-and-compression: ~20fps Note: The with cache results are from the second run after closing and opening the game to avoid the in-memory cache. Since we mainly care about decompression I went with Z_BEST_COMPRESSION as suggested on irc by Steinar H. Gunderson who has benchmarked decompression speeds. Grigori Goronzy provided the following stats for Deus Ex: Mankind Divided start-up times on a Athlon X4 860k with a SSD: No Cache 215 sec Cold Cache zlib BEST_COMPRESSION 285 sec Warm Cache zlib BEST_COMPRESSION 33 sec Cold Cache zlib BEST_SPEED 264 sec Warm Cache zlib BEST_SPEED 33 sec Cold Cache no compression 266 sec Warm Cache no compression 34 sec The total cache size for that game is 48 MiB with BEST_COMPRESSION, 56 MiB with BEST_SPEED and 170 MiB with no compression. These numbers suggest that it may be ok to go with Z_BEST_SPEED but we should gather some actual decompression times before doing so. Other options might be to do the compression in a separate thread, this might allow us to use a higher compression algorithim such as LZMA. Reviewed-by: Grigori Goronzy <greg@chown.ath.cx> Acked-by: Marek Olšák <marek.olsak@amd.com>
2017-03-01 05:04:23 +00:00
disk_cache_write_item_to_disk(dc_job, &cf_data, filename);
done:
free(filename);
2016-09-27 23:55:02 +01:00
}
void
disk_cache_put(struct disk_cache *cache, const cache_key key,
const void *data, size_t size,
struct cache_item_metadata *cache_item_metadata)
{
if (cache->blob_put_cb) {
cache->blob_put_cb(key, CACHE_KEY_SIZE, data, size);
return;
}
if (cache->path_init_failed)
return;
struct disk_cache_put_job *dc_job =
create_put_job(cache, key, data, size, cache_item_metadata);
if (dc_job) {
util_queue_fence_init(&dc_job->fence);
util_queue_add_job(&cache->cache_queue, dc_job, &dc_job->fence,
cache_put, destroy_put_job, dc_job->size);
}
}
2016-09-27 23:55:02 +01:00
void *
disk_cache_get(struct disk_cache *cache, const cache_key key, size_t *size)
2016-09-27 23:55:02 +01:00
{
if (size)
*size = 0;
if (cache->blob_get_cb) {
/* This is what Android EGL defines as the maxValueSize in egl_cache_t
* class implementation.
*/
const signed long max_blob_size = 64 * 1024;
void *blob = malloc(max_blob_size);
if (!blob)
return NULL;
signed long bytes =
cache->blob_get_cb(key, CACHE_KEY_SIZE, blob, max_blob_size);
if (!bytes) {
free(blob);
return NULL;
}
if (size)
*size = bytes;
return blob;
}
char *filename = disk_cache_get_cache_filename(cache, key);
2016-09-27 23:55:02 +01:00
if (filename == NULL)
return NULL;
2016-09-27 23:55:02 +01:00
return disk_cache_load_item(cache, filename, size);
2016-09-27 23:55:02 +01:00
}
void
disk_cache_put_key(struct disk_cache *cache, const cache_key key)
2016-09-27 23:55:02 +01:00
{
const uint32_t *key_chunk = (const uint32_t *) key;
int i = CPU_TO_LE32(*key_chunk) & CACHE_INDEX_KEY_MASK;
2016-09-27 23:55:02 +01:00
unsigned char *entry;
if (cache->blob_put_cb) {
cache->blob_put_cb(key, CACHE_KEY_SIZE, key_chunk, sizeof(uint32_t));
return;
}
if (cache->path_init_failed)
return;
entry = &cache->stored_keys[i * CACHE_KEY_SIZE];
2016-09-27 23:55:02 +01:00
memcpy(entry, key, CACHE_KEY_SIZE);
}
/* This function lets us test whether a given key was previously
* stored in the cache with disk_cache_put_key(). The implement is
2016-09-27 23:55:02 +01:00
* efficient by not using syscalls or hitting the disk. It's not
* race-free, but the races are benign. If we race with someone else
* calling disk_cache_put_key, then that's just an extra cache miss and an
2016-09-27 23:55:02 +01:00
* extra recompile.
*/
bool
disk_cache_has_key(struct disk_cache *cache, const cache_key key)
2016-09-27 23:55:02 +01:00
{
const uint32_t *key_chunk = (const uint32_t *) key;
int i = CPU_TO_LE32(*key_chunk) & CACHE_INDEX_KEY_MASK;
2016-09-27 23:55:02 +01:00
unsigned char *entry;
if (cache->blob_get_cb) {
uint32_t blob;
return cache->blob_get_cb(key, CACHE_KEY_SIZE, &blob, sizeof(uint32_t));
}
if (cache->path_init_failed)
return false;
entry = &cache->stored_keys[i * CACHE_KEY_SIZE];
2016-09-27 23:55:02 +01:00
return memcmp(entry, key, CACHE_KEY_SIZE) == 0;
}
void
disk_cache_compute_key(struct disk_cache *cache, const void *data, size_t size,
cache_key key)
{
struct mesa_sha1 ctx;
_mesa_sha1_init(&ctx);
_mesa_sha1_update(&ctx, cache->driver_keys_blob,
cache->driver_keys_blob_size);
_mesa_sha1_update(&ctx, data, size);
_mesa_sha1_final(&ctx, key);
}
void
disk_cache_set_callbacks(struct disk_cache *cache, disk_cache_put_cb put,
disk_cache_get_cb get)
{
cache->blob_put_cb = put;
cache->blob_get_cb = get;
}
#endif /* ENABLE_SHADER_CACHE */