freedreno/ir3: disk-cache support

Adds a shader disk-cache for ir3 shader variants.  Note that builds with
`-Dshader-cache=false` have no-op stubs with `disk_cache_create()` that
returns NULL.

Binning pass variants are serialized together with their draw-pass
counterparts, due to shared const-state.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5372>
This commit is contained in:
Rob Clark 2020-06-05 10:05:45 -07:00
parent 6aadb00e60
commit f97acb4bb4
8 changed files with 284 additions and 3 deletions

View File

@ -35,6 +35,7 @@ ir3_SOURCES := \
ir3/ir3_cf.c \
ir3/ir3_dce.c \
ir3/ir3_delay.c \
ir3/ir3_disk_cache.c \
ir3/ir3_group.c \
ir3/ir3_image.c \
ir3/ir3_image.h \

View File

@ -40,6 +40,7 @@ static const struct debug_named_value shader_debug_options[] = {
{"forces2en", IR3_DBG_FORCES2EN, "Force s2en mode for tex sampler instructions"},
{"nouboopt", IR3_DBG_NOUBOOPT, "Disable lowering UBO to uniform"},
{"nofp16", IR3_DBG_NOFP16, "Don't lower mediump to fp16"},
{"nocache", IR3_DBG_NOCACHE, "Disable shader cache"},
#ifdef DEBUG
/* DEBUG-only options: */
{"schedmsgs", IR3_DBG_SCHEDMSGS, "Enable scheduler debug messages"},
@ -122,5 +123,7 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id)
compiler->const_upload_unit = 8;
}
ir3_disk_cache_init(compiler);
return compiler;
}

View File

@ -27,6 +27,8 @@
#ifndef IR3_COMPILER_H_
#define IR3_COMPILER_H_
#include "util/disk_cache.h"
#include "ir3.h"
struct ir3_ra_reg_set;
@ -39,6 +41,8 @@ struct ir3_compiler {
struct ir3_ra_reg_set *mergedregs_set;
uint32_t shader_count;
struct disk_cache *disk_cache;
/*
* Configuration options for things that are handled differently on
* different generations:
@ -98,6 +102,14 @@ struct ir3_compiler {
void ir3_compiler_destroy(struct ir3_compiler *compiler);
struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id);
void ir3_disk_cache_init(struct ir3_compiler *compiler);
void ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler,
struct ir3_shader *shader);
bool ir3_disk_cache_retrieve(struct ir3_compiler *compiler,
struct ir3_shader_variant *v);
void ir3_disk_cache_store(struct ir3_compiler *compiler,
struct ir3_shader_variant *v);
int ir3_compile_shader_nir(struct ir3_compiler *compiler,
struct ir3_shader_variant *so);
@ -120,6 +132,7 @@ enum ir3_shader_debug {
IR3_DBG_FORCES2EN = BITFIELD_BIT(8),
IR3_DBG_NOUBOOPT = BITFIELD_BIT(9),
IR3_DBG_NOFP16 = BITFIELD_BIT(10),
IR3_DBG_NOCACHE = BITFIELD_BIT(11),
/* DEBUG-only options: */
IR3_DBG_SCHEDMSGS = BITFIELD_BIT(20),

View File

@ -0,0 +1,226 @@
/*
* Copyright © 2020 Google, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "nir_serialize.h"
#include "ir3_compiler.h"
#include "ir3_nir.h"
#define debug 0
/*
* Shader disk-cache implementation.
*
* Note that at least in the EGL_ANDROID_blob_cache, we should never
* rely on inter-dependencies between different cache entries:
*
* No guarantees are made as to whether a given key/value pair is present in
* the cache after the set call. If a different value has been associated
* with the given key in the past then it is undefined which value, if any, is
* associated with the key after the set call. Note that while there are no
* guarantees, the cache implementation should attempt to cache the most
* recently set value for a given key.
*
* for this reason, because binning pass variants share const_state with
* their draw-pass counterpart, both variants are serialized together.
*/
void
ir3_disk_cache_init(struct ir3_compiler *compiler)
{
if (ir3_shader_debug & IR3_DBG_NOCACHE)
return;
/* array length = print length + nul char + 1 extra to verify it's unused */
char renderer[7];
ASSERTED int len =
snprintf(renderer, sizeof(renderer), "FD%03d", compiler->gpu_id);
assert(len == sizeof(renderer) - 2);
const struct build_id_note *note =
build_id_find_nhdr_for_addr(ir3_disk_cache_init);
assert(note && build_id_length(note) == 20); /* sha1 */
const uint8_t *id_sha1 = build_id_data(note);
assert(id_sha1);
char timestamp[41];
_mesa_sha1_format(timestamp, id_sha1);
const uint64_t driver_flags = ir3_shader_debug;
compiler->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
}
void
ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler,
struct ir3_shader *shader)
{
if (!compiler->disk_cache)
return;
struct mesa_sha1 ctx;
_mesa_sha1_init(&ctx);
/* Serialize the NIR to a binary blob that we can hash for the disk
* cache. Drop unnecessary information (like variable names)
* so the serialized NIR is smaller, and also to let us detect more
* isomorphic shaders when hashing, increasing cache hits.
*/
struct blob blob;
blob_init(&blob);
nir_serialize(&blob, shader->nir, true);
_mesa_sha1_update(&ctx, blob.data, blob.size);
blob_finish(&blob);
/* Note that on some gens stream-out is lowered in ir3 to stg. For later
* gens we maybe don't need to include stream-out in the cache key.
*/
_mesa_sha1_update(&ctx, &shader->stream_output, sizeof(shader->stream_output));
_mesa_sha1_final(&ctx, shader->cache_key);
}
static void
compute_variant_key(struct ir3_compiler *compiler,
struct ir3_shader_variant *v, cache_key cache_key)
{
struct blob blob;
blob_init(&blob);
blob_write_bytes(&blob, &v->shader->cache_key, sizeof(v->shader->cache_key));
blob_write_bytes(&blob, &v->key, sizeof(v->key));
blob_write_uint8(&blob, v->binning_pass);
disk_cache_compute_key(compiler->disk_cache, blob.data, blob.size, cache_key);
blob_finish(&blob);
}
static void
retrieve_variant(struct blob_reader *blob, struct ir3_shader_variant *v)
{
blob_copy_bytes(blob, VARIANT_CACHE_PTR(v), VARIANT_CACHE_SIZE);
/*
* pointers need special handling:
*/
v->bin = malloc(4 * v->info.sizedwords);
blob_copy_bytes(blob, v->bin, 4 * v->info.sizedwords);
if (!v->binning_pass) {
blob_copy_bytes(blob, v->const_state, sizeof(*v->const_state));
unsigned immeds_sz = v->const_state->immediates_size *
sizeof(v->const_state->immediates[0]);
v->const_state->immediates = ralloc_size(v->const_state, immeds_sz);
blob_copy_bytes(blob, v->const_state->immediates, immeds_sz);
}
}
static void
store_variant(struct blob *blob, struct ir3_shader_variant *v)
{
blob_write_bytes(blob, VARIANT_CACHE_PTR(v), VARIANT_CACHE_SIZE);
/*
* pointers need special handling:
*/
blob_write_bytes(blob, v->bin, 4 * v->info.sizedwords);
if (!v->binning_pass) {
blob_write_bytes(blob, v->const_state, sizeof(*v->const_state));
unsigned immeds_sz = v->const_state->immediates_size *
sizeof(v->const_state->immediates[0]);
blob_write_bytes(blob, v->const_state->immediates, immeds_sz);
}
}
bool
ir3_disk_cache_retrieve(struct ir3_compiler *compiler,
struct ir3_shader_variant *v)
{
if (!compiler->disk_cache)
return false;
cache_key cache_key;
compute_variant_key(compiler, v, cache_key);
if (debug) {
char sha1[41];
_mesa_sha1_format(sha1, cache_key);
fprintf(stderr, "[mesa disk cache] retrieving variant %s: ", sha1);
}
size_t size;
void *buffer = disk_cache_get(compiler->disk_cache, cache_key, &size);
if (debug)
fprintf(stderr, "%s\n", buffer ? "found" : "missing");
if (!buffer)
return false;
struct blob_reader blob;
blob_reader_init(&blob, buffer, size);
retrieve_variant(&blob, v);
if (v->binning)
retrieve_variant(&blob, v->binning);
free(buffer);
return true;
}
void
ir3_disk_cache_store(struct ir3_compiler *compiler,
struct ir3_shader_variant *v)
{
if (!compiler->disk_cache)
return;
cache_key cache_key;
compute_variant_key(compiler, v, cache_key);
if (debug) {
char sha1[41];
_mesa_sha1_format(sha1, cache_key);
fprintf(stderr, "[mesa disk cache] storing variant %s\n", sha1);
}
struct blob blob;
blob_init(&blob);
store_variant(&blob, v);
if (v->binning)
store_variant(&blob, v->binning);
disk_cache_put(compiler->disk_cache, cache_key, blob.data, blob.size, NULL);
blob_finish(&blob);
}

View File

@ -245,12 +245,17 @@ create_variant(struct ir3_shader *shader, const struct ir3_shader_key *key)
goto fail;
}
if (ir3_disk_cache_retrieve(shader->compiler, v))
return v;
if (!compile_variant(v))
goto fail;
if (needs_binning_variant(v) && !compile_variant(v->binning))
goto fail;
ir3_disk_cache_store(shader->compiler, v);
return v;
fail:
@ -445,11 +450,12 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir,
if (stream_output)
memcpy(&shader->stream_output, stream_output, sizeof(shader->stream_output));
shader->num_reserved_user_consts = reserved_user_consts;
shader->nir = nir;
ir3_disk_cache_init_shader_key(compiler, shader);
ir3_nir_post_finalize(compiler, nir);
shader->nir = nir;
if (ir3_shader_debug & IR3_DBG_DISASM) {
printf("dump nir%d: type=%d", shader->id, shader->type);
nir_print_shader(shader->nir, stdout);

View File

@ -33,6 +33,7 @@
#include "compiler/shader_enums.h"
#include "compiler/nir/nir.h"
#include "util/bitscan.h"
#include "util/disk_cache.h"
#include "ir3_compiler.h"
@ -481,11 +482,25 @@ struct ir3_shader_variant {
gl_shader_stage type;
struct ir3_shader *shader;
/*
* Below here is serialized when written to disk cache:
*/
/* The actual binary shader instructions, size given by info.sizedwords: */
uint32_t *bin;
struct ir3_const_state *const_state;
/*
* The following macros are used by the shader disk cache save/
* restore paths to serialize/deserialize the variant. Any
* pointers that require special handling in store_variant()
* and retrieve_variant() should go above here.
*/
#define VARIANT_CACHE_START offsetof(struct ir3_shader_variant, info)
#define VARIANT_CACHE_PTR(v) (((char *)v) + VARIANT_CACHE_START)
#define VARIANT_CACHE_SIZE (sizeof(struct ir3_shader_variant) - VARIANT_CACHE_START)
struct ir3_info info;
/* Levels of nesting of flow control:
@ -681,6 +696,8 @@ struct ir3_shader {
struct ir3_shader_variant *variants;
mtx_t variants_lock;
cache_key cache_key; /* shader disk-cache key */
/* Bitmask of bits of the shader key used by this shader. Used to avoid
* recompiles for GL NOS that doesn't actually apply to the shader.
*/

View File

@ -78,6 +78,7 @@ libfreedreno_ir3_files = files(
'ir3_cp_postsched.c',
'ir3_dce.c',
'ir3_delay.c',
'ir3_disk_cache.c',
'ir3_group.c',
'ir3_image.c',
'ir3_image.h',
@ -110,7 +111,7 @@ libfreedreno_ir3 = static_library(
include_directories : [inc_freedreno, inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux],
c_args : [no_override_init_args],
gnu_symbol_visibility : 'hidden',
dependencies : idep_nir_headers,
dependencies : [idep_nir_headers, dep_dl],
build_by_default : false,
)

View File

@ -716,6 +716,19 @@ fd_get_compiler_options(struct pipe_screen *pscreen,
return ir2_get_compiler_options();
}
static struct disk_cache *
fd_get_disk_shader_cache(struct pipe_screen *pscreen)
{
struct fd_screen *screen = fd_screen(pscreen);
if (is_ir3(screen)) {
struct ir3_compiler *compiler = screen->compiler;
return compiler->disk_cache;
}
return NULL;
}
bool
fd_screen_bo_get_handle(struct pipe_screen *pscreen,
struct fd_bo *bo,
@ -993,6 +1006,7 @@ fd_screen_create(struct fd_device *dev, struct renderonly *ro)
pscreen->get_shader_param = fd_screen_get_shader_param;
pscreen->get_compute_param = fd_get_compute_param;
pscreen->get_compiler_options = fd_get_compiler_options;
pscreen->get_disk_shader_cache = fd_get_disk_shader_cache;
fd_resource_screen_init(pscreen);
fd_query_screen_init(pscreen);