radeonsi: use the new run-time linker for shaders

v2:
- fix a memory leak

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
Nicolai Hähnle 2018-05-22 16:14:16 +02:00 committed by Marek Olšák
parent 16bee0e5f6
commit bf8a1ca902
9 changed files with 268 additions and 233 deletions

View File

@ -28,6 +28,7 @@
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
#include "ac_rtld.h"
#include "amd_kernel_code_t.h"
#include "si_build_pm4.h"
#include "si_compute.h"
@ -61,8 +62,26 @@ static const amd_kernel_code_t *si_compute_get_code_object(
if (!program->use_code_object_v2) {
return NULL;
}
return (const amd_kernel_code_t*)
(program->shader.binary.code + symbol_offset);
struct ac_rtld_binary rtld;
if (!ac_rtld_open(&rtld, 1, &program->shader.binary.elf_buffer,
&program->shader.binary.elf_size))
return NULL;
const amd_kernel_code_t *result = NULL;
const char *text;
size_t size;
if (!ac_rtld_get_section_by_name(&rtld, ".text", &text, &size))
goto out;
if (symbol_offset + sizeof(amd_kernel_code_t) > size)
goto out;
result = (const amd_kernel_code_t*)(text + symbol_offset);
out:
ac_rtld_close(&rtld);
return result;
}
static void code_object_to_config(const amd_kernel_code_t *code_object,
@ -145,7 +164,7 @@ static void si_create_compute_state_async(void *job, int thread_index)
si_shader_dump(sscreen, shader, debug, PIPE_SHADER_COMPUTE,
stderr, true);
if (!si_shader_binary_upload(sscreen, shader))
if (!si_shader_binary_upload(sscreen, shader, 0))
program->shader.compilation_failed = true;
} else {
mtx_unlock(&sscreen->shader_cache_mutex);
@ -237,25 +256,23 @@ static void *si_create_compute_state(
header = cso->prog;
code = cso->prog + sizeof(struct pipe_llvm_program_header);
ac_elf_read(code, header->num_bytes, &program->shader.binary);
if (program->use_code_object_v2) {
const amd_kernel_code_t *code_object =
si_compute_get_code_object(program, 0);
code_object_to_config(code_object, &program->shader.config);
if (program->shader.binary.reloc_count != 0) {
fprintf(stderr, "Error: %d unsupported relocations\n",
program->shader.binary.reloc_count);
FREE(program);
return NULL;
}
} else {
ac_shader_binary_read_config(&program->shader.binary,
&program->shader.config, 0, false);
program->shader.binary.elf_size = header->num_bytes;
program->shader.binary.elf_buffer = malloc(header->num_bytes);
if (!program->shader.binary.elf_buffer) {
FREE(program);
return NULL;
}
memcpy((void *)program->shader.binary.elf_buffer, code, header->num_bytes);
const amd_kernel_code_t *code_object =
si_compute_get_code_object(program, 0);
code_object_to_config(code_object, &program->shader.config);
si_shader_dump(sctx->screen, &program->shader, &sctx->debug,
PIPE_SHADER_COMPUTE, stderr, true);
if (!si_shader_binary_upload(sctx->screen, &program->shader)) {
if (!si_shader_binary_upload(sctx->screen, &program->shader, 0)) {
fprintf(stderr, "LLVM failed to upload shader\n");
free((void *)program->shader.binary.elf_buffer);
FREE(program);
return NULL;
}
@ -390,9 +407,7 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
if (sctx->compute_scratch_buffer != shader->scratch_bo && scratch_needed) {
uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address;
si_shader_apply_scratch_relocs(shader, scratch_va);
if (!si_shader_binary_upload(sctx->screen, shader))
if (!si_shader_binary_upload(sctx->screen, shader, scratch_va))
return false;
si_resource_reference(&shader->scratch_bo,
@ -423,11 +438,7 @@ static bool si_switch_compute_shader(struct si_context *sctx,
unsigned lds_blocks;
config = &inline_config;
if (code_object) {
code_object_to_config(code_object, config);
} else {
ac_shader_binary_read_config(&shader->binary, config, offset, false);
}
code_object_to_config(code_object, config);
lds_blocks = config->lds_size;
/* XXX: We are over allocating LDS. For GFX6, the shader reports

View File

@ -32,6 +32,7 @@
#include "util/u_memory.h"
#include "util/u_string.h"
#include "ac_debug.h"
#include "ac_rtld.h"
static void si_dump_bo_list(struct si_context *sctx,
const struct radeon_saved_cs *saved, FILE *f);
@ -201,15 +202,16 @@ static void si_dump_compute_shader(struct si_context *ctx,
/**
* Shader compiles can be overridden with arbitrary ELF objects by setting
* the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
*
* TODO: key this off some hash
*/
bool si_replace_shader(unsigned num, struct ac_shader_binary *binary)
bool si_replace_shader(unsigned num, struct si_shader_binary *binary)
{
const char *p = debug_get_option_replace_shaders();
const char *semicolon;
char *copy = NULL;
FILE *f;
long filesize, nread;
char *buf = NULL;
bool replaced = false;
if (!p)
@ -265,23 +267,25 @@ bool si_replace_shader(unsigned num, struct ac_shader_binary *binary)
if (fseek(f, 0, SEEK_SET) != 0)
goto file_error;
buf = MALLOC(filesize);
if (!buf) {
binary->elf_buffer = MALLOC(filesize);
if (!binary->elf_buffer) {
fprintf(stderr, "out of memory\n");
goto out_close;
}
nread = fread(buf, 1, filesize, f);
if (nread != filesize)
nread = fread((void*)binary->elf_buffer, 1, filesize, f);
if (nread != filesize) {
FREE((void*)binary->elf_buffer);
binary->elf_buffer = NULL;
goto file_error;
}
ac_elf_read(buf, filesize, binary);
binary->elf_size = nread;
replaced = true;
out_close:
fclose(f);
out_free:
FREE(buf);
free(copy);
return replaced;
@ -922,33 +926,52 @@ struct si_shader_inst {
};
/**
* Split a disassembly string into instructions and add them to the array
* pointed to by \p instructions.
* Open the given \p binary as \p rtld_binary and split the contained
* disassembly string into instructions and add them to the array
* pointed to by \p instructions, which must be sufficiently large.
*
* Labels are considered to be part of the following instruction.
*
* The caller must keep \p rtld_binary alive as long as \p instructions are
* used and then close it afterwards.
*/
static void si_add_split_disasm(const char *disasm,
static void si_add_split_disasm(struct ac_rtld_binary *rtld_binary,
struct si_shader_binary *binary,
uint64_t *addr,
unsigned *num,
struct si_shader_inst *instructions)
{
const char *semicolon;
if (!ac_rtld_open(rtld_binary, 1, &binary->elf_buffer, &binary->elf_size))
return;
const char *disasm;
size_t nbytes;
if (!ac_rtld_get_section_by_name(rtld_binary, ".AMDGPU.disasm",
&disasm, &nbytes))
return;
const char *end = disasm + nbytes;
while (disasm < end) {
const char *semicolon = memchr(disasm, ';', end - disasm);
if (!semicolon)
break;
while ((semicolon = strchr(disasm, ';'))) {
struct si_shader_inst *inst = &instructions[(*num)++];
const char *end = util_strchrnul(semicolon, '\n');
const char *inst_end = memchr(semicolon + 1, '\n', end - semicolon - 1);
if (!inst_end)
inst_end = end;
inst->text = disasm;
inst->textlen = end - disasm;
inst->textlen = inst_end - disasm;
inst->addr = *addr;
/* More than 16 chars after ";" means the instruction is 8 bytes long. */
inst->size = end - semicolon > 16 ? 8 : 4;
inst->size = inst_end - semicolon > 16 ? 8 : 4;
*addr += inst->size;
if (!(*end))
if (inst_end == end)
break;
disasm = end + 1;
disasm = inst_end + 1;
}
}
@ -961,7 +984,7 @@ static void si_print_annotated_shader(struct si_shader *shader,
unsigned num_waves,
FILE *f)
{
if (!shader || !shader->binary.disasm_string)
if (!shader)
return;
uint64_t start_addr = shader->bo->gpu_address;
@ -985,25 +1008,26 @@ static void si_print_annotated_shader(struct si_shader *shader,
*/
unsigned num_inst = 0;
uint64_t inst_addr = start_addr;
struct ac_rtld_binary rtld_binaries[5] = {};
struct si_shader_inst *instructions =
calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
if (shader->prolog) {
si_add_split_disasm(shader->prolog->binary.disasm_string,
si_add_split_disasm(&rtld_binaries[0], &shader->prolog->binary,
&inst_addr, &num_inst, instructions);
}
if (shader->previous_stage) {
si_add_split_disasm(shader->previous_stage->binary.disasm_string,
si_add_split_disasm(&rtld_binaries[1], &shader->previous_stage->binary,
&inst_addr, &num_inst, instructions);
}
if (shader->prolog2) {
si_add_split_disasm(shader->prolog2->binary.disasm_string,
si_add_split_disasm(&rtld_binaries[2], &shader->prolog2->binary,
&inst_addr, &num_inst, instructions);
}
si_add_split_disasm(shader->binary.disasm_string,
si_add_split_disasm(&rtld_binaries[3], &shader->binary,
&inst_addr, &num_inst, instructions);
if (shader->epilog) {
si_add_split_disasm(shader->epilog->binary.disasm_string,
si_add_split_disasm(&rtld_binaries[4], &shader->epilog->binary,
&inst_addr, &num_inst, instructions);
}
@ -1041,6 +1065,8 @@ static void si_print_annotated_shader(struct si_shader *shader,
fprintf(f, "\n\n");
free(instructions);
for (unsigned i = 0; i < ARRAY_SIZE(rtld_binaries); ++i)
ac_rtld_close(&rtld_binaries[i]);
}
static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)

View File

@ -721,7 +721,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
struct si_shader_part *part = parts[i];
parts[i] = part->next;
ac_shader_binary_clean(&part->binary);
si_shader_binary_clean(&part->binary);
FREE(part);
}
}

View File

@ -1296,7 +1296,7 @@ void si_log_compute_state(struct si_context *sctx, struct u_log_context *log);
void si_init_debug_functions(struct si_context *sctx);
void si_check_vm_faults(struct si_context *sctx,
struct radeon_saved_cs *saved, enum ring_type ring);
bool si_replace_shader(unsigned num, struct ac_shader_binary *binary);
bool si_replace_shader(unsigned num, struct si_shader_binary *binary);
/* si_dma.c */
void si_init_dma_functions(struct si_context *sctx);

View File

@ -29,8 +29,10 @@
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_dump.h"
#include "ac_binary.h"
#include "ac_exp_param.h"
#include "ac_shader_util.h"
#include "ac_rtld.h"
#include "ac_llvm_util.h"
#include "si_shader_internal.h"
#include "si_pipe.h"
@ -5045,168 +5047,157 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
ac_build_kill_if_false(&ctx->ac, bit);
}
void si_shader_apply_scratch_relocs(struct si_shader *shader,
uint64_t scratch_va)
{
unsigned i;
uint32_t scratch_rsrc_dword0 = scratch_va;
uint32_t scratch_rsrc_dword1 =
S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
/* Enable scratch coalescing. */
scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1);
for (i = 0 ; i < shader->binary.reloc_count; i++) {
const struct ac_shader_reloc *reloc =
&shader->binary.relocs[i];
if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) {
util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
&scratch_rsrc_dword0, 4);
} else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
&scratch_rsrc_dword1, 4);
}
}
}
/* For the UMR disassembler. */
#define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
#define DEBUGGER_NUM_MARKERS 5
static unsigned si_get_shader_binary_size(const struct si_shader *shader)
static bool si_shader_binary_open(const struct si_shader *shader,
struct ac_rtld_binary *rtld)
{
unsigned size = shader->binary.code_size;
const char *part_elfs[5];
size_t part_sizes[5];
unsigned num_parts = 0;
if (shader->prolog)
size += shader->prolog->binary.code_size;
if (shader->previous_stage)
size += shader->previous_stage->binary.code_size;
if (shader->prolog2)
size += shader->prolog2->binary.code_size;
if (shader->epilog)
size += shader->epilog->binary.code_size;
return size + DEBUGGER_NUM_MARKERS * 4;
#define add_part(shader_or_part) \
if (shader_or_part) { \
part_elfs[num_parts] = (shader_or_part)->binary.elf_buffer; \
part_sizes[num_parts] = (shader_or_part)->binary.elf_size; \
num_parts++; \
}
add_part(shader->prolog);
add_part(shader->previous_stage);
add_part(shader->prolog2);
add_part(shader);
add_part(shader->epilog);
#undef add_part
return ac_rtld_open(rtld, num_parts, part_elfs, part_sizes);
}
bool si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
static unsigned si_get_shader_binary_size(const struct si_shader *shader)
{
const struct ac_shader_binary *prolog =
shader->prolog ? &shader->prolog->binary : NULL;
const struct ac_shader_binary *previous_stage =
shader->previous_stage ? &shader->previous_stage->binary : NULL;
const struct ac_shader_binary *prolog2 =
shader->prolog2 ? &shader->prolog2->binary : NULL;
const struct ac_shader_binary *epilog =
shader->epilog ? &shader->epilog->binary : NULL;
const struct ac_shader_binary *mainb = &shader->binary;
unsigned bo_size = si_get_shader_binary_size(shader) +
(!epilog ? mainb->rodata_size : 0);
unsigned char *ptr;
struct ac_rtld_binary rtld;
si_shader_binary_open(shader, &rtld);
return rtld.rx_size;
}
assert(!prolog || !prolog->rodata_size);
assert(!previous_stage || !previous_stage->rodata_size);
assert(!prolog2 || !prolog2->rodata_size);
assert((!prolog && !previous_stage && !prolog2 && !epilog) ||
!mainb->rodata_size);
assert(!epilog || !epilog->rodata_size);
static bool si_get_external_symbol(void *data, const char *name, uint64_t *value)
{
uint64_t *scratch_va = data;
if (!strcmp(scratch_rsrc_dword0_symbol, name)) {
*value = (uint32_t)*scratch_va;
return true;
}
if (!strcmp(scratch_rsrc_dword1_symbol, name)) {
/* Enable scratch coalescing. */
*value = S_008F04_BASE_ADDRESS_HI(*scratch_va >> 32) |
S_008F04_SWIZZLE_ENABLE(1);
if (HAVE_LLVM < 0x0800) {
/* Old LLVM created an R_ABS32_HI relocation for
* this symbol. */
*value <<= 32;
}
return true;
}
return false;
}
bool si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader,
uint64_t scratch_va)
{
struct ac_rtld_binary binary;
if (!si_shader_binary_open(shader, &binary))
return false;
si_resource_reference(&shader->bo, NULL);
shader->bo = si_aligned_buffer_create(&sscreen->b,
sscreen->cpdma_prefetch_writes_memory ?
0 : SI_RESOURCE_FLAG_READ_ONLY,
PIPE_USAGE_IMMUTABLE,
align(bo_size, SI_CPDMA_ALIGNMENT),
align(binary.rx_size, SI_CPDMA_ALIGNMENT),
256);
if (!shader->bo)
return false;
/* Upload. */
ptr = sscreen->ws->buffer_map(shader->bo->buf, NULL,
struct ac_rtld_upload_info u = {};
u.binary = &binary;
u.get_external_symbol = si_get_external_symbol;
u.cb_data = &scratch_va;
u.rx_va = shader->bo->gpu_address;
u.rx_ptr = sscreen->ws->buffer_map(shader->bo->buf, NULL,
PIPE_TRANSFER_READ_WRITE |
PIPE_TRANSFER_UNSYNCHRONIZED |
RADEON_TRANSFER_TEMPORARY);
if (!u.rx_ptr)
return false;
/* Don't use util_memcpy_cpu_to_le32. LLVM binaries are
* endian-independent. */
if (prolog) {
memcpy(ptr, prolog->code, prolog->code_size);
ptr += prolog->code_size;
}
if (previous_stage) {
memcpy(ptr, previous_stage->code, previous_stage->code_size);
ptr += previous_stage->code_size;
}
if (prolog2) {
memcpy(ptr, prolog2->code, prolog2->code_size);
ptr += prolog2->code_size;
}
memcpy(ptr, mainb->code, mainb->code_size);
ptr += mainb->code_size;
if (epilog) {
memcpy(ptr, epilog->code, epilog->code_size);
ptr += epilog->code_size;
} else if (mainb->rodata_size > 0) {
memcpy(ptr, mainb->rodata, mainb->rodata_size);
ptr += mainb->rodata_size;
}
/* Add end-of-code markers for the UMR disassembler. */
uint32_t *ptr32 = (uint32_t*)ptr;
for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; i++)
ptr32[i] = DEBUGGER_END_OF_CODE_MARKER;
bool ok = ac_rtld_upload(&u);
sscreen->ws->buffer_unmap(shader->bo->buf);
return true;
ac_rtld_close(&binary);
return ok;
}
static void si_shader_dump_disassembly(const struct ac_shader_binary *binary,
static void si_shader_dump_disassembly(const struct si_shader_binary *binary,
struct pipe_debug_callback *debug,
const char *name, FILE *file)
{
char *line, *p;
unsigned i, count;
struct ac_rtld_binary rtld_binary;
if (binary->disasm_string) {
fprintf(file, "Shader %s disassembly:\n", name);
fprintf(file, "%s", binary->disasm_string);
if (!ac_rtld_open(&rtld_binary, 1, &binary->elf_buffer, &binary->elf_size))
return;
if (debug && debug->debug_message) {
/* Very long debug messages are cut off, so send the
* disassembly one line at a time. This causes more
* overhead, but on the plus side it simplifies
* parsing of resulting logs.
*/
pipe_debug_message(debug, SHADER_INFO,
"Shader Disassembly Begin");
const char *disasm;
size_t nbytes;
line = binary->disasm_string;
while (*line) {
p = util_strchrnul(line, '\n');
count = p - line;
if (!ac_rtld_get_section_by_name(&rtld_binary, ".AMDGPU.disasm", &disasm, &nbytes))
goto out;
if (count) {
pipe_debug_message(debug, SHADER_INFO,
"%.*s", count, line);
}
fprintf(file, "Shader %s disassembly:\n", name);
if (nbytes > INT_MAX) {
fprintf(file, "too long\n");
goto out;
}
if (!*p)
break;
line = p + 1;
fprintf(file, "%*s", (int)nbytes, disasm);
if (debug && debug->debug_message) {
/* Very long debug messages are cut off, so send the
* disassembly one line at a time. This causes more
* overhead, but on the plus side it simplifies
* parsing of resulting logs.
*/
pipe_debug_message(debug, SHADER_INFO,
"Shader Disassembly Begin");
uint64_t line = 0;
while (line < nbytes) {
int count = nbytes - line;
const char *nl = memchr(disasm + line, '\n', nbytes - line);
if (nl)
count = nl - disasm;
if (count) {
pipe_debug_message(debug, SHADER_INFO,
"%.*s", count, disasm + line);
}
pipe_debug_message(debug, SHADER_INFO,
"Shader Disassembly End");
}
} else {
fprintf(file, "Shader %s binary:\n", name);
for (i = 0; i < binary->code_size; i += 4) {
fprintf(file, "@0x%x: %02x%02x%02x%02x\n", i,
binary->code[i + 3], binary->code[i + 2],
binary->code[i + 1], binary->code[i]);
line += count + 1;
}
pipe_debug_message(debug, SHADER_INFO,
"Shader Disassembly End");
}
out:
ac_rtld_close(&rtld_binary);
}
static void si_calculate_max_simd_waves(struct si_shader *shader)
@ -5398,8 +5389,21 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
check_debug_option);
}
bool si_shader_binary_read_config(struct si_shader_binary *binary,
struct ac_shader_config *conf)
{
struct ac_rtld_binary rtld;
if (!ac_rtld_open(&rtld, 1, &binary->elf_buffer, &binary->elf_size))
return false;
bool ok = ac_rtld_read_config(&rtld, conf);
ac_rtld_close(&rtld);
return ok;
}
static int si_compile_llvm(struct si_screen *sscreen,
struct ac_shader_binary *binary,
struct si_shader_binary *binary,
struct ac_shader_config *conf,
struct ac_llvm_compiler *compiler,
LLVMModuleRef mod,
@ -5408,7 +5412,6 @@ static int si_compile_llvm(struct si_screen *sscreen,
const char *name,
bool less_optimized)
{
int r = 0;
unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
if (si_can_dump_shader(sscreen, processor)) {
@ -5428,13 +5431,14 @@ static int si_compile_llvm(struct si_screen *sscreen,
}
if (!si_replace_shader(count, binary)) {
r = si_llvm_compile(mod, binary, compiler, debug,
less_optimized);
unsigned r = si_llvm_compile(mod, binary, compiler, debug,
less_optimized);
if (r)
return r;
}
ac_shader_binary_read_config(binary, conf, 0, false);
if (!si_shader_binary_read_config(binary, conf))
return -1;
/* Enable 64-bit and 16-bit denormals, because there is no performance
* cost.
@ -5450,24 +5454,7 @@ static int si_compile_llvm(struct si_screen *sscreen,
*/
conf->float_mode |= V_00B028_FP_64_DENORMS;
FREE(binary->config);
FREE(binary->global_symbol_offsets);
binary->config = NULL;
binary->global_symbol_offsets = NULL;
/* Some shaders can't have rodata because their binaries can be
* concatenated.
*/
if (binary->rodata_size &&
(processor == PIPE_SHADER_VERTEX ||
processor == PIPE_SHADER_TESS_CTRL ||
processor == PIPE_SHADER_TESS_EVAL ||
processor == PIPE_SHADER_FRAGMENT)) {
fprintf(stderr, "radeonsi: The shader can't have rodata.");
return -EINVAL;
}
return r;
return 0;
}
static void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret)
@ -5609,7 +5596,11 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
fprintf(stderr, "GS Copy Shader:\n");
si_shader_dump(sscreen, ctx.shader, debug,
PIPE_SHADER_GEOMETRY, stderr, true);
ok = si_shader_binary_upload(sscreen, ctx.shader);
if (!ctx.shader->config.scratch_bytes_per_wave)
ok = si_shader_binary_upload(sscreen, ctx.shader, 0);
else
ok = true;
}
si_llvm_dispose(&ctx);
@ -8011,7 +8002,7 @@ bool si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compil
stderr, true);
/* Upload. */
if (!si_shader_binary_upload(sscreen, shader)) {
if (!si_shader_binary_upload(sscreen, shader, 0)) {
fprintf(stderr, "LLVM failed to upload shader\n");
return false;
}
@ -8027,7 +8018,7 @@ void si_shader_destroy(struct si_shader *shader)
si_resource_reference(&shader->bo, NULL);
if (!shader->is_binary_shared)
ac_shader_binary_clean(&shader->binary);
si_shader_binary_clean(&shader->binary);
free(shader->shader_log);
}

View File

@ -588,6 +588,13 @@ struct si_shader_info {
unsigned max_simd_waves;
};
struct si_shader_binary {
const char *elf_buffer;
size_t elf_size;
char *llvm_ir_string;
};
struct si_shader {
struct si_compiler_ctx_state compiler_ctx_state;
@ -612,7 +619,7 @@ struct si_shader {
bool is_gs_copy_shader;
/* The following data is all that's needed for binary shaders. */
struct ac_shader_binary binary;
struct si_shader_binary binary;
struct ac_shader_config config;
struct si_shader_info info;
@ -669,7 +676,7 @@ struct si_shader {
struct si_shader_part {
struct si_shader_part *next;
union si_shader_part_key key;
struct ac_shader_binary binary;
struct si_shader_binary binary;
struct ac_shader_config config;
};
@ -690,7 +697,8 @@ void si_shader_destroy(struct si_shader *shader);
unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned index);
unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index,
unsigned is_varying);
bool si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
bool si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader,
uint64_t scratch_va);
void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
struct pipe_debug_callback *debug, unsigned processor,
FILE *f, bool check_debug_option);
@ -698,9 +706,10 @@ void si_shader_dump_stats_for_shader_db(const struct si_shader *shader,
struct pipe_debug_callback *debug);
void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
unsigned *lds_size);
void si_shader_apply_scratch_relocs(struct si_shader *shader,
uint64_t scratch_va);
const char *si_get_shader_name(const struct si_shader *shader, unsigned processor);
bool si_shader_binary_read_config(struct si_shader_binary *binary,
struct ac_shader_config *conf);
void si_shader_binary_clean(struct si_shader_binary *binary);
/* si_shader_nir.c */
void si_nir_scan_shader(const struct nir_shader *nir,

View File

@ -36,7 +36,6 @@
#include <llvm-c/TargetMachine.h>
struct pipe_debug_callback;
struct ac_shader_binary;
#define RADEON_LLVM_MAX_INPUT_SLOTS 32
#define RADEON_LLVM_MAX_INPUTS 32 * 4
@ -243,7 +242,7 @@ void si_create_function(struct si_shader_context *ctx,
LLVMTypeRef *returns, unsigned num_returns,
struct si_function_info *fninfo,
unsigned max_workgroup_size);
unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
unsigned si_llvm_compile(LLVMModuleRef M, struct si_shader_binary *binary,
struct ac_llvm_compiler *compiler,
struct pipe_debug_callback *debug,
bool less_optimized);

View File

@ -80,7 +80,7 @@ static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
*
* @returns 0 for success, 1 for failure
*/
unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
unsigned si_llvm_compile(LLVMModuleRef M, struct si_shader_binary *binary,
struct ac_llvm_compiler *compiler,
struct pipe_debug_callback *debug,
bool less_optimized)
@ -100,7 +100,8 @@ unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
/* Compile IR. */
if (!ac_compile_module_to_binary(passes, M, binary))
if (!ac_compile_module_to_elf(passes, M, (char **)&binary->elf_buffer,
&binary->elf_size))
diag.retval = 1;
if (diag.retval != 0)
@ -108,6 +109,15 @@ unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
return diag.retval;
}
void si_shader_binary_clean(struct si_shader_binary *binary)
{
free((void *)binary->elf_buffer);
binary->elf_buffer = NULL;
free(binary->llvm_ir_string);
binary->llvm_ir_string = NULL;
}
LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
enum tgsi_opcode_type type)
{

View File

@ -127,21 +127,21 @@ static uint32_t *read_chunk(uint32_t *ptr, void **data, unsigned *size)
static void *si_get_shader_binary(struct si_shader *shader)
{
/* There is always a size of data followed by the data itself. */
unsigned relocs_size = shader->binary.reloc_count *
sizeof(shader->binary.relocs[0]);
unsigned disasm_size = shader->binary.disasm_string ?
strlen(shader->binary.disasm_string) + 1 : 0;
unsigned llvm_ir_size = shader->binary.llvm_ir_string ?
strlen(shader->binary.llvm_ir_string) + 1 : 0;
/* Refuse to allocate overly large buffers and guard against integer
* overflow. */
if (shader->binary.elf_size > UINT_MAX / 4 ||
llvm_ir_size > UINT_MAX / 4)
return NULL;
unsigned size =
4 + /* total size */
4 + /* CRC32 of the data below */
align(sizeof(shader->config), 4) +
align(sizeof(shader->info), 4) +
4 + align(shader->binary.code_size, 4) +
4 + align(shader->binary.rodata_size, 4) +
4 + align(relocs_size, 4) +
4 + align(disasm_size, 4) +
4 + align(shader->binary.elf_size, 4) +
4 + align(llvm_ir_size, 4);
void *buffer = CALLOC(1, size);
uint32_t *ptr = (uint32_t*)buffer;
@ -154,10 +154,7 @@ static void *si_get_shader_binary(struct si_shader *shader)
ptr = write_data(ptr, &shader->config, sizeof(shader->config));
ptr = write_data(ptr, &shader->info, sizeof(shader->info));
ptr = write_chunk(ptr, shader->binary.code, shader->binary.code_size);
ptr = write_chunk(ptr, shader->binary.rodata, shader->binary.rodata_size);
ptr = write_chunk(ptr, shader->binary.relocs, relocs_size);
ptr = write_chunk(ptr, shader->binary.disasm_string, disasm_size);
ptr = write_chunk(ptr, shader->binary.elf_buffer, shader->binary.elf_size);
ptr = write_chunk(ptr, shader->binary.llvm_ir_string, llvm_ir_size);
assert((char *)ptr - (char *)buffer == size);
@ -175,6 +172,7 @@ static bool si_load_shader_binary(struct si_shader *shader, void *binary)
uint32_t size = *ptr++;
uint32_t crc32 = *ptr++;
unsigned chunk_size;
unsigned elf_size;
if (util_hash_crc32(ptr, size - 8) != crc32) {
fprintf(stderr, "radeonsi: binary shader has invalid CRC32\n");
@ -183,13 +181,9 @@ static bool si_load_shader_binary(struct si_shader *shader, void *binary)
ptr = read_data(ptr, &shader->config, sizeof(shader->config));
ptr = read_data(ptr, &shader->info, sizeof(shader->info));
ptr = read_chunk(ptr, (void**)&shader->binary.code,
&shader->binary.code_size);
ptr = read_chunk(ptr, (void**)&shader->binary.rodata,
&shader->binary.rodata_size);
ptr = read_chunk(ptr, (void**)&shader->binary.relocs, &chunk_size);
shader->binary.reloc_count = chunk_size / sizeof(shader->binary.relocs[0]);
ptr = read_chunk(ptr, (void**)&shader->binary.disasm_string, &chunk_size);
ptr = read_chunk(ptr, (void**)&shader->binary.elf_buffer,
&elf_size);
shader->binary.elf_size = elf_size;
ptr = read_chunk(ptr, (void**)&shader->binary.llvm_ir_string, &chunk_size);
return true;
@ -3132,13 +3126,8 @@ static int si_update_scratch_buffer(struct si_context *sctx,
assert(sctx->scratch_buffer);
if (shader->previous_stage)
si_shader_apply_scratch_relocs(shader->previous_stage, scratch_va);
si_shader_apply_scratch_relocs(shader, scratch_va);
/* Replace the shader bo with a new bo that has the relocs applied. */
if (!si_shader_binary_upload(sctx->screen, shader)) {
if (!si_shader_binary_upload(sctx->screen, shader, scratch_va)) {
si_shader_unlock(shader);
return -1;
}