pvr: Extend hard coding infra to allow per stage per device coding.

This commit extends the graphics hard coding infrastructure to
allow the independent hard coding of stages, i.e. hard code fragment
stage and vertex stage separately instead of having to hard code
everything.

It also extends the infrastructure to allow per device hard coding.

Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Reviewed-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17106>
This commit is contained in:
Karmjit Mahil 2022-06-15 15:35:14 +01:00 committed by Marge Bot
parent c6933cc9de
commit fad7e858f2
3 changed files with 243 additions and 112 deletions

View File

@ -27,7 +27,9 @@
#include <string.h>
#include <vulkan/vulkan_core.h>
#include "compiler/shader_enums.h"
#include "hwdef/rogue_hw_utils.h"
#include "pvr_device_info.h"
#include "pvr_hardcode.h"
#include "pvr_private.h"
#include "rogue/rogue.h"
@ -42,19 +44,30 @@
* This should eventually be deleted as the compiler becomes more capable.
*/
#define PVR_AXE_1_16M_BVNC PVR_BVNC_PACK(33, 15, 11, 3)
#define PVR_GX6250_BVNC PVR_BVNC_PACK(4, 40, 2, 51)
enum pvr_hard_code_shader_type {
PVR_HARD_CODE_SHADER_TYPE_COMPUTE,
PVR_HARD_CODE_SHADER_TYPE_GRAPHICS,
};
/* Applications for which the compiler is capable of generating valid shaders.
/* Table indicating which demo and for which device the compiler is capable of
* generating valid shaders.
*/
static const char *const compilable_progs[] = {
"triangle",
static struct {
const char *const name;
uint64_t bvncs[3];
} compatiblity_table[] = {
{
.name = "triangle",
.bvncs = { PVR_GX6250_BVNC, },
},
};
static const struct pvr_hard_coding_data {
const char *const name;
uint64_t bvnc;
enum pvr_hard_code_shader_type type;
union {
@ -69,6 +82,9 @@ static const struct pvr_hard_coding_data {
} compute;
struct {
/* Mask of MESA_SHADER_* (gl_shader_stage). */
uint32_t flags;
struct rogue_shader_binary *const *const vert_shaders;
struct rogue_shader_binary *const *const frag_shaders;
@ -85,6 +101,7 @@ static const struct pvr_hard_coding_data {
} hard_coding_table[] = {
{
.name = "simple-compute",
.bvnc = PVR_GX6250_BVNC,
.type = PVR_HARD_CODE_SHADER_TYPE_COMPUTE,
.compute = {
@ -118,23 +135,42 @@ static const struct pvr_hard_coding_data {
},
};
bool pvr_hard_code_shader_required(void)
static inline uint64_t
pvr_device_get_bvnc(const struct pvr_device_info *const dev_info)
{
const struct pvr_device_ident *const ident = &dev_info->ident;
return PVR_BVNC_PACK(ident->b, ident->v, ident->n, ident->c);
}
bool pvr_hard_code_shader_required(const struct pvr_device_info *const dev_info)
{
const char *const program = util_get_process_name();
const uint64_t bvnc = pvr_device_get_bvnc(dev_info);
for (uint32_t i = 0; i < ARRAY_SIZE(compilable_progs); i++) {
if (strcmp(program, compilable_progs[i]) == 0)
return false;
for (uint32_t i = 0; i < ARRAY_SIZE(compatiblity_table); i++) {
for (uint32_t j = 0; j < ARRAY_SIZE(compatiblity_table[0].bvncs); j++) {
if (bvnc != compatiblity_table[i].bvncs[j])
continue;
if (strcmp(program, compatiblity_table[i].name) == 0)
return false;
}
}
return true;
}
static const struct pvr_hard_coding_data *pvr_get_hard_coding_data()
static const struct pvr_hard_coding_data *
pvr_get_hard_coding_data(const struct pvr_device_info *const dev_info)
{
const char *const program = util_get_process_name();
const uint64_t bvnc = pvr_device_get_bvnc(dev_info);
for (uint32_t i = 0; i < ARRAY_SIZE(hard_coding_table); i++) {
if (bvnc != hard_coding_table[i].bvnc)
continue;
if (strcmp(program, hard_coding_table[i].name) == 0)
return &hard_coding_table[i];
}
@ -151,7 +187,8 @@ VkResult pvr_hard_code_compute_pipeline(
{
const uint32_t cache_line_size =
rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
const struct pvr_hard_coding_data *const data = pvr_get_hard_coding_data();
const struct pvr_hard_coding_data *const data =
pvr_get_hard_coding_data(&device->pdevice->dev_info);
assert(data->type == PVR_HARD_CODE_SHADER_TYPE_COMPUTE);
@ -167,79 +204,128 @@ VkResult pvr_hard_code_compute_pipeline(
&shader_state_out->bo);
}
void pvr_hard_code_graphics_shaders(
uint32_t pipeline_n,
struct rogue_shader_binary **const vert_shader_out,
struct rogue_shader_binary **const frag_shader_out)
uint32_t
pvr_hard_code_graphics_get_flags(const struct pvr_device_info *const dev_info)
{
const struct pvr_hard_coding_data *const data = pvr_get_hard_coding_data();
const struct pvr_hard_coding_data *const data =
pvr_get_hard_coding_data(dev_info);
assert(data->type == PVR_HARD_CODE_SHADER_TYPE_GRAPHICS);
return data->graphics.flags;
}
void pvr_hard_code_graphics_shader(const struct pvr_device_info *const dev_info,
uint32_t pipeline_n,
gl_shader_stage stage,
struct rogue_shader_binary **const shader_out)
{
const struct pvr_hard_coding_data *const data =
pvr_get_hard_coding_data(dev_info);
assert(data->type == PVR_HARD_CODE_SHADER_TYPE_GRAPHICS);
assert(pipeline_n < data->graphics.shader_count);
assert(data->graphics.flags & BITFIELD_BIT(stage));
mesa_logd("Hard coding graphics pipeline for %s", data->name);
mesa_logd("Hard coding %s stage shader for \"%s\" demo.",
_mesa_shader_stage_to_string(stage),
data->name);
*vert_shader_out = data->graphics.vert_shaders[pipeline_n];
*frag_shader_out = data->graphics.frag_shaders[pipeline_n];
switch (stage) {
case MESA_SHADER_VERTEX:
*shader_out = data->graphics.vert_shaders[pipeline_n];
break;
case MESA_SHADER_FRAGMENT:
*shader_out = data->graphics.frag_shaders[pipeline_n];
break;
default:
unreachable("Unsupported stage.");
}
}
void pvr_hard_code_graphics_vertex_state(
const struct pvr_device_info *const dev_info,
uint32_t pipeline_n,
struct pvr_vertex_shader_state *const vert_state_out)
{
const struct pvr_hard_coding_data *const data = pvr_get_hard_coding_data();
const struct pvr_hard_coding_data *const data =
pvr_get_hard_coding_data(dev_info);
assert(data->type == PVR_HARD_CODE_SHADER_TYPE_GRAPHICS);
assert(pipeline_n < data->graphics.shader_count);
assert(data->graphics.flags & BITFIELD_BIT(MESA_SHADER_VERTEX));
*vert_state_out = *data->graphics.vert_shader_states[0];
}
void pvr_hard_code_graphics_fragment_state(
const struct pvr_device_info *const dev_info,
uint32_t pipeline_n,
struct pvr_fragment_shader_state *const frag_state_out)
{
const struct pvr_hard_coding_data *const data = pvr_get_hard_coding_data();
const struct pvr_hard_coding_data *const data =
pvr_get_hard_coding_data(dev_info);
assert(data->type == PVR_HARD_CODE_SHADER_TYPE_GRAPHICS);
assert(pipeline_n < data->graphics.shader_count);
assert(data->graphics.flags & BITFIELD_BIT(MESA_SHADER_FRAGMENT));
*frag_state_out = *data->graphics.frag_shader_states[0];
}
void pvr_hard_code_graphics_inject_build_info(
void pvr_hard_code_graphics_get_build_info(
const struct pvr_device_info *const dev_info,
uint32_t pipeline_n,
struct rogue_build_ctx *ctx,
struct pvr_explicit_constant_usage *const vert_common_data_out,
struct pvr_explicit_constant_usage *const frag_common_data_out)
gl_shader_stage stage,
struct rogue_common_build_data *const common_build_data,
struct rogue_build_data *const build_data,
struct pvr_explicit_constant_usage *const explicit_const_usage)
{
const struct pvr_hard_coding_data *const data = pvr_get_hard_coding_data();
const struct pvr_hard_coding_data *const data =
pvr_get_hard_coding_data(dev_info);
assert(data->type == PVR_HARD_CODE_SHADER_TYPE_GRAPHICS);
assert(pipeline_n < data->graphics.shader_count);
assert(data->graphics.flags & BITFIELD_BIT(stage));
ctx->stage_data = data->graphics.build_infos[pipeline_n]->stage_data;
ctx->common_data[MESA_SHADER_VERTEX] =
data->graphics.build_infos[pipeline_n]->vert_common_data;
ctx->common_data[MESA_SHADER_FRAGMENT] =
data->graphics.build_infos[pipeline_n]->frag_common_data;
switch (stage) {
case MESA_SHADER_VERTEX:
assert(
data->graphics.build_infos[pipeline_n]->vert_common_data.temps ==
data->graphics.vert_shader_states[pipeline_n]->stage_state.temps_count);
assert(
ctx->common_data[MESA_SHADER_VERTEX].temps ==
data->graphics.vert_shader_states[pipeline_n]->stage_state.temps_count);
assert(
ctx->common_data[MESA_SHADER_FRAGMENT].temps ==
data->graphics.frag_shader_states[pipeline_n]->stage_state.temps_count);
assert(data->graphics.build_infos[pipeline_n]->vert_common_data.coeffs ==
data->graphics.vert_shader_states[pipeline_n]
->stage_state.coefficient_size);
assert(ctx->common_data[MESA_SHADER_VERTEX].coeffs ==
data->graphics.vert_shader_states[pipeline_n]
->stage_state.coefficient_size);
assert(ctx->common_data[MESA_SHADER_FRAGMENT].coeffs ==
data->graphics.frag_shader_states[pipeline_n]
->stage_state.coefficient_size);
build_data->vs = data->graphics.build_infos[pipeline_n]->stage_data.vs;
*common_build_data =
data->graphics.build_infos[pipeline_n]->vert_common_data;
*explicit_const_usage =
data->graphics.build_infos[pipeline_n]->vert_explicit_conts_usage;
*vert_common_data_out =
data->graphics.build_infos[pipeline_n]->vert_explicit_conts_usage;
*frag_common_data_out =
data->graphics.build_infos[pipeline_n]->frag_explicit_conts_usage;
break;
case MESA_SHADER_FRAGMENT:
assert(
data->graphics.build_infos[pipeline_n]->frag_common_data.temps ==
data->graphics.frag_shader_states[pipeline_n]->stage_state.temps_count);
assert(data->graphics.build_infos[pipeline_n]->frag_common_data.coeffs ==
data->graphics.frag_shader_states[pipeline_n]
->stage_state.coefficient_size);
build_data->fs = data->graphics.build_infos[pipeline_n]->stage_data.fs;
*common_build_data =
data->graphics.build_infos[pipeline_n]->frag_common_data;
*explicit_const_usage =
data->graphics.build_infos[pipeline_n]->frag_explicit_conts_usage;
break;
default:
unreachable("Unsupported stage.");
}
}

View File

@ -28,6 +28,7 @@
#include <stdint.h>
#include <vulkan/vulkan_core.h>
#include "compiler/shader_enums.h"
#include "rogue/rogue_build_data.h"
/**
@ -40,6 +41,7 @@
struct pvr_compute_pipeline_shader_state;
struct pvr_device;
struct pvr_fragment_shader_state;
struct pvr_hard_coding_data;
struct pvr_vertex_shader_state;
struct pvr_explicit_constant_usage {
@ -73,36 +75,47 @@ struct pvr_hard_code_graphics_build_info {
/* Returns true if the shader for the currently running program requires hard
* coded shaders.
*/
bool pvr_hard_code_shader_required(void);
bool pvr_hard_code_shader_required(const struct pvr_device_info *const dev_info);
VkResult pvr_hard_code_compute_pipeline(
struct pvr_device *const device,
struct pvr_compute_pipeline_shader_state *const shader_state_out,
struct pvr_hard_code_compute_build_info *const build_info_out);
/* Returns a mask of MESA_SHADER_* (gl_shader_stage) indicating which stage
* needs to be hard coded.
*/
uint32_t
pvr_hard_code_graphics_get_flags(const struct pvr_device_info *const dev_info);
/* pipeline_n:
* The pipeline number. Each pipeline created requires unique hard
* coding so a pipeline number is necessary to identify which data to use.
* This pipeline number to request data for the first pipeline to be created
* is 0 and should be incremented for each subsequent pipeline.
*/
void pvr_hard_code_graphics_shaders(
void pvr_hard_code_graphics_shader(
const struct pvr_device_info *const dev_info,
uint32_t pipeline_n,
struct rogue_shader_binary **const vert_shader_out,
struct rogue_shader_binary **const frag_shader_out);
gl_shader_stage stage,
struct rogue_shader_binary **const shader_out);
void pvr_hard_code_graphics_vertex_state(
const struct pvr_device_info *const dev_info,
uint32_t pipeline_n,
struct pvr_vertex_shader_state *vert_state);
struct pvr_vertex_shader_state *const vert_state_out);
void pvr_hard_code_graphics_fragment_state(
uint32_t pipelien_n,
struct pvr_fragment_shader_state *frag_state);
void pvr_hard_code_graphics_inject_build_info(
const struct pvr_device_info *const dev_info,
uint32_t pipeline_n,
struct rogue_build_ctx *ctx,
struct pvr_explicit_constant_usage *const vert_common_data_out,
struct pvr_explicit_constant_usage *const frag_common_data_out);
struct pvr_fragment_shader_state *const frag_state_out);
void pvr_hard_code_graphics_get_build_info(
const struct pvr_device_info *const dev_info,
uint32_t pipeline_n,
gl_shader_stage stage,
struct rogue_common_build_data *const common_build_data,
struct rogue_build_data *const build_data,
struct pvr_explicit_constant_usage *const explicit_const_usage);
#endif /* PVR_HARDCODE_SHADERS_H */

View File

@ -1020,7 +1020,7 @@ static VkResult pvr_compute_pipeline_compile(
uint32_t usc_temps;
VkResult result;
if (pvr_hard_code_shader_required()) {
if (pvr_hard_code_shader_required(&device->pdevice->dev_info)) {
struct pvr_hard_code_compute_build_info build_info;
result = pvr_hard_code_compute_pipeline(device,
@ -1399,7 +1399,6 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device,
struct pvr_explicit_constant_usage frag_explicit_const_usage = {
.start_offset = 0,
};
const bool requires_hard_coding = pvr_hard_code_shader_required();
static uint32_t hard_code_pipeline_n = 0;
const VkPipelineVertexInputStateCreateInfo *const vertex_input_state =
@ -1415,61 +1414,98 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device,
if (!ctx)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
if (requires_hard_coding) {
pvr_hard_code_graphics_shaders(hard_code_pipeline_n,
&ctx->binary[MESA_SHADER_VERTEX],
&ctx->binary[MESA_SHADER_FRAGMENT]);
} else {
/* NIR middle-end translation. */
for (gl_shader_stage stage = MESA_SHADER_FRAGMENT;
stage > MESA_SHADER_NONE;
stage--) {
const VkPipelineShaderStageCreateInfo *create_info;
size_t stage_index = gfx_pipeline->stage_indices[stage];
/* NIR middle-end translation. */
for (gl_shader_stage stage = MESA_SHADER_FRAGMENT; stage > MESA_SHADER_NONE;
stage--) {
const VkPipelineShaderStageCreateInfo *create_info;
size_t stage_index = gfx_pipeline->stage_indices[stage];
/* Skip unused/inactive stages. */
if (stage_index == ~0)
if (pvr_hard_code_shader_required(&device->pdevice->dev_info)) {
if (pvr_hard_code_graphics_get_flags(&device->pdevice->dev_info) &
BITFIELD_BIT(stage)) {
continue;
create_info = &pCreateInfo->pStages[stage_index];
/* SPIR-V to NIR. */
ctx->nir[stage] = pvr_spirv_to_nir(ctx, stage, create_info);
if (!ctx->nir[stage]) {
ralloc_free(ctx);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
}
/* Pre-back-end analysis and optimization, driver data extraction. */
/* TODO: Analyze and cull unused I/O between stages. */
/* TODO: Allocate UBOs between stages;
* pipeline->layout->set_{count,layout}.
*/
/* Skip unused/inactive stages. */
if (stage_index == ~0)
continue;
/* Back-end translation. */
for (gl_shader_stage stage = MESA_SHADER_FRAGMENT;
stage > MESA_SHADER_NONE;
stage--) {
if (!ctx->nir[stage])
continue;
create_info = &pCreateInfo->pStages[stage_index];
ctx->rogue[stage] = pvr_nir_to_rogue(ctx, ctx->nir[stage]);
if (!ctx->rogue[stage]) {
ralloc_free(ctx);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
ctx->binary[stage] = pvr_rogue_to_binary(ctx, ctx->rogue[stage]);
if (!ctx->binary[stage]) {
ralloc_free(ctx);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
/* SPIR-V to NIR. */
ctx->nir[stage] = pvr_spirv_to_nir(ctx, stage, create_info);
if (!ctx->nir[stage]) {
ralloc_free(ctx);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
}
if (requires_hard_coding) {
pvr_hard_code_graphics_vertex_state(hard_code_pipeline_n,
/* Pre-back-end analysis and optimization, driver data extraction. */
/* TODO: Analyze and cull unused I/O between stages. */
/* TODO: Allocate UBOs between stages;
* pipeline->layout->set_{count,layout}.
*/
/* Back-end translation. */
for (gl_shader_stage stage = MESA_SHADER_FRAGMENT; stage > MESA_SHADER_NONE;
stage--) {
if (pvr_hard_code_shader_required(&device->pdevice->dev_info) &&
pvr_hard_code_graphics_get_flags(&device->pdevice->dev_info) &
BITFIELD_BIT(stage)) {
const struct pvr_device_info *const dev_info =
&device->pdevice->dev_info;
struct pvr_explicit_constant_usage *explicit_const_usage;
switch (stage) {
case MESA_SHADER_VERTEX:
explicit_const_usage = &vert_explicit_const_usage;
break;
case MESA_SHADER_FRAGMENT:
explicit_const_usage = &frag_explicit_const_usage;
break;
default:
unreachable("Unsupported stage.");
}
pvr_hard_code_graphics_shader(dev_info,
hard_code_pipeline_n,
stage,
&ctx->binary[stage]);
pvr_hard_code_graphics_get_build_info(dev_info,
hard_code_pipeline_n,
stage,
&ctx->common_data[stage],
&ctx->stage_data,
explicit_const_usage);
continue;
}
if (!ctx->nir[stage])
continue;
ctx->rogue[stage] = pvr_nir_to_rogue(ctx, ctx->nir[stage]);
if (!ctx->rogue[stage]) {
ralloc_free(ctx);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
ctx->binary[stage] = pvr_rogue_to_binary(ctx, ctx->rogue[stage]);
if (!ctx->binary[stage]) {
ralloc_free(ctx);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
}
if (pvr_hard_code_shader_required(&device->pdevice->dev_info) &&
pvr_hard_code_graphics_get_flags(&device->pdevice->dev_info) &
BITFIELD_BIT(MESA_SHADER_VERTEX)) {
pvr_hard_code_graphics_vertex_state(&device->pdevice->dev_info,
hard_code_pipeline_n,
&gfx_pipeline->vertex_shader_state);
} else {
pvr_vertex_state_init(gfx_pipeline,
@ -1485,8 +1521,11 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device,
if (result != VK_SUCCESS)
goto err_free_build_context;
if (requires_hard_coding) {
if (pvr_hard_code_shader_required(&device->pdevice->dev_info) &&
pvr_hard_code_graphics_get_flags(&device->pdevice->dev_info) &
BITFIELD_BIT(MESA_SHADER_FRAGMENT)) {
pvr_hard_code_graphics_fragment_state(
&device->pdevice->dev_info,
hard_code_pipeline_n,
&gfx_pipeline->fragment_shader_state);
} else {
@ -1507,13 +1546,6 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device,
* case the optimization doesn't happen.
*/
if (requires_hard_coding) {
pvr_hard_code_graphics_inject_build_info(hard_code_pipeline_n,
ctx,
&vert_explicit_const_usage,
&frag_explicit_const_usage);
}
/* TODO: The programs we use are hard coded for now, but these should be
* selected dynamically.
*/