vc4: Rewrite the kernel ABI to support texture uniform relocation.

This required building a shader parser that would walk the program to find
where the texturing-related uniforms are in the uniforms stream.

Note that as of this commit, a new kernel is required for rendering on
actual VC4 hardware (currently that commit is named "drm/vc4: Introduce
shader validation and better command stream validation.", but is likely to
be squashed as part of an eventual merge of the kernel driver).
This commit is contained in:
Eric Anholt 2014-07-21 11:27:35 -07:00
parent 6a5ece12aa
commit a8f2bf0f51
10 changed files with 608 additions and 97 deletions

View File

@ -17,5 +17,6 @@ C_SOURCES := \
vc4_screen.c \
vc4_simulator.c \
vc4_simulator_validate.c \
vc4_simulator_validate_shaders.c \
vc4_state.c \
$()

View File

@ -107,6 +107,8 @@ vc4_flush(struct pipe_context *pctx)
submit.shader_records = vc4->shader_rec.base;
submit.shader_record_len = vc4->shader_rec.next - vc4->shader_rec.base;
submit.shader_record_count = vc4->shader_rec_count;
submit.uniforms = vc4->uniforms.base;
submit.uniforms_len = vc4->uniforms.next - vc4->uniforms.base;
if (!(vc4_debug & VC4_DEBUG_NORAST)) {
int ret;
@ -123,6 +125,7 @@ vc4_flush(struct pipe_context *pctx)
vc4_reset_cl(&vc4->bcl);
vc4_reset_cl(&vc4->rcl);
vc4_reset_cl(&vc4->shader_rec);
vc4_reset_cl(&vc4->uniforms);
vc4_reset_cl(&vc4->bo_handles);
#ifdef USE_VC4_SIMULATOR
vc4_reset_cl(&vc4->bo_pointers);

View File

@ -70,6 +70,7 @@ struct vc4_shader_uniform_info {
enum quniform_contents *contents;
uint32_t *data;
uint32_t count;
uint32_t num_texture_samples;
};
struct vc4_compiled_shader {
@ -120,6 +121,7 @@ struct vc4_context {
struct vc4_cl bcl;
struct vc4_cl rcl;
struct vc4_cl shader_rec;
struct vc4_cl uniforms;
struct vc4_cl bo_handles;
#ifdef USE_VC4_SIMULATOR
struct vc4_cl bo_pointers;
@ -195,12 +197,11 @@ int vc4_simulator_flush(struct vc4_context *vc4,
struct drm_vc4_submit_cl *args,
struct vc4_surface *color_surf);
void vc4_get_uniform_bo(struct vc4_context *vc4,
void vc4_write_uniforms(struct vc4_context *vc4,
struct vc4_compiled_shader *shader,
struct vc4_constbuf_stateobj *cb,
struct vc4_texture_stateobj *texstate,
int shader_index, struct vc4_bo **out_bo,
uint32_t *out_offset);
int shader_index);
void vc4_flush(struct pipe_context *pctx);
void vc4_emit_state(struct pipe_context *pctx);

View File

@ -162,40 +162,38 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
// Shader Record
struct vc4_bo *fs_ubo, *vs_ubo, *cs_ubo;
uint32_t fs_ubo_offset, vs_ubo_offset, cs_ubo_offset;
vc4_get_uniform_bo(vc4, vc4->prog.fs,
vc4_write_uniforms(vc4, vc4->prog.fs,
&vc4->constbuf[PIPE_SHADER_FRAGMENT],
&vc4->fragtex,
0, &fs_ubo, &fs_ubo_offset);
vc4_get_uniform_bo(vc4, vc4->prog.vs,
0);
vc4_write_uniforms(vc4, vc4->prog.vs,
&vc4->constbuf[PIPE_SHADER_VERTEX],
&vc4->verttex,
0, &vs_ubo, &vs_ubo_offset);
vc4_get_uniform_bo(vc4, vc4->prog.vs,
0);
vc4_write_uniforms(vc4, vc4->prog.vs,
&vc4->constbuf[PIPE_SHADER_VERTEX],
&vc4->verttex,
1, &cs_ubo, &cs_ubo_offset);
1);
cl_start_shader_reloc(&vc4->shader_rec, 6 + vtx->num_elements);
cl_start_shader_reloc(&vc4->shader_rec, 3 + vtx->num_elements);
cl_u16(&vc4->shader_rec, VC4_SHADER_FLAG_ENABLE_CLIPPING);
cl_u8(&vc4->shader_rec, 0); /* fs num uniforms (unused) */
cl_u8(&vc4->shader_rec, vc4->prog.fs->num_inputs);
cl_reloc(vc4, &vc4->shader_rec, vc4->prog.fs->bo, 0);
cl_reloc(vc4, &vc4->shader_rec, fs_ubo, fs_ubo_offset);
cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
cl_u16(&vc4->shader_rec, 0); /* vs num uniforms */
cl_u8(&vc4->shader_rec, (1 << vtx->num_elements) - 1); /* vs attribute array bitfield */
cl_u8(&vc4->shader_rec, 16 * vtx->num_elements); /* vs total attribute size */
cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, 0);
cl_reloc(vc4, &vc4->shader_rec, vs_ubo, vs_ubo_offset);
cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
cl_u16(&vc4->shader_rec, 0); /* cs num uniforms */
cl_u8(&vc4->shader_rec, (1 << vtx->num_elements) - 1); /* cs attribute array bitfield */
cl_u8(&vc4->shader_rec, 16 * vtx->num_elements); /* vs total attribute size */
cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo,
vc4->prog.vs->coord_shader_offset);
cl_reloc(vc4, &vc4->shader_rec, cs_ubo, cs_ubo_offset);
cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
for (int i = 0; i < vtx->num_elements; i++) {
struct pipe_vertex_element *elem = &vtx->pipe[i];

View File

@ -74,6 +74,21 @@ struct drm_vc4_submit_cl {
*/
void __user *shader_records;
/* Pointer to uniform data and texture handles for the textures
* referenced by the shader.
*
* For each shader state record, there is a set of uniform data in the
* order referenced by the record (FS, VS, then CS). Each set of
* uniform data has a uint32_t index into bo_handles per texture
* sample operation, in the order the QPU_W_TMUn_S writes appear in
* the program. Following the texture BO handle indices is the actual
* uniform data.
*
* The individual uniform state blocks don't have sizes passed in,
* because the kernel has to determine the sizes anyway during shader
* code validation.
*/
void __user *uniforms;
void __user *bo_handles;
/* Size in bytes of the binner command list. */
@ -84,11 +99,13 @@ struct drm_vc4_submit_cl {
uint32_t shader_record_len;
/* Number of shader records.
*
* This could just be computed from the contents of shader_records,
* but it keeps the kernel from having to resize various allocations
* it makes.
* This could just be computed from the contents of shader_records and
* the address bits of references to them from the bin CL, but it
* keeps the kernel from having to resize some allocations it makes.
*/
uint32_t shader_record_count;
/** Size in bytes of the uniform state. */
uint32_t uniforms_len;
/* Number of BO handles passed in (size is that times 4). */
uint32_t bo_handle_count;

View File

@ -57,6 +57,7 @@ struct tgsi_to_qir {
enum quniform_contents *uniform_contents;
uint32_t num_uniforms;
uint32_t num_outputs;
uint32_t num_texture_samples;
};
struct vc4_key {
@ -332,6 +333,7 @@ tgsi_to_qir_tex(struct tgsi_to_qir *trans,
qir_TEX_S(c, s, sampler_p1);
}
trans->num_texture_samples++;
qir_emit(c, qir_inst(QOP_TEX_RESULT, c->undef, c->undef, c->undef));
for (int i = 0; i < 4; i++) {
@ -938,6 +940,7 @@ copy_uniform_state_to_shader(struct vc4_compiled_shader *shader,
uinfo->contents = malloc(count * sizeof(*uinfo->contents));
memcpy(uinfo->contents, trans->uniform_contents,
count * sizeof(*uinfo->contents));
uinfo->num_texture_samples = trans->num_texture_samples;
}
static void
@ -1141,26 +1144,23 @@ static uint32_t translate_wrap(uint32_t p_wrap)
}
}
static uint32_t
get_texture_p0(struct vc4_texture_stateobj *texstate,
uint32_t tex_and_sampler)
static void
write_texture_p0(struct vc4_context *vc4,
struct vc4_texture_stateobj *texstate,
uint32_t tex_and_sampler)
{
uint32_t texi = (tex_and_sampler >> 0) & 0xff;
struct pipe_sampler_view *texture = texstate->textures[texi];
struct vc4_resource *rsc = vc4_resource(texture->texture);
return (texture->u.tex.last_level |
#if USE_VC4_SIMULATOR
simpenrose_hw_addr(rsc->bo->map) /* XXX */
#else
0 /* XXX */
#endif
/* XXX: data type */);
cl_reloc(vc4, &vc4->uniforms, rsc->bo,
texture->u.tex.last_level);
}
static uint32_t
get_texture_p1(struct vc4_texture_stateobj *texstate,
uint32_t tex_and_sampler)
static void
write_texture_p1(struct vc4_context *vc4,
struct vc4_texture_stateobj *texstate,
uint32_t tex_and_sampler)
{
uint32_t texi = (tex_and_sampler >> 0) & 0xff;
uint32_t sampi = (tex_and_sampler >> 8) & 0xff;
@ -1176,14 +1176,15 @@ get_texture_p1(struct vc4_texture_stateobj *texstate,
[PIPE_TEX_FILTER_LINEAR] = 0,
};
return ((1 << 31) /* XXX: data type */|
(texture->texture->height0 << 20) |
(texture->texture->width0 << 8) |
(imgfilter_map[sampler->mag_img_filter] << 7) |
((imgfilter_map[sampler->min_img_filter] +
mipfilter_map[sampler->min_mip_filter]) << 4) |
(translate_wrap(sampler->wrap_t) << 2) |
(translate_wrap(sampler->wrap_s) << 0));
cl_u32(&vc4->uniforms,
(1 << 31) /* XXX: data type */|
(texture->texture->height0 << 20) |
(texture->texture->width0 << 8) |
(imgfilter_map[sampler->mag_img_filter] << 7) |
((imgfilter_map[sampler->min_img_filter] +
mipfilter_map[sampler->min_mip_filter]) << 4) |
(translate_wrap(sampler->wrap_t) << 2) |
(translate_wrap(sampler->wrap_s) << 0));
}
static uint32_t
@ -1203,56 +1204,57 @@ get_texrect_scale(struct vc4_texture_stateobj *texstate,
}
void
vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
struct vc4_constbuf_stateobj *cb,
struct vc4_texture_stateobj *texstate,
int shader_index, struct vc4_bo **out_bo,
uint32_t *out_offset)
int shader_index)
{
struct vc4_shader_uniform_info *uinfo = &shader->uniforms[shader_index];
struct vc4_bo *ubo = vc4_bo_alloc(vc4->screen,
MAX2(1, uinfo->count * 4), "ubo");
uint32_t *map = vc4_bo_map(ubo);
const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
cl_start_shader_reloc(&vc4->uniforms, uinfo->num_texture_samples);
for (int i = 0; i < uinfo->count; i++) {
switch (uinfo->contents[i]) {
case QUNIFORM_CONSTANT:
map[i] = uinfo->data[i];
cl_u32(&vc4->uniforms, uinfo->data[i]);
break;
case QUNIFORM_UNIFORM:
map[i] = ((uint32_t *)cb->cb[0].user_buffer)[uinfo->data[i]];
cl_u32(&vc4->uniforms,
gallium_uniforms[uinfo->data[i]]);
break;
case QUNIFORM_VIEWPORT_X_SCALE:
map[i] = fui(vc4->framebuffer.width * 16.0f / 2.0f);
cl_u32(&vc4->uniforms, fui(vc4->framebuffer.width *
16.0f / 2.0f));
break;
case QUNIFORM_VIEWPORT_Y_SCALE:
map[i] = fui(vc4->framebuffer.height * -16.0f / 2.0f);
cl_u32(&vc4->uniforms, fui(vc4->framebuffer.height *
-16.0f / 2.0f));
break;
case QUNIFORM_TEXTURE_CONFIG_P0:
map[i] = get_texture_p0(texstate, uinfo->data[i]);
write_texture_p0(vc4, texstate, uinfo->data[i]);
break;
case QUNIFORM_TEXTURE_CONFIG_P1:
map[i] = get_texture_p1(texstate, uinfo->data[i]);
write_texture_p1(vc4, texstate, uinfo->data[i]);
break;
case QUNIFORM_TEXRECT_SCALE_X:
case QUNIFORM_TEXRECT_SCALE_Y:
map[i] = get_texrect_scale(texstate,
uinfo->contents[i],
uinfo->data[i]);
cl_u32(&vc4->uniforms,
get_texrect_scale(texstate,
uinfo->contents[i],
uinfo->data[i]));
break;
}
#if 0
uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4);
fprintf(stderr, "%p/%d: %d: 0x%08x (%f)\n",
shader, shader_index, i, map[i], uif(map[i]));
shader, shader_index, i, written_val, uif(written_val));
#endif
}
*out_bo = ubo;
*out_offset = 0;
}
static void

View File

@ -63,9 +63,9 @@ drm_gem_cma_create(struct drm_device *dev, size_t size)
}
static int
vc4_simulator_pin_bos(struct drm_device *dev, struct drm_vc4_submit_cl *args,
struct exec_info *exec)
vc4_simulator_pin_bos(struct drm_device *dev, struct exec_info *exec)
{
struct drm_vc4_submit_cl *args = exec->args;
struct vc4_context *vc4 = dev->vc4;
struct vc4_bo **bos = vc4->bo_pointers.base;
@ -84,8 +84,7 @@ vc4_simulator_pin_bos(struct drm_device *dev, struct drm_vc4_submit_cl *args,
}
static int
vc4_simulator_unpin_bos(struct drm_vc4_submit_cl *args,
struct exec_info *exec)
vc4_simulator_unpin_bos(struct exec_info *exec)
{
for (int i = 0; i < exec->bo_count; i++) {
struct drm_gem_cma_object *obj = exec->bo[i];
@ -102,9 +101,9 @@ vc4_simulator_unpin_bos(struct drm_vc4_submit_cl *args,
}
static int
vc4_cl_validate(struct drm_device *dev, struct drm_vc4_submit_cl *args,
struct exec_info *exec)
vc4_cl_validate(struct drm_device *dev, struct exec_info *exec)
{
struct drm_vc4_submit_cl *args = exec->args;
void *temp = NULL;
void *bin, *render, *shader_rec;
int ret = 0;
@ -112,12 +111,14 @@ vc4_cl_validate(struct drm_device *dev, struct drm_vc4_submit_cl *args,
uint32_t render_offset = bin_offset + args->bin_cl_len;
uint32_t shader_rec_offset = roundup(render_offset +
args->render_cl_len, 16);
uint32_t exec_size = shader_rec_offset + args->shader_record_len;
uint32_t uniforms_offset = shader_rec_offset + args->shader_record_len;
uint32_t exec_size = uniforms_offset + args->uniforms_len;
uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
args->shader_record_count);
if (shader_rec_offset < render_offset ||
exec_size < shader_rec_offset ||
uniforms_offset < shader_rec_offset ||
exec_size < uniforms_offset ||
args->shader_record_count >= (UINT_MAX /
sizeof(struct vc4_shader_state)) ||
temp_size < exec_size) {
@ -142,6 +143,7 @@ vc4_cl_validate(struct drm_device *dev, struct drm_vc4_submit_cl *args,
bin = temp + bin_offset;
render = temp + render_offset;
shader_rec = temp + shader_rec_offset;
exec->uniforms_u = temp + uniforms_offset;
exec->shader_state = temp + exec_size;
exec->shader_state_size = args->shader_record_count;
@ -164,6 +166,13 @@ vc4_cl_validate(struct drm_device *dev, struct drm_vc4_submit_cl *args,
goto fail;
}
ret = copy_from_user(exec->uniforms_u, args->uniforms,
args->uniforms_len);
if (ret) {
DRM_ERROR("Failed to copy in uniforms cl\n");
goto fail;
}
exec->exec_bo = drm_gem_cma_create(dev, exec_size);
#if 0
if (IS_ERR(exec->exec_bo)) {
@ -180,6 +189,10 @@ vc4_cl_validate(struct drm_device *dev, struct drm_vc4_submit_cl *args,
exec->ct1ea = exec->ct1ca + args->render_cl_len;
exec->shader_paddr = exec->exec_bo->paddr + shader_rec_offset;
exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
exec->uniforms_size = args->uniforms_len;
ret = vc4_validate_cl(dev,
exec->exec_bo->vaddr + bin_offset,
bin,
@ -243,18 +256,20 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args,
}
}
ret = vc4_simulator_pin_bos(dev, args, &exec);
exec.args = args;
ret = vc4_simulator_pin_bos(dev, &exec);
if (ret)
return ret;
ret = vc4_cl_validate(dev, args, &exec);
ret = vc4_cl_validate(dev, &exec);
if (ret)
return ret;
simpenrose_do_binning(exec.ct0ca, exec.ct0ea);
simpenrose_do_rendering(exec.ct1ca, exec.ct1ea);
ret = vc4_simulator_unpin_bos(args, &exec);
ret = vc4_simulator_unpin_bos(&exec);
if (ret)
return ret;

View File

@ -347,6 +347,30 @@ vc4_validate_cl(struct drm_device *dev,
return 0;
}
static bool
reloc_tex(struct exec_info *exec,
void *uniform_data_u,
struct vc4_texture_sample_info *sample,
uint32_t texture_handle_index)
{
struct drm_gem_cma_object *tex;
uint32_t unvalidated_p0 = *(uint32_t *)(uniform_data_u +
sample->p_offset[0]);
uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
if (texture_handle_index >= exec->bo_count) {
DRM_ERROR("texture handle index %d >= %d\n",
texture_handle_index, exec->bo_count);
return false;
}
tex = exec->bo[texture_handle_index];
*validated_p0 = tex->paddr + unvalidated_p0;
return true;
}
static int
validate_shader_rec(struct drm_device *dev,
struct exec_info *exec,
@ -358,45 +382,54 @@ validate_shader_rec(struct drm_device *dev,
uint32_t *src_handles = unvalidated;
void *src_pkt;
void *dst_pkt = validated;
static const int gl_bo_offsets[] = {
4, 8, /* fs code, ubo */
16, 20, /* vs code, ubo */
28, 32, /* cs code, ubo */
enum shader_rec_reloc_type {
RELOC_CODE,
RELOC_VBO,
};
static const int nv_bo_offsets[] = {
4, 8, /* fs code, ubo */
12, /* vbo */
struct shader_rec_reloc {
enum shader_rec_reloc_type type;
uint32_t offset;
};
struct drm_gem_cma_object *bo[ARRAY_SIZE(gl_bo_offsets) + 8];
const int *bo_offsets;
uint32_t nr_attributes = 0, nr_bo, packet_size;
static const struct shader_rec_reloc gl_relocs[] = {
{ RELOC_CODE, 4 }, /* fs */
{ RELOC_CODE, 16 }, /* vs */
{ RELOC_CODE, 28 }, /* cs */
};
static const struct shader_rec_reloc nv_relocs[] = {
{ RELOC_CODE, 4 }, /* fs */
{ RELOC_VBO, 12 }
};
const struct shader_rec_reloc *relocs;
struct drm_gem_cma_object *bo[ARRAY_SIZE(gl_relocs) + 8];
uint32_t nr_attributes = 0, nr_relocs, packet_size;
int i;
struct vc4_validated_shader_info *validated_shader = NULL;
if (state->packet == VC4_PACKET_NV_SHADER_STATE) {
bo_offsets = nv_bo_offsets;
nr_bo = ARRAY_SIZE(nv_bo_offsets);
relocs = nv_relocs;
nr_relocs = ARRAY_SIZE(nv_relocs);
packet_size = 16;
} else {
bo_offsets = gl_bo_offsets;
nr_bo = ARRAY_SIZE(gl_bo_offsets);
relocs = gl_relocs;
nr_relocs = ARRAY_SIZE(gl_relocs);
nr_attributes = state->addr & 0x7;
if (nr_attributes == 0)
nr_attributes = 8;
packet_size = 36 + nr_attributes * 8;
}
if ((nr_bo + nr_attributes) * 4 + packet_size > len) {
if ((nr_relocs + nr_attributes) * 4 + packet_size > len) {
DRM_ERROR("overflowed shader packet read "
"(handles %d, packet %d, len %d)\n",
(nr_bo + nr_attributes) * 4, packet_size, len);
(nr_relocs + nr_attributes) * 4, packet_size, len);
return -EINVAL;
}
src_pkt = unvalidated + 4 * (nr_bo + nr_attributes);
src_pkt = unvalidated + 4 * (nr_relocs + nr_attributes);
memcpy(dst_pkt, src_pkt, packet_size);
for (i = 0; i < nr_bo + nr_attributes; i++) {
for (i = 0; i < nr_relocs + nr_attributes; i++) {
if (src_handles[i] >= exec->bo_count) {
DRM_ERROR("shader rec bo index %d > %d\n",
src_handles[i], exec->bo_count);
@ -405,21 +438,73 @@ validate_shader_rec(struct drm_device *dev,
bo[i] = exec->bo[src_handles[i]];
}
for (i = 0; i < nr_bo; i++) {
/* XXX: validation */
uint32_t o = bo_offsets[i];
*(uint32_t *)(dst_pkt + o) =
bo[i]->paddr + *(uint32_t *)(src_pkt + o);
for (i = 0; i < nr_relocs; i++) {
uint32_t o = relocs[i].offset;
uint32_t src_offset = *(uint32_t *)(src_pkt + o);
*(uint32_t *)(dst_pkt + o) = bo[i]->paddr + src_offset;
uint32_t *texture_handles_u;
void *uniform_data_u;
uint32_t tex;
switch (relocs[i].type) {
case RELOC_CODE:
kfree(validated_shader);
validated_shader = vc4_validate_shader(bo[i],
src_offset);
if (!validated_shader)
goto fail;
if (validated_shader->uniforms_src_size >
exec->uniforms_size) {
DRM_ERROR("Uniforms src buffer overflow\n");
goto fail;
}
texture_handles_u = exec->uniforms_u;
uniform_data_u = (texture_handles_u +
validated_shader->num_texture_samples);
memcpy(exec->uniforms_v, uniform_data_u,
validated_shader->uniforms_size);
for (tex = 0;
tex < validated_shader->num_texture_samples;
tex++) {
if (!reloc_tex(exec,
uniform_data_u,
&validated_shader->texture_samples[tex],
texture_handles_u[tex])) {
goto fail;
}
}
*(uint32_t *)(dst_pkt + o + 4) = exec->uniforms_p;
exec->uniforms_u += validated_shader->uniforms_src_size;
exec->uniforms_v += validated_shader->uniforms_size;
exec->uniforms_p += validated_shader->uniforms_size;
break;
case RELOC_VBO:
break;
}
}
for (i = 0; i < nr_attributes; i++) {
/* XXX: validation */
uint32_t o = 36 + i * 8;
*(uint32_t *)(dst_pkt + o) =
bo[nr_bo + i]->paddr + *(uint32_t *)(src_pkt + o);
bo[nr_relocs + i]->paddr + *(uint32_t *)(src_pkt + o);
}
kfree(validated_shader);
return 0;
fail:
kfree(validated_shader);
return -EINVAL;
}
int

View File

@ -26,15 +26,20 @@
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <errno.h>
#include "vc4_context.h"
#include "vc4_qpu_defines.h"
#define DRM_INFO(...) fprintf(stderr, __VA_ARGS__)
#define DRM_ERROR(...) fprintf(stderr, __VA_ARGS__)
#define kmalloc(size, arg) malloc(size)
#define kcalloc(size, count, arg) calloc(size, count)
#define kfree(ptr) free(ptr)
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define krealloc(ptr, size, args) realloc(ptr, size)
#define roundup(x, y) align(x, y)
static inline int
@ -64,6 +69,9 @@ struct drm_gem_cma_object {
};
struct exec_info {
/* Kernel-space copy of the ioctl arguments */
struct drm_vc4_submit_cl *args;
/* This is the array of BOs that were looked up at the start of exec.
* Command validation will use indices into this array.
*/
@ -79,9 +87,8 @@ struct exec_info {
uint32_t bo_index[2];
uint32_t max_width, max_height;
/**
* This is the BO where we store the validated command lists
* and shader records.
/* This is the BO where we store the validated command lists, shader
* records, and uniforms.
*/
struct drm_gem_cma_object *exec_bo;
@ -108,6 +115,50 @@ struct exec_info {
uint32_t ct0ca, ct0ea;
uint32_t ct1ca, ct1ea;
uint32_t shader_paddr;
/* Pointers to the uniform data. These pointers are incremented, and
* size decremented, as each batch of uniforms is uploaded.
*/
void *uniforms_u;
void *uniforms_v;
uint32_t uniforms_p;
uint32_t uniforms_size;
};
/**
* struct vc4_texture_sample_info - saves the offsets into the UBO for texture
* setup parameters.
*
* This will be used at draw time to relocate the reference to the texture
* contents in p0, and validate that the offset combined with
* width/height/stride/etc. from p1 and p2/p3 doesn't sample outside the BO.
* Note that the hardware treats unprovided config parameters as 0, so not all
* of them need to be set up for every texure sample, and we'll store ~0 as
* the offset to mark the unused ones.
*
* See the VC4 3D architecture guide page 41 ("Texture and Memory Lookup Unit
* Setup") for definitions of the texture parameters.
*/
struct vc4_texture_sample_info {
uint32_t p_offset[4];
};
/**
* struct vc4_validated_shader_info - information about validated shaders that
* needs to be used from command list validation.
*
* For a given shader, each time a shader state record references it, we need
* to verify that the shader doesn't read more uniforms than the shader state
* record's uniform BO pointer can provide, and we need to apply relocations
* and validate the shader state record's uniforms that define the texture
* samples.
*/
struct vc4_validated_shader_info
{
uint32_t uniforms_size;
uint32_t uniforms_src_size;
uint32_t num_texture_samples;
struct vc4_texture_sample_info *texture_samples;
};
int vc4_validate_cl(struct drm_device *dev,
@ -123,4 +174,8 @@ int vc4_validate_shader_recs(struct drm_device *dev,
uint32_t len,
struct exec_info *exec);
struct vc4_validated_shader_info *
vc4_validate_shader(struct drm_gem_cma_object *shader_obj,
uint32_t start_offset);
#endif /* VC4_SIMULATOR_VALIDATE_H */

View File

@ -0,0 +1,334 @@
/*
* Copyright © 2014 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/**
* DOC: Shader validator for VC4.
*
* The VC4 has no IOMMU between it and system memory. So, a user with access
* to execute shaders could escalate privilege by overwriting system memory
* (using the VPM write address register in the general-purpose DMA mode) or
* reading system memory it shouldn't (reading it as a texture, or uniform
* data, or vertex data).
*
* This walks over a shader starting from some offset within a BO, ensuring
* that its accesses are appropriately bounded, and recording how many texture
* accesses are made and where so that we can do relocations for them in the
* uniform stream.
*
* The kernel API has shaders stored in user-mapped BOs. The BOs will be
* forcibly unmapped from the process before validation, and any cache of
* validated state will be flushed if the mapping is faulted back in.
*
* Storing the shaders in BOs means that the validation process will be slow
* due to uncached reads, but since shaders are long-lived and shader BOs are
* never actually modified, this shouldn't be a problem.
*/
#include "vc4_simulator_validate.h"
#include "vc4_qpu.h"
#include "vc4_qpu_defines.h"
struct vc4_shader_validation_state {
struct vc4_texture_sample_info tmu_setup[2];
int tmu_write_count[2];
};
static bool
is_tmu_write(uint32_t waddr)
{
return (waddr >= QPU_W_TMU0_S &&
waddr <= QPU_W_TMU1_B);
}
static bool
check_register_write(uint32_t waddr, bool is_b)
{
switch (waddr) {
case QPU_W_UNIFORMS_ADDRESS:
/* XXX: We'll probably need to support this for reladdr, but
* it's definitely a security-related one.
*/
DRM_ERROR("uniforms address load unsupported\n");
return false;
case QPU_W_TLB_COLOR_MS:
case QPU_W_TLB_COLOR_ALL:
case QPU_W_TLB_Z:
/* XXX: We need to track which buffers get written by the
* shader, to make sure that we have those buffers set up by
* the config packets. But we need to pass them for now to
* get things up and running.
*/
return true;
case QPU_W_TMU0_S:
case QPU_W_TMU0_T:
case QPU_W_TMU0_R:
case QPU_W_TMU0_B:
case QPU_W_TMU1_S:
case QPU_W_TMU1_T:
case QPU_W_TMU1_R:
case QPU_W_TMU1_B:
/* XXX: We need to track where the uniforms get loaded for
* texturing so that we can do relocations, and to validate
* those uniform contents.
*/
return true;
case QPU_W_HOST_INT:
case QPU_W_TMU_NOSWAP:
case QPU_W_TLB_STENCIL_SETUP:
case QPU_W_TLB_ALPHA_MASK:
case QPU_W_MUTEX_RELEASE:
/* XXX: I haven't thought about these, so don't support them
* for now.
*/
DRM_ERROR("Unsupported waddr %d\n", waddr);
return false;
case QPU_W_VPM_ADDR:
DRM_ERROR("General VPM DMA unsupported\n");
return false;
case QPU_W_VPM:
case QPU_W_VPMVCD_SETUP:
/* We allow VPM setup in general, even including VPM DMA
* configuration setup, because the (unsafe) DMA can only be
* triggered by QPU_W_VPM_ADDR writes.
*/
return true;
}
return true;
}
static bool
record_validated_texture_sample(struct vc4_validated_shader_info *validated_shader,
struct vc4_shader_validation_state *validation_state,
int tmu)
{
uint32_t s = validated_shader->num_texture_samples;
int i;
struct vc4_texture_sample_info *temp_samples;
temp_samples = krealloc(validated_shader->texture_samples,
(s + 1) * sizeof(*temp_samples),
GFP_KERNEL);
if (!temp_samples)
return false;
memcpy(temp_samples[s].p_offset,
validation_state->tmu_setup[tmu].p_offset,
validation_state->tmu_write_count[tmu] * sizeof(uint32_t));
for (i = validation_state->tmu_write_count[tmu]; i < 4; i++)
temp_samples[s].p_offset[i] = ~0;
validated_shader->num_texture_samples = s + 1;
validated_shader->texture_samples = temp_samples;
return true;
}
static bool
check_tmu_writes(uint64_t inst,
struct vc4_validated_shader_info *validated_shader,
struct vc4_shader_validation_state *validation_state,
uint32_t waddr)
{
int tmu = waddr > QPU_W_TMU0_B;
if (!is_tmu_write(waddr))
return true;
if (validation_state->tmu_write_count[tmu] >= 4) {
DRM_ERROR("TMU%d got too many parameters before dispatch\n",
tmu);
return false;
}
validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
validated_shader->uniforms_size;
validation_state->tmu_write_count[tmu]++;
validated_shader->uniforms_size += 4;
if (waddr == QPU_W_TMU0_S || waddr == QPU_W_TMU1_S) {
if (!record_validated_texture_sample(validated_shader,
validation_state, tmu)) {
return false;
}
validation_state->tmu_write_count[tmu] = 0;
}
return true;
}
static bool
check_instruction_writes(uint64_t inst,
struct vc4_validated_shader_info *validated_shader,
struct vc4_shader_validation_state *validation_state)
{
uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
bool ws = inst & QPU_WS;
if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
DRM_ERROR("ADD and MUL both set up textures\n");
return false;
}
if (!check_tmu_writes(inst, validated_shader, validation_state,
waddr_add)) {
return false;
}
if (!check_tmu_writes(inst, validated_shader, validation_state,
waddr_mul)) {
return false;
}
return (check_register_write(waddr_add, ws) &&
check_register_write(waddr_mul, !ws));
}
static bool
check_instruction_reads(uint64_t inst,
struct vc4_validated_shader_info *validated_shader)
{
uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
if (raddr_a == QPU_R_UNIF ||
raddr_b == QPU_R_UNIF) {
if (is_tmu_write(waddr_add) || is_tmu_write(waddr_mul)) {
DRM_ERROR("uniform read in the same instruction as "
"texture setup");
return false;
}
/* This can't overflow the uint32_t, because we're reading 8
* bytes of instruction to increment by 4 here, so we'd
* already be OOM.
*/
validated_shader->uniforms_size += 4;
}
return true;
}
struct vc4_validated_shader_info *
vc4_validate_shader(struct drm_gem_cma_object *shader_obj,
uint32_t start_offset)
{
bool found_shader_end = false;
int shader_end_ip = 0;
uint32_t ip, max_ip;
uint64_t *shader;
struct vc4_validated_shader_info *validated_shader;
struct vc4_shader_validation_state validation_state;
memset(&validation_state, 0, sizeof(validation_state));
if (start_offset + sizeof(uint64_t) > shader_obj->base.size) {
DRM_ERROR("shader starting at %d outside of BO sized %d\n",
start_offset,
shader_obj->base.size);
return NULL;
}
shader = shader_obj->vaddr + start_offset;
max_ip = (shader_obj->base.size - start_offset) / sizeof(uint64_t);
validated_shader = kcalloc(sizeof(*validated_shader), 1, GFP_KERNEL);
if (!validated_shader)
return NULL;
for (ip = 0; ip < max_ip; ip++) {
uint64_t inst = shader[ip];
uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
switch (sig) {
case QPU_SIG_NONE:
case QPU_SIG_WAIT_FOR_SCOREBOARD:
case QPU_SIG_SCOREBOARD_UNLOCK:
case QPU_SIG_LOAD_TMU0:
case QPU_SIG_LOAD_TMU1:
if (!check_instruction_writes(inst, validated_shader,
&validation_state)) {
DRM_ERROR("Bad write at ip %d\n", ip);
goto fail;
}
if (!check_instruction_reads(inst, validated_shader))
goto fail;
break;
case QPU_SIG_LOAD_IMM:
if (!check_instruction_writes(inst, validated_shader,
&validation_state)) {
DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
goto fail;
}
break;
case QPU_SIG_PROG_END:
found_shader_end = true;
shader_end_ip = ip;
break;
default:
DRM_ERROR("Unsupported QPU signal %d at "
"instruction %d\n", sig, ip);
goto fail;
}
/* There are two delay slots after program end is signaled
* that are still executed, then we're finished.
*/
if (found_shader_end && ip == shader_end_ip + 2)
break;
}
if (ip == max_ip) {
DRM_ERROR("shader starting at %d failed to terminate before "
"shader BO end at %d\n",
start_offset,
shader_obj->base.size);
goto fail;
}
/* Again, no chance of integer overflow here because the worst case
* scenario is 8 bytes of uniforms plus handles per 8-byte
* instruction.
*/
validated_shader->uniforms_src_size =
(validated_shader->uniforms_size +
4 * validated_shader->num_texture_samples);
return validated_shader;
fail:
kfree(validated_shader);
return NULL;
}