vc4: Rework cl handling to be friendlier to the compiler.

Drops 680 bytes of code, from avoiding a bunch of extra updates to the
next pointer in the struct.
This commit is contained in:
Eric Anholt 2015-07-09 22:51:06 -07:00
parent a0d3915663
commit 7432017f65
6 changed files with 201 additions and 150 deletions

View File

@ -66,8 +66,15 @@ vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo)
return hindex;
}
cl_u32(&vc4->bo_handles, bo->handle);
cl_ptr(&vc4->bo_pointers, vc4_bo_reference(bo));
struct vc4_cl_out *out;
out = cl_start(&vc4->bo_handles);
cl_u32(&out, bo->handle);
cl_end(&vc4->bo_handles, out);
out = cl_start(&vc4->bo_pointers);
cl_ptr(&out, vc4_bo_reference(bo));
cl_end(&vc4->bo_pointers, out);
return hindex;
}

View File

@ -33,10 +33,16 @@
struct vc4_bo;
/**
* Undefined structure, used for typechecking that you're passing the pointers
* to these functions correctly.
*/
struct vc4_cl_out;
struct vc4_cl {
void *base;
void *next;
void *reloc_next;
struct vc4_cl_out *next;
struct vc4_cl_out *reloc_next;
uint32_t size;
uint32_t reloc_count;
};
@ -55,122 +61,135 @@ static inline uint32_t cl_offset(struct vc4_cl *cl)
}
static inline void
put_unaligned_32(void *ptr, uint32_t val)
cl_advance(struct vc4_cl_out **cl, uint32_t n)
{
struct unaligned_32 *p = ptr;
(*cl) = (struct vc4_cl_out *)((char *)(*cl) + n);
}
static inline struct vc4_cl_out *
cl_start(struct vc4_cl *cl)
{
return cl->next;
}
static inline void
cl_end(struct vc4_cl *cl, struct vc4_cl_out *next)
{
cl->next = next;
assert(cl_offset(cl) <= cl->size);
}
static inline void
put_unaligned_32(struct vc4_cl_out *ptr, uint32_t val)
{
struct unaligned_32 *p = (void *)ptr;
p->x = val;
}
static inline void
put_unaligned_16(void *ptr, uint16_t val)
put_unaligned_16(struct vc4_cl_out *ptr, uint16_t val)
{
struct unaligned_16 *p = ptr;
struct unaligned_16 *p = (void *)ptr;
p->x = val;
}
static inline void
cl_u8(struct vc4_cl *cl, uint8_t n)
cl_u8(struct vc4_cl_out **cl, uint8_t n)
{
assert(cl_offset(cl) + 1 <= cl->size);
*(uint8_t *)cl->next = n;
cl->next++;
*(uint8_t *)(*cl) = n;
cl_advance(cl, 1);
}
static inline void
cl_u16(struct vc4_cl *cl, uint16_t n)
cl_u16(struct vc4_cl_out **cl, uint16_t n)
{
assert(cl_offset(cl) + 2 <= cl->size);
put_unaligned_16(cl->next, n);
cl->next += 2;
put_unaligned_16(*cl, n);
cl_advance(cl, 2);
}
static inline void
cl_u32(struct vc4_cl *cl, uint32_t n)
cl_u32(struct vc4_cl_out **cl, uint32_t n)
{
assert(cl_offset(cl) + 4 <= cl->size);
put_unaligned_32(cl->next, n);
cl->next += 4;
put_unaligned_32(*cl, n);
cl_advance(cl, 4);
}
static inline void
cl_aligned_u32(struct vc4_cl *cl, uint32_t n)
cl_aligned_u32(struct vc4_cl_out **cl, uint32_t n)
{
assert(cl_offset(cl) + 4 <= cl->size);
*(uint32_t *)cl->next = n;
cl->next += 4;
*(uint32_t *)(*cl) = n;
cl_advance(cl, 4);
}
static inline void
cl_ptr(struct vc4_cl *cl, void *ptr)
cl_ptr(struct vc4_cl_out **cl, void *ptr)
{
assert(cl_offset(cl) + sizeof(void *) <= cl->size);
*(void **)cl->next = ptr;
cl->next += sizeof(void *);
*(struct vc4_cl_out **)(*cl) = ptr;
cl_advance(cl, sizeof(void *));
}
static inline void
cl_f(struct vc4_cl *cl, float f)
cl_f(struct vc4_cl_out **cl, float f)
{
cl_u32(cl, fui(f));
}
static inline void
cl_aligned_f(struct vc4_cl *cl, float f)
cl_aligned_f(struct vc4_cl_out **cl, float f)
{
cl_aligned_u32(cl, fui(f));
}
static inline void
cl_start_reloc(struct vc4_cl *cl, uint32_t n)
cl_start_reloc(struct vc4_cl *cl, struct vc4_cl_out **out, uint32_t n)
{
assert(n == 1 || n == 2);
assert(cl->reloc_count == 0);
cl->reloc_count = n;
cl_u8(cl, VC4_PACKET_GEM_HANDLES);
cl->reloc_next = cl->next;
cl_u32(cl, 0); /* Space where hindex will be written. */
cl_u32(cl, 0); /* Space where hindex will be written. */
cl_u8(out, VC4_PACKET_GEM_HANDLES);
cl->reloc_next = *out;
cl_u32(out, 0); /* Space where hindex will be written. */
cl_u32(out, 0); /* Space where hindex will be written. */
}
static inline void
static inline struct vc4_cl_out *
cl_start_shader_reloc(struct vc4_cl *cl, uint32_t n)
{
assert(cl->reloc_count == 0);
cl->reloc_count = n;
cl->reloc_next = cl->next;
/* Space where hindex will be written. */
cl->next += n * 4;
/* Reserve the space where hindex will be written. */
cl_advance(&cl->next, n * 4);
return cl->next;
}
static inline void
cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl, struct vc4_cl_out **cl_out,
struct vc4_bo *bo, uint32_t offset)
{
*(uint32_t *)cl->reloc_next = vc4_gem_hindex(vc4, bo);
cl->reloc_next += 4;
cl_advance(&cl->reloc_next, 4);
cl->reloc_count--;
cl_u32(cl, offset);
cl_u32(cl_out, offset);
}
static inline void
cl_aligned_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
struct vc4_bo *bo, uint32_t offset)
struct vc4_cl_out **cl_out,
struct vc4_bo *bo, uint32_t offset)
{
*(uint32_t *)cl->reloc_next = vc4_gem_hindex(vc4, bo);
cl->reloc_next += 4;
cl_advance(&cl->reloc_next, 4);
cl->reloc_count--;
cl_aligned_u32(cl, offset);
cl_aligned_u32(cl_out, offset);
}
void cl_ensure_space(struct vc4_cl *cl, uint32_t size);

View File

@ -61,9 +61,11 @@ vc4_flush(struct pipe_context *pctx)
* FLUSH completes.
*/
cl_ensure_space(&vc4->bcl, 8);
cl_u8(&vc4->bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
cl_u8(&bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
/* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
cl_u8(&vc4->bcl, VC4_PACKET_FLUSH);
cl_u8(&bcl, VC4_PACKET_FLUSH);
cl_end(&vc4->bcl, bcl);
if (cbuf && (vc4->resolve & PIPE_CLEAR_COLOR0)) {
pipe_surface_reference(&vc4->color_write, cbuf);

View File

@ -71,37 +71,40 @@ vc4_start_draw(struct vc4_context *vc4)
uint32_t height = vc4->framebuffer.height;
uint32_t tilew = align(width, 64) / 64;
uint32_t tileh = align(height, 64) / 64;
struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
// Tile state data is 48 bytes per tile, I think it can be thrown away
// as soon as binning is finished.
cl_u8(&vc4->bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
cl_u32(&vc4->bcl, 0); /* tile alloc addr, filled by kernel */
cl_u32(&vc4->bcl, 0); /* tile alloc size, filled by kernel */
cl_u32(&vc4->bcl, 0); /* tile state addr, filled by kernel */
cl_u8(&vc4->bcl, tilew);
cl_u8(&vc4->bcl, tileh);
cl_u8(&vc4->bcl, 0); /* flags, filled by kernel. */
cl_u8(&bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
cl_u32(&bcl, 0); /* tile alloc addr, filled by kernel */
cl_u32(&bcl, 0); /* tile alloc size, filled by kernel */
cl_u32(&bcl, 0); /* tile state addr, filled by kernel */
cl_u8(&bcl, tilew);
cl_u8(&bcl, tileh);
cl_u8(&bcl, 0); /* flags, filled by kernel. */
/* START_TILE_BINNING resets the statechange counters in the hardware,
* which are what is used when a primitive is binned to a tile to
* figure out what new state packets need to be written to that tile's
* command list.
*/
cl_u8(&vc4->bcl, VC4_PACKET_START_TILE_BINNING);
cl_u8(&bcl, VC4_PACKET_START_TILE_BINNING);
/* Reset the current compressed primitives format. This gets modified
* by VC4_PACKET_GL_INDEXED_PRIMITIVE and
* VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
* of every tile.
*/
cl_u8(&vc4->bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
cl_u8(&vc4->bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
cl_u8(&bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
cl_u8(&bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
vc4->needs_flush = true;
vc4->draw_call_queued = true;
vc4->draw_width = width;
vc4->draw_height = height;
cl_end(&vc4->bcl, bcl);
}
static void
@ -167,28 +170,29 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
*/
uint32_t num_elements_emit = MAX2(vtx->num_elements, 1);
/* Emit the shader record. */
cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit);
cl_u16(&vc4->shader_rec,
struct vc4_cl_out *shader_rec =
cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit);
cl_u16(&shader_rec,
VC4_SHADER_FLAG_ENABLE_CLIPPING |
((info->mode == PIPE_PRIM_POINTS &&
vc4->rasterizer->base.point_size_per_vertex) ?
VC4_SHADER_FLAG_VS_POINT_SIZE : 0));
cl_u8(&vc4->shader_rec, 0); /* fs num uniforms (unused) */
cl_u8(&vc4->shader_rec, vc4->prog.fs->num_inputs);
cl_reloc(vc4, &vc4->shader_rec, vc4->prog.fs->bo, 0);
cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
cl_u8(&shader_rec, 0); /* fs num uniforms (unused) */
cl_u8(&shader_rec, vc4->prog.fs->num_inputs);
cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.fs->bo, 0);
cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
cl_u16(&vc4->shader_rec, 0); /* vs num uniforms */
cl_u8(&vc4->shader_rec, vc4->prog.vs->vattrs_live);
cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[8]);
cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, 0);
cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
cl_u16(&shader_rec, 0); /* vs num uniforms */
cl_u8(&shader_rec, vc4->prog.vs->vattrs_live);
cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[8]);
cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.vs->bo, 0);
cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
cl_u16(&vc4->shader_rec, 0); /* cs num uniforms */
cl_u8(&vc4->shader_rec, vc4->prog.cs->vattrs_live);
cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[8]);
cl_reloc(vc4, &vc4->shader_rec, vc4->prog.cs->bo, 0);
cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
cl_u16(&shader_rec, 0); /* cs num uniforms */
cl_u8(&shader_rec, vc4->prog.cs->vattrs_live);
cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[8]);
cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.cs->bo, 0);
cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
uint32_t max_index = 0xffff;
uint32_t vpm_offset = 0;
@ -202,11 +206,11 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
uint32_t elem_size =
util_format_get_blocksize(elem->src_format);
cl_reloc(vc4, &vc4->shader_rec, rsc->bo, offset);
cl_u8(&vc4->shader_rec, elem_size - 1);
cl_u8(&vc4->shader_rec, vb->stride);
cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[i]);
cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[i]);
cl_reloc(vc4, &vc4->shader_rec, &shader_rec, rsc->bo, offset);
cl_u8(&shader_rec, elem_size - 1);
cl_u8(&shader_rec, vb->stride);
cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[i]);
cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[i]);
vpm_offset += align(elem_size, 4);
@ -219,21 +223,23 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
if (vtx->num_elements == 0) {
assert(num_elements_emit == 1);
struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO");
cl_reloc(vc4, &vc4->shader_rec, bo, 0);
cl_u8(&vc4->shader_rec, 16 - 1); /* element size */
cl_u8(&vc4->shader_rec, 0); /* stride */
cl_u8(&vc4->shader_rec, 0); /* VS VPM offset */
cl_u8(&vc4->shader_rec, 0); /* CS VPM offset */
cl_reloc(vc4, &vc4->shader_rec, &shader_rec, bo, 0);
cl_u8(&shader_rec, 16 - 1); /* element size */
cl_u8(&shader_rec, 0); /* stride */
cl_u8(&shader_rec, 0); /* VS VPM offset */
cl_u8(&shader_rec, 0); /* CS VPM offset */
vc4_bo_unreference(&bo);
}
cl_end(&vc4->shader_rec, shader_rec);
struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
/* the actual draw call. */
cl_u8(&vc4->bcl, VC4_PACKET_GL_SHADER_STATE);
cl_u8(&bcl, VC4_PACKET_GL_SHADER_STATE);
assert(vtx->num_elements <= 8);
/* Note that number of attributes == 0 in the packet means 8
* attributes. This field also contains the offset into shader_rec.
*/
cl_u32(&vc4->bcl, num_elements_emit & 0x7);
cl_u32(&bcl, num_elements_emit & 0x7);
/* Note that the primitive type fields match with OpenGL/gallium
* definitions, up to but not including QUADS.
@ -251,25 +257,26 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
}
struct vc4_resource *rsc = vc4_resource(prsc);
cl_start_reloc(&vc4->bcl, 1);
cl_u8(&vc4->bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
cl_u8(&vc4->bcl,
cl_start_reloc(&vc4->bcl, &bcl, 1);
cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
cl_u8(&bcl,
info->mode |
(index_size == 2 ?
VC4_INDEX_BUFFER_U16:
VC4_INDEX_BUFFER_U8));
cl_u32(&vc4->bcl, info->count);
cl_reloc(vc4, &vc4->bcl, rsc->bo, offset);
cl_u32(&vc4->bcl, max_index);
cl_u32(&bcl, info->count);
cl_reloc(vc4, &vc4->bcl, &bcl, rsc->bo, offset);
cl_u32(&bcl, max_index);
if (vc4->indexbuf.index_size == 4)
pipe_resource_reference(&prsc, NULL);
} else {
cl_u8(&vc4->bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
cl_u8(&vc4->bcl, info->mode);
cl_u32(&vc4->bcl, info->count);
cl_u32(&vc4->bcl, info->start);
cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
cl_u8(&bcl, info->mode);
cl_u32(&bcl, info->count);
cl_u32(&bcl, info->start);
}
cl_end(&vc4->bcl, bcl);
if (vc4->zsa && vc4->zsa->base.depth.enabled) {
vc4->resolve |= PIPE_CLEAR_DEPTH;

View File

@ -28,6 +28,7 @@ vc4_emit_state(struct pipe_context *pctx)
{
struct vc4_context *vc4 = vc4_context(pctx);
struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
if (vc4->dirty & (VC4_DIRTY_SCISSOR | VC4_DIRTY_VIEWPORT)) {
float *vpscale = vc4->viewport.scale;
float *vptranslate = vc4->viewport.translate;
@ -40,11 +41,11 @@ vc4_emit_state(struct pipe_context *pctx)
uint32_t maxx = MIN2(vc4->scissor.maxx, vp_maxx);
uint32_t maxy = MIN2(vc4->scissor.maxy, vp_maxy);
cl_u8(&vc4->bcl, VC4_PACKET_CLIP_WINDOW);
cl_u16(&vc4->bcl, minx);
cl_u16(&vc4->bcl, miny);
cl_u16(&vc4->bcl, maxx - minx);
cl_u16(&vc4->bcl, maxy - miny);
cl_u8(&bcl, VC4_PACKET_CLIP_WINDOW);
cl_u16(&bcl, minx);
cl_u16(&bcl, miny);
cl_u16(&bcl, maxx - minx);
cl_u16(&bcl, maxy - miny);
vc4->draw_min_x = MIN2(vc4->draw_min_x, minx);
vc4->draw_min_y = MIN2(vc4->draw_min_y, miny);
@ -53,47 +54,49 @@ vc4_emit_state(struct pipe_context *pctx)
}
if (vc4->dirty & (VC4_DIRTY_RASTERIZER | VC4_DIRTY_ZSA)) {
cl_u8(&vc4->bcl, VC4_PACKET_CONFIGURATION_BITS);
cl_u8(&vc4->bcl,
cl_u8(&bcl, VC4_PACKET_CONFIGURATION_BITS);
cl_u8(&bcl,
vc4->rasterizer->config_bits[0] |
vc4->zsa->config_bits[0]);
cl_u8(&vc4->bcl,
cl_u8(&bcl,
vc4->rasterizer->config_bits[1] |
vc4->zsa->config_bits[1]);
cl_u8(&vc4->bcl,
cl_u8(&bcl,
vc4->rasterizer->config_bits[2] |
vc4->zsa->config_bits[2]);
}
if (vc4->dirty & VC4_DIRTY_RASTERIZER) {
cl_u8(&vc4->bcl, VC4_PACKET_DEPTH_OFFSET);
cl_u16(&vc4->bcl, vc4->rasterizer->offset_factor);
cl_u16(&vc4->bcl, vc4->rasterizer->offset_units);
cl_u8(&bcl, VC4_PACKET_DEPTH_OFFSET);
cl_u16(&bcl, vc4->rasterizer->offset_factor);
cl_u16(&bcl, vc4->rasterizer->offset_units);
cl_u8(&vc4->bcl, VC4_PACKET_POINT_SIZE);
cl_f(&vc4->bcl, vc4->rasterizer->point_size);
cl_u8(&bcl, VC4_PACKET_POINT_SIZE);
cl_f(&bcl, vc4->rasterizer->point_size);
cl_u8(&vc4->bcl, VC4_PACKET_LINE_WIDTH);
cl_f(&vc4->bcl, vc4->rasterizer->base.line_width);
cl_u8(&bcl, VC4_PACKET_LINE_WIDTH);
cl_f(&bcl, vc4->rasterizer->base.line_width);
}
if (vc4->dirty & VC4_DIRTY_VIEWPORT) {
cl_u8(&vc4->bcl, VC4_PACKET_CLIPPER_XY_SCALING);
cl_f(&vc4->bcl, vc4->viewport.scale[0] * 16.0f);
cl_f(&vc4->bcl, vc4->viewport.scale[1] * 16.0f);
cl_u8(&bcl, VC4_PACKET_CLIPPER_XY_SCALING);
cl_f(&bcl, vc4->viewport.scale[0] * 16.0f);
cl_f(&bcl, vc4->viewport.scale[1] * 16.0f);
cl_u8(&vc4->bcl, VC4_PACKET_CLIPPER_Z_SCALING);
cl_f(&vc4->bcl, vc4->viewport.translate[2]);
cl_f(&vc4->bcl, vc4->viewport.scale[2]);
cl_u8(&bcl, VC4_PACKET_CLIPPER_Z_SCALING);
cl_f(&bcl, vc4->viewport.translate[2]);
cl_f(&bcl, vc4->viewport.scale[2]);
cl_u8(&vc4->bcl, VC4_PACKET_VIEWPORT_OFFSET);
cl_u16(&vc4->bcl, 16 * vc4->viewport.translate[0]);
cl_u16(&vc4->bcl, 16 * vc4->viewport.translate[1]);
cl_u8(&bcl, VC4_PACKET_VIEWPORT_OFFSET);
cl_u16(&bcl, 16 * vc4->viewport.translate[0]);
cl_u16(&bcl, 16 * vc4->viewport.translate[1]);
}
if (vc4->dirty & VC4_DIRTY_FLAT_SHADE_FLAGS) {
cl_u8(&vc4->bcl, VC4_PACKET_FLAT_SHADE_FLAGS);
cl_u32(&vc4->bcl, vc4->rasterizer->base.flatshade ?
cl_u8(&bcl, VC4_PACKET_FLAT_SHADE_FLAGS);
cl_u32(&bcl, vc4->rasterizer->base.flatshade ?
vc4->prog.fs->color_inputs : 0);
}
cl_end(&vc4->bcl, bcl);
}

View File

@ -2530,13 +2530,14 @@ static uint32_t translate_wrap(uint32_t p_wrap, bool using_nearest)
static void
write_texture_p0(struct vc4_context *vc4,
struct vc4_cl_out **uniforms,
struct vc4_texture_stateobj *texstate,
uint32_t unit)
{
struct pipe_sampler_view *texture = texstate->textures[unit];
struct vc4_resource *rsc = vc4_resource(texture->texture);
cl_reloc(vc4, &vc4->uniforms, rsc->bo,
cl_reloc(vc4, &vc4->uniforms, uniforms, rsc->bo,
VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
VC4_SET_FIELD(texture->u.tex.last_level -
texture->u.tex.first_level, VC4_TEX_P0_MIPLVLS) |
@ -2547,6 +2548,7 @@ write_texture_p0(struct vc4_context *vc4,
static void
write_texture_p1(struct vc4_context *vc4,
struct vc4_cl_out **uniforms,
struct vc4_texture_stateobj *texstate,
uint32_t unit)
{
@ -2570,7 +2572,7 @@ write_texture_p1(struct vc4_context *vc4,
(sampler->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
sampler->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
cl_aligned_u32(&vc4->uniforms,
cl_aligned_u32(uniforms,
VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) |
VC4_SET_FIELD(texture->texture->height0 & 2047,
VC4_TEX_P1_HEIGHT) |
@ -2589,6 +2591,7 @@ write_texture_p1(struct vc4_context *vc4,
static void
write_texture_p2(struct vc4_context *vc4,
struct vc4_cl_out **uniforms,
struct vc4_texture_stateobj *texstate,
uint32_t data)
{
@ -2596,7 +2599,7 @@ write_texture_p2(struct vc4_context *vc4,
struct pipe_sampler_view *texture = texstate->textures[unit];
struct vc4_resource *rsc = vc4_resource(texture->texture);
cl_aligned_u32(&vc4->uniforms,
cl_aligned_u32(uniforms,
VC4_SET_FIELD(VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE,
VC4_TEX_P2_PTYPE) |
VC4_SET_FIELD(rsc->cube_map_stride >> 12, VC4_TEX_P2_CMST) |
@ -2613,6 +2616,7 @@ write_texture_p2(struct vc4_context *vc4,
static void
write_texture_border_color(struct vc4_context *vc4,
struct vc4_cl_out **uniforms,
struct vc4_texture_stateobj *texstate,
uint32_t unit)
{
@ -2673,7 +2677,7 @@ write_texture_border_color(struct vc4_context *vc4,
}
}
cl_aligned_u32(&vc4->uniforms, uc.ui[0]);
cl_aligned_u32(uniforms, uc.ui[0]);
}
static uint32_t
@ -2693,7 +2697,8 @@ get_texrect_scale(struct vc4_texture_stateobj *texstate,
}
static struct vc4_bo *
vc4_upload_ubo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
vc4_upload_ubo(struct vc4_context *vc4,
struct vc4_compiled_shader *shader,
const uint32_t *gallium_uniforms)
{
if (!shader->ubo_size)
@ -2722,72 +2727,78 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
cl_ensure_space(&vc4->uniforms, (uinfo->count +
uinfo->num_texture_samples) * 4);
cl_start_shader_reloc(&vc4->uniforms, uinfo->num_texture_samples);
struct vc4_cl_out *uniforms =
cl_start_shader_reloc(&vc4->uniforms,
uinfo->num_texture_samples);
for (int i = 0; i < uinfo->count; i++) {
switch (uinfo->contents[i]) {
case QUNIFORM_CONSTANT:
cl_aligned_u32(&vc4->uniforms, uinfo->data[i]);
cl_aligned_u32(&uniforms, uinfo->data[i]);
break;
case QUNIFORM_UNIFORM:
cl_aligned_u32(&vc4->uniforms,
cl_aligned_u32(&uniforms,
gallium_uniforms[uinfo->data[i]]);
break;
case QUNIFORM_VIEWPORT_X_SCALE:
cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[0] * 16.0f);
cl_aligned_f(&uniforms, vc4->viewport.scale[0] * 16.0f);
break;
case QUNIFORM_VIEWPORT_Y_SCALE:
cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[1] * 16.0f);
cl_aligned_f(&uniforms, vc4->viewport.scale[1] * 16.0f);
break;
case QUNIFORM_VIEWPORT_Z_OFFSET:
cl_aligned_f(&vc4->uniforms, vc4->viewport.translate[2]);
cl_aligned_f(&uniforms, vc4->viewport.translate[2]);
break;
case QUNIFORM_VIEWPORT_Z_SCALE:
cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[2]);
cl_aligned_f(&uniforms, vc4->viewport.scale[2]);
break;
case QUNIFORM_USER_CLIP_PLANE:
cl_aligned_f(&vc4->uniforms,
cl_aligned_f(&uniforms,
vc4->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
break;
case QUNIFORM_TEXTURE_CONFIG_P0:
write_texture_p0(vc4, texstate, uinfo->data[i]);
write_texture_p0(vc4, &uniforms, texstate,
uinfo->data[i]);
break;
case QUNIFORM_TEXTURE_CONFIG_P1:
write_texture_p1(vc4, texstate, uinfo->data[i]);
write_texture_p1(vc4, &uniforms, texstate,
uinfo->data[i]);
break;
case QUNIFORM_TEXTURE_CONFIG_P2:
write_texture_p2(vc4, texstate, uinfo->data[i]);
write_texture_p2(vc4, &uniforms, texstate,
uinfo->data[i]);
break;
case QUNIFORM_UBO_ADDR:
cl_aligned_reloc(vc4, &vc4->uniforms, ubo, 0);
cl_aligned_reloc(vc4, &vc4->uniforms, &uniforms, ubo, 0);
break;
case QUNIFORM_TEXTURE_BORDER_COLOR:
write_texture_border_color(vc4, texstate, uinfo->data[i]);
write_texture_border_color(vc4, &uniforms,
texstate, uinfo->data[i]);
break;
case QUNIFORM_TEXRECT_SCALE_X:
case QUNIFORM_TEXRECT_SCALE_Y:
cl_aligned_u32(&vc4->uniforms,
cl_aligned_u32(&uniforms,
get_texrect_scale(texstate,
uinfo->contents[i],
uinfo->data[i]));
break;
case QUNIFORM_BLEND_CONST_COLOR:
cl_aligned_f(&vc4->uniforms,
cl_aligned_f(&uniforms,
CLAMP(vc4->blend_color.color[uinfo->data[i]], 0, 1));
break;
case QUNIFORM_STENCIL:
cl_aligned_u32(&vc4->uniforms,
cl_aligned_u32(&uniforms,
vc4->zsa->stencil_uniforms[uinfo->data[i]] |
(uinfo->data[i] <= 1 ?
(vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
@ -2795,16 +2806,18 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
break;
case QUNIFORM_ALPHA_REF:
cl_aligned_f(&vc4->uniforms,
cl_aligned_f(&uniforms,
vc4->zsa->base.alpha.ref_value);
break;
}
#if 0
uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4);
uint32_t written_val = *((uint32_t *)uniforms - 1);
fprintf(stderr, "%p: %d / 0x%08x (%f)\n",
shader, i, written_val, uif(written_val));
#endif
}
cl_end(&vc4->uniforms, uniforms);
}
static void