vc4: Rework cl handling to be friendlier to the compiler.
Drops 680 bytes of code, from avoiding a bunch of extra updates to the next pointer in the struct.
This commit is contained in:
parent
a0d3915663
commit
7432017f65
|
@ -66,8 +66,15 @@ vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo)
|
|||
return hindex;
|
||||
}
|
||||
|
||||
cl_u32(&vc4->bo_handles, bo->handle);
|
||||
cl_ptr(&vc4->bo_pointers, vc4_bo_reference(bo));
|
||||
struct vc4_cl_out *out;
|
||||
|
||||
out = cl_start(&vc4->bo_handles);
|
||||
cl_u32(&out, bo->handle);
|
||||
cl_end(&vc4->bo_handles, out);
|
||||
|
||||
out = cl_start(&vc4->bo_pointers);
|
||||
cl_ptr(&out, vc4_bo_reference(bo));
|
||||
cl_end(&vc4->bo_pointers, out);
|
||||
|
||||
return hindex;
|
||||
}
|
||||
|
|
|
@ -33,10 +33,16 @@
|
|||
|
||||
struct vc4_bo;
|
||||
|
||||
/**
|
||||
* Undefined structure, used for typechecking that you're passing the pointers
|
||||
* to these functions correctly.
|
||||
*/
|
||||
struct vc4_cl_out;
|
||||
|
||||
struct vc4_cl {
|
||||
void *base;
|
||||
void *next;
|
||||
void *reloc_next;
|
||||
struct vc4_cl_out *next;
|
||||
struct vc4_cl_out *reloc_next;
|
||||
uint32_t size;
|
||||
uint32_t reloc_count;
|
||||
};
|
||||
|
@ -55,122 +61,135 @@ static inline uint32_t cl_offset(struct vc4_cl *cl)
|
|||
}
|
||||
|
||||
static inline void
|
||||
put_unaligned_32(void *ptr, uint32_t val)
|
||||
cl_advance(struct vc4_cl_out **cl, uint32_t n)
|
||||
{
|
||||
struct unaligned_32 *p = ptr;
|
||||
(*cl) = (struct vc4_cl_out *)((char *)(*cl) + n);
|
||||
}
|
||||
|
||||
static inline struct vc4_cl_out *
|
||||
cl_start(struct vc4_cl *cl)
|
||||
{
|
||||
return cl->next;
|
||||
}
|
||||
|
||||
static inline void
|
||||
cl_end(struct vc4_cl *cl, struct vc4_cl_out *next)
|
||||
{
|
||||
cl->next = next;
|
||||
assert(cl_offset(cl) <= cl->size);
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
put_unaligned_32(struct vc4_cl_out *ptr, uint32_t val)
|
||||
{
|
||||
struct unaligned_32 *p = (void *)ptr;
|
||||
p->x = val;
|
||||
}
|
||||
|
||||
static inline void
|
||||
put_unaligned_16(void *ptr, uint16_t val)
|
||||
put_unaligned_16(struct vc4_cl_out *ptr, uint16_t val)
|
||||
{
|
||||
struct unaligned_16 *p = ptr;
|
||||
struct unaligned_16 *p = (void *)ptr;
|
||||
p->x = val;
|
||||
}
|
||||
|
||||
static inline void
|
||||
cl_u8(struct vc4_cl *cl, uint8_t n)
|
||||
cl_u8(struct vc4_cl_out **cl, uint8_t n)
|
||||
{
|
||||
assert(cl_offset(cl) + 1 <= cl->size);
|
||||
|
||||
*(uint8_t *)cl->next = n;
|
||||
cl->next++;
|
||||
*(uint8_t *)(*cl) = n;
|
||||
cl_advance(cl, 1);
|
||||
}
|
||||
|
||||
static inline void
|
||||
cl_u16(struct vc4_cl *cl, uint16_t n)
|
||||
cl_u16(struct vc4_cl_out **cl, uint16_t n)
|
||||
{
|
||||
assert(cl_offset(cl) + 2 <= cl->size);
|
||||
|
||||
put_unaligned_16(cl->next, n);
|
||||
cl->next += 2;
|
||||
put_unaligned_16(*cl, n);
|
||||
cl_advance(cl, 2);
|
||||
}
|
||||
|
||||
static inline void
|
||||
cl_u32(struct vc4_cl *cl, uint32_t n)
|
||||
cl_u32(struct vc4_cl_out **cl, uint32_t n)
|
||||
{
|
||||
assert(cl_offset(cl) + 4 <= cl->size);
|
||||
|
||||
put_unaligned_32(cl->next, n);
|
||||
cl->next += 4;
|
||||
put_unaligned_32(*cl, n);
|
||||
cl_advance(cl, 4);
|
||||
}
|
||||
|
||||
static inline void
|
||||
cl_aligned_u32(struct vc4_cl *cl, uint32_t n)
|
||||
cl_aligned_u32(struct vc4_cl_out **cl, uint32_t n)
|
||||
{
|
||||
assert(cl_offset(cl) + 4 <= cl->size);
|
||||
|
||||
*(uint32_t *)cl->next = n;
|
||||
cl->next += 4;
|
||||
*(uint32_t *)(*cl) = n;
|
||||
cl_advance(cl, 4);
|
||||
}
|
||||
|
||||
static inline void
|
||||
cl_ptr(struct vc4_cl *cl, void *ptr)
|
||||
cl_ptr(struct vc4_cl_out **cl, void *ptr)
|
||||
{
|
||||
assert(cl_offset(cl) + sizeof(void *) <= cl->size);
|
||||
|
||||
*(void **)cl->next = ptr;
|
||||
cl->next += sizeof(void *);
|
||||
*(struct vc4_cl_out **)(*cl) = ptr;
|
||||
cl_advance(cl, sizeof(void *));
|
||||
}
|
||||
|
||||
static inline void
|
||||
cl_f(struct vc4_cl *cl, float f)
|
||||
cl_f(struct vc4_cl_out **cl, float f)
|
||||
{
|
||||
cl_u32(cl, fui(f));
|
||||
}
|
||||
|
||||
static inline void
|
||||
cl_aligned_f(struct vc4_cl *cl, float f)
|
||||
cl_aligned_f(struct vc4_cl_out **cl, float f)
|
||||
{
|
||||
cl_aligned_u32(cl, fui(f));
|
||||
}
|
||||
|
||||
static inline void
|
||||
cl_start_reloc(struct vc4_cl *cl, uint32_t n)
|
||||
cl_start_reloc(struct vc4_cl *cl, struct vc4_cl_out **out, uint32_t n)
|
||||
{
|
||||
assert(n == 1 || n == 2);
|
||||
assert(cl->reloc_count == 0);
|
||||
cl->reloc_count = n;
|
||||
|
||||
cl_u8(cl, VC4_PACKET_GEM_HANDLES);
|
||||
cl->reloc_next = cl->next;
|
||||
cl_u32(cl, 0); /* Space where hindex will be written. */
|
||||
cl_u32(cl, 0); /* Space where hindex will be written. */
|
||||
cl_u8(out, VC4_PACKET_GEM_HANDLES);
|
||||
cl->reloc_next = *out;
|
||||
cl_u32(out, 0); /* Space where hindex will be written. */
|
||||
cl_u32(out, 0); /* Space where hindex will be written. */
|
||||
}
|
||||
|
||||
static inline void
|
||||
static inline struct vc4_cl_out *
|
||||
cl_start_shader_reloc(struct vc4_cl *cl, uint32_t n)
|
||||
{
|
||||
assert(cl->reloc_count == 0);
|
||||
cl->reloc_count = n;
|
||||
cl->reloc_next = cl->next;
|
||||
|
||||
/* Space where hindex will be written. */
|
||||
cl->next += n * 4;
|
||||
/* Reserve the space where hindex will be written. */
|
||||
cl_advance(&cl->next, n * 4);
|
||||
|
||||
return cl->next;
|
||||
}
|
||||
|
||||
static inline void
|
||||
cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
|
||||
cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl, struct vc4_cl_out **cl_out,
|
||||
struct vc4_bo *bo, uint32_t offset)
|
||||
{
|
||||
*(uint32_t *)cl->reloc_next = vc4_gem_hindex(vc4, bo);
|
||||
cl->reloc_next += 4;
|
||||
cl_advance(&cl->reloc_next, 4);
|
||||
|
||||
cl->reloc_count--;
|
||||
|
||||
cl_u32(cl, offset);
|
||||
cl_u32(cl_out, offset);
|
||||
}
|
||||
|
||||
static inline void
|
||||
cl_aligned_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
|
||||
struct vc4_bo *bo, uint32_t offset)
|
||||
struct vc4_cl_out **cl_out,
|
||||
struct vc4_bo *bo, uint32_t offset)
|
||||
{
|
||||
*(uint32_t *)cl->reloc_next = vc4_gem_hindex(vc4, bo);
|
||||
cl->reloc_next += 4;
|
||||
cl_advance(&cl->reloc_next, 4);
|
||||
|
||||
cl->reloc_count--;
|
||||
|
||||
cl_aligned_u32(cl, offset);
|
||||
cl_aligned_u32(cl_out, offset);
|
||||
}
|
||||
|
||||
void cl_ensure_space(struct vc4_cl *cl, uint32_t size);
|
||||
|
|
|
@ -61,9 +61,11 @@ vc4_flush(struct pipe_context *pctx)
|
|||
* FLUSH completes.
|
||||
*/
|
||||
cl_ensure_space(&vc4->bcl, 8);
|
||||
cl_u8(&vc4->bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
|
||||
struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
|
||||
cl_u8(&bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
|
||||
/* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
|
||||
cl_u8(&vc4->bcl, VC4_PACKET_FLUSH);
|
||||
cl_u8(&bcl, VC4_PACKET_FLUSH);
|
||||
cl_end(&vc4->bcl, bcl);
|
||||
|
||||
if (cbuf && (vc4->resolve & PIPE_CLEAR_COLOR0)) {
|
||||
pipe_surface_reference(&vc4->color_write, cbuf);
|
||||
|
|
|
@ -71,37 +71,40 @@ vc4_start_draw(struct vc4_context *vc4)
|
|||
uint32_t height = vc4->framebuffer.height;
|
||||
uint32_t tilew = align(width, 64) / 64;
|
||||
uint32_t tileh = align(height, 64) / 64;
|
||||
struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
|
||||
|
||||
// Tile state data is 48 bytes per tile, I think it can be thrown away
|
||||
// as soon as binning is finished.
|
||||
cl_u8(&vc4->bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
|
||||
cl_u32(&vc4->bcl, 0); /* tile alloc addr, filled by kernel */
|
||||
cl_u32(&vc4->bcl, 0); /* tile alloc size, filled by kernel */
|
||||
cl_u32(&vc4->bcl, 0); /* tile state addr, filled by kernel */
|
||||
cl_u8(&vc4->bcl, tilew);
|
||||
cl_u8(&vc4->bcl, tileh);
|
||||
cl_u8(&vc4->bcl, 0); /* flags, filled by kernel. */
|
||||
cl_u8(&bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
|
||||
cl_u32(&bcl, 0); /* tile alloc addr, filled by kernel */
|
||||
cl_u32(&bcl, 0); /* tile alloc size, filled by kernel */
|
||||
cl_u32(&bcl, 0); /* tile state addr, filled by kernel */
|
||||
cl_u8(&bcl, tilew);
|
||||
cl_u8(&bcl, tileh);
|
||||
cl_u8(&bcl, 0); /* flags, filled by kernel. */
|
||||
|
||||
/* START_TILE_BINNING resets the statechange counters in the hardware,
|
||||
* which are what is used when a primitive is binned to a tile to
|
||||
* figure out what new state packets need to be written to that tile's
|
||||
* command list.
|
||||
*/
|
||||
cl_u8(&vc4->bcl, VC4_PACKET_START_TILE_BINNING);
|
||||
cl_u8(&bcl, VC4_PACKET_START_TILE_BINNING);
|
||||
|
||||
/* Reset the current compressed primitives format. This gets modified
|
||||
* by VC4_PACKET_GL_INDEXED_PRIMITIVE and
|
||||
* VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
|
||||
* of every tile.
|
||||
*/
|
||||
cl_u8(&vc4->bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
|
||||
cl_u8(&vc4->bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
|
||||
VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
|
||||
cl_u8(&bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
|
||||
cl_u8(&bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
|
||||
VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
|
||||
|
||||
vc4->needs_flush = true;
|
||||
vc4->draw_call_queued = true;
|
||||
vc4->draw_width = width;
|
||||
vc4->draw_height = height;
|
||||
|
||||
cl_end(&vc4->bcl, bcl);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -167,28 +170,29 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
|
|||
*/
|
||||
uint32_t num_elements_emit = MAX2(vtx->num_elements, 1);
|
||||
/* Emit the shader record. */
|
||||
cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit);
|
||||
cl_u16(&vc4->shader_rec,
|
||||
struct vc4_cl_out *shader_rec =
|
||||
cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit);
|
||||
cl_u16(&shader_rec,
|
||||
VC4_SHADER_FLAG_ENABLE_CLIPPING |
|
||||
((info->mode == PIPE_PRIM_POINTS &&
|
||||
vc4->rasterizer->base.point_size_per_vertex) ?
|
||||
VC4_SHADER_FLAG_VS_POINT_SIZE : 0));
|
||||
cl_u8(&vc4->shader_rec, 0); /* fs num uniforms (unused) */
|
||||
cl_u8(&vc4->shader_rec, vc4->prog.fs->num_inputs);
|
||||
cl_reloc(vc4, &vc4->shader_rec, vc4->prog.fs->bo, 0);
|
||||
cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
|
||||
cl_u8(&shader_rec, 0); /* fs num uniforms (unused) */
|
||||
cl_u8(&shader_rec, vc4->prog.fs->num_inputs);
|
||||
cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.fs->bo, 0);
|
||||
cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
|
||||
|
||||
cl_u16(&vc4->shader_rec, 0); /* vs num uniforms */
|
||||
cl_u8(&vc4->shader_rec, vc4->prog.vs->vattrs_live);
|
||||
cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[8]);
|
||||
cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, 0);
|
||||
cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
|
||||
cl_u16(&shader_rec, 0); /* vs num uniforms */
|
||||
cl_u8(&shader_rec, vc4->prog.vs->vattrs_live);
|
||||
cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[8]);
|
||||
cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.vs->bo, 0);
|
||||
cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
|
||||
|
||||
cl_u16(&vc4->shader_rec, 0); /* cs num uniforms */
|
||||
cl_u8(&vc4->shader_rec, vc4->prog.cs->vattrs_live);
|
||||
cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[8]);
|
||||
cl_reloc(vc4, &vc4->shader_rec, vc4->prog.cs->bo, 0);
|
||||
cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
|
||||
cl_u16(&shader_rec, 0); /* cs num uniforms */
|
||||
cl_u8(&shader_rec, vc4->prog.cs->vattrs_live);
|
||||
cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[8]);
|
||||
cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.cs->bo, 0);
|
||||
cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
|
||||
|
||||
uint32_t max_index = 0xffff;
|
||||
uint32_t vpm_offset = 0;
|
||||
|
@ -202,11 +206,11 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
|
|||
uint32_t elem_size =
|
||||
util_format_get_blocksize(elem->src_format);
|
||||
|
||||
cl_reloc(vc4, &vc4->shader_rec, rsc->bo, offset);
|
||||
cl_u8(&vc4->shader_rec, elem_size - 1);
|
||||
cl_u8(&vc4->shader_rec, vb->stride);
|
||||
cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[i]);
|
||||
cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[i]);
|
||||
cl_reloc(vc4, &vc4->shader_rec, &shader_rec, rsc->bo, offset);
|
||||
cl_u8(&shader_rec, elem_size - 1);
|
||||
cl_u8(&shader_rec, vb->stride);
|
||||
cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[i]);
|
||||
cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[i]);
|
||||
|
||||
vpm_offset += align(elem_size, 4);
|
||||
|
||||
|
@ -219,21 +223,23 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
|
|||
if (vtx->num_elements == 0) {
|
||||
assert(num_elements_emit == 1);
|
||||
struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO");
|
||||
cl_reloc(vc4, &vc4->shader_rec, bo, 0);
|
||||
cl_u8(&vc4->shader_rec, 16 - 1); /* element size */
|
||||
cl_u8(&vc4->shader_rec, 0); /* stride */
|
||||
cl_u8(&vc4->shader_rec, 0); /* VS VPM offset */
|
||||
cl_u8(&vc4->shader_rec, 0); /* CS VPM offset */
|
||||
cl_reloc(vc4, &vc4->shader_rec, &shader_rec, bo, 0);
|
||||
cl_u8(&shader_rec, 16 - 1); /* element size */
|
||||
cl_u8(&shader_rec, 0); /* stride */
|
||||
cl_u8(&shader_rec, 0); /* VS VPM offset */
|
||||
cl_u8(&shader_rec, 0); /* CS VPM offset */
|
||||
vc4_bo_unreference(&bo);
|
||||
}
|
||||
cl_end(&vc4->shader_rec, shader_rec);
|
||||
|
||||
struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
|
||||
/* the actual draw call. */
|
||||
cl_u8(&vc4->bcl, VC4_PACKET_GL_SHADER_STATE);
|
||||
cl_u8(&bcl, VC4_PACKET_GL_SHADER_STATE);
|
||||
assert(vtx->num_elements <= 8);
|
||||
/* Note that number of attributes == 0 in the packet means 8
|
||||
* attributes. This field also contains the offset into shader_rec.
|
||||
*/
|
||||
cl_u32(&vc4->bcl, num_elements_emit & 0x7);
|
||||
cl_u32(&bcl, num_elements_emit & 0x7);
|
||||
|
||||
/* Note that the primitive type fields match with OpenGL/gallium
|
||||
* definitions, up to but not including QUADS.
|
||||
|
@ -251,25 +257,26 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
|
|||
}
|
||||
struct vc4_resource *rsc = vc4_resource(prsc);
|
||||
|
||||
cl_start_reloc(&vc4->bcl, 1);
|
||||
cl_u8(&vc4->bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
|
||||
cl_u8(&vc4->bcl,
|
||||
cl_start_reloc(&vc4->bcl, &bcl, 1);
|
||||
cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
|
||||
cl_u8(&bcl,
|
||||
info->mode |
|
||||
(index_size == 2 ?
|
||||
VC4_INDEX_BUFFER_U16:
|
||||
VC4_INDEX_BUFFER_U8));
|
||||
cl_u32(&vc4->bcl, info->count);
|
||||
cl_reloc(vc4, &vc4->bcl, rsc->bo, offset);
|
||||
cl_u32(&vc4->bcl, max_index);
|
||||
cl_u32(&bcl, info->count);
|
||||
cl_reloc(vc4, &vc4->bcl, &bcl, rsc->bo, offset);
|
||||
cl_u32(&bcl, max_index);
|
||||
|
||||
if (vc4->indexbuf.index_size == 4)
|
||||
pipe_resource_reference(&prsc, NULL);
|
||||
} else {
|
||||
cl_u8(&vc4->bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
|
||||
cl_u8(&vc4->bcl, info->mode);
|
||||
cl_u32(&vc4->bcl, info->count);
|
||||
cl_u32(&vc4->bcl, info->start);
|
||||
cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
|
||||
cl_u8(&bcl, info->mode);
|
||||
cl_u32(&bcl, info->count);
|
||||
cl_u32(&bcl, info->start);
|
||||
}
|
||||
cl_end(&vc4->bcl, bcl);
|
||||
|
||||
if (vc4->zsa && vc4->zsa->base.depth.enabled) {
|
||||
vc4->resolve |= PIPE_CLEAR_DEPTH;
|
||||
|
|
|
@ -28,6 +28,7 @@ vc4_emit_state(struct pipe_context *pctx)
|
|||
{
|
||||
struct vc4_context *vc4 = vc4_context(pctx);
|
||||
|
||||
struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
|
||||
if (vc4->dirty & (VC4_DIRTY_SCISSOR | VC4_DIRTY_VIEWPORT)) {
|
||||
float *vpscale = vc4->viewport.scale;
|
||||
float *vptranslate = vc4->viewport.translate;
|
||||
|
@ -40,11 +41,11 @@ vc4_emit_state(struct pipe_context *pctx)
|
|||
uint32_t maxx = MIN2(vc4->scissor.maxx, vp_maxx);
|
||||
uint32_t maxy = MIN2(vc4->scissor.maxy, vp_maxy);
|
||||
|
||||
cl_u8(&vc4->bcl, VC4_PACKET_CLIP_WINDOW);
|
||||
cl_u16(&vc4->bcl, minx);
|
||||
cl_u16(&vc4->bcl, miny);
|
||||
cl_u16(&vc4->bcl, maxx - minx);
|
||||
cl_u16(&vc4->bcl, maxy - miny);
|
||||
cl_u8(&bcl, VC4_PACKET_CLIP_WINDOW);
|
||||
cl_u16(&bcl, minx);
|
||||
cl_u16(&bcl, miny);
|
||||
cl_u16(&bcl, maxx - minx);
|
||||
cl_u16(&bcl, maxy - miny);
|
||||
|
||||
vc4->draw_min_x = MIN2(vc4->draw_min_x, minx);
|
||||
vc4->draw_min_y = MIN2(vc4->draw_min_y, miny);
|
||||
|
@ -53,47 +54,49 @@ vc4_emit_state(struct pipe_context *pctx)
|
|||
}
|
||||
|
||||
if (vc4->dirty & (VC4_DIRTY_RASTERIZER | VC4_DIRTY_ZSA)) {
|
||||
cl_u8(&vc4->bcl, VC4_PACKET_CONFIGURATION_BITS);
|
||||
cl_u8(&vc4->bcl,
|
||||
cl_u8(&bcl, VC4_PACKET_CONFIGURATION_BITS);
|
||||
cl_u8(&bcl,
|
||||
vc4->rasterizer->config_bits[0] |
|
||||
vc4->zsa->config_bits[0]);
|
||||
cl_u8(&vc4->bcl,
|
||||
cl_u8(&bcl,
|
||||
vc4->rasterizer->config_bits[1] |
|
||||
vc4->zsa->config_bits[1]);
|
||||
cl_u8(&vc4->bcl,
|
||||
cl_u8(&bcl,
|
||||
vc4->rasterizer->config_bits[2] |
|
||||
vc4->zsa->config_bits[2]);
|
||||
}
|
||||
|
||||
if (vc4->dirty & VC4_DIRTY_RASTERIZER) {
|
||||
cl_u8(&vc4->bcl, VC4_PACKET_DEPTH_OFFSET);
|
||||
cl_u16(&vc4->bcl, vc4->rasterizer->offset_factor);
|
||||
cl_u16(&vc4->bcl, vc4->rasterizer->offset_units);
|
||||
cl_u8(&bcl, VC4_PACKET_DEPTH_OFFSET);
|
||||
cl_u16(&bcl, vc4->rasterizer->offset_factor);
|
||||
cl_u16(&bcl, vc4->rasterizer->offset_units);
|
||||
|
||||
cl_u8(&vc4->bcl, VC4_PACKET_POINT_SIZE);
|
||||
cl_f(&vc4->bcl, vc4->rasterizer->point_size);
|
||||
cl_u8(&bcl, VC4_PACKET_POINT_SIZE);
|
||||
cl_f(&bcl, vc4->rasterizer->point_size);
|
||||
|
||||
cl_u8(&vc4->bcl, VC4_PACKET_LINE_WIDTH);
|
||||
cl_f(&vc4->bcl, vc4->rasterizer->base.line_width);
|
||||
cl_u8(&bcl, VC4_PACKET_LINE_WIDTH);
|
||||
cl_f(&bcl, vc4->rasterizer->base.line_width);
|
||||
}
|
||||
|
||||
if (vc4->dirty & VC4_DIRTY_VIEWPORT) {
|
||||
cl_u8(&vc4->bcl, VC4_PACKET_CLIPPER_XY_SCALING);
|
||||
cl_f(&vc4->bcl, vc4->viewport.scale[0] * 16.0f);
|
||||
cl_f(&vc4->bcl, vc4->viewport.scale[1] * 16.0f);
|
||||
cl_u8(&bcl, VC4_PACKET_CLIPPER_XY_SCALING);
|
||||
cl_f(&bcl, vc4->viewport.scale[0] * 16.0f);
|
||||
cl_f(&bcl, vc4->viewport.scale[1] * 16.0f);
|
||||
|
||||
cl_u8(&vc4->bcl, VC4_PACKET_CLIPPER_Z_SCALING);
|
||||
cl_f(&vc4->bcl, vc4->viewport.translate[2]);
|
||||
cl_f(&vc4->bcl, vc4->viewport.scale[2]);
|
||||
cl_u8(&bcl, VC4_PACKET_CLIPPER_Z_SCALING);
|
||||
cl_f(&bcl, vc4->viewport.translate[2]);
|
||||
cl_f(&bcl, vc4->viewport.scale[2]);
|
||||
|
||||
cl_u8(&vc4->bcl, VC4_PACKET_VIEWPORT_OFFSET);
|
||||
cl_u16(&vc4->bcl, 16 * vc4->viewport.translate[0]);
|
||||
cl_u16(&vc4->bcl, 16 * vc4->viewport.translate[1]);
|
||||
cl_u8(&bcl, VC4_PACKET_VIEWPORT_OFFSET);
|
||||
cl_u16(&bcl, 16 * vc4->viewport.translate[0]);
|
||||
cl_u16(&bcl, 16 * vc4->viewport.translate[1]);
|
||||
}
|
||||
|
||||
if (vc4->dirty & VC4_DIRTY_FLAT_SHADE_FLAGS) {
|
||||
cl_u8(&vc4->bcl, VC4_PACKET_FLAT_SHADE_FLAGS);
|
||||
cl_u32(&vc4->bcl, vc4->rasterizer->base.flatshade ?
|
||||
cl_u8(&bcl, VC4_PACKET_FLAT_SHADE_FLAGS);
|
||||
cl_u32(&bcl, vc4->rasterizer->base.flatshade ?
|
||||
vc4->prog.fs->color_inputs : 0);
|
||||
}
|
||||
|
||||
cl_end(&vc4->bcl, bcl);
|
||||
}
|
||||
|
|
|
@ -2530,13 +2530,14 @@ static uint32_t translate_wrap(uint32_t p_wrap, bool using_nearest)
|
|||
|
||||
static void
|
||||
write_texture_p0(struct vc4_context *vc4,
|
||||
struct vc4_cl_out **uniforms,
|
||||
struct vc4_texture_stateobj *texstate,
|
||||
uint32_t unit)
|
||||
{
|
||||
struct pipe_sampler_view *texture = texstate->textures[unit];
|
||||
struct vc4_resource *rsc = vc4_resource(texture->texture);
|
||||
|
||||
cl_reloc(vc4, &vc4->uniforms, rsc->bo,
|
||||
cl_reloc(vc4, &vc4->uniforms, uniforms, rsc->bo,
|
||||
VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
|
||||
VC4_SET_FIELD(texture->u.tex.last_level -
|
||||
texture->u.tex.first_level, VC4_TEX_P0_MIPLVLS) |
|
||||
|
@ -2547,6 +2548,7 @@ write_texture_p0(struct vc4_context *vc4,
|
|||
|
||||
static void
|
||||
write_texture_p1(struct vc4_context *vc4,
|
||||
struct vc4_cl_out **uniforms,
|
||||
struct vc4_texture_stateobj *texstate,
|
||||
uint32_t unit)
|
||||
{
|
||||
|
@ -2570,7 +2572,7 @@ write_texture_p1(struct vc4_context *vc4,
|
|||
(sampler->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
|
||||
sampler->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
|
||||
|
||||
cl_aligned_u32(&vc4->uniforms,
|
||||
cl_aligned_u32(uniforms,
|
||||
VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) |
|
||||
VC4_SET_FIELD(texture->texture->height0 & 2047,
|
||||
VC4_TEX_P1_HEIGHT) |
|
||||
|
@ -2589,6 +2591,7 @@ write_texture_p1(struct vc4_context *vc4,
|
|||
|
||||
static void
|
||||
write_texture_p2(struct vc4_context *vc4,
|
||||
struct vc4_cl_out **uniforms,
|
||||
struct vc4_texture_stateobj *texstate,
|
||||
uint32_t data)
|
||||
{
|
||||
|
@ -2596,7 +2599,7 @@ write_texture_p2(struct vc4_context *vc4,
|
|||
struct pipe_sampler_view *texture = texstate->textures[unit];
|
||||
struct vc4_resource *rsc = vc4_resource(texture->texture);
|
||||
|
||||
cl_aligned_u32(&vc4->uniforms,
|
||||
cl_aligned_u32(uniforms,
|
||||
VC4_SET_FIELD(VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE,
|
||||
VC4_TEX_P2_PTYPE) |
|
||||
VC4_SET_FIELD(rsc->cube_map_stride >> 12, VC4_TEX_P2_CMST) |
|
||||
|
@ -2613,6 +2616,7 @@ write_texture_p2(struct vc4_context *vc4,
|
|||
|
||||
static void
|
||||
write_texture_border_color(struct vc4_context *vc4,
|
||||
struct vc4_cl_out **uniforms,
|
||||
struct vc4_texture_stateobj *texstate,
|
||||
uint32_t unit)
|
||||
{
|
||||
|
@ -2673,7 +2677,7 @@ write_texture_border_color(struct vc4_context *vc4,
|
|||
}
|
||||
}
|
||||
|
||||
cl_aligned_u32(&vc4->uniforms, uc.ui[0]);
|
||||
cl_aligned_u32(uniforms, uc.ui[0]);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
|
@ -2693,7 +2697,8 @@ get_texrect_scale(struct vc4_texture_stateobj *texstate,
|
|||
}
|
||||
|
||||
static struct vc4_bo *
|
||||
vc4_upload_ubo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
|
||||
vc4_upload_ubo(struct vc4_context *vc4,
|
||||
struct vc4_compiled_shader *shader,
|
||||
const uint32_t *gallium_uniforms)
|
||||
{
|
||||
if (!shader->ubo_size)
|
||||
|
@ -2722,72 +2727,78 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
|
|||
cl_ensure_space(&vc4->uniforms, (uinfo->count +
|
||||
uinfo->num_texture_samples) * 4);
|
||||
|
||||
cl_start_shader_reloc(&vc4->uniforms, uinfo->num_texture_samples);
|
||||
struct vc4_cl_out *uniforms =
|
||||
cl_start_shader_reloc(&vc4->uniforms,
|
||||
uinfo->num_texture_samples);
|
||||
|
||||
for (int i = 0; i < uinfo->count; i++) {
|
||||
|
||||
switch (uinfo->contents[i]) {
|
||||
case QUNIFORM_CONSTANT:
|
||||
cl_aligned_u32(&vc4->uniforms, uinfo->data[i]);
|
||||
cl_aligned_u32(&uniforms, uinfo->data[i]);
|
||||
break;
|
||||
case QUNIFORM_UNIFORM:
|
||||
cl_aligned_u32(&vc4->uniforms,
|
||||
cl_aligned_u32(&uniforms,
|
||||
gallium_uniforms[uinfo->data[i]]);
|
||||
break;
|
||||
case QUNIFORM_VIEWPORT_X_SCALE:
|
||||
cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[0] * 16.0f);
|
||||
cl_aligned_f(&uniforms, vc4->viewport.scale[0] * 16.0f);
|
||||
break;
|
||||
case QUNIFORM_VIEWPORT_Y_SCALE:
|
||||
cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[1] * 16.0f);
|
||||
cl_aligned_f(&uniforms, vc4->viewport.scale[1] * 16.0f);
|
||||
break;
|
||||
|
||||
case QUNIFORM_VIEWPORT_Z_OFFSET:
|
||||
cl_aligned_f(&vc4->uniforms, vc4->viewport.translate[2]);
|
||||
cl_aligned_f(&uniforms, vc4->viewport.translate[2]);
|
||||
break;
|
||||
case QUNIFORM_VIEWPORT_Z_SCALE:
|
||||
cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[2]);
|
||||
cl_aligned_f(&uniforms, vc4->viewport.scale[2]);
|
||||
break;
|
||||
|
||||
case QUNIFORM_USER_CLIP_PLANE:
|
||||
cl_aligned_f(&vc4->uniforms,
|
||||
cl_aligned_f(&uniforms,
|
||||
vc4->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
|
||||
break;
|
||||
|
||||
case QUNIFORM_TEXTURE_CONFIG_P0:
|
||||
write_texture_p0(vc4, texstate, uinfo->data[i]);
|
||||
write_texture_p0(vc4, &uniforms, texstate,
|
||||
uinfo->data[i]);
|
||||
break;
|
||||
|
||||
case QUNIFORM_TEXTURE_CONFIG_P1:
|
||||
write_texture_p1(vc4, texstate, uinfo->data[i]);
|
||||
write_texture_p1(vc4, &uniforms, texstate,
|
||||
uinfo->data[i]);
|
||||
break;
|
||||
|
||||
case QUNIFORM_TEXTURE_CONFIG_P2:
|
||||
write_texture_p2(vc4, texstate, uinfo->data[i]);
|
||||
write_texture_p2(vc4, &uniforms, texstate,
|
||||
uinfo->data[i]);
|
||||
break;
|
||||
|
||||
case QUNIFORM_UBO_ADDR:
|
||||
cl_aligned_reloc(vc4, &vc4->uniforms, ubo, 0);
|
||||
cl_aligned_reloc(vc4, &vc4->uniforms, &uniforms, ubo, 0);
|
||||
break;
|
||||
|
||||
case QUNIFORM_TEXTURE_BORDER_COLOR:
|
||||
write_texture_border_color(vc4, texstate, uinfo->data[i]);
|
||||
write_texture_border_color(vc4, &uniforms,
|
||||
texstate, uinfo->data[i]);
|
||||
break;
|
||||
|
||||
case QUNIFORM_TEXRECT_SCALE_X:
|
||||
case QUNIFORM_TEXRECT_SCALE_Y:
|
||||
cl_aligned_u32(&vc4->uniforms,
|
||||
cl_aligned_u32(&uniforms,
|
||||
get_texrect_scale(texstate,
|
||||
uinfo->contents[i],
|
||||
uinfo->data[i]));
|
||||
break;
|
||||
|
||||
case QUNIFORM_BLEND_CONST_COLOR:
|
||||
cl_aligned_f(&vc4->uniforms,
|
||||
cl_aligned_f(&uniforms,
|
||||
CLAMP(vc4->blend_color.color[uinfo->data[i]], 0, 1));
|
||||
break;
|
||||
|
||||
case QUNIFORM_STENCIL:
|
||||
cl_aligned_u32(&vc4->uniforms,
|
||||
cl_aligned_u32(&uniforms,
|
||||
vc4->zsa->stencil_uniforms[uinfo->data[i]] |
|
||||
(uinfo->data[i] <= 1 ?
|
||||
(vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
|
||||
|
@ -2795,16 +2806,18 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
|
|||
break;
|
||||
|
||||
case QUNIFORM_ALPHA_REF:
|
||||
cl_aligned_f(&vc4->uniforms,
|
||||
cl_aligned_f(&uniforms,
|
||||
vc4->zsa->base.alpha.ref_value);
|
||||
break;
|
||||
}
|
||||
#if 0
|
||||
uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4);
|
||||
uint32_t written_val = *((uint32_t *)uniforms - 1);
|
||||
fprintf(stderr, "%p: %d / 0x%08x (%f)\n",
|
||||
shader, i, written_val, uif(written_val));
|
||||
#endif
|
||||
}
|
||||
|
||||
cl_end(&vc4->uniforms, uniforms);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
Loading…
Reference in New Issue