radeonsi: implement ARB_draw_indirect
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
This commit is contained in:
parent
887b69a233
commit
2a7b57ad42
|
@ -98,7 +98,7 @@ GL 4.0:
|
|||
|
||||
GLSL 4.0 not started
|
||||
GL_ARB_draw_buffers_blend DONE (i965, nv50, nvc0, r600, radeonsi, softpipe)
|
||||
GL_ARB_draw_indirect DONE (i965, nvc0)
|
||||
GL_ARB_draw_indirect DONE (i965, nvc0, radeonsi, softpipe, llvmpipe)
|
||||
GL_ARB_gpu_shader5 started
|
||||
- 'precise' qualifier DONE
|
||||
- Dynamically uniform sampler array indices started (Chris)
|
||||
|
@ -165,7 +165,7 @@ GL 4.3:
|
|||
GL_ARB_framebuffer_no_attachments not started
|
||||
GL_ARB_internalformat_query2 not started
|
||||
GL_ARB_invalidate_subdata DONE (all drivers)
|
||||
GL_ARB_multi_draw_indirect DONE (i965, nvc0)
|
||||
GL_ARB_multi_draw_indirect DONE (i965, nvc0, radeonsi, softpipe, llvmpipe)
|
||||
GL_ARB_program_interface_query not started
|
||||
GL_ARB_robust_buffer_access_behavior not started
|
||||
GL_ARB_shader_image_size not started
|
||||
|
|
|
@ -45,7 +45,9 @@ Note: some of the new features are only available with certain drivers.
|
|||
|
||||
<ul>
|
||||
<li>GL_ARB_compressed_texture_pixel_storage on all drivers</li>
|
||||
<li>GL_ARB_draw_indirect on nvc0, radeonsi</li>
|
||||
<li>GL_ARB_explicit_uniform_location (all drivers that support GLSL)</li>
|
||||
<li>GL_ARB_multi_draw_indirect on nvc0, radeonsi</li>
|
||||
<li>GL_ARB_sample_shading on radeonsi</li>
|
||||
<li>GL_ARB_stencil_texturing on nv50, nvc0, r600, and radeonsi</li>
|
||||
<li>GL_ARB_texture_cube_map_array on radeonsi</li>
|
||||
|
@ -56,7 +58,6 @@ Note: some of the new features are only available with certain drivers.
|
|||
<li>GL_ARB_seamless_cubemap_per_texture on i965, llvmpipe, nvc0, r600, radeonsi, softpipe</li>
|
||||
<li>GL_ARB_fragment_layer_viewport on nv50, nvc0, llvmpipe, r600</li>
|
||||
<li>GL_AMD_vertex_shader_viewport_index on i965/gen7+, r600</li>
|
||||
<li>GL_ARB_(multi_)draw_indirect on nvc0</li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
|
|
@ -57,6 +57,59 @@ void si_cmd_draw_index_auto(struct si_pm4_state *pm4, uint32_t count,
|
|||
si_pm4_cmd_end(pm4, predicate);
|
||||
}
|
||||
|
||||
void si_cmd_draw_indirect(struct si_pm4_state *pm4, uint64_t indirect_va,
|
||||
uint32_t indirect_offset, uint32_t base_vtx_loc,
|
||||
uint32_t start_inst_loc, bool predicate)
|
||||
{
|
||||
assert(indirect_va % 8 == 0);
|
||||
assert(indirect_offset % 4 == 0);
|
||||
|
||||
si_pm4_cmd_begin(pm4, PKT3_SET_BASE);
|
||||
si_pm4_cmd_add(pm4, 1);
|
||||
si_pm4_cmd_add(pm4, indirect_va);
|
||||
si_pm4_cmd_add(pm4, indirect_va >> 32);
|
||||
si_pm4_cmd_end(pm4, predicate);
|
||||
|
||||
si_pm4_cmd_begin(pm4, PKT3_DRAW_INDIRECT);
|
||||
si_pm4_cmd_add(pm4, indirect_offset);
|
||||
si_pm4_cmd_add(pm4, (base_vtx_loc - SI_SH_REG_OFFSET) >> 2);
|
||||
si_pm4_cmd_add(pm4, (start_inst_loc - SI_SH_REG_OFFSET) >> 2);
|
||||
si_pm4_cmd_add(pm4, V_0287F0_DI_SRC_SEL_AUTO_INDEX);
|
||||
si_pm4_cmd_end(pm4, predicate);
|
||||
}
|
||||
|
||||
void si_cmd_draw_index_indirect(struct si_pm4_state *pm4, uint64_t indirect_va,
|
||||
uint64_t index_va, uint32_t index_max_size,
|
||||
uint32_t indirect_offset, uint32_t base_vtx_loc,
|
||||
uint32_t start_inst_loc, bool predicate)
|
||||
{
|
||||
assert(indirect_va % 8 == 0);
|
||||
assert(index_va % 2 == 0);
|
||||
assert(indirect_offset % 4 == 0);
|
||||
|
||||
si_pm4_cmd_begin(pm4, PKT3_SET_BASE);
|
||||
si_pm4_cmd_add(pm4, 1);
|
||||
si_pm4_cmd_add(pm4, indirect_va);
|
||||
si_pm4_cmd_add(pm4, indirect_va >> 32);
|
||||
si_pm4_cmd_end(pm4, predicate);
|
||||
|
||||
si_pm4_cmd_begin(pm4, PKT3_INDEX_BASE);
|
||||
si_pm4_cmd_add(pm4, index_va);
|
||||
si_pm4_cmd_add(pm4, index_va >> 32);
|
||||
si_pm4_cmd_end(pm4, predicate);
|
||||
|
||||
si_pm4_cmd_begin(pm4, PKT3_INDEX_BUFFER_SIZE);
|
||||
si_pm4_cmd_add(pm4, index_max_size);
|
||||
si_pm4_cmd_end(pm4, predicate);
|
||||
|
||||
si_pm4_cmd_begin(pm4, PKT3_DRAW_INDEX_INDIRECT);
|
||||
si_pm4_cmd_add(pm4, indirect_offset);
|
||||
si_pm4_cmd_add(pm4, (base_vtx_loc - SI_SH_REG_OFFSET) >> 2);
|
||||
si_pm4_cmd_add(pm4, (start_inst_loc - SI_SH_REG_OFFSET) >> 2);
|
||||
si_pm4_cmd_add(pm4, V_0287F0_DI_SRC_SEL_DMA);
|
||||
si_pm4_cmd_end(pm4, predicate);
|
||||
}
|
||||
|
||||
void si_cmd_surface_sync(struct si_pm4_state *pm4, uint32_t cp_coher_cntl)
|
||||
{
|
||||
if (pm4->chip_class >= CIK) {
|
||||
|
|
|
@ -214,6 +214,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
|
||||
case PIPE_CAP_CUBE_MAP_ARRAY:
|
||||
case PIPE_CAP_SAMPLE_SHADING:
|
||||
case PIPE_CAP_DRAW_INDIRECT:
|
||||
return 1;
|
||||
|
||||
case PIPE_CAP_TEXTURE_MULTISAMPLE:
|
||||
|
|
|
@ -265,6 +265,13 @@ void si_cmd_draw_index_2(struct si_pm4_state *pm4, uint32_t max_size,
|
|||
uint32_t initiator, bool predicate);
|
||||
void si_cmd_draw_index_auto(struct si_pm4_state *pm4, uint32_t count,
|
||||
uint32_t initiator, bool predicate);
|
||||
void si_cmd_draw_indirect(struct si_pm4_state *pm4, uint64_t indirect_va,
|
||||
uint32_t indirect_offset, uint32_t base_vtx_loc,
|
||||
uint32_t start_inst_loc, bool predicate);
|
||||
void si_cmd_draw_index_indirect(struct si_pm4_state *pm4, uint64_t indirect_va,
|
||||
uint64_t index_va, uint32_t index_max_size,
|
||||
uint32_t indirect_offset, uint32_t base_vtx_loc,
|
||||
uint32_t start_inst_loc, bool predicate);
|
||||
void si_cmd_surface_sync(struct si_pm4_state *pm4, uint32_t cp_coher_cntl);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -783,15 +783,18 @@ static void si_state_draw(struct si_context *sctx,
|
|||
}
|
||||
si_pm4_cmd_end(pm4, sctx->b.predicate_drawing);
|
||||
|
||||
if (!info->indirect) {
|
||||
si_pm4_cmd_begin(pm4, PKT3_NUM_INSTANCES);
|
||||
si_pm4_cmd_add(pm4, info->instance_count);
|
||||
si_pm4_cmd_end(pm4, sctx->b.predicate_drawing);
|
||||
|
||||
if (!info->indirect) {
|
||||
si_pm4_set_reg(pm4, sh_base_reg + SI_SGPR_BASE_VERTEX * 4,
|
||||
info->indexed ? info->index_bias : info->start);
|
||||
si_pm4_set_reg(pm4, sh_base_reg + SI_SGPR_START_INSTANCE * 4,
|
||||
info->start_instance);
|
||||
} else {
|
||||
si_pm4_add_bo(pm4, (struct r600_resource *)info->indirect,
|
||||
RADEON_USAGE_READ, RADEON_PRIO_MIN);
|
||||
}
|
||||
|
||||
if (info->indexed) {
|
||||
|
@ -803,14 +806,35 @@ static void si_state_draw(struct si_context *sctx,
|
|||
|
||||
si_pm4_add_bo(pm4, (struct r600_resource *)ib->buffer, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_MIN);
|
||||
|
||||
if (info->indirect) {
|
||||
uint64_t indirect_va = r600_resource_va(&sctx->screen->b.b,
|
||||
info->indirect);
|
||||
si_cmd_draw_index_indirect(pm4, indirect_va, va, max_size,
|
||||
info->indirect_offset,
|
||||
sh_base_reg + SI_SGPR_BASE_VERTEX * 4,
|
||||
sh_base_reg + SI_SGPR_START_INSTANCE * 4,
|
||||
sctx->b.predicate_drawing);
|
||||
} else {
|
||||
va += info->start * ib->index_size;
|
||||
si_cmd_draw_index_2(pm4, max_size, va, info->count,
|
||||
V_0287F0_DI_SRC_SEL_DMA,
|
||||
sctx->b.predicate_drawing);
|
||||
}
|
||||
} else {
|
||||
uint32_t initiator = V_0287F0_DI_SRC_SEL_AUTO_INDEX;
|
||||
initiator |= S_0287F0_USE_OPAQUE(!!info->count_from_stream_output);
|
||||
si_cmd_draw_index_auto(pm4, info->count, initiator, sctx->b.predicate_drawing);
|
||||
if (info->indirect) {
|
||||
uint64_t indirect_va = r600_resource_va(&sctx->screen->b.b,
|
||||
info->indirect);
|
||||
si_cmd_draw_indirect(pm4, indirect_va, info->indirect_offset,
|
||||
sh_base_reg + SI_SGPR_BASE_VERTEX * 4,
|
||||
sh_base_reg + SI_SGPR_START_INSTANCE * 4,
|
||||
sctx->b.predicate_drawing);
|
||||
} else {
|
||||
si_cmd_draw_index_auto(pm4, info->count,
|
||||
V_0287F0_DI_SRC_SEL_AUTO_INDEX |
|
||||
S_0287F0_USE_OPAQUE(!!info->count_from_stream_output),
|
||||
sctx->b.predicate_drawing);
|
||||
}
|
||||
}
|
||||
|
||||
si_pm4_set_state(sctx, draw, pm4);
|
||||
|
@ -898,13 +922,32 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
|
|||
|
||||
const struct r600_atom si_atom_cache_flush = { si_emit_cache_flush, 13 }; /* number of CS dwords */
|
||||
|
||||
static void si_get_draw_start_count(struct si_context *sctx,
|
||||
const struct pipe_draw_info *info,
|
||||
unsigned *start, unsigned *count)
|
||||
{
|
||||
if (info->indirect) {
|
||||
struct r600_resource *indirect =
|
||||
(struct r600_resource*)info->indirect;
|
||||
int *data = r600_buffer_map_sync_with_rings(&sctx->b,
|
||||
indirect, PIPE_TRANSFER_READ);
|
||||
data += info->indirect_offset/sizeof(int);
|
||||
*start = data[2];
|
||||
*count = data[0];
|
||||
} else {
|
||||
*start = info->start;
|
||||
*count = info->count;
|
||||
}
|
||||
}
|
||||
|
||||
void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
||||
{
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
struct pipe_index_buffer ib = {};
|
||||
uint32_t i;
|
||||
|
||||
if (!info->count && (info->indexed || !info->count_from_stream_output))
|
||||
if (!info->count && !info->indirect &&
|
||||
(info->indexed || !info->count_from_stream_output))
|
||||
return;
|
||||
|
||||
if (!sctx->ps_shader || !sctx->vs_shader)
|
||||
|
@ -926,8 +969,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
|||
unsigned out_offset, start, count, start_offset;
|
||||
void *ptr;
|
||||
|
||||
start = info->start;
|
||||
count = info->count;
|
||||
si_get_draw_start_count(sctx, info, &start, &count);
|
||||
start_offset = start * ib.index_size;
|
||||
|
||||
u_upload_alloc(sctx->b.uploader, start_offset, count * 2,
|
||||
|
@ -946,8 +988,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
|||
} else if (ib.user_buffer && !ib.buffer) {
|
||||
unsigned start, count, start_offset;
|
||||
|
||||
start = info->start;
|
||||
count = info->count;
|
||||
si_get_draw_start_count(sctx, info, &start, &count);
|
||||
start_offset = start * ib.index_size;
|
||||
|
||||
u_upload_data(sctx->b.uploader, start_offset, count * ib.index_size,
|
||||
|
|
|
@ -70,18 +70,27 @@
|
|||
#define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7
|
||||
|
||||
#define PKT3_NOP 0x10
|
||||
#define PKT3_SET_BASE 0x11
|
||||
#define PKT3_CLEAR_STATE 0x12
|
||||
#define PKT3_INDEX_BUFFER_SIZE 0x13
|
||||
#define PKT3_DISPATCH_DIRECT 0x15
|
||||
#define PKT3_DISPATCH_INDIRECT 0x16
|
||||
#define PKT3_OCCLUSION_QUERY 0x1F /* new for CIK */
|
||||
#define PKT3_SET_PREDICATION 0x20
|
||||
#define PKT3_COND_EXEC 0x22
|
||||
#define PKT3_PRED_EXEC 0x23
|
||||
#define PKT3_DRAW_INDIRECT 0x24
|
||||
#define PKT3_DRAW_INDEX_INDIRECT 0x25
|
||||
#define PKT3_INDEX_BASE 0x26
|
||||
#define PKT3_DRAW_INDEX_2 0x27
|
||||
#define PKT3_CONTEXT_CONTROL 0x28
|
||||
#define PKT3_INDEX_TYPE 0x2A
|
||||
#define PKT3_DRAW_INDIRECT_MULTI 0x2C
|
||||
#define PKT3_DRAW_INDEX_AUTO 0x2D
|
||||
#define PKT3_DRAW_INDEX_IMMD 0x2E /* not on CIK */
|
||||
#define PKT3_NUM_INSTANCES 0x2F
|
||||
#define PKT3_DRAW_INDEX_MULTI_AUTO 0x30
|
||||
#define PKT3_INDIRECT_BUFFER 0x32
|
||||
#define PKT3_STRMOUT_BUFFER_UPDATE 0x34
|
||||
#define PKT3_DRAW_INDEX_OFFSET_2 0x35
|
||||
#define PKT3_DRAW_PREAMBLE 0x36 /* new on CIK, required on GFX7.2 and later */
|
||||
|
@ -99,12 +108,12 @@
|
|||
#define PKT3_WRITE_DATA_ENGINE_SEL_ME 0
|
||||
#define PKT3_WRITE_DATA_ENGINE_SEL_PFP 1
|
||||
#define PKT3_WRITE_DATA_ENGINE_SEL_CE 2
|
||||
#define PKT3_DRAW_INDEX_INDIRECT_MULTI 0x38
|
||||
#define PKT3_MEM_SEMAPHORE 0x39
|
||||
#define PKT3_MPEG_INDEX 0x3A /* not on CIK */
|
||||
#define PKT3_WAIT_REG_MEM 0x3C
|
||||
#define WAIT_REG_MEM_EQUAL 3
|
||||
#define PKT3_MEM_WRITE 0x3D /* not on CIK */
|
||||
#define PKT3_INDIRECT_BUFFER 0x32
|
||||
#define PKT3_COPY_DATA 0x40
|
||||
#define COPY_DATA_SRC_SEL(x) ((x) & 0xf)
|
||||
#define COPY_DATA_REG 0
|
||||
|
|
Loading…
Reference in New Issue