gallium/tgsi: start adding hw atomics (v3.2)
This adds support for a hw atomic counters to TGSI. A new register file for storing atomic counters is added, along with a new atomic counter semantic, along with docs for both. v2: drop semantic, move hw counter to backend, Ilia pointed out SSO would have busted my plan, and he was right. v3: drop BUFFER decls. (Marek) v3.1: minor fixups for whitespace, set ureg error if we overflow the hw atomic limits. (nha) v3.2: fix some docs inconsistencies (Ilia) Reviewed-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com> Tested-By: Gert Wollny <gw.fossdev@gmail.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
parent
2a06423c00
commit
4b0b82770a
|
@ -58,6 +58,7 @@ static const char *tgsi_file_names[] =
|
|||
"BUFFER",
|
||||
"MEMORY",
|
||||
"CONSTBUF",
|
||||
"HWATOMIC",
|
||||
};
|
||||
|
||||
const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
|
||||
|
|
|
@ -80,6 +80,7 @@ struct ureg_tokens {
|
|||
#define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS
|
||||
#define UREG_MAX_OUTPUT (4 * PIPE_MAX_SHADER_OUTPUTS)
|
||||
#define UREG_MAX_CONSTANT_RANGE 32
|
||||
#define UREG_MAX_HW_ATOMIC_RANGE 32
|
||||
#define UREG_MAX_IMMEDIATE 4096
|
||||
#define UREG_MAX_ADDR 3
|
||||
#define UREG_MAX_ARRAY_TEMPS 256
|
||||
|
@ -92,6 +93,15 @@ struct const_decl {
|
|||
unsigned nr_constant_ranges;
|
||||
};
|
||||
|
||||
struct hw_atomic_decl {
|
||||
struct {
|
||||
unsigned first;
|
||||
unsigned last;
|
||||
unsigned array_id;
|
||||
} hw_atomic_range[UREG_MAX_HW_ATOMIC_RANGE];
|
||||
unsigned nr_hw_atomic_ranges;
|
||||
};
|
||||
|
||||
#define DOMAIN_DECL 0
|
||||
#define DOMAIN_INSN 1
|
||||
|
||||
|
@ -182,6 +192,8 @@ struct ureg_program
|
|||
|
||||
struct const_decl const_decls[PIPE_MAX_CONSTANT_BUFFERS];
|
||||
|
||||
struct hw_atomic_decl hw_atomic_decls[PIPE_MAX_HW_ATOMIC_BUFFERS];
|
||||
|
||||
unsigned properties[TGSI_PROPERTY_COUNT];
|
||||
|
||||
unsigned nr_addrs;
|
||||
|
@ -583,6 +595,30 @@ out:
|
|||
return ureg_src_dimension(src, 0);
|
||||
}
|
||||
|
||||
|
||||
/* Returns a new hw atomic register. Keep track of which have been
|
||||
* referred to so that we can emit decls later.
|
||||
*/
|
||||
void
|
||||
ureg_DECL_hw_atomic(struct ureg_program *ureg,
|
||||
unsigned first,
|
||||
unsigned last,
|
||||
unsigned buffer_id,
|
||||
unsigned array_id)
|
||||
{
|
||||
struct hw_atomic_decl *decl = &ureg->hw_atomic_decls[buffer_id];
|
||||
|
||||
if (decl->nr_hw_atomic_ranges < UREG_MAX_HW_ATOMIC_RANGE) {
|
||||
uint i = decl->nr_hw_atomic_ranges++;
|
||||
|
||||
decl->hw_atomic_range[i].first = first;
|
||||
decl->hw_atomic_range[i].last = last;
|
||||
decl->hw_atomic_range[i].array_id = array_id;
|
||||
} else {
|
||||
set_bad(ureg);
|
||||
}
|
||||
}
|
||||
|
||||
static struct ureg_dst alloc_temporary( struct ureg_program *ureg,
|
||||
boolean local )
|
||||
{
|
||||
|
@ -1501,6 +1537,35 @@ emit_decl_semantic(struct ureg_program *ureg,
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_decl_atomic_2d(struct ureg_program *ureg,
|
||||
unsigned first,
|
||||
unsigned last,
|
||||
unsigned index2D,
|
||||
unsigned array_id)
|
||||
{
|
||||
union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 3);
|
||||
|
||||
out[0].value = 0;
|
||||
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
|
||||
out[0].decl.NrTokens = 3;
|
||||
out[0].decl.File = TGSI_FILE_HW_ATOMIC;
|
||||
out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
|
||||
out[0].decl.Dimension = 1;
|
||||
out[0].decl.Array = array_id != 0;
|
||||
|
||||
out[1].value = 0;
|
||||
out[1].decl_range.First = first;
|
||||
out[1].decl_range.Last = last;
|
||||
|
||||
out[2].value = 0;
|
||||
out[2].decl_dim.Index2D = index2D;
|
||||
|
||||
if (array_id) {
|
||||
out[3].value = 0;
|
||||
out[3].array.ArrayID = array_id;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_decl_fs(struct ureg_program *ureg,
|
||||
|
@ -1908,6 +1973,22 @@ static void emit_decls( struct ureg_program *ureg )
|
|||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < PIPE_MAX_HW_ATOMIC_BUFFERS; i++) {
|
||||
struct hw_atomic_decl *decl = &ureg->hw_atomic_decls[i];
|
||||
|
||||
if (decl->nr_hw_atomic_ranges) {
|
||||
uint j;
|
||||
|
||||
for (j = 0; j < decl->nr_hw_atomic_ranges; j++) {
|
||||
emit_decl_atomic_2d(ureg,
|
||||
decl->hw_atomic_range[j].first,
|
||||
decl->hw_atomic_range[j].last,
|
||||
i,
|
||||
decl->hw_atomic_range[j].array_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ureg->nr_temps) {
|
||||
unsigned array = 0;
|
||||
for (i = 0; i < ureg->nr_temps;) {
|
||||
|
|
|
@ -316,6 +316,13 @@ struct ureg_src
|
|||
ureg_DECL_constant( struct ureg_program *,
|
||||
unsigned index );
|
||||
|
||||
void
|
||||
ureg_DECL_hw_atomic(struct ureg_program *ureg,
|
||||
unsigned first,
|
||||
unsigned last,
|
||||
unsigned buffer_id,
|
||||
unsigned array_id);
|
||||
|
||||
struct ureg_dst
|
||||
ureg_DECL_temporary( struct ureg_program * );
|
||||
|
||||
|
|
|
@ -2638,9 +2638,11 @@ logical operations. In this context atomicity means that another
|
|||
concurrent memory access operation that affects the same memory
|
||||
location is guaranteed to be performed strictly before or after the
|
||||
entire execution of the atomic operation. The resource may be a BUFFER,
|
||||
IMAGE, or MEMORY. In the case of an image, the offset works the same as for
|
||||
``LOAD`` and ``STORE``, specified above. These atomic operations may
|
||||
only be used with 32-bit integer image formats.
|
||||
IMAGE, HWATOMIC, or MEMORY. In the case of an image, the offset works
|
||||
the same as for ``LOAD`` and ``STORE``, specified above. For atomic
|
||||
counters, the offset is an immediate index to the base hw atomic
|
||||
counter for this operation.
|
||||
These atomic operations may only be used with 32-bit integer image formats.
|
||||
|
||||
.. opcode:: ATOMUADD - Atomic integer addition
|
||||
|
||||
|
@ -3517,6 +3519,31 @@ accessing a misaligned address is undefined.
|
|||
Usage of the STORE opcode is only allowed if the WR (writable) flag
|
||||
is set.
|
||||
|
||||
Hardware Atomic Register File
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Hardware atomics are declared as a 2D array with an optional array id.
|
||||
|
||||
The first member of the dimension is the buffer resource the atomic
|
||||
is located in.
|
||||
The second member is a range into the buffer resource, either for
|
||||
one or multiple counters. If this is an array, the declaration will have
|
||||
an unique array id.
|
||||
|
||||
Each counter is 4 bytes in size, and index and ranges are in counters not bytes.
|
||||
DCL HWATOMIC[0][0]
|
||||
DCL HWATOMIC[0][1]
|
||||
|
||||
This declares two atomics, one at the start of the buffer and one in the
|
||||
second 4 bytes.
|
||||
|
||||
DCL HWATOMIC[0][0]
|
||||
DCL HWATOMIC[1][0]
|
||||
DCL HWATOMIC[1][1..3], ARRAY(1)
|
||||
|
||||
This declares 5 atomics, one in buffer 0 at 0,
|
||||
one in buffer 1 at 0, and an array of 3 atomics in
|
||||
the buffer 1, starting at 1.
|
||||
|
||||
Properties
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
|
|
@ -75,6 +75,7 @@ enum tgsi_file_type {
|
|||
TGSI_FILE_BUFFER,
|
||||
TGSI_FILE_MEMORY,
|
||||
TGSI_FILE_CONSTBUF,
|
||||
TGSI_FILE_HW_ATOMIC,
|
||||
TGSI_FILE_COUNT, /**< how many TGSI_FILE_ types */
|
||||
};
|
||||
|
||||
|
|
|
@ -75,6 +75,7 @@ extern "C" {
|
|||
#define PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT 2
|
||||
#define PIPE_MAX_WINDOW_RECTANGLES 8
|
||||
|
||||
#define PIPE_MAX_HW_ATOMIC_BUFFERS 32
|
||||
|
||||
struct pipe_reference
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue