gallium/tgsi: start adding hw atomics (v3.2)
This adds support for a hw atomic counters to TGSI. A new register file for storing atomic counters is added, along with a new atomic counter semantic, along with docs for both. v2: drop semantic, move hw counter to backend, Ilia pointed out SSO would have busted my plan, and he was right. v3: drop BUFFER decls. (Marek) v3.1: minor fixups for whitespace, set ureg error if we overflow the hw atomic limits. (nha) v3.2: fix some docs inconsistencies (Ilia) Reviewed-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com> Tested-By: Gert Wollny <gw.fossdev@gmail.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
parent
2a06423c00
commit
4b0b82770a
|
@ -58,6 +58,7 @@ static const char *tgsi_file_names[] =
|
||||||
"BUFFER",
|
"BUFFER",
|
||||||
"MEMORY",
|
"MEMORY",
|
||||||
"CONSTBUF",
|
"CONSTBUF",
|
||||||
|
"HWATOMIC",
|
||||||
};
|
};
|
||||||
|
|
||||||
const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
|
const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
|
||||||
|
|
|
@ -80,6 +80,7 @@ struct ureg_tokens {
|
||||||
#define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS
|
#define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS
|
||||||
#define UREG_MAX_OUTPUT (4 * PIPE_MAX_SHADER_OUTPUTS)
|
#define UREG_MAX_OUTPUT (4 * PIPE_MAX_SHADER_OUTPUTS)
|
||||||
#define UREG_MAX_CONSTANT_RANGE 32
|
#define UREG_MAX_CONSTANT_RANGE 32
|
||||||
|
#define UREG_MAX_HW_ATOMIC_RANGE 32
|
||||||
#define UREG_MAX_IMMEDIATE 4096
|
#define UREG_MAX_IMMEDIATE 4096
|
||||||
#define UREG_MAX_ADDR 3
|
#define UREG_MAX_ADDR 3
|
||||||
#define UREG_MAX_ARRAY_TEMPS 256
|
#define UREG_MAX_ARRAY_TEMPS 256
|
||||||
|
@ -92,6 +93,15 @@ struct const_decl {
|
||||||
unsigned nr_constant_ranges;
|
unsigned nr_constant_ranges;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct hw_atomic_decl {
|
||||||
|
struct {
|
||||||
|
unsigned first;
|
||||||
|
unsigned last;
|
||||||
|
unsigned array_id;
|
||||||
|
} hw_atomic_range[UREG_MAX_HW_ATOMIC_RANGE];
|
||||||
|
unsigned nr_hw_atomic_ranges;
|
||||||
|
};
|
||||||
|
|
||||||
#define DOMAIN_DECL 0
|
#define DOMAIN_DECL 0
|
||||||
#define DOMAIN_INSN 1
|
#define DOMAIN_INSN 1
|
||||||
|
|
||||||
|
@ -182,6 +192,8 @@ struct ureg_program
|
||||||
|
|
||||||
struct const_decl const_decls[PIPE_MAX_CONSTANT_BUFFERS];
|
struct const_decl const_decls[PIPE_MAX_CONSTANT_BUFFERS];
|
||||||
|
|
||||||
|
struct hw_atomic_decl hw_atomic_decls[PIPE_MAX_HW_ATOMIC_BUFFERS];
|
||||||
|
|
||||||
unsigned properties[TGSI_PROPERTY_COUNT];
|
unsigned properties[TGSI_PROPERTY_COUNT];
|
||||||
|
|
||||||
unsigned nr_addrs;
|
unsigned nr_addrs;
|
||||||
|
@ -583,6 +595,30 @@ out:
|
||||||
return ureg_src_dimension(src, 0);
|
return ureg_src_dimension(src, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Returns a new hw atomic register. Keep track of which have been
|
||||||
|
* referred to so that we can emit decls later.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ureg_DECL_hw_atomic(struct ureg_program *ureg,
|
||||||
|
unsigned first,
|
||||||
|
unsigned last,
|
||||||
|
unsigned buffer_id,
|
||||||
|
unsigned array_id)
|
||||||
|
{
|
||||||
|
struct hw_atomic_decl *decl = &ureg->hw_atomic_decls[buffer_id];
|
||||||
|
|
||||||
|
if (decl->nr_hw_atomic_ranges < UREG_MAX_HW_ATOMIC_RANGE) {
|
||||||
|
uint i = decl->nr_hw_atomic_ranges++;
|
||||||
|
|
||||||
|
decl->hw_atomic_range[i].first = first;
|
||||||
|
decl->hw_atomic_range[i].last = last;
|
||||||
|
decl->hw_atomic_range[i].array_id = array_id;
|
||||||
|
} else {
|
||||||
|
set_bad(ureg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static struct ureg_dst alloc_temporary( struct ureg_program *ureg,
|
static struct ureg_dst alloc_temporary( struct ureg_program *ureg,
|
||||||
boolean local )
|
boolean local )
|
||||||
{
|
{
|
||||||
|
@ -1501,6 +1537,35 @@ emit_decl_semantic(struct ureg_program *ureg,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_decl_atomic_2d(struct ureg_program *ureg,
|
||||||
|
unsigned first,
|
||||||
|
unsigned last,
|
||||||
|
unsigned index2D,
|
||||||
|
unsigned array_id)
|
||||||
|
{
|
||||||
|
union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 3);
|
||||||
|
|
||||||
|
out[0].value = 0;
|
||||||
|
out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
|
||||||
|
out[0].decl.NrTokens = 3;
|
||||||
|
out[0].decl.File = TGSI_FILE_HW_ATOMIC;
|
||||||
|
out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
|
||||||
|
out[0].decl.Dimension = 1;
|
||||||
|
out[0].decl.Array = array_id != 0;
|
||||||
|
|
||||||
|
out[1].value = 0;
|
||||||
|
out[1].decl_range.First = first;
|
||||||
|
out[1].decl_range.Last = last;
|
||||||
|
|
||||||
|
out[2].value = 0;
|
||||||
|
out[2].decl_dim.Index2D = index2D;
|
||||||
|
|
||||||
|
if (array_id) {
|
||||||
|
out[3].value = 0;
|
||||||
|
out[3].array.ArrayID = array_id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
emit_decl_fs(struct ureg_program *ureg,
|
emit_decl_fs(struct ureg_program *ureg,
|
||||||
|
@ -1908,6 +1973,22 @@ static void emit_decls( struct ureg_program *ureg )
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < PIPE_MAX_HW_ATOMIC_BUFFERS; i++) {
|
||||||
|
struct hw_atomic_decl *decl = &ureg->hw_atomic_decls[i];
|
||||||
|
|
||||||
|
if (decl->nr_hw_atomic_ranges) {
|
||||||
|
uint j;
|
||||||
|
|
||||||
|
for (j = 0; j < decl->nr_hw_atomic_ranges; j++) {
|
||||||
|
emit_decl_atomic_2d(ureg,
|
||||||
|
decl->hw_atomic_range[j].first,
|
||||||
|
decl->hw_atomic_range[j].last,
|
||||||
|
i,
|
||||||
|
decl->hw_atomic_range[j].array_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (ureg->nr_temps) {
|
if (ureg->nr_temps) {
|
||||||
unsigned array = 0;
|
unsigned array = 0;
|
||||||
for (i = 0; i < ureg->nr_temps;) {
|
for (i = 0; i < ureg->nr_temps;) {
|
||||||
|
|
|
@ -316,6 +316,13 @@ struct ureg_src
|
||||||
ureg_DECL_constant( struct ureg_program *,
|
ureg_DECL_constant( struct ureg_program *,
|
||||||
unsigned index );
|
unsigned index );
|
||||||
|
|
||||||
|
void
|
||||||
|
ureg_DECL_hw_atomic(struct ureg_program *ureg,
|
||||||
|
unsigned first,
|
||||||
|
unsigned last,
|
||||||
|
unsigned buffer_id,
|
||||||
|
unsigned array_id);
|
||||||
|
|
||||||
struct ureg_dst
|
struct ureg_dst
|
||||||
ureg_DECL_temporary( struct ureg_program * );
|
ureg_DECL_temporary( struct ureg_program * );
|
||||||
|
|
||||||
|
|
|
@ -2638,9 +2638,11 @@ logical operations. In this context atomicity means that another
|
||||||
concurrent memory access operation that affects the same memory
|
concurrent memory access operation that affects the same memory
|
||||||
location is guaranteed to be performed strictly before or after the
|
location is guaranteed to be performed strictly before or after the
|
||||||
entire execution of the atomic operation. The resource may be a BUFFER,
|
entire execution of the atomic operation. The resource may be a BUFFER,
|
||||||
IMAGE, or MEMORY. In the case of an image, the offset works the same as for
|
IMAGE, HWATOMIC, or MEMORY. In the case of an image, the offset works
|
||||||
``LOAD`` and ``STORE``, specified above. These atomic operations may
|
the same as for ``LOAD`` and ``STORE``, specified above. For atomic
|
||||||
only be used with 32-bit integer image formats.
|
counters, the offset is an immediate index to the base hw atomic
|
||||||
|
counter for this operation.
|
||||||
|
These atomic operations may only be used with 32-bit integer image formats.
|
||||||
|
|
||||||
.. opcode:: ATOMUADD - Atomic integer addition
|
.. opcode:: ATOMUADD - Atomic integer addition
|
||||||
|
|
||||||
|
@ -3517,6 +3519,31 @@ accessing a misaligned address is undefined.
|
||||||
Usage of the STORE opcode is only allowed if the WR (writable) flag
|
Usage of the STORE opcode is only allowed if the WR (writable) flag
|
||||||
is set.
|
is set.
|
||||||
|
|
||||||
|
Hardware Atomic Register File
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
Hardware atomics are declared as a 2D array with an optional array id.
|
||||||
|
|
||||||
|
The first member of the dimension is the buffer resource the atomic
|
||||||
|
is located in.
|
||||||
|
The second member is a range into the buffer resource, either for
|
||||||
|
one or multiple counters. If this is an array, the declaration will have
|
||||||
|
an unique array id.
|
||||||
|
|
||||||
|
Each counter is 4 bytes in size, and index and ranges are in counters not bytes.
|
||||||
|
DCL HWATOMIC[0][0]
|
||||||
|
DCL HWATOMIC[0][1]
|
||||||
|
|
||||||
|
This declares two atomics, one at the start of the buffer and one in the
|
||||||
|
second 4 bytes.
|
||||||
|
|
||||||
|
DCL HWATOMIC[0][0]
|
||||||
|
DCL HWATOMIC[1][0]
|
||||||
|
DCL HWATOMIC[1][1..3], ARRAY(1)
|
||||||
|
|
||||||
|
This declares 5 atomics, one in buffer 0 at 0,
|
||||||
|
one in buffer 1 at 0, and an array of 3 atomics in
|
||||||
|
the buffer 1, starting at 1.
|
||||||
|
|
||||||
Properties
|
Properties
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
|
@ -75,6 +75,7 @@ enum tgsi_file_type {
|
||||||
TGSI_FILE_BUFFER,
|
TGSI_FILE_BUFFER,
|
||||||
TGSI_FILE_MEMORY,
|
TGSI_FILE_MEMORY,
|
||||||
TGSI_FILE_CONSTBUF,
|
TGSI_FILE_CONSTBUF,
|
||||||
|
TGSI_FILE_HW_ATOMIC,
|
||||||
TGSI_FILE_COUNT, /**< how many TGSI_FILE_ types */
|
TGSI_FILE_COUNT, /**< how many TGSI_FILE_ types */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -75,6 +75,7 @@ extern "C" {
|
||||||
#define PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT 2
|
#define PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT 2
|
||||||
#define PIPE_MAX_WINDOW_RECTANGLES 8
|
#define PIPE_MAX_WINDOW_RECTANGLES 8
|
||||||
|
|
||||||
|
#define PIPE_MAX_HW_ATOMIC_BUFFERS 32
|
||||||
|
|
||||||
struct pipe_reference
|
struct pipe_reference
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue