anv/gpu_memcpy: Add a lighter-weight GPU memcpy function
We'll be performing a GPU memcpy in more places to copy small amounts of data. Add an alternate function that thrashes less state. v2: - Make a new function (Jason Ekstrand). - Move the #define into the function. v3: - Update the function name (Jason). - Update comments. v4: Use an indirect drawing register as TEMP_REG (Jason Ekstrand). Signed-off-by: Nanley Chery <nanley.g.chery@intel.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
This commit is contained in:
parent
dcff5ab9f1
commit
0b16600056
|
@ -69,5 +69,10 @@ void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
|
|||
struct anv_bo *src, uint32_t src_offset,
|
||||
uint32_t size);
|
||||
|
||||
void genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_bo *dst, uint32_t dst_offset,
|
||||
struct anv_bo *src, uint32_t src_offset,
|
||||
uint32_t size);
|
||||
|
||||
void genX(blorp_exec)(struct blorp_batch *batch,
|
||||
const struct blorp_params *params);
|
||||
|
|
|
@ -51,6 +51,46 @@ gcd_pow2_u64(uint64_t a, uint64_t b)
|
|||
return 1 << MIN2(a_log2, b_log2);
|
||||
}
|
||||
|
||||
void
|
||||
genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_bo *dst, uint32_t dst_offset,
|
||||
struct anv_bo *src, uint32_t src_offset,
|
||||
uint32_t size)
|
||||
{
|
||||
/* This memcpy operates in units of dwords. */
|
||||
assert(size % 4 == 0);
|
||||
assert(dst_offset % 4 == 0);
|
||||
assert(src_offset % 4 == 0);
|
||||
|
||||
for (uint32_t i = 0; i < size; i += 4) {
|
||||
const struct anv_address src_addr =
|
||||
(struct anv_address) { src, src_offset + i};
|
||||
const struct anv_address dst_addr =
|
||||
(struct anv_address) { dst, dst_offset + i};
|
||||
#if GEN_GEN >= 8
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_COPY_MEM_MEM), cp) {
|
||||
cp.DestinationMemoryAddress = dst_addr;
|
||||
cp.SourceMemoryAddress = src_addr;
|
||||
}
|
||||
#else
|
||||
/* IVB does not have a general purpose register for command streamer
|
||||
* commands. Therefore, we use an alternate temporary register.
|
||||
*/
|
||||
#define TEMP_REG 0x2440 /* GEN7_3DPRIM_BASE_VERTEX */
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), load) {
|
||||
load.RegisterAddress = TEMP_REG;
|
||||
load.MemoryAddress = src_addr;
|
||||
}
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), store) {
|
||||
store.RegisterAddress = TEMP_REG;
|
||||
store.MemoryAddress = dst_addr;
|
||||
}
|
||||
#undef TEMP_REG
|
||||
#endif
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_bo *dst, uint32_t dst_offset,
|
||||
|
|
Loading…
Reference in New Issue