1377 lines
39 KiB
C
1377 lines
39 KiB
C
/*
|
|
* Copyright © 2019 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#ifndef MI_BUILDER_H
|
|
#define MI_BUILDER_H
|
|
|
|
#include "dev/intel_device_info.h"
|
|
#include "genxml/genX_bits.h"
|
|
#include "util/bitscan.h"
|
|
#include "util/fast_idiv_by_const.h"
|
|
#include "util/u_math.h"
|
|
|
|
#ifndef MI_BUILDER_NUM_ALLOC_GPRS
|
|
/** The number of GPRs the MI builder is allowed to allocate
|
|
*
|
|
* This may be set by a user of this API so that it can reserve some GPRs at
|
|
* the top end for its own use.
|
|
*/
|
|
#define MI_BUILDER_NUM_ALLOC_GPRS 16
|
|
#endif
|
|
|
|
/** These must be defined by the user of the builder
|
|
*
|
|
* void *__gen_get_batch_dwords(__gen_user_data *user_data,
|
|
* unsigned num_dwords);
|
|
*
|
|
* __gen_address_type
|
|
* __gen_address_offset(__gen_address_type addr, uint64_t offset);
|
|
*
|
|
*
|
|
* If self-modifying batches are supported, we must be able to pass batch
|
|
* addresses around as void*s so pinning as well as batch chaining or some
|
|
* other mechanism for ensuring batch pointers remain valid during building is
|
|
* required. The following function must also be defined, it returns an
|
|
* address in canonical form:
|
|
*
|
|
* __gen_address_type
|
|
* __gen_get_batch_address(__gen_user_data *user_data, void *location);
|
|
*
|
|
* Also, __gen_combine_address must accept a location value of NULL and return
|
|
* a fully valid 64-bit address.
|
|
*/
|
|
|
|
/*
|
|
* Start of the actual MI builder
|
|
*/
|
|
|
|
#define __genxml_cmd_length(cmd) cmd ## _length
|
|
#define __genxml_cmd_header(cmd) cmd ## _header
|
|
#define __genxml_cmd_pack(cmd) cmd ## _pack
|
|
|
|
#define mi_builder_pack(b, cmd, dst, name) \
|
|
for (struct cmd name = { __genxml_cmd_header(cmd) }, \
|
|
*_dst = (struct cmd *)(dst); __builtin_expect(_dst != NULL, 1); \
|
|
__genxml_cmd_pack(cmd)((b)->user_data, (void *)_dst, &name), \
|
|
_dst = NULL)
|
|
|
|
#define mi_builder_emit(b, cmd, name) \
|
|
mi_builder_pack((b), cmd, __gen_get_batch_dwords((b)->user_data, __genxml_cmd_length(cmd)), name)
|
|
|
|
|
|
enum mi_value_type {
|
|
MI_VALUE_TYPE_IMM,
|
|
MI_VALUE_TYPE_MEM32,
|
|
MI_VALUE_TYPE_MEM64,
|
|
MI_VALUE_TYPE_REG32,
|
|
MI_VALUE_TYPE_REG64,
|
|
};
|
|
|
|
struct mi_value {
|
|
enum mi_value_type type;
|
|
|
|
union {
|
|
uint64_t imm;
|
|
__gen_address_type addr;
|
|
uint32_t reg;
|
|
};
|
|
|
|
#if GFX_VERx10 >= 75
|
|
bool invert;
|
|
#endif
|
|
};
|
|
|
|
struct mi_reg_num {
|
|
uint32_t num;
|
|
#if GFX_VER >= 11
|
|
bool cs;
|
|
#endif
|
|
};
|
|
|
|
static inline struct mi_reg_num
|
|
mi_adjust_reg_num(uint32_t reg)
|
|
{
|
|
#if GFX_VER >= 11
|
|
bool cs = reg >= 0x2000 && reg < 0x4000;
|
|
return (struct mi_reg_num) {
|
|
.num = reg - (cs ? 0x2000 : 0),
|
|
.cs = cs,
|
|
};
|
|
#else
|
|
return (struct mi_reg_num) { .num = reg, };
|
|
#endif
|
|
}
|
|
|
|
#if GFX_VER >= 9
|
|
#define MI_BUILDER_MAX_MATH_DWORDS 256
|
|
#else
|
|
#define MI_BUILDER_MAX_MATH_DWORDS 64
|
|
#endif
|
|
|
|
struct mi_builder {
|
|
const struct intel_device_info *devinfo;
|
|
__gen_user_data *user_data;
|
|
|
|
#if GFX_VERx10 >= 75
|
|
uint32_t gprs;
|
|
uint8_t gpr_refs[MI_BUILDER_NUM_ALLOC_GPRS];
|
|
|
|
unsigned num_math_dwords;
|
|
uint32_t math_dwords[MI_BUILDER_MAX_MATH_DWORDS];
|
|
#endif
|
|
};
|
|
|
|
static inline void
|
|
mi_builder_init(struct mi_builder *b,
|
|
const struct intel_device_info *devinfo,
|
|
__gen_user_data *user_data)
|
|
{
|
|
memset(b, 0, sizeof(*b));
|
|
b->devinfo = devinfo;
|
|
b->user_data = user_data;
|
|
|
|
#if GFX_VERx10 >= 75
|
|
b->gprs = 0;
|
|
b->num_math_dwords = 0;
|
|
#endif
|
|
}
|
|
|
|
static inline void
|
|
mi_builder_flush_math(struct mi_builder *b)
|
|
{
|
|
#if GFX_VERx10 >= 75
|
|
if (b->num_math_dwords == 0)
|
|
return;
|
|
|
|
uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
|
|
1 + b->num_math_dwords);
|
|
mi_builder_pack(b, GENX(MI_MATH), dw, math) {
|
|
math.DWordLength = 1 + b->num_math_dwords - GENX(MI_MATH_length_bias);
|
|
}
|
|
memcpy(dw + 1, b->math_dwords, b->num_math_dwords * sizeof(uint32_t));
|
|
b->num_math_dwords = 0;
|
|
#endif
|
|
}
|
|
|
|
#define _MI_BUILDER_GPR_BASE 0x2600
|
|
/* The actual hardware limit on GPRs */
|
|
#define _MI_BUILDER_NUM_HW_GPRS 16
|
|
|
|
#if GFX_VERx10 >= 75
|
|
|
|
static inline bool
|
|
mi_value_is_reg(struct mi_value val)
|
|
{
|
|
return val.type == MI_VALUE_TYPE_REG32 ||
|
|
val.type == MI_VALUE_TYPE_REG64;
|
|
}
|
|
|
|
static inline bool
|
|
mi_value_is_gpr(struct mi_value val)
|
|
{
|
|
return mi_value_is_reg(val) &&
|
|
val.reg >= _MI_BUILDER_GPR_BASE &&
|
|
val.reg < _MI_BUILDER_GPR_BASE +
|
|
_MI_BUILDER_NUM_HW_GPRS * 8;
|
|
}
|
|
|
|
static inline bool
|
|
_mi_value_is_allocated_gpr(struct mi_value val)
|
|
{
|
|
return mi_value_is_reg(val) &&
|
|
val.reg >= _MI_BUILDER_GPR_BASE &&
|
|
val.reg < _MI_BUILDER_GPR_BASE +
|
|
MI_BUILDER_NUM_ALLOC_GPRS * 8;
|
|
}
|
|
|
|
static inline uint32_t
|
|
_mi_value_as_gpr(struct mi_value val)
|
|
{
|
|
assert(mi_value_is_gpr(val));
|
|
assert(val.reg % 8 == 0);
|
|
return (val.reg - _MI_BUILDER_GPR_BASE) / 8;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_new_gpr(struct mi_builder *b)
|
|
{
|
|
unsigned gpr = ffs(~b->gprs) - 1;
|
|
assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
|
|
assert(b->gpr_refs[gpr] == 0);
|
|
b->gprs |= (1u << gpr);
|
|
b->gpr_refs[gpr] = 1;
|
|
|
|
return (struct mi_value) {
|
|
.type = MI_VALUE_TYPE_REG64,
|
|
.reg = _MI_BUILDER_GPR_BASE + gpr * 8,
|
|
};
|
|
}
|
|
#endif /* GFX_VERx10 >= 75 */
|
|
|
|
/** Take a reference to a mi_value
|
|
*
|
|
* The MI builder uses reference counting to automatically free ALU GPRs for
|
|
* re-use in calculations. All mi_* math functions consume the reference
|
|
* they are handed for each source and return a reference to a value which the
|
|
* caller must consume. In particular, if you pas the same value into a
|
|
* single mi_* math function twice (say to add a number to itself), you
|
|
* are responsible for calling mi_value_ref() to get a second reference
|
|
* because the mi_* math function will consume it twice.
|
|
*/
|
|
static inline struct mi_value
|
|
mi_value_ref(struct mi_builder *b, struct mi_value val)
|
|
{
|
|
#if GFX_VERx10 >= 75
|
|
if (_mi_value_is_allocated_gpr(val)) {
|
|
unsigned gpr = _mi_value_as_gpr(val);
|
|
assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
|
|
assert(b->gprs & (1u << gpr));
|
|
assert(b->gpr_refs[gpr] < UINT8_MAX);
|
|
b->gpr_refs[gpr]++;
|
|
}
|
|
#endif /* GFX_VERx10 >= 75 */
|
|
|
|
return val;
|
|
}
|
|
|
|
/** Drop a reference to a mi_value
|
|
*
|
|
* See also mi_value_ref.
|
|
*/
|
|
static inline void
|
|
mi_value_unref(struct mi_builder *b, struct mi_value val)
|
|
{
|
|
#if GFX_VERx10 >= 75
|
|
if (_mi_value_is_allocated_gpr(val)) {
|
|
unsigned gpr = _mi_value_as_gpr(val);
|
|
assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
|
|
assert(b->gprs & (1u << gpr));
|
|
assert(b->gpr_refs[gpr] > 0);
|
|
if (--b->gpr_refs[gpr] == 0)
|
|
b->gprs &= ~(1u << gpr);
|
|
}
|
|
#endif /* GFX_VERx10 >= 75 */
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_imm(uint64_t imm)
|
|
{
|
|
return (struct mi_value) {
|
|
.type = MI_VALUE_TYPE_IMM,
|
|
.imm = imm,
|
|
};
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_reg32(uint32_t reg)
|
|
{
|
|
struct mi_value val = {
|
|
.type = MI_VALUE_TYPE_REG32,
|
|
.reg = reg,
|
|
};
|
|
#if GFX_VERx10 >= 75
|
|
assert(!_mi_value_is_allocated_gpr(val));
|
|
#endif
|
|
return val;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_reg64(uint32_t reg)
|
|
{
|
|
struct mi_value val = {
|
|
.type = MI_VALUE_TYPE_REG64,
|
|
.reg = reg,
|
|
};
|
|
#if GFX_VERx10 >= 75
|
|
assert(!_mi_value_is_allocated_gpr(val));
|
|
#endif
|
|
return val;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_mem32(__gen_address_type addr)
|
|
{
|
|
return (struct mi_value) {
|
|
.type = MI_VALUE_TYPE_MEM32,
|
|
.addr = addr,
|
|
};
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_mem64(__gen_address_type addr)
|
|
{
|
|
return (struct mi_value) {
|
|
.type = MI_VALUE_TYPE_MEM64,
|
|
.addr = addr,
|
|
};
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_value_half(struct mi_value value, bool top_32_bits)
|
|
{
|
|
switch (value.type) {
|
|
case MI_VALUE_TYPE_IMM:
|
|
if (top_32_bits)
|
|
value.imm >>= 32;
|
|
else
|
|
value.imm &= 0xffffffffu;
|
|
return value;
|
|
|
|
case MI_VALUE_TYPE_MEM32:
|
|
assert(!top_32_bits);
|
|
return value;
|
|
|
|
case MI_VALUE_TYPE_MEM64:
|
|
if (top_32_bits)
|
|
value.addr = __gen_address_offset(value.addr, 4);
|
|
value.type = MI_VALUE_TYPE_MEM32;
|
|
return value;
|
|
|
|
case MI_VALUE_TYPE_REG32:
|
|
assert(!top_32_bits);
|
|
return value;
|
|
|
|
case MI_VALUE_TYPE_REG64:
|
|
if (top_32_bits)
|
|
value.reg += 4;
|
|
value.type = MI_VALUE_TYPE_REG32;
|
|
return value;
|
|
}
|
|
|
|
unreachable("Invalid mi_value type");
|
|
}
|
|
|
|
static inline void
|
|
_mi_copy_no_unref(struct mi_builder *b,
|
|
struct mi_value dst, struct mi_value src)
|
|
{
|
|
#if GFX_VERx10 >= 75
|
|
/* TODO: We could handle src.invert by emitting a bit of math if we really
|
|
* wanted to.
|
|
*/
|
|
assert(!dst.invert && !src.invert);
|
|
#endif
|
|
mi_builder_flush_math(b);
|
|
|
|
switch (dst.type) {
|
|
case MI_VALUE_TYPE_IMM:
|
|
unreachable("Cannot copy to an immediate");
|
|
|
|
case MI_VALUE_TYPE_MEM64:
|
|
case MI_VALUE_TYPE_REG64:
|
|
switch (src.type) {
|
|
case MI_VALUE_TYPE_IMM:
|
|
if (dst.type == MI_VALUE_TYPE_REG64) {
|
|
uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
|
|
GENX(MI_LOAD_REGISTER_IMM_length) + 2);
|
|
struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
|
|
mi_builder_pack(b, GENX(MI_LOAD_REGISTER_IMM), dw, lri) {
|
|
lri.DWordLength = GENX(MI_LOAD_REGISTER_IMM_length) + 2 -
|
|
GENX(MI_LOAD_REGISTER_IMM_length_bias);
|
|
#if GFX_VER >= 11
|
|
lri.AddCSMMIOStartOffset = reg.cs;
|
|
#endif
|
|
}
|
|
dw[1] = reg.num;
|
|
dw[2] = src.imm;
|
|
dw[3] = reg.num + 4;
|
|
dw[4] = src.imm >> 32;
|
|
} else {
|
|
#if GFX_VER >= 8
|
|
assert(dst.type == MI_VALUE_TYPE_MEM64);
|
|
uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
|
|
GENX(MI_STORE_DATA_IMM_length) + 1);
|
|
mi_builder_pack(b, GENX(MI_STORE_DATA_IMM), dw, sdm) {
|
|
sdm.DWordLength = GENX(MI_STORE_DATA_IMM_length) + 1 -
|
|
GENX(MI_STORE_DATA_IMM_length_bias);
|
|
sdm.StoreQword = true;
|
|
sdm.Address = dst.addr;
|
|
}
|
|
dw[3] = src.imm;
|
|
dw[4] = src.imm >> 32;
|
|
#else
|
|
_mi_copy_no_unref(b, mi_value_half(dst, false),
|
|
mi_value_half(src, false));
|
|
_mi_copy_no_unref(b, mi_value_half(dst, true),
|
|
mi_value_half(src, true));
|
|
#endif
|
|
}
|
|
break;
|
|
case MI_VALUE_TYPE_REG32:
|
|
case MI_VALUE_TYPE_MEM32:
|
|
_mi_copy_no_unref(b, mi_value_half(dst, false),
|
|
mi_value_half(src, false));
|
|
_mi_copy_no_unref(b, mi_value_half(dst, true),
|
|
mi_imm(0));
|
|
break;
|
|
case MI_VALUE_TYPE_REG64:
|
|
case MI_VALUE_TYPE_MEM64:
|
|
_mi_copy_no_unref(b, mi_value_half(dst, false),
|
|
mi_value_half(src, false));
|
|
_mi_copy_no_unref(b, mi_value_half(dst, true),
|
|
mi_value_half(src, true));
|
|
break;
|
|
default:
|
|
unreachable("Invalid mi_value type");
|
|
}
|
|
break;
|
|
|
|
case MI_VALUE_TYPE_MEM32:
|
|
switch (src.type) {
|
|
case MI_VALUE_TYPE_IMM:
|
|
mi_builder_emit(b, GENX(MI_STORE_DATA_IMM), sdi) {
|
|
sdi.Address = dst.addr;
|
|
#if GFX_VER >= 12
|
|
sdi.ForceWriteCompletionCheck = true;
|
|
#endif
|
|
sdi.ImmediateData = src.imm;
|
|
}
|
|
break;
|
|
|
|
case MI_VALUE_TYPE_MEM32:
|
|
case MI_VALUE_TYPE_MEM64:
|
|
#if GFX_VER >= 8
|
|
mi_builder_emit(b, GENX(MI_COPY_MEM_MEM), cmm) {
|
|
cmm.DestinationMemoryAddress = dst.addr;
|
|
cmm.SourceMemoryAddress = src.addr;
|
|
}
|
|
#elif GFX_VERx10 == 75
|
|
{
|
|
struct mi_value tmp = mi_new_gpr(b);
|
|
_mi_copy_no_unref(b, tmp, src);
|
|
_mi_copy_no_unref(b, dst, tmp);
|
|
mi_value_unref(b, tmp);
|
|
}
|
|
#else
|
|
unreachable("Cannot do mem <-> mem copy on IVB and earlier");
|
|
#endif
|
|
break;
|
|
|
|
case MI_VALUE_TYPE_REG32:
|
|
case MI_VALUE_TYPE_REG64:
|
|
mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
|
|
struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
|
|
srm.RegisterAddress = reg.num;
|
|
#if GFX_VER >= 11
|
|
srm.AddCSMMIOStartOffset = reg.cs;
|
|
#endif
|
|
srm.MemoryAddress = dst.addr;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
unreachable("Invalid mi_value type");
|
|
}
|
|
break;
|
|
|
|
case MI_VALUE_TYPE_REG32:
|
|
switch (src.type) {
|
|
case MI_VALUE_TYPE_IMM:
|
|
mi_builder_emit(b, GENX(MI_LOAD_REGISTER_IMM), lri) {
|
|
struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
|
|
lri.RegisterOffset = reg.num;
|
|
#if GFX_VER >= 11
|
|
lri.AddCSMMIOStartOffset = reg.cs;
|
|
#endif
|
|
lri.DataDWord = src.imm;
|
|
}
|
|
break;
|
|
|
|
case MI_VALUE_TYPE_MEM32:
|
|
case MI_VALUE_TYPE_MEM64:
|
|
#if GFX_VER >= 7
|
|
mi_builder_emit(b, GENX(MI_LOAD_REGISTER_MEM), lrm) {
|
|
struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
|
|
lrm.RegisterAddress = reg.num;
|
|
#if GFX_VER >= 11
|
|
lrm.AddCSMMIOStartOffset = reg.cs;
|
|
#endif
|
|
lrm.MemoryAddress = src.addr;
|
|
}
|
|
#else
|
|
unreachable("Cannot load do mem -> reg copy on SNB and earlier");
|
|
#endif
|
|
break;
|
|
|
|
case MI_VALUE_TYPE_REG32:
|
|
case MI_VALUE_TYPE_REG64:
|
|
#if GFX_VERx10 >= 75
|
|
if (src.reg != dst.reg) {
|
|
mi_builder_emit(b, GENX(MI_LOAD_REGISTER_REG), lrr) {
|
|
struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
|
|
lrr.SourceRegisterAddress = reg.num;
|
|
#if GFX_VER >= 11
|
|
lrr.AddCSMMIOStartOffsetSource = reg.cs;
|
|
#endif
|
|
reg = mi_adjust_reg_num(dst.reg);
|
|
lrr.DestinationRegisterAddress = reg.num;
|
|
#if GFX_VER >= 11
|
|
lrr.AddCSMMIOStartOffsetDestination = reg.cs;
|
|
#endif
|
|
}
|
|
}
|
|
#else
|
|
unreachable("Cannot do reg <-> reg copy on IVB and earlier");
|
|
#endif
|
|
break;
|
|
|
|
default:
|
|
unreachable("Invalid mi_value type");
|
|
}
|
|
break;
|
|
|
|
default:
|
|
unreachable("Invalid mi_value type");
|
|
}
|
|
}
|
|
|
|
#if GFX_VERx10 >= 75
|
|
static inline struct mi_value
|
|
mi_resolve_invert(struct mi_builder *b, struct mi_value src);
|
|
#endif
|
|
|
|
/** Store the value in src to the value represented by dst
|
|
*
|
|
* If the bit size of src and dst mismatch, this function does an unsigned
|
|
* integer cast. If src has more bits than dst, it takes the bottom bits. If
|
|
* src has fewer bits then dst, it fills the top bits with zeros.
|
|
*
|
|
* This function consumes one reference for each of src and dst.
|
|
*/
|
|
static inline void
|
|
mi_store(struct mi_builder *b, struct mi_value dst, struct mi_value src)
|
|
{
|
|
#if GFX_VERx10 >= 75
|
|
src = mi_resolve_invert(b, src);
|
|
#endif
|
|
_mi_copy_no_unref(b, dst, src);
|
|
mi_value_unref(b, src);
|
|
mi_value_unref(b, dst);
|
|
}
|
|
|
|
static inline void
|
|
mi_memset(struct mi_builder *b, __gen_address_type dst,
|
|
uint32_t value, uint32_t size)
|
|
{
|
|
#if GFX_VERx10 >= 75
|
|
assert(b->num_math_dwords == 0);
|
|
#endif
|
|
|
|
/* This memset operates in units of dwords. */
|
|
assert(size % 4 == 0);
|
|
|
|
for (uint32_t i = 0; i < size; i += 4) {
|
|
mi_store(b, mi_mem32(__gen_address_offset(dst, i)),
|
|
mi_imm(value));
|
|
}
|
|
}
|
|
|
|
/* NOTE: On IVB, this function stomps GFX7_3DPRIM_BASE_VERTEX */
|
|
static inline void
|
|
mi_memcpy(struct mi_builder *b, __gen_address_type dst,
|
|
__gen_address_type src, uint32_t size)
|
|
{
|
|
#if GFX_VERx10 >= 75
|
|
assert(b->num_math_dwords == 0);
|
|
#endif
|
|
|
|
/* This memcpy operates in units of dwords. */
|
|
assert(size % 4 == 0);
|
|
|
|
for (uint32_t i = 0; i < size; i += 4) {
|
|
struct mi_value dst_val = mi_mem32(__gen_address_offset(dst, i));
|
|
struct mi_value src_val = mi_mem32(__gen_address_offset(src, i));
|
|
#if GFX_VERx10 >= 75
|
|
mi_store(b, dst_val, src_val);
|
|
#else
|
|
/* IVB does not have a general purpose register for command streamer
|
|
* commands. Therefore, we use an alternate temporary register.
|
|
*/
|
|
struct mi_value tmp_reg = mi_reg32(0x2440); /* GFX7_3DPRIM_BASE_VERTEX */
|
|
mi_store(b, tmp_reg, src_val);
|
|
mi_store(b, dst_val, tmp_reg);
|
|
#endif
|
|
}
|
|
}
|
|
|
|
/*
|
|
* MI_MATH Section. Only available on Haswell+
|
|
*/
|
|
|
|
#if GFX_VERx10 >= 75
|
|
|
|
/**
|
|
* Perform a predicated store (assuming the condition is already loaded
|
|
* in the MI_PREDICATE_RESULT register) of the value in src to the memory
|
|
* location specified by dst. Non-memory destinations are not supported.
|
|
*
|
|
* This function consumes one reference for each of src and dst.
|
|
*/
|
|
static inline void
|
|
mi_store_if(struct mi_builder *b, struct mi_value dst, struct mi_value src)
|
|
{
|
|
assert(!dst.invert && !src.invert);
|
|
|
|
mi_builder_flush_math(b);
|
|
|
|
/* We can only predicate MI_STORE_REGISTER_MEM, so restrict the
|
|
* destination to be memory, and resolve the source to a temporary
|
|
* register if it isn't in one already.
|
|
*/
|
|
assert(dst.type == MI_VALUE_TYPE_MEM64 ||
|
|
dst.type == MI_VALUE_TYPE_MEM32);
|
|
|
|
if (src.type != MI_VALUE_TYPE_REG32 &&
|
|
src.type != MI_VALUE_TYPE_REG64) {
|
|
struct mi_value tmp = mi_new_gpr(b);
|
|
_mi_copy_no_unref(b, tmp, src);
|
|
src = tmp;
|
|
}
|
|
|
|
if (dst.type == MI_VALUE_TYPE_MEM64) {
|
|
mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
|
|
struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
|
|
srm.RegisterAddress = reg.num;
|
|
#if GFX_VER >= 11
|
|
srm.AddCSMMIOStartOffset = reg.cs;
|
|
#endif
|
|
srm.MemoryAddress = dst.addr;
|
|
srm.PredicateEnable = true;
|
|
}
|
|
mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
|
|
struct mi_reg_num reg = mi_adjust_reg_num(src.reg + 4);
|
|
srm.RegisterAddress = reg.num;
|
|
#if GFX_VER >= 11
|
|
srm.AddCSMMIOStartOffset = reg.cs;
|
|
#endif
|
|
srm.MemoryAddress = __gen_address_offset(dst.addr, 4);
|
|
srm.PredicateEnable = true;
|
|
}
|
|
} else {
|
|
mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
|
|
struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
|
|
srm.RegisterAddress = reg.num;
|
|
#if GFX_VER >= 11
|
|
srm.AddCSMMIOStartOffset = reg.cs;
|
|
#endif
|
|
srm.MemoryAddress = dst.addr;
|
|
srm.PredicateEnable = true;
|
|
}
|
|
}
|
|
|
|
mi_value_unref(b, src);
|
|
mi_value_unref(b, dst);
|
|
}
|
|
|
|
static inline void
|
|
_mi_builder_push_math(struct mi_builder *b,
|
|
const uint32_t *dwords,
|
|
unsigned num_dwords)
|
|
{
|
|
assert(num_dwords < MI_BUILDER_MAX_MATH_DWORDS);
|
|
if (b->num_math_dwords + num_dwords > MI_BUILDER_MAX_MATH_DWORDS)
|
|
mi_builder_flush_math(b);
|
|
|
|
memcpy(&b->math_dwords[b->num_math_dwords],
|
|
dwords, num_dwords * sizeof(*dwords));
|
|
b->num_math_dwords += num_dwords;
|
|
}
|
|
|
|
static inline uint32_t
|
|
_mi_pack_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2)
|
|
{
|
|
struct GENX(MI_MATH_ALU_INSTRUCTION) instr = {
|
|
.Operand2 = operand2,
|
|
.Operand1 = operand1,
|
|
.ALUOpcode = opcode,
|
|
};
|
|
|
|
uint32_t dw;
|
|
GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr);
|
|
|
|
return dw;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_value_to_gpr(struct mi_builder *b, struct mi_value val)
|
|
{
|
|
if (mi_value_is_gpr(val))
|
|
return val;
|
|
|
|
/* Save off the invert flag because it makes copy() grumpy */
|
|
bool invert = val.invert;
|
|
val.invert = false;
|
|
|
|
struct mi_value tmp = mi_new_gpr(b);
|
|
_mi_copy_no_unref(b, tmp, val);
|
|
tmp.invert = invert;
|
|
|
|
return tmp;
|
|
}
|
|
|
|
static inline uint64_t
|
|
mi_value_to_u64(struct mi_value val)
|
|
{
|
|
assert(val.type == MI_VALUE_TYPE_IMM);
|
|
return val.invert ? ~val.imm : val.imm;
|
|
}
|
|
|
|
static inline uint32_t
|
|
_mi_math_load_src(struct mi_builder *b, unsigned src, struct mi_value *val)
|
|
{
|
|
if (val->type == MI_VALUE_TYPE_IMM &&
|
|
(val->imm == 0 || val->imm == UINT64_MAX)) {
|
|
uint64_t imm = val->invert ? ~val->imm : val->imm;
|
|
return _mi_pack_alu(imm ? MI_ALU_LOAD1 : MI_ALU_LOAD0, src, 0);
|
|
} else {
|
|
*val = mi_value_to_gpr(b, *val);
|
|
return _mi_pack_alu(val->invert ? MI_ALU_LOADINV : MI_ALU_LOAD,
|
|
src, _mi_value_as_gpr(*val));
|
|
}
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_math_binop(struct mi_builder *b, uint32_t opcode,
|
|
struct mi_value src0, struct mi_value src1,
|
|
uint32_t store_op, uint32_t store_src)
|
|
{
|
|
struct mi_value dst = mi_new_gpr(b);
|
|
|
|
uint32_t dw[4];
|
|
dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &src0);
|
|
dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &src1);
|
|
dw[2] = _mi_pack_alu(opcode, 0, 0);
|
|
dw[3] = _mi_pack_alu(store_op, _mi_value_as_gpr(dst), store_src);
|
|
_mi_builder_push_math(b, dw, 4);
|
|
|
|
mi_value_unref(b, src0);
|
|
mi_value_unref(b, src1);
|
|
|
|
return dst;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_inot(struct mi_builder *b, struct mi_value val)
|
|
{
|
|
if (val.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(~mi_value_to_u64(val));
|
|
|
|
val.invert = !val.invert;
|
|
return val;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_resolve_invert(struct mi_builder *b, struct mi_value src)
|
|
{
|
|
if (!src.invert)
|
|
return src;
|
|
|
|
assert(src.type != MI_VALUE_TYPE_IMM);
|
|
return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
|
|
MI_ALU_STORE, MI_ALU_ACCU);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_iadd(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) + mi_value_to_u64(src1));
|
|
|
|
return mi_math_binop(b, MI_ALU_ADD, src0, src1,
|
|
MI_ALU_STORE, MI_ALU_ACCU);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_iadd_imm(struct mi_builder *b,
|
|
struct mi_value src, uint64_t N)
|
|
{
|
|
if (N == 0)
|
|
return src;
|
|
|
|
return mi_iadd(b, src, mi_imm(N));
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_isub(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) - mi_value_to_u64(src1));
|
|
|
|
return mi_math_binop(b, MI_ALU_SUB, src0, src1,
|
|
MI_ALU_STORE, MI_ALU_ACCU);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ieq(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) == mi_value_to_u64(src1) ? ~0ull : 0);
|
|
|
|
/* Compute "equal" by subtracting and storing the zero bit */
|
|
return mi_math_binop(b, MI_ALU_SUB, src0, src1,
|
|
MI_ALU_STORE, MI_ALU_ZF);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ine(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) != mi_value_to_u64(src1) ? ~0ull : 0);
|
|
|
|
/* Compute "not equal" by subtracting and storing the inverse zero bit */
|
|
return mi_math_binop(b, MI_ALU_SUB, src0, src1,
|
|
MI_ALU_STOREINV, MI_ALU_ZF);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ult(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) < mi_value_to_u64(src1) ? ~0ull : 0);
|
|
|
|
/* Compute "less than" by subtracting and storing the carry bit */
|
|
return mi_math_binop(b, MI_ALU_SUB, src0, src1,
|
|
MI_ALU_STORE, MI_ALU_CF);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_uge(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) >= mi_value_to_u64(src1) ? ~0ull : 0);
|
|
|
|
/* Compute "less than" by subtracting and storing the carry bit */
|
|
return mi_math_binop(b, MI_ALU_SUB, src0, src1,
|
|
MI_ALU_STOREINV, MI_ALU_CF);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_iand(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) & mi_value_to_u64(src1));
|
|
|
|
return mi_math_binop(b, MI_ALU_AND, src0, src1,
|
|
MI_ALU_STORE, MI_ALU_ACCU);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_nz(struct mi_builder *b, struct mi_value src)
|
|
{
|
|
if (src.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src) != 0 ? ~0ull : 0);
|
|
|
|
return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
|
|
MI_ALU_STOREINV, MI_ALU_ZF);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_z(struct mi_builder *b, struct mi_value src)
|
|
{
|
|
if (src.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src) == 0 ? ~0ull : 0);
|
|
|
|
return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
|
|
MI_ALU_STORE, MI_ALU_ZF);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ior(struct mi_builder *b,
|
|
struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) | mi_value_to_u64(src1));
|
|
|
|
return mi_math_binop(b, MI_ALU_OR, src0, src1,
|
|
MI_ALU_STORE, MI_ALU_ACCU);
|
|
}
|
|
|
|
#if GFX_VERx10 >= 125
|
|
static inline struct mi_value
|
|
mi_ishl(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src1.type == MI_VALUE_TYPE_IMM) {
|
|
assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
|
|
assert(mi_value_to_u64(src1) <= 32);
|
|
}
|
|
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) << mi_value_to_u64(src1));
|
|
|
|
return mi_math_binop(b, MI_ALU_SHL, src0, src1,
|
|
MI_ALU_STORE, MI_ALU_ACCU);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ushr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src1.type == MI_VALUE_TYPE_IMM) {
|
|
assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
|
|
assert(mi_value_to_u64(src1) <= 32);
|
|
}
|
|
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) >> mi_value_to_u64(src1));
|
|
|
|
return mi_math_binop(b, MI_ALU_SHR, src0, src1,
|
|
MI_ALU_STORE, MI_ALU_ACCU);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ushr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
|
|
{
|
|
if (shift == 0)
|
|
return src;
|
|
|
|
if (shift >= 64)
|
|
return mi_imm(0);
|
|
|
|
if (src.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src) >> shift);
|
|
|
|
struct mi_value res = mi_value_to_gpr(b, src);
|
|
|
|
/* Annoyingly, we only have power-of-two shifts */
|
|
while (shift) {
|
|
int bit = u_bit_scan(&shift);
|
|
assert(bit <= 5);
|
|
res = mi_ushr(b, res, mi_imm(1 << bit));
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ishr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src1.type == MI_VALUE_TYPE_IMM) {
|
|
assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
|
|
assert(mi_value_to_u64(src1) <= 32);
|
|
}
|
|
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm((int64_t)mi_value_to_u64(src0) >> mi_value_to_u64(src1));
|
|
|
|
return mi_math_binop(b, MI_ALU_SAR, src0, src1,
|
|
MI_ALU_STORE, MI_ALU_ACCU);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ishr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
|
|
{
|
|
if (shift == 0)
|
|
return src;
|
|
|
|
if (shift >= 64)
|
|
return mi_imm(0);
|
|
|
|
if (src.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm((int64_t)mi_value_to_u64(src) >> shift);
|
|
|
|
struct mi_value res = mi_value_to_gpr(b, src);
|
|
|
|
/* Annoyingly, we only have power-of-two shifts */
|
|
while (shift) {
|
|
int bit = u_bit_scan(&shift);
|
|
assert(bit <= 5);
|
|
res = mi_ishr(b, res, mi_imm(1 << bit));
|
|
}
|
|
|
|
return res;
|
|
}
|
|
#endif /* if GFX_VERx10 >= 125 */
|
|
|
|
static inline struct mi_value
|
|
mi_imul_imm(struct mi_builder *b, struct mi_value src, uint32_t N)
|
|
{
|
|
if (src.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src) * N);
|
|
|
|
if (N == 0) {
|
|
mi_value_unref(b, src);
|
|
return mi_imm(0);
|
|
}
|
|
|
|
if (N == 1)
|
|
return src;
|
|
|
|
src = mi_value_to_gpr(b, src);
|
|
|
|
struct mi_value res = mi_value_ref(b, src);
|
|
|
|
unsigned top_bit = 31 - __builtin_clz(N);
|
|
for (int i = top_bit - 1; i >= 0; i--) {
|
|
res = mi_iadd(b, res, mi_value_ref(b, res));
|
|
if (N & (1 << i))
|
|
res = mi_iadd(b, res, mi_value_ref(b, src));
|
|
}
|
|
|
|
mi_value_unref(b, src);
|
|
|
|
return res;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ishl_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
|
|
{
|
|
if (shift == 0)
|
|
return src;
|
|
|
|
if (shift >= 64)
|
|
return mi_imm(0);
|
|
|
|
if (src.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src) << shift);
|
|
|
|
struct mi_value res = mi_value_to_gpr(b, src);
|
|
|
|
#if GFX_VERx10 >= 125
|
|
/* Annoyingly, we only have power-of-two shifts */
|
|
while (shift) {
|
|
int bit = u_bit_scan(&shift);
|
|
assert(bit <= 5);
|
|
res = mi_ishl(b, res, mi_imm(1 << bit));
|
|
}
|
|
#else
|
|
for (unsigned i = 0; i < shift; i++)
|
|
res = mi_iadd(b, res, mi_value_ref(b, res));
|
|
#endif
|
|
|
|
return res;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ushr32_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
|
|
{
|
|
if (shift == 0)
|
|
return src;
|
|
|
|
if (shift >= 64)
|
|
return mi_imm(0);
|
|
|
|
/* We right-shift by left-shifting by 32 - shift and taking the top 32 bits
|
|
* of the result.
|
|
*/
|
|
if (src.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm((mi_value_to_u64(src) >> shift) & UINT32_MAX);
|
|
|
|
if (shift > 32) {
|
|
struct mi_value tmp = mi_new_gpr(b);
|
|
_mi_copy_no_unref(b, mi_value_half(tmp, false),
|
|
mi_value_half(src, true));
|
|
_mi_copy_no_unref(b, mi_value_half(tmp, true), mi_imm(0));
|
|
mi_value_unref(b, src);
|
|
src = tmp;
|
|
shift -= 32;
|
|
}
|
|
assert(shift <= 32);
|
|
struct mi_value tmp = mi_ishl_imm(b, src, 32 - shift);
|
|
struct mi_value dst = mi_new_gpr(b);
|
|
_mi_copy_no_unref(b, mi_value_half(dst, false),
|
|
mi_value_half(tmp, true));
|
|
_mi_copy_no_unref(b, mi_value_half(dst, true), mi_imm(0));
|
|
mi_value_unref(b, tmp);
|
|
return dst;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_udiv32_imm(struct mi_builder *b, struct mi_value N, uint32_t D)
|
|
{
|
|
if (N.type == MI_VALUE_TYPE_IMM) {
|
|
assert(mi_value_to_u64(N) <= UINT32_MAX);
|
|
return mi_imm(mi_value_to_u64(N) / D);
|
|
}
|
|
|
|
/* We implicitly assume that N is only a 32-bit value */
|
|
if (D == 0) {
|
|
/* This is invalid but we should do something */
|
|
return mi_imm(0);
|
|
} else if (util_is_power_of_two_or_zero(D)) {
|
|
return mi_ushr32_imm(b, N, util_logbase2(D));
|
|
} else {
|
|
struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32);
|
|
assert(m.multiplier <= UINT32_MAX);
|
|
|
|
if (m.pre_shift)
|
|
N = mi_ushr32_imm(b, N, m.pre_shift);
|
|
|
|
/* Do the 32x32 multiply into gpr0 */
|
|
N = mi_imul_imm(b, N, m.multiplier);
|
|
|
|
if (m.increment)
|
|
N = mi_iadd(b, N, mi_imm(m.multiplier));
|
|
|
|
N = mi_ushr32_imm(b, N, 32);
|
|
|
|
if (m.post_shift)
|
|
N = mi_ushr32_imm(b, N, m.post_shift);
|
|
|
|
return N;
|
|
}
|
|
}
|
|
|
|
#endif /* MI_MATH section */
|
|
|
|
/* This assumes addresses of strictly more than 32bits (aka. Gfx8+). */
|
|
#if MI_BUILDER_CAN_WRITE_BATCH
|
|
|
|
struct mi_address_token {
|
|
/* Pointers to address memory fields in the batch. */
|
|
uint64_t *ptrs[2];
|
|
};
|
|
|
|
static inline struct mi_address_token
|
|
mi_store_address(struct mi_builder *b, struct mi_value addr_reg)
|
|
{
|
|
mi_builder_flush_math(b);
|
|
|
|
assert(addr_reg.type == MI_VALUE_TYPE_REG64);
|
|
|
|
struct mi_address_token token = {};
|
|
|
|
for (unsigned i = 0; i < 2; i++) {
|
|
mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
|
|
srm.RegisterAddress = addr_reg.reg + (i * 4);
|
|
|
|
const unsigned addr_dw =
|
|
GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8;
|
|
token.ptrs[i] = (void *)_dst + addr_dw;
|
|
}
|
|
}
|
|
|
|
mi_value_unref(b, addr_reg);
|
|
return token;
|
|
}
|
|
|
|
static inline void
|
|
mi_self_mod_barrier(struct mi_builder *b)
|
|
{
|
|
/* First make sure all the memory writes from previous modifying commands
|
|
* have landed. We want to do this before going through the CS cache,
|
|
* otherwise we could be fetching memory that hasn't been written to yet.
|
|
*/
|
|
mi_builder_emit(b, GENX(PIPE_CONTROL), pc) {
|
|
pc.CommandStreamerStallEnable = true;
|
|
}
|
|
/* Documentation says Gfx11+ should be able to invalidate the command cache
|
|
* but experiment show it doesn't work properly, so for now just get over
|
|
* the CS prefetch.
|
|
*/
|
|
for (uint32_t i = 0; i < (b->devinfo->cs_prefetch_size / 4); i++)
|
|
mi_builder_emit(b, GENX(MI_NOOP), noop);
|
|
}
|
|
|
|
static inline void
|
|
_mi_resolve_address_token(struct mi_builder *b,
|
|
struct mi_address_token token,
|
|
void *batch_location)
|
|
{
|
|
__gen_address_type addr = __gen_get_batch_address(b->user_data,
|
|
batch_location);
|
|
uint64_t addr_addr_u64 = __gen_combine_address(b->user_data, batch_location,
|
|
addr, 0);
|
|
*(token.ptrs[0]) = addr_addr_u64;
|
|
*(token.ptrs[1]) = addr_addr_u64 + 4;
|
|
}
|
|
|
|
#endif /* MI_BUILDER_CAN_WRITE_BATCH */
|
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
/*
|
|
* Indirect load/store. Only available on XE_HP+
|
|
*/
|
|
|
|
MUST_CHECK static inline struct mi_value
|
|
mi_load_mem64_offset(struct mi_builder *b,
|
|
__gen_address_type addr, struct mi_value offset)
|
|
{
|
|
uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);
|
|
struct mi_value addr_val = mi_imm(addr_u64);
|
|
|
|
struct mi_value dst = mi_new_gpr(b);
|
|
|
|
uint32_t dw[5];
|
|
dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);
|
|
dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);
|
|
dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);
|
|
dw[3] = _mi_pack_alu(MI_ALU_LOADIND, _mi_value_as_gpr(dst), MI_ALU_ACCU);
|
|
dw[4] = _mi_pack_alu(MI_ALU_FENCE_RD, 0, 0);
|
|
_mi_builder_push_math(b, dw, 5);
|
|
|
|
mi_value_unref(b, addr_val);
|
|
mi_value_unref(b, offset);
|
|
|
|
return dst;
|
|
}
|
|
|
|
static inline void
|
|
mi_store_mem64_offset(struct mi_builder *b,
|
|
__gen_address_type addr, struct mi_value offset,
|
|
struct mi_value data)
|
|
{
|
|
uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);
|
|
struct mi_value addr_val = mi_imm(addr_u64);
|
|
|
|
data = mi_value_to_gpr(b, mi_resolve_invert(b, data));
|
|
|
|
uint32_t dw[5];
|
|
dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);
|
|
dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);
|
|
dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);
|
|
dw[3] = _mi_pack_alu(MI_ALU_STOREIND, MI_ALU_ACCU, _mi_value_as_gpr(data));
|
|
dw[4] = _mi_pack_alu(MI_ALU_FENCE_WR, 0, 0);
|
|
_mi_builder_push_math(b, dw, 5);
|
|
|
|
mi_value_unref(b, addr_val);
|
|
mi_value_unref(b, offset);
|
|
mi_value_unref(b, data);
|
|
|
|
/* This is the only math case which has side-effects outside of regular
|
|
* registers to flush math afterwards so we don't confuse anyone.
|
|
*/
|
|
mi_builder_flush_math(b);
|
|
}
|
|
|
|
/*
|
|
* Control-flow Section. Only available on XE_HP+
|
|
*/
|
|
|
|
struct _mi_goto {
|
|
bool predicated;
|
|
void *mi_bbs;
|
|
};
|
|
|
|
struct mi_goto_target {
|
|
bool placed;
|
|
unsigned num_gotos;
|
|
struct _mi_goto gotos[8];
|
|
__gen_address_type addr;
|
|
};
|
|
|
|
#define MI_GOTO_TARGET_INIT ((struct mi_goto_target) {})
|
|
|
|
#define MI_BUILDER_MI_PREDICATE_RESULT_num 0x2418
|
|
|
|
static inline void
|
|
mi_goto_if(struct mi_builder *b, struct mi_value cond,
|
|
struct mi_goto_target *t)
|
|
{
|
|
/* First, set up the predicate, if any */
|
|
bool predicated;
|
|
if (cond.type == MI_VALUE_TYPE_IMM) {
|
|
/* If it's an immediate, the goto either doesn't happen or happens
|
|
* unconditionally.
|
|
*/
|
|
if (mi_value_to_u64(cond) == 0)
|
|
return;
|
|
|
|
assert(mi_value_to_u64(cond) == ~0ull);
|
|
predicated = false;
|
|
} else if (mi_value_is_reg(cond) &&
|
|
cond.reg == MI_BUILDER_MI_PREDICATE_RESULT_num) {
|
|
/* If it's MI_PREDICATE_RESULT, we use whatever predicate the client
|
|
* provided us with
|
|
*/
|
|
assert(cond.type == MI_VALUE_TYPE_REG32);
|
|
predicated = true;
|
|
} else {
|
|
mi_store(b, mi_reg32(MI_BUILDER_MI_PREDICATE_RESULT_num), cond);
|
|
predicated = true;
|
|
}
|
|
|
|
if (predicated) {
|
|
mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
|
|
sp.PredicateEnable = NOOPOnResultClear;
|
|
}
|
|
}
|
|
if (t->placed) {
|
|
mi_builder_emit(b, GENX(MI_BATCH_BUFFER_START), bbs) {
|
|
bbs.PredicationEnable = predicated;
|
|
bbs.AddressSpaceIndicator = ASI_PPGTT;
|
|
bbs.BatchBufferStartAddress = t->addr;
|
|
}
|
|
} else {
|
|
assert(t->num_gotos < ARRAY_SIZE(t->gotos));
|
|
struct _mi_goto g = {
|
|
.predicated = predicated,
|
|
.mi_bbs = __gen_get_batch_dwords(b->user_data,
|
|
GENX(MI_BATCH_BUFFER_START_length)),
|
|
};
|
|
memset(g.mi_bbs, 0, 4 * GENX(MI_BATCH_BUFFER_START_length));
|
|
t->gotos[t->num_gotos++] = g;
|
|
}
|
|
if (predicated) {
|
|
mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
|
|
sp.PredicateEnable = NOOPNever;
|
|
}
|
|
}
|
|
}
|
|
|
|
static inline void
|
|
mi_goto(struct mi_builder *b, struct mi_goto_target *t)
|
|
{
|
|
mi_goto_if(b, mi_imm(-1), t);
|
|
}
|
|
|
|
static inline void
|
|
mi_goto_target(struct mi_builder *b, struct mi_goto_target *t)
|
|
{
|
|
mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
|
|
sp.PredicateEnable = NOOPNever;
|
|
t->addr = __gen_get_batch_address(b->user_data, _dst);
|
|
}
|
|
t->placed = true;
|
|
|
|
struct GENX(MI_BATCH_BUFFER_START) bbs = { GENX(MI_BATCH_BUFFER_START_header) };
|
|
bbs.AddressSpaceIndicator = ASI_PPGTT;
|
|
bbs.BatchBufferStartAddress = t->addr;
|
|
|
|
for (unsigned i = 0; i < t->num_gotos; i++) {
|
|
bbs.PredicationEnable = t->gotos[i].predicated;
|
|
GENX(MI_BATCH_BUFFER_START_pack)(b->user_data, t->gotos[i].mi_bbs, &bbs);
|
|
}
|
|
}
|
|
|
|
static inline struct mi_goto_target
|
|
mi_goto_target_init_and_place(struct mi_builder *b)
|
|
{
|
|
struct mi_goto_target t = MI_GOTO_TARGET_INIT;
|
|
mi_goto_target(b, &t);
|
|
return t;
|
|
}
|
|
|
|
#define mi_loop(b) \
|
|
for (struct mi_goto_target __break = MI_GOTO_TARGET_INIT, \
|
|
__continue = mi_goto_target_init_and_place(b); !__break.placed; \
|
|
mi_goto(b, &__continue), mi_goto_target(b, &__break))
|
|
|
|
#define mi_break(b) mi_goto(b, &__break)
|
|
#define mi_break_if(b, cond) mi_goto_if(b, cond, &__break)
|
|
#define mi_continue(b) mi_goto(b, &__continue)
|
|
#define mi_continue_if(b, cond) mi_goto_if(b, cond, &__continue)
|
|
|
|
#endif /* GFX_VERx10 >= 125 */
|
|
|
|
#endif /* MI_BUILDER_H */
|