intel: Rename genx keyword to gfxx in source files
Commands used to do the changes: export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965" grep -E "gen[[:digit:]]+" -rIl $SEARCH_PATH | xargs sed -ie "s/gen\([[:digit:]]\+\)/gfx\1/g" Exclude pack.h and xml changes in this patch: grep -E "gfx[[:digit:]]+_pack\.h" -rIl $SEARCH_PATH | xargs sed -ie "s/gfx\([[:digit:]]\+_pack\.h\)/gen\1/g" grep -E "gfx[[:digit:]]+\.xml" -rIl $SEARCH_PATH | xargs sed -ie "s/gfx\([[:digit:]]\+\.xml\)/gen\1/g" Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>
This commit is contained in:
parent
66f6535974
commit
b75f095bc7
|
@ -579,7 +579,7 @@ get_copy_region_aux_settings(struct iris_context *ice,
|
|||
* for adjusting the clear color, so clear support may only be enabled
|
||||
* in some cases:
|
||||
*
|
||||
* - On gen11+, the clear color is indirect and comes in two forms: a
|
||||
* - On gfx11+, the clear color is indirect and comes in two forms: a
|
||||
* 32bpc representation used for rendering and a pixel representation
|
||||
* used for sampling. blorp_copy doesn't change indirect clear colors,
|
||||
* so clears are only supported in the sampling case.
|
||||
|
|
|
@ -1860,7 +1860,7 @@ iris_bufmgr_create(struct gen_device_info *devinfo, int fd, bool bo_reuse)
|
|||
util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SURFACE],
|
||||
IRIS_MEMZONE_SURFACE_START,
|
||||
_4GB_minus_1 - IRIS_MAX_BINDERS * IRIS_BINDER_SIZE);
|
||||
/* TODO: Why does limiting to 2GB help some state items on gen12?
|
||||
/* TODO: Why does limiting to 2GB help some state items on gfx12?
|
||||
* - CC Viewport Pointer
|
||||
* - Blend State Pointer
|
||||
* - Color Calc State Pointer
|
||||
|
|
|
@ -256,19 +256,19 @@ iris_destroy_context(struct pipe_context *ctx)
|
|||
#define genX_call(devinfo, func, ...) \
|
||||
switch ((devinfo)->verx10) { \
|
||||
case 125: \
|
||||
gen125_##func(__VA_ARGS__); \
|
||||
gfx125_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 120: \
|
||||
gen12_##func(__VA_ARGS__); \
|
||||
gfx12_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 110: \
|
||||
gen11_##func(__VA_ARGS__); \
|
||||
gfx11_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 90: \
|
||||
gen9_##func(__VA_ARGS__); \
|
||||
gfx9_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 80: \
|
||||
gen8_##func(__VA_ARGS__); \
|
||||
gfx8_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
default: \
|
||||
unreachable("Unknown hardware generation"); \
|
||||
|
|
|
@ -993,7 +993,7 @@ int iris_get_driver_query_group_info(struct pipe_screen *pscreen,
|
|||
struct pipe_driver_query_group_info *info);
|
||||
|
||||
/* iris_state.c */
|
||||
void gen9_toggle_preemption(struct iris_context *ice,
|
||||
void gfx9_toggle_preemption(struct iris_context *ice,
|
||||
struct iris_batch *batch,
|
||||
const struct pipe_draw_info *draw);
|
||||
|
||||
|
@ -1002,34 +1002,34 @@ void gen9_toggle_preemption(struct iris_context *ice,
|
|||
#ifdef genX
|
||||
# include "iris_genx_protos.h"
|
||||
#else
|
||||
# define genX(x) gen4_##x
|
||||
# define genX(x) gfx4_##x
|
||||
# include "iris_genx_protos.h"
|
||||
# undef genX
|
||||
# define genX(x) gen5_##x
|
||||
# define genX(x) gfx5_##x
|
||||
# include "iris_genx_protos.h"
|
||||
# undef genX
|
||||
# define genX(x) gen6_##x
|
||||
# define genX(x) gfx6_##x
|
||||
# include "iris_genx_protos.h"
|
||||
# undef genX
|
||||
# define genX(x) gen7_##x
|
||||
# define genX(x) gfx7_##x
|
||||
# include "iris_genx_protos.h"
|
||||
# undef genX
|
||||
# define genX(x) gen75_##x
|
||||
# define genX(x) gfx75_##x
|
||||
# include "iris_genx_protos.h"
|
||||
# undef genX
|
||||
# define genX(x) gen8_##x
|
||||
# define genX(x) gfx8_##x
|
||||
# include "iris_genx_protos.h"
|
||||
# undef genX
|
||||
# define genX(x) gen9_##x
|
||||
# define genX(x) gfx9_##x
|
||||
# include "iris_genx_protos.h"
|
||||
# undef genX
|
||||
# define genX(x) gen11_##x
|
||||
# define genX(x) gfx11_##x
|
||||
# include "iris_genx_protos.h"
|
||||
# undef genX
|
||||
# define genX(x) gen12_##x
|
||||
# define genX(x) gfx12_##x
|
||||
# include "iris_genx_protos.h"
|
||||
# undef genX
|
||||
# define genX(x) gen125_##x
|
||||
# define genX(x) gfx125_##x
|
||||
# include "iris_genx_protos.h"
|
||||
# undef genX
|
||||
#endif
|
||||
|
|
|
@ -55,7 +55,7 @@
|
|||
/* The number of bits in our TIMESTAMP queries. */
|
||||
#define TIMESTAMP_BITS 36
|
||||
|
||||
/* For gen12 we set the streamout buffers using 4 separate commands
|
||||
/* For gfx12 we set the streamout buffers using 4 separate commands
|
||||
* (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout
|
||||
* of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of
|
||||
* 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the
|
||||
|
|
|
@ -268,7 +268,7 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info,
|
|||
iris_update_draw_info(ice, info);
|
||||
|
||||
if (devinfo->ver == 9)
|
||||
gen9_toggle_preemption(ice, batch, info);
|
||||
gfx9_toggle_preemption(ice, batch, info);
|
||||
|
||||
iris_update_compiled_shaders(ice);
|
||||
|
||||
|
|
|
@ -210,7 +210,7 @@ iris_is_format_supported(struct pipe_screen *pscreen,
|
|||
}
|
||||
|
||||
/* TODO: Support ASTC 5x5 on Gen9 properly. This means implementing
|
||||
* a complex sampler workaround (see i965's gen9_apply_astc5x5_wa_flush).
|
||||
* a complex sampler workaround (see i965's gfx9_apply_astc5x5_wa_flush).
|
||||
* Without it, st/mesa will emulate ASTC 5x5 via uncompressed textures.
|
||||
*/
|
||||
if (devinfo->ver == 9 && (format == ISL_FORMAT_ASTC_LDR_2D_5X5_FLT16 ||
|
||||
|
|
|
@ -351,7 +351,7 @@ iris_cache_flush_for_render(struct iris_batch *batch,
|
|||
*
|
||||
* Even though it's not obvious, this can easily happen in practice.
|
||||
* Suppose a client is blending on a surface with sRGB encode enabled on
|
||||
* gen9. This implies that you get AUX_USAGE_CCS_D at best. If the client
|
||||
* gfx9. This implies that you get AUX_USAGE_CCS_D at best. If the client
|
||||
* then disables sRGB decode and continues blending we will flip on
|
||||
* AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is
|
||||
* perfectly valid since CCS_E is a subset of CCS_D). However, this means
|
||||
|
@ -897,11 +897,11 @@ iris_image_view_aux_usage(struct iris_context *ice,
|
|||
static bool
|
||||
isl_formats_are_fast_clear_compatible(enum isl_format a, enum isl_format b)
|
||||
{
|
||||
/* On gen8 and earlier, the hardware was only capable of handling 0/1 clear
|
||||
/* On gfx8 and earlier, the hardware was only capable of handling 0/1 clear
|
||||
* values so sRGB curve application was a no-op for all fast-clearable
|
||||
* formats.
|
||||
*
|
||||
* On gen9+, the hardware supports arbitrary clear values. For sRGB clear
|
||||
* On gfx9+, the hardware supports arbitrary clear values. For sRGB clear
|
||||
* values, the hardware interprets the floats, not as what would be
|
||||
* returned from the sampler (or written by the shader), but as being
|
||||
* between format conversion and sRGB curve application. This means that
|
||||
|
@ -985,7 +985,7 @@ iris_resource_render_aux_usage(struct iris_context *ice,
|
|||
case ISL_AUX_USAGE_CCS_D:
|
||||
case ISL_AUX_USAGE_CCS_E:
|
||||
case ISL_AUX_USAGE_GEN12_CCS_E:
|
||||
/* Disable CCS for some cases of texture-view rendering. On gen12, HW
|
||||
/* Disable CCS for some cases of texture-view rendering. On gfx12, HW
|
||||
* may convert some subregions of shader output to fast-cleared blocks
|
||||
* if CCS is enabled and the shader output matches the clear color.
|
||||
* Existing fast-cleared blocks are correctly interpreted by the clear
|
||||
|
|
|
@ -61,19 +61,19 @@
|
|||
#define genX_call(devinfo, func, ...) \
|
||||
switch ((devinfo)->verx10) { \
|
||||
case 125: \
|
||||
gen125_##func(__VA_ARGS__); \
|
||||
gfx125_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 120: \
|
||||
gen12_##func(__VA_ARGS__); \
|
||||
gfx12_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 110: \
|
||||
gen11_##func(__VA_ARGS__); \
|
||||
gfx11_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 90: \
|
||||
gen9_##func(__VA_ARGS__); \
|
||||
gfx9_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 80: \
|
||||
gen8_##func(__VA_ARGS__); \
|
||||
gfx8_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
default: \
|
||||
unreachable("Unknown hardware generation"); \
|
||||
|
|
|
@ -836,7 +836,7 @@ calculate_pixel_hashing_table(unsigned n, unsigned m,
|
|||
|
||||
#if GFX_VER == 11
|
||||
static void
|
||||
gen11_upload_pixel_hashing_tables(struct iris_batch *batch)
|
||||
gfx11_upload_pixel_hashing_tables(struct iris_batch *batch)
|
||||
{
|
||||
const struct gen_device_info *devinfo = &batch->screen->devinfo;
|
||||
assert(devinfo->ppipe_subslices[2] == 0);
|
||||
|
@ -872,7 +872,7 @@ gen11_upload_pixel_hashing_tables(struct iris_batch *batch)
|
|||
}
|
||||
#elif GFX_VERx10 == 120
|
||||
static void
|
||||
gen12_upload_pixel_hashing_tables(struct iris_batch *batch)
|
||||
gfx12_upload_pixel_hashing_tables(struct iris_batch *batch)
|
||||
{
|
||||
const struct gen_device_info *devinfo = &batch->screen->devinfo;
|
||||
/* For each n calculate ppipes_of[n], equal to the number of pixel pipes
|
||||
|
@ -1033,11 +1033,11 @@ iris_init_render_context(struct iris_batch *batch)
|
|||
}
|
||||
}
|
||||
|
||||
gen11_upload_pixel_hashing_tables(batch);
|
||||
gfx11_upload_pixel_hashing_tables(batch);
|
||||
#endif
|
||||
|
||||
#if GFX_VERx10 == 120
|
||||
gen12_upload_pixel_hashing_tables(batch);
|
||||
gfx12_upload_pixel_hashing_tables(batch);
|
||||
#endif
|
||||
|
||||
/* 3DSTATE_DRAWING_RECTANGLE is non-pipelined, so we want to avoid
|
||||
|
@ -2787,7 +2787,7 @@ iris_set_shader_images(struct pipe_context *ctx,
|
|||
|
||||
enum isl_format isl_fmt = iris_image_view_get_format(ice, img);
|
||||
|
||||
/* Render compression with images supported on gen12+ only. */
|
||||
/* Render compression with images supported on gfx12+ only. */
|
||||
unsigned aux_usages = GFX_VER >= 12 ? res->aux.possible_usages :
|
||||
1 << ISL_AUX_USAGE_NONE;
|
||||
|
||||
|
@ -4581,7 +4581,7 @@ iris_store_cs_state(const struct gen_device_info *devinfo,
|
|||
* preemption.
|
||||
*
|
||||
* We still have issues with mid-thread preemption (it was already
|
||||
* disabled by the kernel on gen11, due to missing workarounds). It's
|
||||
* disabled by the kernel on gfx11, due to missing workarounds). It's
|
||||
* possible that we are just missing some workarounds, and could enable
|
||||
* it later, but for now let's disable it to fix a GPU in compute in Car
|
||||
* Chase (and possibly more).
|
||||
|
@ -4745,8 +4745,8 @@ update_clear_value(struct iris_context *ice,
|
|||
UNUSED struct isl_device *isl_dev = &batch->screen->isl_dev;
|
||||
UNUSED unsigned aux_modes = all_aux_modes;
|
||||
|
||||
/* We only need to update the clear color in the surface state for gen8 and
|
||||
* gen9. Newer gens can read it directly from the clear color state buffer.
|
||||
/* We only need to update the clear color in the surface state for gfx8 and
|
||||
* gfx9. Newer gens can read it directly from the clear color state buffer.
|
||||
*/
|
||||
#if GFX_VER == 9
|
||||
/* Skip updating the ISL_AUX_USAGE_NONE surface state */
|
||||
|
@ -6824,7 +6824,7 @@ iris_upload_compute_walker(struct iris_context *ice,
|
|||
|
||||
if (stage_dirty & IRIS_STAGE_DIRTY_CS) {
|
||||
iris_emit_cmd(batch, GENX(CFE_STATE), cfe) {
|
||||
/* TODO: Enable gen12-hp scratch support*/
|
||||
/* TODO: Enable gfx12-hp scratch support*/
|
||||
assert(prog_data->total_scratch == 0);
|
||||
|
||||
cfe.MaximumNumberofThreads =
|
||||
|
@ -7802,7 +7802,7 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
|
|||
* We don't put this in the vtable because it's only used on Gen9.
|
||||
*/
|
||||
void
|
||||
gen9_toggle_preemption(struct iris_context *ice,
|
||||
gfx9_toggle_preemption(struct iris_context *ice,
|
||||
struct iris_batch *batch,
|
||||
const struct pipe_draw_info *draw)
|
||||
{
|
||||
|
|
|
@ -239,10 +239,10 @@ LOCAL_GENERATED_SOURCES := $(addprefix $(intermediates)/,$(VULKAN_GENERATED_FILE
|
|||
|
||||
ANV_VK_ENTRYPOINTS_GEN_ARGS= \
|
||||
--proto --weak --prefix anv \
|
||||
--device-prefix gen7 --device-prefix gen75 \
|
||||
--device-prefix gen8 --device-prefix gen9 \
|
||||
--device-prefix gen11 --device-prefix gen12 \
|
||||
--device-prefix gen125
|
||||
--device-prefix gfx7 --device-prefix gfx75 \
|
||||
--device-prefix gfx8 --device-prefix gfx9 \
|
||||
--device-prefix gfx11 --device-prefix gfx12 \
|
||||
--device-prefix gfx125
|
||||
|
||||
$(intermediates)/vulkan/anv_entrypoints.c: $(VK_ENTRYPOINTS_GEN_SCRIPT) \
|
||||
$(VULKAN_API_XML)
|
||||
|
|
|
@ -39,7 +39,7 @@ blorp_shader_type_to_name(enum blorp_shader_type type)
|
|||
[BLORP_SHADER_TYPE_CLEAR] = "BLORP-clear",
|
||||
[BLORP_SHADER_TYPE_MCS_PARTIAL_RESOLVE] = "BLORP-mcs-partial-resolve",
|
||||
[BLORP_SHADER_TYPE_LAYER_OFFSET_VS] = "BLORP-layer-offset-vs",
|
||||
[BLORP_SHADER_TYPE_GEN4_SF] = "BLORP-gen4-sf",
|
||||
[BLORP_SHADER_TYPE_GEN4_SF] = "BLORP-gfx4-sf",
|
||||
};
|
||||
assert(type < ARRAY_SIZE(shader_name));
|
||||
|
||||
|
|
|
@ -110,7 +110,7 @@ struct blorp_surf
|
|||
|
||||
/**
|
||||
* If set (bo != NULL), clear_color is ignored and the actual clear color
|
||||
* is fetched from this address. On gen7-8, this is all of dword 7 of
|
||||
* is fetched from this address. On gfx7-8, this is all of dword 7 of
|
||||
* RENDER_SURFACE_STATE and is the responsibility of the caller to ensure
|
||||
* that it contains a swizzle of RGBA and resource min LOD of 0.
|
||||
*/
|
||||
|
@ -205,7 +205,7 @@ blorp_hiz_clear_depth_stencil(struct blorp_batch *batch,
|
|||
|
||||
|
||||
void
|
||||
blorp_gen8_hiz_clear_attachments(struct blorp_batch *batch,
|
||||
blorp_gfx8_hiz_clear_attachments(struct blorp_batch *batch,
|
||||
uint32_t num_samples,
|
||||
uint32_t x0, uint32_t y0,
|
||||
uint32_t x1, uint32_t y1,
|
||||
|
|
|
@ -1361,7 +1361,7 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
|
|||
src_pos = nir_f2i32(&b, nir_channels(&b, src_pos, 0x3));
|
||||
|
||||
if (devinfo->ver == 6) {
|
||||
/* Because gen6 only supports 4x interleved MSAA, we can do all the
|
||||
/* Because gfx6 only supports 4x interleved MSAA, we can do all the
|
||||
* blending we need with a single linear-interpolated texture lookup
|
||||
* at the center of the sample. The texture coordinates to be odd
|
||||
* integers so that they correspond to the center of a 2x2 block
|
||||
|
@ -1637,10 +1637,10 @@ blorp_surf_retile_w_to_y(const struct isl_device *isl_dev,
|
|||
/* First, we need to convert it to a simple 1-level 1-layer 2-D surface */
|
||||
blorp_surf_convert_to_single_slice(isl_dev, info);
|
||||
|
||||
/* On gen7+, we don't have interleaved multisampling for color render
|
||||
/* On gfx7+, we don't have interleaved multisampling for color render
|
||||
* targets so we have to fake it.
|
||||
*
|
||||
* TODO: Are we sure we don't also need to fake it on gen6?
|
||||
* TODO: Are we sure we don't also need to fake it on gfx6?
|
||||
*/
|
||||
if (isl_dev->info->ver > 6 &&
|
||||
info->surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) {
|
||||
|
@ -1868,7 +1868,7 @@ try_blorp_blit(struct blorp_batch *batch,
|
|||
|
||||
if (devinfo->ver == 4) {
|
||||
/* The MinLOD and MinimumArrayElement don't work properly for cube maps.
|
||||
* Convert them to a single slice on gen4.
|
||||
* Convert them to a single slice on gfx4.
|
||||
*/
|
||||
if (params->dst.surf.usage & ISL_SURF_USAGE_CUBE_BIT) {
|
||||
blorp_surf_convert_to_single_slice(batch->blorp->isl_dev, ¶ms->dst);
|
||||
|
|
|
@ -435,7 +435,7 @@ blorp_clear(struct blorp_batch *batch,
|
|||
if (surf->surf->tiling == ISL_TILING_LINEAR)
|
||||
use_simd16_replicated_data = false;
|
||||
|
||||
/* Replicated clears don't work yet before gen6 */
|
||||
/* Replicated clears don't work yet before gfx6 */
|
||||
if (batch->blorp->isl_dev->info->ver < 6)
|
||||
use_simd16_replicated_data = false;
|
||||
|
||||
|
@ -478,7 +478,7 @@ blorp_clear(struct blorp_batch *batch,
|
|||
}
|
||||
|
||||
/* The MinLOD and MinimumArrayElement don't work properly for cube maps.
|
||||
* Convert them to a single slice on gen4.
|
||||
* Convert them to a single slice on gfx4.
|
||||
*/
|
||||
if (batch->blorp->isl_dev->info->ver == 4 &&
|
||||
(params.dst.surf.usage & ISL_SURF_USAGE_CUBE_BIT)) {
|
||||
|
@ -498,7 +498,7 @@ blorp_clear(struct blorp_batch *batch,
|
|||
}
|
||||
|
||||
if (params.dst.tile_x_sa || params.dst.tile_y_sa) {
|
||||
/* Either we're on gen4 where there is no multisampling or the
|
||||
/* Either we're on gfx4 where there is no multisampling or the
|
||||
* surface is compressed which also implies no multisampling.
|
||||
* Therefore, sa == px and we don't need to do a conversion.
|
||||
*/
|
||||
|
@ -762,7 +762,7 @@ blorp_can_hiz_clear_depth(const struct gen_device_info *devinfo,
|
|||
uint32_t level, uint32_t layer,
|
||||
uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1)
|
||||
{
|
||||
/* This function currently doesn't support any gen prior to gen8 */
|
||||
/* This function currently doesn't support any gen prior to gfx8 */
|
||||
assert(devinfo->ver >= 8);
|
||||
|
||||
if (devinfo->ver == 8 && surf->format == ISL_FORMAT_R16_UNORM) {
|
||||
|
@ -895,7 +895,7 @@ blorp_hiz_clear_depth_stencil(struct blorp_batch *batch,
|
|||
blorp_params_init(¶ms);
|
||||
params.snapshot_type = INTEL_SNAPSHOT_HIZ_CLEAR;
|
||||
|
||||
/* This requires WM_HZ_OP which only exists on gen8+ */
|
||||
/* This requires WM_HZ_OP which only exists on gfx8+ */
|
||||
assert(ISL_GFX_VER(batch->blorp->isl_dev) >= 8);
|
||||
|
||||
params.hiz_op = ISL_AUX_OP_FAST_CLEAR;
|
||||
|
@ -948,7 +948,7 @@ blorp_hiz_clear_depth_stencil(struct blorp_batch *batch,
|
|||
* tagged as cleared so the depth clear value is not actually needed.
|
||||
*/
|
||||
void
|
||||
blorp_gen8_hiz_clear_attachments(struct blorp_batch *batch,
|
||||
blorp_gfx8_hiz_clear_attachments(struct blorp_batch *batch,
|
||||
uint32_t num_samples,
|
||||
uint32_t x0, uint32_t y0,
|
||||
uint32_t x1, uint32_t y1,
|
||||
|
@ -1273,7 +1273,7 @@ blorp_ccs_ambiguate(struct blorp_batch *batch,
|
|||
uint32_t level, uint32_t layer)
|
||||
{
|
||||
if (ISL_GFX_VER(batch->blorp->isl_dev) >= 10) {
|
||||
/* On gen10 and above, we have a hardware resolve op for this */
|
||||
/* On gfx10 and above, we have a hardware resolve op for this */
|
||||
return blorp_ccs_resolve(batch, surf, level, layer, 1,
|
||||
surf->surf->format, ISL_AUX_OP_AMBIGUATE);
|
||||
}
|
||||
|
@ -1366,7 +1366,7 @@ blorp_ccs_ambiguate(struct blorp_batch *batch,
|
|||
width_cl = DIV_ROUND_UP(width_el, x_el_per_cl);
|
||||
height_cl = DIV_ROUND_UP(height_el, y_el_per_cl);
|
||||
} else {
|
||||
/* On gen7, the CCS tiling is not so nice. However, there we are
|
||||
/* On gfx7, the CCS tiling is not so nice. However, there we are
|
||||
* guaranteed that we only have a single level and slice so we don't
|
||||
* have to worry about it and can just align to a whole tile.
|
||||
*/
|
||||
|
|
|
@ -653,7 +653,7 @@ blorp_emit_sampler_state(struct blorp_batch *batch)
|
|||
}
|
||||
|
||||
/* What follows is the code for setting up a "pipeline" on Sandy Bridge and
|
||||
* later hardware. This file will be included by i965 for gen4-5 as well, so
|
||||
* later hardware. This file will be included by i965 for gfx4-5 as well, so
|
||||
* this code is guarded by GFX_VER >= 6.
|
||||
*/
|
||||
#if GFX_VER >= 6
|
||||
|
@ -861,7 +861,7 @@ blorp_emit_ps_config(struct blorp_batch *batch,
|
|||
}
|
||||
|
||||
/* 3DSTATE_PS expects the number of threads per PSD, which is always 64
|
||||
* for pre Gen11 and 128 for gen11+; On gen11+ If a programmed value is
|
||||
* for pre Gen11 and 128 for gfx11+; On gfx11+ If a programmed value is
|
||||
* k, it implies 2(k+1) threads. It implicitly scales for different GT
|
||||
* levels (which have some # of PSDs).
|
||||
*
|
||||
|
@ -1286,7 +1286,7 @@ blorp_emit_pipeline(struct blorp_batch *batch,
|
|||
* The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
|
||||
*
|
||||
* The dynamic state emit helpers emit their own STATE_POINTERS packets on
|
||||
* gen7+. However, on gen6 and earlier, they're all lumpped together in
|
||||
* gfx7+. However, on gfx6 and earlier, they're all lumpped together in
|
||||
* one CC_STATE_POINTERS packet so we have to emit that here.
|
||||
*/
|
||||
blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), cc) {
|
||||
|
@ -1427,7 +1427,7 @@ blorp_emit_surface_state(struct blorp_batch *batch,
|
|||
|
||||
enum isl_aux_usage aux_usage = surface->aux_usage;
|
||||
|
||||
/* On gen12, implicit CCS has no aux buffer */
|
||||
/* On gfx12, implicit CCS has no aux buffer */
|
||||
bool use_aux_address = (aux_usage != ISL_AUX_USAGE_NONE) &&
|
||||
(surface->aux_addr.buffer != NULL);
|
||||
|
||||
|
@ -1465,7 +1465,7 @@ blorp_emit_surface_state(struct blorp_batch *batch,
|
|||
surface->addr, 0);
|
||||
|
||||
if (use_aux_address) {
|
||||
/* On gen7 and prior, the bottom 12 bits of the MCS base address are
|
||||
/* On gfx7 and prior, the bottom 12 bits of the MCS base address are
|
||||
* used to store other information. This should be ok, however, because
|
||||
* surface buffer addresses are always 4K page alinged.
|
||||
*/
|
||||
|
@ -1493,7 +1493,7 @@ blorp_emit_surface_state(struct blorp_batch *batch,
|
|||
isl_dev->ss.clear_value_size);
|
||||
}
|
||||
#else
|
||||
unreachable("Fast clears are only supported on gen7+");
|
||||
unreachable("Fast clears are only supported on gfx7+");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -1718,7 +1718,7 @@ blorp_emit_depth_stencil_config(struct blorp_batch *batch,
|
|||
* clearing operations without such information.
|
||||
* */
|
||||
static void
|
||||
blorp_emit_gen8_hiz_op(struct blorp_batch *batch,
|
||||
blorp_emit_gfx8_hiz_op(struct blorp_batch *batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
/* We should be performing an operation on a depth or stencil buffer.
|
||||
|
@ -1966,7 +1966,7 @@ blorp_exec(struct blorp_batch *batch, const struct blorp_params *params)
|
|||
|
||||
#if GFX_VER >= 8
|
||||
if (params->hiz_op != ISL_AUX_OP_NONE) {
|
||||
blorp_emit_gen8_hiz_op(batch, params);
|
||||
blorp_emit_gfx8_hiz_op(batch, params);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -359,7 +359,7 @@ struct brw_blorp_blit_prog_key
|
|||
* \name BLORP internals
|
||||
* \{
|
||||
*
|
||||
* Used internally by gen6_blorp_exec() and gen7_blorp_exec().
|
||||
* Used internally by gfx6_blorp_exec() and gfx7_blorp_exec().
|
||||
*/
|
||||
|
||||
void brw_blorp_init_wm_prog_key(struct brw_wm_prog_key *wm_key);
|
||||
|
|
|
@ -749,7 +749,7 @@ decode_3dstate_constant(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
|
|||
}
|
||||
|
||||
static void
|
||||
decode_gen6_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
|
||||
decode_gfx6_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
|
||||
const uint32_t *p)
|
||||
{
|
||||
fprintf(ctx->fp, "VS Binding Table:\n");
|
||||
|
@ -777,7 +777,7 @@ decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx *ctx,
|
|||
}
|
||||
|
||||
static void
|
||||
decode_3dstate_sampler_state_pointers_gen6(struct intel_batch_decode_ctx *ctx,
|
||||
decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx *ctx,
|
||||
const uint32_t *p)
|
||||
{
|
||||
dump_samplers(ctx, p[1], 1);
|
||||
|
@ -1136,7 +1136,7 @@ struct custom_decoder {
|
|||
{ "3DSTATE_CONSTANT_DS", decode_3dstate_constant },
|
||||
{ "3DSTATE_CONSTANT_ALL", decode_3dstate_constant_all },
|
||||
|
||||
{ "3DSTATE_BINDING_TABLE_POINTERS", decode_gen6_3dstate_binding_table_pointers },
|
||||
{ "3DSTATE_BINDING_TABLE_POINTERS", decode_gfx6_3dstate_binding_table_pointers },
|
||||
{ "3DSTATE_BINDING_TABLE_POINTERS_VS", decode_3dstate_binding_table_pointers },
|
||||
{ "3DSTATE_BINDING_TABLE_POINTERS_HS", decode_3dstate_binding_table_pointers },
|
||||
{ "3DSTATE_BINDING_TABLE_POINTERS_DS", decode_3dstate_binding_table_pointers },
|
||||
|
@ -1148,7 +1148,7 @@ struct custom_decoder {
|
|||
{ "3DSTATE_SAMPLER_STATE_POINTERS_DS", decode_3dstate_sampler_state_pointers },
|
||||
{ "3DSTATE_SAMPLER_STATE_POINTERS_GS", decode_3dstate_sampler_state_pointers },
|
||||
{ "3DSTATE_SAMPLER_STATE_POINTERS_PS", decode_3dstate_sampler_state_pointers },
|
||||
{ "3DSTATE_SAMPLER_STATE_POINTERS", decode_3dstate_sampler_state_pointers_gen6 },
|
||||
{ "3DSTATE_SAMPLER_STATE_POINTERS", decode_3dstate_sampler_state_pointers_gfx6 },
|
||||
|
||||
{ "3DSTATE_VIEWPORT_STATE_POINTERS_CC", decode_3dstate_viewport_state_pointers_cc },
|
||||
{ "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip },
|
||||
|
|
|
@ -40,7 +40,7 @@ struct intel_l3_list {
|
|||
|
||||
/**
|
||||
* IVB/HSW validated L3 configurations. The first entry will be used as
|
||||
* default by gen7_restore_default_l3_config(), otherwise the ordering is
|
||||
* default by gfx7_restore_default_l3_config(), otherwise the ordering is
|
||||
* unimportant.
|
||||
*/
|
||||
static const struct intel_l3_config ivb_l3_configs[] = {
|
||||
|
|
|
@ -213,7 +213,7 @@ mi_builder_test::SetUp()
|
|||
ctx_id = ctx_create.ctx_id;
|
||||
|
||||
if (GFX_VER >= 8) {
|
||||
/* On gen8+, we require softpin */
|
||||
/* On gfx8+, we require softpin */
|
||||
int has_softpin;
|
||||
drm_i915_getparam getparam = drm_i915_getparam();
|
||||
getparam.param = I915_PARAM_HAS_EXEC_SOFTPIN;
|
||||
|
@ -309,7 +309,7 @@ mi_builder_test::submit_batch()
|
|||
objects[0].handle = data_bo_handle;
|
||||
objects[0].relocation_count = 0;
|
||||
objects[0].relocs_ptr = 0;
|
||||
#if GFX_VER >= 8 /* On gen8+, we pin everything */
|
||||
#if GFX_VER >= 8 /* On gfx8+, we pin everything */
|
||||
objects[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
|
||||
EXEC_OBJECT_PINNED |
|
||||
EXEC_OBJECT_WRITE;
|
||||
|
@ -320,7 +320,7 @@ mi_builder_test::submit_batch()
|
|||
#endif
|
||||
|
||||
objects[1].handle = batch_bo_handle;
|
||||
#if GFX_VER >= 8 /* On gen8+, we don't use relocations */
|
||||
#if GFX_VER >= 8 /* On gfx8+, we don't use relocations */
|
||||
objects[1].relocation_count = 0;
|
||||
objects[1].relocs_ptr = 0;
|
||||
objects[1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
|
||||
|
|
|
@ -345,7 +345,7 @@ static void invert_det( struct brw_sf_compile *c)
|
|||
/* Looks like we invert all 8 elements just to get 1/det in
|
||||
* position 2 !?!
|
||||
*/
|
||||
gen4_math(&c->func,
|
||||
gfx4_math(&c->func,
|
||||
c->inv_det,
|
||||
BRW_MATH_FUNCTION_INV,
|
||||
0,
|
||||
|
@ -633,7 +633,7 @@ static void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
|
|||
if (pc_coord_replace) {
|
||||
set_predicate_control_flag_value(p, c, pc_coord_replace);
|
||||
/* Caculate 1.0/PointWidth */
|
||||
gen4_math(&c->func,
|
||||
gfx4_math(&c->func,
|
||||
c->tmp,
|
||||
BRW_MATH_FUNCTION_INV,
|
||||
0,
|
||||
|
|
|
@ -179,7 +179,7 @@ brw_shader_stage_is_bindless(gl_shader_stage stage)
|
|||
* @{
|
||||
*/
|
||||
|
||||
enum PACKED gen6_gather_sampler_wa {
|
||||
enum PACKED gfx6_gather_sampler_wa {
|
||||
WA_SIGN = 1, /* whether we need to sign extend */
|
||||
WA_8BIT = 2, /* if we have an 8bit format needing wa */
|
||||
WA_16BIT = 4, /* if we have a 16bit format needing wa */
|
||||
|
@ -216,7 +216,7 @@ struct brw_sampler_prog_key_data {
|
|||
/**
|
||||
* For Sandybridge, which shader w/a we need for gather quirks.
|
||||
*/
|
||||
enum gen6_gather_sampler_wa gen6_gather_wa[MAX_SAMPLERS];
|
||||
enum gfx6_gather_sampler_wa gfx6_gather_wa[MAX_SAMPLERS];
|
||||
|
||||
/**
|
||||
* Texture units that have a YUV image bound.
|
||||
|
@ -575,7 +575,7 @@ struct brw_image_param {
|
|||
#define BRW_MAX_SOL_BINDINGS 64
|
||||
|
||||
/**
|
||||
* Binding table index for the first gen6 SOL binding.
|
||||
* Binding table index for the first gfx6 SOL binding.
|
||||
*/
|
||||
#define BRW_GEN6_SOL_BINDING_START 0
|
||||
|
||||
|
@ -852,7 +852,7 @@ struct brw_wm_prog_data {
|
|||
|
||||
/**
|
||||
* Mask of which interpolation modes are required by the fragment shader.
|
||||
* Used in hardware setup on gen6+.
|
||||
* Used in hardware setup on gfx6+.
|
||||
*/
|
||||
uint32_t barycentric_interp_modes;
|
||||
|
||||
|
@ -1054,8 +1054,8 @@ typedef enum
|
|||
/**
|
||||
* We always program SF to start reading at an offset of 1 (2 varying slots)
|
||||
* from the start of the vertex URB entry. This causes it to skip:
|
||||
* - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
|
||||
* - VARYING_SLOT_PSIZ and VARYING_SLOT_POS on gen6+
|
||||
* - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gfx4-5
|
||||
* - VARYING_SLOT_PSIZ and VARYING_SLOT_POS on gfx6+
|
||||
*/
|
||||
#define BRW_SF_URB_ENTRY_READ_OFFSET 1
|
||||
|
||||
|
|
|
@ -75,7 +75,7 @@ debug_sampler_recompile(const struct brw_compiler *c, void *log,
|
|||
|
||||
for (unsigned i = 0; i < MAX_SAMPLERS; i++) {
|
||||
found |= check("EXT_texture_swizzle or DEPTH_TEXTURE_MODE", swizzles[i]);
|
||||
found |= check("textureGather workarounds", gen6_gather_wa[i]);
|
||||
found |= check("textureGather workarounds", gfx6_gather_wa[i]);
|
||||
found |= check_float("scale factor", scale_factors[i]);
|
||||
}
|
||||
|
||||
|
|
|
@ -289,7 +289,7 @@ static const char *const end_of_thread[2] = {
|
|||
};
|
||||
|
||||
/* SFIDs on Gen4-5 */
|
||||
static const char *const gen4_sfid[16] = {
|
||||
static const char *const gfx4_sfid[16] = {
|
||||
[BRW_SFID_NULL] = "null",
|
||||
[BRW_SFID_MATH] = "math",
|
||||
[BRW_SFID_SAMPLER] = "sampler",
|
||||
|
@ -301,7 +301,7 @@ static const char *const gen4_sfid[16] = {
|
|||
[BRW_SFID_VME] = "vme",
|
||||
};
|
||||
|
||||
static const char *const gen6_sfid[16] = {
|
||||
static const char *const gfx6_sfid[16] = {
|
||||
[BRW_SFID_NULL] = "null",
|
||||
[BRW_SFID_MATH] = "math",
|
||||
[BRW_SFID_SAMPLER] = "sampler",
|
||||
|
@ -318,7 +318,7 @@ static const char *const gen6_sfid[16] = {
|
|||
[GEN_RT_SFID_RAY_TRACE_ACCELERATOR] = "rt accel",
|
||||
};
|
||||
|
||||
static const char *const gen7_gateway_subfuncid[8] = {
|
||||
static const char *const gfx7_gateway_subfuncid[8] = {
|
||||
[BRW_MESSAGE_GATEWAY_SFID_OPEN_GATEWAY] = "open",
|
||||
[BRW_MESSAGE_GATEWAY_SFID_CLOSE_GATEWAY] = "close",
|
||||
[BRW_MESSAGE_GATEWAY_SFID_FORWARD_MSG] = "forward msg",
|
||||
|
@ -328,7 +328,7 @@ static const char *const gen7_gateway_subfuncid[8] = {
|
|||
[BRW_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE] = "mmio read/write",
|
||||
};
|
||||
|
||||
static const char *const gen4_dp_read_port_msg_type[4] = {
|
||||
static const char *const gfx4_dp_read_port_msg_type[4] = {
|
||||
[0b00] = "OWord Block Read",
|
||||
[0b01] = "OWord Dual Block Read",
|
||||
[0b10] = "Media Block Read",
|
||||
|
@ -355,7 +355,7 @@ static const char *const dp_write_port_msg_type[8] = {
|
|||
[0b111] = "flush render cache",
|
||||
};
|
||||
|
||||
static const char *const dp_rc_msg_type_gen6[16] = {
|
||||
static const char *const dp_rc_msg_type_gfx6[16] = {
|
||||
[BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read",
|
||||
[GFX6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read",
|
||||
[GFX6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read",
|
||||
|
@ -375,7 +375,7 @@ static const char *const dp_rc_msg_type_gen6[16] = {
|
|||
[GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORM write",
|
||||
};
|
||||
|
||||
static const char *const dp_rc_msg_type_gen7[16] = {
|
||||
static const char *const dp_rc_msg_type_gfx7[16] = {
|
||||
[GFX7_DATAPORT_RC_MEDIA_BLOCK_READ] = "media block read",
|
||||
[GFX7_DATAPORT_RC_TYPED_SURFACE_READ] = "typed surface read",
|
||||
[GFX7_DATAPORT_RC_TYPED_ATOMIC_OP] = "typed atomic op",
|
||||
|
@ -385,7 +385,7 @@ static const char *const dp_rc_msg_type_gen7[16] = {
|
|||
[GFX7_DATAPORT_RC_TYPED_SURFACE_WRITE] = "typed surface write"
|
||||
};
|
||||
|
||||
static const char *const dp_rc_msg_type_gen9[16] = {
|
||||
static const char *const dp_rc_msg_type_gfx9[16] = {
|
||||
[GFX9_DATAPORT_RC_RENDER_TARGET_WRITE] = "RT write",
|
||||
[GFX9_DATAPORT_RC_RENDER_TARGET_READ] = "RT read"
|
||||
};
|
||||
|
@ -393,9 +393,9 @@ static const char *const dp_rc_msg_type_gen9[16] = {
|
|||
static const char *const *
|
||||
dp_rc_msg_type(const struct gen_device_info *devinfo)
|
||||
{
|
||||
return (devinfo->ver >= 9 ? dp_rc_msg_type_gen9 :
|
||||
devinfo->ver >= 7 ? dp_rc_msg_type_gen7 :
|
||||
devinfo->ver >= 6 ? dp_rc_msg_type_gen6 :
|
||||
return (devinfo->ver >= 9 ? dp_rc_msg_type_gfx9 :
|
||||
devinfo->ver >= 7 ? dp_rc_msg_type_gfx7 :
|
||||
devinfo->ver >= 6 ? dp_rc_msg_type_gfx6 :
|
||||
dp_write_port_msg_type);
|
||||
}
|
||||
|
||||
|
@ -409,7 +409,7 @@ static const char *const m_rt_write_subtype[] = {
|
|||
[0b111] = "SIMD16/RepData-111", /* no idea how this is different than 1 */
|
||||
};
|
||||
|
||||
static const char *const dp_dc0_msg_type_gen7[16] = {
|
||||
static const char *const dp_dc0_msg_type_gfx7[16] = {
|
||||
[GFX7_DATAPORT_DC_OWORD_BLOCK_READ] = "DC OWORD block read",
|
||||
[GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ] =
|
||||
"DC unaligned OWORD block read",
|
||||
|
@ -535,12 +535,12 @@ static const char *const math_precision[2] = {
|
|||
[1] = "partial_precision"
|
||||
};
|
||||
|
||||
static const char *const gen5_urb_opcode[] = {
|
||||
static const char *const gfx5_urb_opcode[] = {
|
||||
[0] = "urb_write",
|
||||
[1] = "ff_sync",
|
||||
};
|
||||
|
||||
static const char *const gen7_urb_opcode[] = {
|
||||
static const char *const gfx7_urb_opcode[] = {
|
||||
[BRW_URB_OPCODE_WRITE_HWORD] = "write HWord",
|
||||
[BRW_URB_OPCODE_WRITE_OWORD] = "write OWord",
|
||||
[BRW_URB_OPCODE_READ_HWORD] = "read HWord",
|
||||
|
@ -574,7 +574,7 @@ static const char *const urb_complete[2] = {
|
|||
[1] = "complete"
|
||||
};
|
||||
|
||||
static const char *const gen5_sampler_msg_type[] = {
|
||||
static const char *const gfx5_sampler_msg_type[] = {
|
||||
[GFX5_SAMPLER_MESSAGE_SAMPLE] = "sample",
|
||||
[GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS] = "sample_b",
|
||||
[GFX5_SAMPLER_MESSAGE_SAMPLE_LOD] = "sample_l",
|
||||
|
@ -600,7 +600,7 @@ static const char *const gen5_sampler_msg_type[] = {
|
|||
[GFX7_SAMPLER_MESSAGE_SAMPLE_LD2DSS] = "ld2dss",
|
||||
};
|
||||
|
||||
static const char *const gen5_sampler_simd_mode[4] = {
|
||||
static const char *const gfx5_sampler_simd_mode[4] = {
|
||||
[BRW_SAMPLER_SIMD_MODE_SIMD4X2] = "SIMD4x2",
|
||||
[BRW_SAMPLER_SIMD_MODE_SIMD8] = "SIMD8",
|
||||
[BRW_SAMPLER_SIMD_MODE_SIMD16] = "SIMD16",
|
||||
|
@ -1018,7 +1018,7 @@ src_da16(FILE *file,
|
|||
|
||||
static enum brw_vertical_stride
|
||||
vstride_from_align1_3src_vstride(const struct gen_device_info *devinfo,
|
||||
enum gen10_align1_3src_vertical_stride vstride)
|
||||
enum gfx10_align1_3src_vertical_stride vstride)
|
||||
{
|
||||
switch (vstride) {
|
||||
case BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0: return BRW_VERTICAL_STRIDE_0;
|
||||
|
@ -1035,7 +1035,7 @@ vstride_from_align1_3src_vstride(const struct gen_device_info *devinfo,
|
|||
}
|
||||
|
||||
static enum brw_horizontal_stride
|
||||
hstride_from_align1_3src_hstride(enum gen10_align1_3src_src_horizontal_stride hstride)
|
||||
hstride_from_align1_3src_hstride(enum gfx10_align1_3src_src_horizontal_stride hstride)
|
||||
{
|
||||
switch (hstride) {
|
||||
case BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0: return BRW_HORIZONTAL_STRIDE_0;
|
||||
|
@ -1048,7 +1048,7 @@ hstride_from_align1_3src_hstride(enum gen10_align1_3src_src_horizontal_stride hs
|
|||
}
|
||||
|
||||
static enum brw_vertical_stride
|
||||
vstride_from_align1_3src_hstride(enum gen10_align1_3src_src_horizontal_stride hstride)
|
||||
vstride_from_align1_3src_hstride(enum gfx10_align1_3src_src_horizontal_stride hstride)
|
||||
{
|
||||
switch (hstride) {
|
||||
case BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0: return BRW_VERTICAL_STRIDE_0;
|
||||
|
@ -1732,7 +1732,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
|
|||
brw_inst_cond_modifier(devinfo, inst), NULL);
|
||||
|
||||
/* If we're using the conditional modifier, print which flags reg is
|
||||
* used for it. Note that on gen6+, the embedded-condition SEL and
|
||||
* used for it. Note that on gfx6+, the embedded-condition SEL and
|
||||
* control flow doesn't update flags.
|
||||
*/
|
||||
if (brw_inst_cond_modifier(devinfo, inst) &&
|
||||
|
@ -1770,7 +1770,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
|
|||
if (devinfo->ver >= 7) {
|
||||
jip = brw_inst_jip(devinfo, inst);
|
||||
} else {
|
||||
jip = brw_inst_gen6_jump_count(devinfo, inst);
|
||||
jip = brw_inst_gfx6_jump_count(devinfo, inst);
|
||||
}
|
||||
|
||||
pad(file, 16);
|
||||
|
@ -1780,18 +1780,18 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
|
|||
opcode == BRW_OPCODE_CONTINUE ||
|
||||
opcode == BRW_OPCODE_ELSE)) {
|
||||
pad(file, 16);
|
||||
format(file, "Jump: %d", brw_inst_gen4_jump_count(devinfo, inst));
|
||||
format(file, "Jump: %d", brw_inst_gfx4_jump_count(devinfo, inst));
|
||||
pad(file, 32);
|
||||
format(file, "Pop: %"PRIu64, brw_inst_gen4_pop_count(devinfo, inst));
|
||||
format(file, "Pop: %"PRIu64, brw_inst_gfx4_pop_count(devinfo, inst));
|
||||
} else if (devinfo->ver < 6 && (opcode == BRW_OPCODE_IF ||
|
||||
opcode == BRW_OPCODE_IFF ||
|
||||
opcode == BRW_OPCODE_HALT ||
|
||||
opcode == BRW_OPCODE_WHILE)) {
|
||||
pad(file, 16);
|
||||
format(file, "Jump: %d", brw_inst_gen4_jump_count(devinfo, inst));
|
||||
format(file, "Jump: %d", brw_inst_gfx4_jump_count(devinfo, inst));
|
||||
} else if (devinfo->ver < 6 && opcode == BRW_OPCODE_ENDIF) {
|
||||
pad(file, 16);
|
||||
format(file, "Pop: %"PRIu64, brw_inst_gen4_pop_count(devinfo, inst));
|
||||
format(file, "Pop: %"PRIu64, brw_inst_gfx4_pop_count(devinfo, inst));
|
||||
} else if (opcode == BRW_OPCODE_JMPI) {
|
||||
pad(file, 16);
|
||||
err |= src1(file, devinfo, inst);
|
||||
|
@ -1871,7 +1871,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
|
|||
space = 0;
|
||||
|
||||
fprintf(file, " ");
|
||||
err |= control(file, "SFID", devinfo->ver >= 6 ? gen6_sfid : gen4_sfid,
|
||||
err |= control(file, "SFID", devinfo->ver >= 6 ? gfx6_sfid : gfx4_sfid,
|
||||
sfid, &space);
|
||||
string(file, " MsgDesc:");
|
||||
|
||||
|
@ -1894,10 +1894,10 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
|
|||
break;
|
||||
case BRW_SFID_SAMPLER:
|
||||
if (devinfo->ver >= 5) {
|
||||
err |= control(file, "sampler message", gen5_sampler_msg_type,
|
||||
err |= control(file, "sampler message", gfx5_sampler_msg_type,
|
||||
brw_sampler_desc_msg_type(devinfo, imm_desc),
|
||||
&space);
|
||||
err |= control(file, "sampler simd mode", gen5_sampler_simd_mode,
|
||||
err |= control(file, "sampler simd mode", gfx5_sampler_simd_mode,
|
||||
brw_sampler_desc_simd_mode(devinfo, imm_desc),
|
||||
&space);
|
||||
format(file, " Surface = %u Sampler = %u",
|
||||
|
@ -1930,7 +1930,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
|
|||
} else {
|
||||
bool is_965 = devinfo->ver == 4 && !devinfo->is_g4x;
|
||||
err |= control(file, "DP read message type",
|
||||
is_965 ? gen4_dp_read_port_msg_type :
|
||||
is_965 ? gfx4_dp_read_port_msg_type :
|
||||
g45_dp_read_port_msg_type,
|
||||
brw_dp_read_desc_msg_type(devinfo, imm_desc),
|
||||
&space);
|
||||
|
@ -1982,8 +1982,8 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
|
|||
space = 1;
|
||||
|
||||
err |= control(file, "urb opcode",
|
||||
devinfo->ver >= 7 ? gen7_urb_opcode
|
||||
: gen5_urb_opcode,
|
||||
devinfo->ver >= 7 ? gfx7_urb_opcode
|
||||
: gfx5_urb_opcode,
|
||||
opcode, &space);
|
||||
|
||||
if (devinfo->ver >= 7 &&
|
||||
|
@ -2018,7 +2018,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
|
|||
|
||||
case BRW_SFID_MESSAGE_GATEWAY:
|
||||
format(file, " (%s)",
|
||||
gen7_gateway_subfuncid[brw_inst_gateway_subfuncid(devinfo, inst)]);
|
||||
gfx7_gateway_subfuncid[brw_inst_gateway_subfuncid(devinfo, inst)]);
|
||||
break;
|
||||
|
||||
case GFX7_SFID_DATAPORT_DATA_CACHE:
|
||||
|
@ -2026,7 +2026,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
|
|||
format(file, " (");
|
||||
|
||||
err |= control(file, "DP DC0 message type",
|
||||
dp_dc0_msg_type_gen7,
|
||||
dp_dc0_msg_type_gfx7,
|
||||
brw_dp_desc_msg_type(devinfo, imm_desc), &space);
|
||||
|
||||
format(file, ", %u, ",
|
||||
|
|
|
@ -495,7 +495,7 @@ brw_label_assembly(const struct gen_device_info *devinfo,
|
|||
if (devinfo->ver >= 7) {
|
||||
jip = brw_inst_jip(devinfo, inst);
|
||||
} else {
|
||||
jip = brw_inst_gen6_jump_count(devinfo, inst);
|
||||
jip = brw_inst_gfx6_jump_count(devinfo, inst);
|
||||
}
|
||||
|
||||
brw_create_label(&root_label, offset + jip * to_bytes_scale, mem_ctx);
|
||||
|
|
|
@ -54,7 +54,7 @@ struct brw_insn_state {
|
|||
/* Group in units of channels */
|
||||
unsigned group:5;
|
||||
|
||||
/* Compression control on gen4-5 */
|
||||
/* Compression control on gfx4-5 */
|
||||
bool compressed:1;
|
||||
|
||||
/* One of BRW_MASK_* */
|
||||
|
@ -129,7 +129,7 @@ struct brw_codegen {
|
|||
*/
|
||||
int *loop_stack;
|
||||
/**
|
||||
* pre-gen6, the BREAK and CONT instructions had to tell how many IF/ENDIF
|
||||
* pre-gfx6, the BREAK and CONT instructions had to tell how many IF/ENDIF
|
||||
* blocks they were popping out of, to fix up the mask stack. This tracks
|
||||
* the IF/ENDIF nesting in each current nested loop level.
|
||||
*/
|
||||
|
@ -202,7 +202,7 @@ brw_inst *brw_next_insn(struct brw_codegen *p, unsigned opcode);
|
|||
void brw_set_dest(struct brw_codegen *p, brw_inst *insn, struct brw_reg dest);
|
||||
void brw_set_src0(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg);
|
||||
|
||||
void gen6_resolve_implied_move(struct brw_codegen *p,
|
||||
void gfx6_resolve_implied_move(struct brw_codegen *p,
|
||||
struct brw_reg *src,
|
||||
unsigned msg_reg_nr);
|
||||
|
||||
|
@ -446,7 +446,7 @@ brw_dp_desc(const struct gen_device_info *devinfo,
|
|||
unsigned msg_type,
|
||||
unsigned msg_control)
|
||||
{
|
||||
/* Prior to gen6, things are too inconsistent; use the dp_read/write_desc
|
||||
/* Prior to gfx6, things are too inconsistent; use the dp_read/write_desc
|
||||
* helpers instead.
|
||||
*/
|
||||
assert(devinfo->ver >= 6);
|
||||
|
@ -1155,7 +1155,7 @@ brw_inst *brw_fb_WRITE(struct brw_codegen *p,
|
|||
bool last_render_target,
|
||||
bool header_present);
|
||||
|
||||
brw_inst *gen9_fb_READ(struct brw_codegen *p,
|
||||
brw_inst *gfx9_fb_READ(struct brw_codegen *p,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg payload,
|
||||
unsigned binding_table_index,
|
||||
|
@ -1180,14 +1180,14 @@ void brw_adjust_sampler_state_pointer(struct brw_codegen *p,
|
|||
struct brw_reg header,
|
||||
struct brw_reg sampler_index);
|
||||
|
||||
void gen4_math(struct brw_codegen *p,
|
||||
void gfx4_math(struct brw_codegen *p,
|
||||
struct brw_reg dest,
|
||||
unsigned function,
|
||||
unsigned msg_reg_nr,
|
||||
struct brw_reg src,
|
||||
unsigned precision );
|
||||
|
||||
void gen6_math(struct brw_codegen *p,
|
||||
void gfx6_math(struct brw_codegen *p,
|
||||
struct brw_reg dest,
|
||||
unsigned function,
|
||||
struct brw_reg src0,
|
||||
|
@ -1212,7 +1212,7 @@ void brw_oword_block_write_scratch(struct brw_codegen *p,
|
|||
int num_regs,
|
||||
unsigned offset);
|
||||
|
||||
void gen7_block_read_scratch(struct brw_codegen *p,
|
||||
void gfx7_block_read_scratch(struct brw_codegen *p,
|
||||
struct brw_reg dest,
|
||||
int num_regs,
|
||||
unsigned offset);
|
||||
|
@ -1251,7 +1251,7 @@ void brw_barrier(struct brw_codegen *p, struct brw_reg src);
|
|||
* channel.
|
||||
*/
|
||||
brw_inst *brw_IF(struct brw_codegen *p, unsigned execute_size);
|
||||
brw_inst *gen6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional,
|
||||
brw_inst *gfx6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional,
|
||||
struct brw_reg src0, struct brw_reg src1);
|
||||
|
||||
void brw_ELSE(struct brw_codegen *p);
|
||||
|
|
|
@ -223,7 +223,7 @@ static const uint16_t g45_src_index_table[32] = {
|
|||
0b111101110100,
|
||||
};
|
||||
|
||||
static const uint32_t gen6_control_index_table[32] = {
|
||||
static const uint32_t gfx6_control_index_table[32] = {
|
||||
0b00000000000000000,
|
||||
0b01000000000000000,
|
||||
0b00110000000000000,
|
||||
|
@ -258,7 +258,7 @@ static const uint32_t gen6_control_index_table[32] = {
|
|||
0b00100000000001001,
|
||||
};
|
||||
|
||||
static const uint32_t gen6_datatype_table[32] = {
|
||||
static const uint32_t gfx6_datatype_table[32] = {
|
||||
0b001001110000000000,
|
||||
0b001000110000100000,
|
||||
0b001001110000000001,
|
||||
|
@ -293,7 +293,7 @@ static const uint32_t gen6_datatype_table[32] = {
|
|||
0b001000001110111110,
|
||||
};
|
||||
|
||||
static const uint16_t gen6_subreg_table[32] = {
|
||||
static const uint16_t gfx6_subreg_table[32] = {
|
||||
0b000000000000000,
|
||||
0b000000000000100,
|
||||
0b000000110000000,
|
||||
|
@ -328,7 +328,7 @@ static const uint16_t gen6_subreg_table[32] = {
|
|||
0b000110000000000,
|
||||
};
|
||||
|
||||
static const uint16_t gen6_src_index_table[32] = {
|
||||
static const uint16_t gfx6_src_index_table[32] = {
|
||||
0b000000000000,
|
||||
0b010110001000,
|
||||
0b010001101000,
|
||||
|
@ -363,7 +363,7 @@ static const uint16_t gen6_src_index_table[32] = {
|
|||
0b001101010000,
|
||||
};
|
||||
|
||||
static const uint32_t gen7_control_index_table[32] = {
|
||||
static const uint32_t gfx7_control_index_table[32] = {
|
||||
0b0000000000000000010,
|
||||
0b0000100000000000000,
|
||||
0b0000100000000000001,
|
||||
|
@ -398,7 +398,7 @@ static const uint32_t gen7_control_index_table[32] = {
|
|||
0b0101000000100000000,
|
||||
};
|
||||
|
||||
static const uint32_t gen7_datatype_table[32] = {
|
||||
static const uint32_t gfx7_datatype_table[32] = {
|
||||
0b001000000000000001,
|
||||
0b001000000000100000,
|
||||
0b001000000000100001,
|
||||
|
@ -433,7 +433,7 @@ static const uint32_t gen7_datatype_table[32] = {
|
|||
0b001010110100101000,
|
||||
};
|
||||
|
||||
static const uint16_t gen7_subreg_table[32] = {
|
||||
static const uint16_t gfx7_subreg_table[32] = {
|
||||
0b000000000000000,
|
||||
0b000000000000001,
|
||||
0b000000000001000,
|
||||
|
@ -468,7 +468,7 @@ static const uint16_t gen7_subreg_table[32] = {
|
|||
0b111000000011100,
|
||||
};
|
||||
|
||||
static const uint16_t gen7_src_index_table[32] = {
|
||||
static const uint16_t gfx7_src_index_table[32] = {
|
||||
0b000000000000,
|
||||
0b000000000010,
|
||||
0b000000010000,
|
||||
|
@ -503,7 +503,7 @@ static const uint16_t gen7_src_index_table[32] = {
|
|||
0b010110001000,
|
||||
};
|
||||
|
||||
static const uint32_t gen8_control_index_table[32] = {
|
||||
static const uint32_t gfx8_control_index_table[32] = {
|
||||
0b0000000000000000010,
|
||||
0b0000100000000000000,
|
||||
0b0000100000000000001,
|
||||
|
@ -538,7 +538,7 @@ static const uint32_t gen8_control_index_table[32] = {
|
|||
0b0101000000100000000,
|
||||
};
|
||||
|
||||
static const uint32_t gen8_datatype_table[32] = {
|
||||
static const uint32_t gfx8_datatype_table[32] = {
|
||||
0b001000000000000000001,
|
||||
0b001000000000001000000,
|
||||
0b001000000000001000001,
|
||||
|
@ -573,7 +573,7 @@ static const uint32_t gen8_datatype_table[32] = {
|
|||
0b001001011001001001000,
|
||||
};
|
||||
|
||||
static const uint16_t gen8_subreg_table[32] = {
|
||||
static const uint16_t gfx8_subreg_table[32] = {
|
||||
0b000000000000000,
|
||||
0b000000000000001,
|
||||
0b000000000001000,
|
||||
|
@ -608,7 +608,7 @@ static const uint16_t gen8_subreg_table[32] = {
|
|||
0b111000000011100,
|
||||
};
|
||||
|
||||
static const uint16_t gen8_src_index_table[32] = {
|
||||
static const uint16_t gfx8_src_index_table[32] = {
|
||||
0b000000000000,
|
||||
0b000000000010,
|
||||
0b000000010000,
|
||||
|
@ -643,7 +643,7 @@ static const uint16_t gen8_src_index_table[32] = {
|
|||
0b010110001000,
|
||||
};
|
||||
|
||||
static const uint32_t gen11_datatype_table[32] = {
|
||||
static const uint32_t gfx11_datatype_table[32] = {
|
||||
0b001000000000000000001,
|
||||
0b001000000000001000000,
|
||||
0b001000000000001000001,
|
||||
|
@ -678,7 +678,7 @@ static const uint32_t gen11_datatype_table[32] = {
|
|||
0b001001011001001001000,
|
||||
};
|
||||
|
||||
static const uint32_t gen12_control_index_table[32] = {
|
||||
static const uint32_t gfx12_control_index_table[32] = {
|
||||
0b000000000000000000100, /* (16|M0) */
|
||||
0b000000000000000000011, /* (8|M0) */
|
||||
0b000000010000000000000, /* (W) (1|M0) */
|
||||
|
@ -713,7 +713,7 @@ static const uint32_t gen12_control_index_table[32] = {
|
|||
0b000000010000000100100, /* (W) (16|M16) */
|
||||
};
|
||||
|
||||
static const uint32_t gen12_datatype_table[32] = {
|
||||
static const uint32_t gfx12_datatype_table[32] = {
|
||||
0b11010110100101010100, /* grf<1>:f grf:f grf:f */
|
||||
0b00000110100101010100, /* grf<1>:f grf:f arf:ub */
|
||||
0b00000010101101010100, /* grf<1>:f imm:f arf:ub */
|
||||
|
@ -748,7 +748,7 @@ static const uint32_t gen12_datatype_table[32] = {
|
|||
0b00000010100101010100, /* grf<1>:f arf:f arf:ub */
|
||||
};
|
||||
|
||||
static const uint16_t gen12_subreg_table[32] = {
|
||||
static const uint16_t gfx12_subreg_table[32] = {
|
||||
0b000000000000000, /* .0 .0 .0 */
|
||||
0b100000000000000, /* .0 .0 .16 */
|
||||
0b001000000000000, /* .0 .0 .4 */
|
||||
|
@ -783,7 +783,7 @@ static const uint16_t gen12_subreg_table[32] = {
|
|||
0b110001100000000, /* .0 .24 .24 */
|
||||
};
|
||||
|
||||
static const uint16_t gen12_src0_index_table[16] = {
|
||||
static const uint16_t gfx12_src0_index_table[16] = {
|
||||
0b010001100100, /* r<8;8,1> */
|
||||
0b000000000000, /* r<0;1,0> */
|
||||
0b010001100110, /* -r<8;8,1> */
|
||||
|
@ -802,7 +802,7 @@ static const uint16_t gen12_src0_index_table[16] = {
|
|||
0b111100010000, /* r[a]<1,0> */
|
||||
};
|
||||
|
||||
static const uint16_t gen12_src1_index_table[16] = {
|
||||
static const uint16_t gfx12_src1_index_table[16] = {
|
||||
0b000100011001, /* r<8;8,1> */
|
||||
0b000000000000, /* r<0;1,0> */
|
||||
0b100100011001, /* -r<8;8,1> */
|
||||
|
@ -827,7 +827,7 @@ static const uint16_t gen12_src1_index_table[16] = {
|
|||
*
|
||||
* The low 24 bits have the same mappings on both hardware.
|
||||
*/
|
||||
static const uint32_t gen8_3src_control_index_table[4] = {
|
||||
static const uint32_t gfx8_3src_control_index_table[4] = {
|
||||
0b00100000000110000000000001,
|
||||
0b00000000000110000000000001,
|
||||
0b00000000001000000000000001,
|
||||
|
@ -841,14 +841,14 @@ static const uint32_t gen8_3src_control_index_table[4] = {
|
|||
* The low 44 bits have the same mappings on both hardware, and since the high
|
||||
* three bits on Broadwell are zero, we can reuse Cherryview's table.
|
||||
*/
|
||||
static const uint64_t gen8_3src_source_index_table[4] = {
|
||||
static const uint64_t gfx8_3src_source_index_table[4] = {
|
||||
0b0000001110010011100100111001000001111000000000000,
|
||||
0b0000001110010011100100111001000001111000000000010,
|
||||
0b0000001110010011100100111001000001111000000001000,
|
||||
0b0000001110010011100100111001000001111000000100000,
|
||||
};
|
||||
|
||||
static const uint64_t gen12_3src_control_index_table[32] = {
|
||||
static const uint64_t gfx12_3src_control_index_table[32] = {
|
||||
0b000001001010010101000000000000000100, /* (16|M0) grf<1>:f :f :f :f */
|
||||
0b000001001010010101000000000000000011, /* (8|M0) grf<1>:f :f :f :f */
|
||||
0b000001001000010101000000000000000011, /* (8|M0) arf<1>:f :f :f :f */
|
||||
|
@ -883,7 +883,7 @@ static const uint64_t gen12_3src_control_index_table[32] = {
|
|||
0b000001001000010101010000000000000011, /* (8|M0) (sat)arf<1>:f :f :f :f */
|
||||
};
|
||||
|
||||
static const uint32_t gen12_3src_source_index_table[32] = {
|
||||
static const uint32_t gfx12_3src_source_index_table[32] = {
|
||||
0b100101100001100000000, /* grf<0;0> grf<8;1> grf<0> */
|
||||
0b100101100001001000010, /* arf<4;1> grf<8;1> grf<0> */
|
||||
0b101101100001101000011, /* grf<8;1> grf<8;1> grf<1> */
|
||||
|
@ -918,7 +918,7 @@ static const uint32_t gen12_3src_source_index_table[32] = {
|
|||
0b101001100101101000011, /* grf<8;1> arf<8;1> -grf<1> */
|
||||
};
|
||||
|
||||
static const uint32_t gen12_3src_subreg_table[32] = {
|
||||
static const uint32_t gfx12_3src_subreg_table[32] = {
|
||||
0b00000000000000000000, /* .0 .0 .0 .0 */
|
||||
0b00100000000000000000, /* .0 .0 .0 .4 */
|
||||
0b00000000000110000000, /* .0 .12 .0 .0 */
|
||||
|
@ -993,7 +993,7 @@ set_control_index(const struct compaction_state *c,
|
|||
uncompacted = (brw_inst_bits(src, 31, 31) << 16) | /* 1b */
|
||||
(brw_inst_bits(src, 23, 8)); /* 16b */
|
||||
|
||||
/* On gen7, the flag register and subregister numbers are integrated into
|
||||
/* On gfx7, the flag register and subregister numbers are integrated into
|
||||
* the control index.
|
||||
*/
|
||||
if (devinfo->ver == 7)
|
||||
|
@ -1093,14 +1093,14 @@ set_src0_index(const struct compaction_state *c, brw_compact_inst *dst,
|
|||
int table_len;
|
||||
|
||||
if (devinfo->ver >= 12) {
|
||||
table_len = ARRAY_SIZE(gen12_src0_index_table);
|
||||
table_len = ARRAY_SIZE(gfx12_src0_index_table);
|
||||
uncompacted = (brw_inst_bits(src, 87, 84) << 8) | /* 4b */
|
||||
(brw_inst_bits(src, 83, 81) << 5) | /* 3b */
|
||||
(brw_inst_bits(src, 80, 80) << 4) | /* 1b */
|
||||
(brw_inst_bits(src, 65, 64) << 2) | /* 2b */
|
||||
(brw_inst_bits(src, 45, 44)); /* 2b */
|
||||
} else {
|
||||
table_len = ARRAY_SIZE(gen8_src_index_table);
|
||||
table_len = ARRAY_SIZE(gfx8_src_index_table);
|
||||
uncompacted = brw_inst_bits(src, 88, 77); /* 12b */
|
||||
}
|
||||
|
||||
|
@ -1133,14 +1133,14 @@ set_src1_index(const struct compaction_state *c, brw_compact_inst *dst,
|
|||
int table_len;
|
||||
|
||||
if (devinfo->ver >= 12) {
|
||||
table_len = ARRAY_SIZE(gen12_src0_index_table);
|
||||
table_len = ARRAY_SIZE(gfx12_src0_index_table);
|
||||
uncompacted = (brw_inst_bits(src, 121, 120) << 10) | /* 2b */
|
||||
(brw_inst_bits(src, 119, 116) << 6) | /* 4b */
|
||||
(brw_inst_bits(src, 115, 113) << 3) | /* 3b */
|
||||
(brw_inst_bits(src, 112, 112) << 2) | /* 1b */
|
||||
(brw_inst_bits(src, 97, 96)); /* 2b */
|
||||
} else {
|
||||
table_len = ARRAY_SIZE(gen8_src_index_table);
|
||||
table_len = ARRAY_SIZE(gfx8_src_index_table);
|
||||
uncompacted = brw_inst_bits(src, 120, 109); /* 12b */
|
||||
}
|
||||
|
||||
|
@ -1182,8 +1182,8 @@ set_3src_control_index(const struct gen_device_info *devinfo,
|
|||
(brw_inst_bits(src, 21, 19) << 3) | /* 3b */
|
||||
(brw_inst_bits(src, 18, 16)); /* 3b */
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(gen12_3src_control_index_table); i++) {
|
||||
if (gen12_3src_control_index_table[i] == uncompacted) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_control_index_table); i++) {
|
||||
if (gfx12_3src_control_index_table[i] == uncompacted) {
|
||||
brw_compact_inst_set_3src_control_index(devinfo, dst, i);
|
||||
return true;
|
||||
}
|
||||
|
@ -1198,8 +1198,8 @@ set_3src_control_index(const struct gen_device_info *devinfo,
|
|||
brw_inst_bits(src, 36, 35) << 24; /* 2b */
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
|
||||
if (gen8_3src_control_index_table[i] == uncompacted) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_control_index_table); i++) {
|
||||
if (gfx8_3src_control_index_table[i] == uncompacted) {
|
||||
brw_compact_inst_set_3src_control_index(devinfo, dst, i);
|
||||
return true;
|
||||
}
|
||||
|
@ -1233,8 +1233,8 @@ set_3src_source_index(const struct gen_device_info *devinfo,
|
|||
(brw_inst_bits(src, 43, 43) << 1) | /* 1b */
|
||||
(brw_inst_bits(src, 35, 35)); /* 1b */
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(gen12_3src_source_index_table); i++) {
|
||||
if (gen12_3src_source_index_table[i] == uncompacted) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_source_index_table); i++) {
|
||||
if (gfx12_3src_source_index_table[i] == uncompacted) {
|
||||
brw_compact_inst_set_3src_source_index(devinfo, dst, i);
|
||||
return true;
|
||||
}
|
||||
|
@ -1258,8 +1258,8 @@ set_3src_source_index(const struct gen_device_info *devinfo,
|
|||
(brw_inst_bits(src, 104, 104) << 44); /* 1b */
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
|
||||
if (gen8_3src_source_index_table[i] == uncompacted) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_source_index_table); i++) {
|
||||
if (gfx8_3src_source_index_table[i] == uncompacted) {
|
||||
brw_compact_inst_set_3src_source_index(devinfo, dst, i);
|
||||
return true;
|
||||
}
|
||||
|
@ -1281,8 +1281,8 @@ set_3src_subreg_index(const struct gen_device_info *devinfo,
|
|||
(brw_inst_bits(src, 71, 67) << 5) | /* 5b */
|
||||
(brw_inst_bits(src, 55, 51)); /* 5b */
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(gen12_3src_subreg_table); i++) {
|
||||
if (gen12_3src_subreg_table[i] == uncompacted) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_subreg_table); i++) {
|
||||
if (gfx12_3src_subreg_table[i] == uncompacted) {
|
||||
brw_compact_inst_set_3src_subreg_index(devinfo, dst, i);
|
||||
return true;
|
||||
}
|
||||
|
@ -1887,7 +1887,7 @@ set_uncompacted_3src_control_index(const struct compaction_state *c,
|
|||
|
||||
if (devinfo->ver >= 12) {
|
||||
uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
|
||||
uint64_t uncompacted = gen12_3src_control_index_table[compacted];
|
||||
uint64_t uncompacted = gfx12_3src_control_index_table[compacted];
|
||||
|
||||
brw_inst_set_bits(dst, 95, 92, (uncompacted >> 32));
|
||||
brw_inst_set_bits(dst, 90, 88, (uncompacted >> 29) & 0x7);
|
||||
|
@ -1909,7 +1909,7 @@ set_uncompacted_3src_control_index(const struct compaction_state *c,
|
|||
brw_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7);
|
||||
} else {
|
||||
uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
|
||||
uint32_t uncompacted = gen8_3src_control_index_table[compacted];
|
||||
uint32_t uncompacted = gfx8_3src_control_index_table[compacted];
|
||||
|
||||
brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
|
||||
brw_inst_set_bits(dst, 28, 8, (uncompacted >> 0) & 0x1fffff);
|
||||
|
@ -1928,7 +1928,7 @@ set_uncompacted_3src_source_index(const struct gen_device_info *devinfo,
|
|||
uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src);
|
||||
|
||||
if (devinfo->ver >= 12) {
|
||||
uint32_t uncompacted = gen12_3src_source_index_table[compacted];
|
||||
uint32_t uncompacted = gfx12_3src_source_index_table[compacted];
|
||||
|
||||
brw_inst_set_bits(dst, 114, 114, (uncompacted >> 20));
|
||||
brw_inst_set_bits(dst, 113, 112, (uncompacted >> 18) & 0x3);
|
||||
|
@ -1946,7 +1946,7 @@ set_uncompacted_3src_source_index(const struct gen_device_info *devinfo,
|
|||
brw_inst_set_bits(dst, 43, 43, (uncompacted >> 1) & 0x1);
|
||||
brw_inst_set_bits(dst, 35, 35, (uncompacted >> 0) & 0x1);
|
||||
} else {
|
||||
uint64_t uncompacted = gen8_3src_source_index_table[compacted];
|
||||
uint64_t uncompacted = gfx8_3src_source_index_table[compacted];
|
||||
|
||||
brw_inst_set_bits(dst, 83, 83, (uncompacted >> 43) & 0x1);
|
||||
brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
|
||||
|
@ -1972,7 +1972,7 @@ set_uncompacted_3src_subreg_index(const struct gen_device_info *devinfo,
|
|||
assert(devinfo->ver >= 12);
|
||||
|
||||
uint32_t compacted = brw_compact_inst_3src_subreg_index(devinfo, src);
|
||||
uint32_t uncompacted = gen12_3src_subreg_table[compacted];
|
||||
uint32_t uncompacted = gfx12_3src_subreg_table[compacted];
|
||||
|
||||
brw_inst_set_bits(dst, 119, 115, (uncompacted >> 15));
|
||||
brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10) & 0x1f);
|
||||
|
@ -2165,7 +2165,7 @@ update_uip_jip(const struct gen_device_info *devinfo, brw_inst *insn,
|
|||
}
|
||||
|
||||
static void
|
||||
update_gen4_jump_count(const struct gen_device_info *devinfo, brw_inst *insn,
|
||||
update_gfx4_jump_count(const struct gen_device_info *devinfo, brw_inst *insn,
|
||||
int this_old_ip, int *compacted_counts)
|
||||
{
|
||||
assert(devinfo->ver == 5 || devinfo->is_g4x);
|
||||
|
@ -2176,7 +2176,7 @@ update_gen4_jump_count(const struct gen_device_info *devinfo, brw_inst *insn,
|
|||
*/
|
||||
int shift = devinfo->is_g4x ? 1 : 0;
|
||||
|
||||
int jump_count_compacted = brw_inst_gen4_jump_count(devinfo, insn) << shift;
|
||||
int jump_count_compacted = brw_inst_gfx4_jump_count(devinfo, insn) << shift;
|
||||
|
||||
int target_old_ip = this_old_ip + (jump_count_compacted / 2);
|
||||
|
||||
|
@ -2184,7 +2184,7 @@ update_gen4_jump_count(const struct gen_device_info *devinfo, brw_inst *insn,
|
|||
int target_compacted_count = compacted_counts[target_old_ip];
|
||||
|
||||
jump_count_compacted -= (target_compacted_count - this_compacted_count);
|
||||
brw_inst_set_gen4_jump_count(devinfo, insn, jump_count_compacted >> shift);
|
||||
brw_inst_set_gfx4_jump_count(devinfo, insn, jump_count_compacted >> shift);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -2195,62 +2195,62 @@ compaction_state_init(struct compaction_state *c,
|
|||
assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
|
||||
assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
|
||||
assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
|
||||
assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
|
||||
assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
|
||||
assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
|
||||
assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
|
||||
assert(gen7_control_index_table[ARRAY_SIZE(gen7_control_index_table) - 1] != 0);
|
||||
assert(gen7_datatype_table[ARRAY_SIZE(gen7_datatype_table) - 1] != 0);
|
||||
assert(gen7_subreg_table[ARRAY_SIZE(gen7_subreg_table) - 1] != 0);
|
||||
assert(gen7_src_index_table[ARRAY_SIZE(gen7_src_index_table) - 1] != 0);
|
||||
assert(gen8_control_index_table[ARRAY_SIZE(gen8_control_index_table) - 1] != 0);
|
||||
assert(gen8_datatype_table[ARRAY_SIZE(gen8_datatype_table) - 1] != 0);
|
||||
assert(gen8_subreg_table[ARRAY_SIZE(gen8_subreg_table) - 1] != 0);
|
||||
assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0);
|
||||
assert(gen11_datatype_table[ARRAY_SIZE(gen11_datatype_table) - 1] != 0);
|
||||
assert(gen12_control_index_table[ARRAY_SIZE(gen12_control_index_table) - 1] != 0);
|
||||
assert(gen12_datatype_table[ARRAY_SIZE(gen12_datatype_table) - 1] != 0);
|
||||
assert(gen12_subreg_table[ARRAY_SIZE(gen12_subreg_table) - 1] != 0);
|
||||
assert(gen12_src0_index_table[ARRAY_SIZE(gen12_src0_index_table) - 1] != 0);
|
||||
assert(gen12_src1_index_table[ARRAY_SIZE(gen12_src1_index_table) - 1] != 0);
|
||||
assert(gfx6_control_index_table[ARRAY_SIZE(gfx6_control_index_table) - 1] != 0);
|
||||
assert(gfx6_datatype_table[ARRAY_SIZE(gfx6_datatype_table) - 1] != 0);
|
||||
assert(gfx6_subreg_table[ARRAY_SIZE(gfx6_subreg_table) - 1] != 0);
|
||||
assert(gfx6_src_index_table[ARRAY_SIZE(gfx6_src_index_table) - 1] != 0);
|
||||
assert(gfx7_control_index_table[ARRAY_SIZE(gfx7_control_index_table) - 1] != 0);
|
||||
assert(gfx7_datatype_table[ARRAY_SIZE(gfx7_datatype_table) - 1] != 0);
|
||||
assert(gfx7_subreg_table[ARRAY_SIZE(gfx7_subreg_table) - 1] != 0);
|
||||
assert(gfx7_src_index_table[ARRAY_SIZE(gfx7_src_index_table) - 1] != 0);
|
||||
assert(gfx8_control_index_table[ARRAY_SIZE(gfx8_control_index_table) - 1] != 0);
|
||||
assert(gfx8_datatype_table[ARRAY_SIZE(gfx8_datatype_table) - 1] != 0);
|
||||
assert(gfx8_subreg_table[ARRAY_SIZE(gfx8_subreg_table) - 1] != 0);
|
||||
assert(gfx8_src_index_table[ARRAY_SIZE(gfx8_src_index_table) - 1] != 0);
|
||||
assert(gfx11_datatype_table[ARRAY_SIZE(gfx11_datatype_table) - 1] != 0);
|
||||
assert(gfx12_control_index_table[ARRAY_SIZE(gfx12_control_index_table) - 1] != 0);
|
||||
assert(gfx12_datatype_table[ARRAY_SIZE(gfx12_datatype_table) - 1] != 0);
|
||||
assert(gfx12_subreg_table[ARRAY_SIZE(gfx12_subreg_table) - 1] != 0);
|
||||
assert(gfx12_src0_index_table[ARRAY_SIZE(gfx12_src0_index_table) - 1] != 0);
|
||||
assert(gfx12_src1_index_table[ARRAY_SIZE(gfx12_src1_index_table) - 1] != 0);
|
||||
|
||||
c->devinfo = devinfo;
|
||||
switch (devinfo->ver) {
|
||||
case 12:
|
||||
c->control_index_table = gen12_control_index_table;;
|
||||
c->datatype_table = gen12_datatype_table;
|
||||
c->subreg_table = gen12_subreg_table;
|
||||
c->src0_index_table = gen12_src0_index_table;
|
||||
c->src1_index_table = gen12_src1_index_table;
|
||||
c->control_index_table = gfx12_control_index_table;;
|
||||
c->datatype_table = gfx12_datatype_table;
|
||||
c->subreg_table = gfx12_subreg_table;
|
||||
c->src0_index_table = gfx12_src0_index_table;
|
||||
c->src1_index_table = gfx12_src1_index_table;
|
||||
break;
|
||||
case 11:
|
||||
c->control_index_table = gen8_control_index_table;
|
||||
c->datatype_table = gen11_datatype_table;
|
||||
c->subreg_table = gen8_subreg_table;
|
||||
c->src0_index_table = gen8_src_index_table;
|
||||
c->src1_index_table = gen8_src_index_table;
|
||||
c->control_index_table = gfx8_control_index_table;
|
||||
c->datatype_table = gfx11_datatype_table;
|
||||
c->subreg_table = gfx8_subreg_table;
|
||||
c->src0_index_table = gfx8_src_index_table;
|
||||
c->src1_index_table = gfx8_src_index_table;
|
||||
break;
|
||||
case 9:
|
||||
case 8:
|
||||
c->control_index_table = gen8_control_index_table;
|
||||
c->datatype_table = gen8_datatype_table;
|
||||
c->subreg_table = gen8_subreg_table;
|
||||
c->src0_index_table = gen8_src_index_table;
|
||||
c->src1_index_table = gen8_src_index_table;
|
||||
c->control_index_table = gfx8_control_index_table;
|
||||
c->datatype_table = gfx8_datatype_table;
|
||||
c->subreg_table = gfx8_subreg_table;
|
||||
c->src0_index_table = gfx8_src_index_table;
|
||||
c->src1_index_table = gfx8_src_index_table;
|
||||
break;
|
||||
case 7:
|
||||
c->control_index_table = gen7_control_index_table;
|
||||
c->datatype_table = gen7_datatype_table;
|
||||
c->subreg_table = gen7_subreg_table;
|
||||
c->src0_index_table = gen7_src_index_table;
|
||||
c->src1_index_table = gen7_src_index_table;
|
||||
c->control_index_table = gfx7_control_index_table;
|
||||
c->datatype_table = gfx7_datatype_table;
|
||||
c->subreg_table = gfx7_subreg_table;
|
||||
c->src0_index_table = gfx7_src_index_table;
|
||||
c->src1_index_table = gfx7_src_index_table;
|
||||
break;
|
||||
case 6:
|
||||
c->control_index_table = gen6_control_index_table;
|
||||
c->datatype_table = gen6_datatype_table;
|
||||
c->subreg_table = gen6_subreg_table;
|
||||
c->src0_index_table = gen6_src_index_table;
|
||||
c->src1_index_table = gen6_src_index_table;
|
||||
c->control_index_table = gfx6_control_index_table;
|
||||
c->datatype_table = gfx6_datatype_table;
|
||||
c->subreg_table = gfx6_subreg_table;
|
||||
c->src0_index_table = gfx6_src_index_table;
|
||||
c->src1_index_table = gfx6_src_index_table;
|
||||
break;
|
||||
case 5:
|
||||
case 4:
|
||||
|
@ -2362,7 +2362,7 @@ brw_compact_instructions(struct brw_codegen *p, int start_offset,
|
|||
if (devinfo->ver >= 6) {
|
||||
update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
|
||||
} else {
|
||||
update_gen4_jump_count(devinfo, insn, this_old_ip,
|
||||
update_gfx4_jump_count(devinfo, insn, this_old_ip,
|
||||
compacted_counts);
|
||||
}
|
||||
break;
|
||||
|
@ -2391,14 +2391,14 @@ brw_compact_instructions(struct brw_codegen *p, int start_offset,
|
|||
assert(!brw_inst_cmpt_control(devinfo, insn));
|
||||
|
||||
/* Jump Count is in units of compacted instructions on Gen6. */
|
||||
int jump_count_compacted = brw_inst_gen6_jump_count(devinfo, insn);
|
||||
int jump_count_compacted = brw_inst_gfx6_jump_count(devinfo, insn);
|
||||
|
||||
int target_old_ip = this_old_ip + (jump_count_compacted / 2);
|
||||
int target_compacted_count = compacted_counts[target_old_ip];
|
||||
jump_count_compacted -= (target_compacted_count - this_compacted_count);
|
||||
brw_inst_set_gen6_jump_count(devinfo, insn, jump_count_compacted);
|
||||
brw_inst_set_gfx6_jump_count(devinfo, insn, jump_count_compacted);
|
||||
} else {
|
||||
update_gen4_jump_count(devinfo, insn, this_old_ip,
|
||||
update_gfx4_jump_count(devinfo, insn, this_old_ip,
|
||||
compacted_counts);
|
||||
}
|
||||
break;
|
||||
|
|
|
@ -157,14 +157,14 @@ enum PACKED brw_horizontal_stride {
|
|||
BRW_HORIZONTAL_STRIDE_4 = 3,
|
||||
};
|
||||
|
||||
enum PACKED gen10_align1_3src_src_horizontal_stride {
|
||||
enum PACKED gfx10_align1_3src_src_horizontal_stride {
|
||||
BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0 = 0,
|
||||
BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1 = 1,
|
||||
BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2 = 2,
|
||||
BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4 = 3,
|
||||
};
|
||||
|
||||
enum PACKED gen10_align1_3src_dst_horizontal_stride {
|
||||
enum PACKED gfx10_align1_3src_dst_horizontal_stride {
|
||||
BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1 = 0,
|
||||
BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_2 = 1,
|
||||
};
|
||||
|
@ -610,7 +610,7 @@ enum opcode {
|
|||
|
||||
/**
|
||||
* Write geometry shader output data to the URB and request a new URB
|
||||
* handle (gen6).
|
||||
* handle (gfx6).
|
||||
*
|
||||
* This opcode doesn't do an implied move from R0 to the first MRF.
|
||||
*/
|
||||
|
@ -690,7 +690,7 @@ enum opcode {
|
|||
GS_OPCODE_GET_INSTANCE_ID,
|
||||
|
||||
/**
|
||||
* Send a FF_SYNC message to allocate initial URB handles (gen6).
|
||||
* Send a FF_SYNC message to allocate initial URB handles (gfx6).
|
||||
*
|
||||
* - dst will be used as the writeback register for the FF_SYNC operation.
|
||||
*
|
||||
|
@ -709,7 +709,7 @@ enum opcode {
|
|||
GS_OPCODE_FF_SYNC,
|
||||
|
||||
/**
|
||||
* Move r0.1 (which holds PrimitiveID information in gen6) to a separate
|
||||
* Move r0.1 (which holds PrimitiveID information in gfx6) to a separate
|
||||
* register.
|
||||
*
|
||||
* - dst is the GRF where PrimitiveID information will be moved.
|
||||
|
@ -718,7 +718,7 @@ enum opcode {
|
|||
|
||||
/**
|
||||
* Write transform feedback data to the SVB by sending a SVB WRITE message.
|
||||
* Used in gen6.
|
||||
* Used in gfx6.
|
||||
*
|
||||
* - dst is the MRF register containing the message header.
|
||||
*
|
||||
|
@ -730,7 +730,7 @@ enum opcode {
|
|||
|
||||
/**
|
||||
* Set destination index in the SVB write message payload (M0.5). Used
|
||||
* in gen6 for transform feedback.
|
||||
* in gfx6 for transform feedback.
|
||||
*
|
||||
* - dst is the header to save the destination indices for SVB WRITE.
|
||||
* - src is the register that holds the destination indices value.
|
||||
|
@ -739,7 +739,7 @@ enum opcode {
|
|||
|
||||
/**
|
||||
* Prepare Mx.0 subregister for being used in the FF_SYNC message header.
|
||||
* Used in gen6 for transform feedback.
|
||||
* Used in gfx6 for transform feedback.
|
||||
*
|
||||
* - dst will hold the register with the final Mx.0 value.
|
||||
*
|
||||
|
@ -987,7 +987,7 @@ enum PACKED brw_reg_file {
|
|||
BAD_FILE,
|
||||
};
|
||||
|
||||
enum PACKED gen10_align1_3src_reg_file {
|
||||
enum PACKED gfx10_align1_3src_reg_file {
|
||||
BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE = 0,
|
||||
BRW_ALIGN1_3SRC_IMMEDIATE_VALUE = 1, /* src0, src2 */
|
||||
BRW_ALIGN1_3SRC_ACCUMULATOR = 1, /* dest, src1 */
|
||||
|
@ -998,7 +998,7 @@ enum PACKED gen10_align1_3src_reg_file {
|
|||
* on float or integer types. The register arguments have fields that offer
|
||||
* more fine control their respective types.
|
||||
*/
|
||||
enum PACKED gen10_align1_3src_exec_type {
|
||||
enum PACKED gfx10_align1_3src_exec_type {
|
||||
BRW_ALIGN1_3SRC_EXEC_TYPE_INT = 0,
|
||||
BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT = 1,
|
||||
};
|
||||
|
@ -1041,7 +1041,7 @@ enum PACKED brw_vertical_stride {
|
|||
BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL = 0xF,
|
||||
};
|
||||
|
||||
enum PACKED gen10_align1_3src_vertical_stride {
|
||||
enum PACKED gfx10_align1_3src_vertical_stride {
|
||||
BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0 = 0,
|
||||
BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1 = 1,
|
||||
BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2 = 1,
|
||||
|
@ -1530,8 +1530,8 @@ enum brw_message_target {
|
|||
#define BRW_MATH_FUNCTION_RSQ 5
|
||||
#define BRW_MATH_FUNCTION_SIN 6
|
||||
#define BRW_MATH_FUNCTION_COS 7
|
||||
#define BRW_MATH_FUNCTION_SINCOS 8 /* gen4, gen5 */
|
||||
#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */
|
||||
#define BRW_MATH_FUNCTION_SINCOS 8 /* gfx4, gfx5 */
|
||||
#define BRW_MATH_FUNCTION_FDIV 9 /* gfx6+ */
|
||||
#define BRW_MATH_FUNCTION_POW 10
|
||||
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
|
||||
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12
|
||||
|
|
|
@ -43,7 +43,7 @@
|
|||
* explicit move; it should be called before emitting a SEND instruction.
|
||||
*/
|
||||
void
|
||||
gen6_resolve_implied_move(struct brw_codegen *p,
|
||||
gfx6_resolve_implied_move(struct brw_codegen *p,
|
||||
struct brw_reg *src,
|
||||
unsigned msg_reg_nr)
|
||||
{
|
||||
|
@ -68,7 +68,7 @@ gen6_resolve_implied_move(struct brw_codegen *p,
|
|||
}
|
||||
|
||||
static void
|
||||
gen7_convert_mrf_to_grf(struct brw_codegen *p, struct brw_reg *reg)
|
||||
gfx7_convert_mrf_to_grf(struct brw_codegen *p, struct brw_reg *reg)
|
||||
{
|
||||
/* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
|
||||
* "The send with EOT should use register space R112-R127 for <src>. This is
|
||||
|
@ -107,7 +107,7 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
|
|||
dest.hstride = BRW_HORIZONTAL_STRIDE_2;
|
||||
}
|
||||
|
||||
gen7_convert_mrf_to_grf(p, &dest);
|
||||
gfx7_convert_mrf_to_grf(p, &dest);
|
||||
|
||||
if (devinfo->ver >= 12 &&
|
||||
(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
|
||||
|
@ -215,7 +215,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
|
|||
else if (reg.file == BRW_GENERAL_REGISTER_FILE)
|
||||
assert(reg.nr < 128);
|
||||
|
||||
gen7_convert_mrf_to_grf(p, ®);
|
||||
gfx7_convert_mrf_to_grf(p, ®);
|
||||
|
||||
if (devinfo->ver >= 6 &&
|
||||
(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
|
||||
|
@ -373,7 +373,7 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
|
|||
assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
|
||||
reg.nr != BRW_ARF_ACCUMULATOR);
|
||||
|
||||
gen7_convert_mrf_to_grf(p, ®);
|
||||
gfx7_convert_mrf_to_grf(p, ®);
|
||||
assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
|
||||
|
||||
brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type);
|
||||
|
@ -580,7 +580,7 @@ static void brw_set_urb_message( struct brw_codegen *p,
|
|||
}
|
||||
|
||||
static void
|
||||
gen7_set_dp_scratch_message(struct brw_codegen *p,
|
||||
gfx7_set_dp_scratch_message(struct brw_codegen *p,
|
||||
brw_inst *inst,
|
||||
bool write,
|
||||
bool dword,
|
||||
|
@ -741,7 +741,7 @@ get_3src_subreg_nr(struct brw_reg reg)
|
|||
return reg.subnr / 4;
|
||||
}
|
||||
|
||||
static enum gen10_align1_3src_vertical_stride
|
||||
static enum gfx10_align1_3src_vertical_stride
|
||||
to_3src_align1_vstride(const struct gen_device_info *devinfo,
|
||||
enum brw_vertical_stride vstride)
|
||||
{
|
||||
|
@ -765,7 +765,7 @@ to_3src_align1_vstride(const struct gen_device_info *devinfo,
|
|||
}
|
||||
|
||||
|
||||
static enum gen10_align1_3src_src_horizontal_stride
|
||||
static enum gfx10_align1_3src_src_horizontal_stride
|
||||
to_3src_align1_hstride(enum brw_horizontal_stride hstride)
|
||||
{
|
||||
switch (hstride) {
|
||||
|
@ -789,7 +789,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
|
|||
const struct gen_device_info *devinfo = p->devinfo;
|
||||
brw_inst *inst = next_insn(p, opcode);
|
||||
|
||||
gen7_convert_mrf_to_grf(p, &dest);
|
||||
gfx7_convert_mrf_to_grf(p, &dest);
|
||||
|
||||
assert(dest.nr < 128);
|
||||
|
||||
|
@ -1410,7 +1410,7 @@ brw_IF(struct brw_codegen *p, unsigned execute_size)
|
|||
brw_set_src1(p, insn, brw_imm_d(0x0));
|
||||
} else if (devinfo->ver == 6) {
|
||||
brw_set_dest(p, insn, brw_imm_w(0));
|
||||
brw_inst_set_gen6_jump_count(devinfo, insn, 0);
|
||||
brw_inst_set_gfx6_jump_count(devinfo, insn, 0);
|
||||
brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
|
||||
brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
|
||||
} else if (devinfo->ver == 7) {
|
||||
|
@ -1439,11 +1439,11 @@ brw_IF(struct brw_codegen *p, unsigned execute_size)
|
|||
return insn;
|
||||
}
|
||||
|
||||
/* This function is only used for gen6-style IF instructions with an
|
||||
* embedded comparison (conditional modifier). It is not used on gen7.
|
||||
/* This function is only used for gfx6-style IF instructions with an
|
||||
* embedded comparison (conditional modifier). It is not used on gfx7.
|
||||
*/
|
||||
brw_inst *
|
||||
gen6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional,
|
||||
gfx6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional,
|
||||
struct brw_reg src0, struct brw_reg src1)
|
||||
{
|
||||
const struct gen_device_info *devinfo = p->devinfo;
|
||||
|
@ -1453,7 +1453,7 @@ gen6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional,
|
|||
|
||||
brw_set_dest(p, insn, brw_imm_w(0));
|
||||
brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
|
||||
brw_inst_set_gen6_jump_count(devinfo, insn, 0);
|
||||
brw_inst_set_gfx6_jump_count(devinfo, insn, 0);
|
||||
brw_set_src0(p, insn, src0);
|
||||
brw_set_src1(p, insn, src1);
|
||||
|
||||
|
@ -1546,12 +1546,12 @@ patch_IF_ELSE(struct brw_codegen *p,
|
|||
* all-false and jumping past the ENDIF.
|
||||
*/
|
||||
brw_inst_set_opcode(devinfo, if_inst, BRW_OPCODE_IFF);
|
||||
brw_inst_set_gen4_jump_count(devinfo, if_inst,
|
||||
brw_inst_set_gfx4_jump_count(devinfo, if_inst,
|
||||
br * (endif_inst - if_inst + 1));
|
||||
brw_inst_set_gen4_pop_count(devinfo, if_inst, 0);
|
||||
brw_inst_set_gfx4_pop_count(devinfo, if_inst, 0);
|
||||
} else if (devinfo->ver == 6) {
|
||||
/* As of gen6, there is no IFF and IF must point to the ENDIF. */
|
||||
brw_inst_set_gen6_jump_count(devinfo, if_inst, br*(endif_inst - if_inst));
|
||||
/* As of gfx6, there is no IFF and IF must point to the ENDIF. */
|
||||
brw_inst_set_gfx6_jump_count(devinfo, if_inst, br*(endif_inst - if_inst));
|
||||
} else {
|
||||
brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
|
||||
brw_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst));
|
||||
|
@ -1561,25 +1561,25 @@ patch_IF_ELSE(struct brw_codegen *p,
|
|||
|
||||
/* Patch IF -> ELSE */
|
||||
if (devinfo->ver < 6) {
|
||||
brw_inst_set_gen4_jump_count(devinfo, if_inst,
|
||||
brw_inst_set_gfx4_jump_count(devinfo, if_inst,
|
||||
br * (else_inst - if_inst));
|
||||
brw_inst_set_gen4_pop_count(devinfo, if_inst, 0);
|
||||
brw_inst_set_gfx4_pop_count(devinfo, if_inst, 0);
|
||||
} else if (devinfo->ver == 6) {
|
||||
brw_inst_set_gen6_jump_count(devinfo, if_inst,
|
||||
brw_inst_set_gfx6_jump_count(devinfo, if_inst,
|
||||
br * (else_inst - if_inst + 1));
|
||||
}
|
||||
|
||||
/* Patch ELSE -> ENDIF */
|
||||
if (devinfo->ver < 6) {
|
||||
/* BRW_OPCODE_ELSE pre-gen6 should point just past the
|
||||
/* BRW_OPCODE_ELSE pre-gfx6 should point just past the
|
||||
* matching ENDIF.
|
||||
*/
|
||||
brw_inst_set_gen4_jump_count(devinfo, else_inst,
|
||||
brw_inst_set_gfx4_jump_count(devinfo, else_inst,
|
||||
br * (endif_inst - else_inst + 1));
|
||||
brw_inst_set_gen4_pop_count(devinfo, else_inst, 1);
|
||||
brw_inst_set_gfx4_pop_count(devinfo, else_inst, 1);
|
||||
} else if (devinfo->ver == 6) {
|
||||
/* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
|
||||
brw_inst_set_gen6_jump_count(devinfo, else_inst,
|
||||
/* BRW_OPCODE_ELSE on gfx6 should point to the matching ENDIF. */
|
||||
brw_inst_set_gfx6_jump_count(devinfo, else_inst,
|
||||
br * (endif_inst - else_inst));
|
||||
} else {
|
||||
/* The IF instruction's JIP should point just past the ELSE */
|
||||
|
@ -1611,7 +1611,7 @@ brw_ELSE(struct brw_codegen *p)
|
|||
brw_set_src1(p, insn, brw_imm_d(0x0));
|
||||
} else if (devinfo->ver == 6) {
|
||||
brw_set_dest(p, insn, brw_imm_w(0));
|
||||
brw_inst_set_gen6_jump_count(devinfo, insn, 0);
|
||||
brw_inst_set_gfx6_jump_count(devinfo, insn, 0);
|
||||
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
||||
brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
||||
} else if (devinfo->ver == 7) {
|
||||
|
@ -1707,10 +1707,10 @@ brw_ENDIF(struct brw_codegen *p)
|
|||
|
||||
/* Also pop item off the stack in the endif instruction: */
|
||||
if (devinfo->ver < 6) {
|
||||
brw_inst_set_gen4_jump_count(devinfo, insn, 0);
|
||||
brw_inst_set_gen4_pop_count(devinfo, insn, 1);
|
||||
brw_inst_set_gfx4_jump_count(devinfo, insn, 0);
|
||||
brw_inst_set_gfx4_pop_count(devinfo, insn, 1);
|
||||
} else if (devinfo->ver == 6) {
|
||||
brw_inst_set_gen6_jump_count(devinfo, insn, 2);
|
||||
brw_inst_set_gfx6_jump_count(devinfo, insn, 2);
|
||||
} else {
|
||||
brw_inst_set_jip(devinfo, insn, 2);
|
||||
}
|
||||
|
@ -1735,7 +1735,7 @@ brw_BREAK(struct brw_codegen *p)
|
|||
brw_set_dest(p, insn, brw_ip_reg());
|
||||
brw_set_src0(p, insn, brw_ip_reg());
|
||||
brw_set_src1(p, insn, brw_imm_d(0x0));
|
||||
brw_inst_set_gen4_pop_count(devinfo, insn,
|
||||
brw_inst_set_gfx4_pop_count(devinfo, insn,
|
||||
p->if_depth_in_loop[p->loop_stack_depth]);
|
||||
}
|
||||
brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
||||
|
@ -1760,7 +1760,7 @@ brw_CONT(struct brw_codegen *p)
|
|||
}
|
||||
|
||||
if (devinfo->ver < 6) {
|
||||
brw_inst_set_gen4_pop_count(devinfo, insn,
|
||||
brw_inst_set_gfx4_pop_count(devinfo, insn,
|
||||
p->if_depth_in_loop[p->loop_stack_depth]);
|
||||
}
|
||||
brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
||||
|
@ -1806,11 +1806,11 @@ brw_HALT(struct brw_codegen *p)
|
|||
* For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
|
||||
* jip and no DO instruction.
|
||||
*
|
||||
* For non-uniform control flow pre-gen6, there's a DO instruction to
|
||||
* For non-uniform control flow pre-gfx6, there's a DO instruction to
|
||||
* push the mask, and a WHILE to jump back, and BREAK to get out and
|
||||
* pop the mask.
|
||||
*
|
||||
* For gen6, there's no more mask stack, so no need for DO. WHILE
|
||||
* For gfx6, there's no more mask stack, so no need for DO. WHILE
|
||||
* just points back to the first instruction of the loop.
|
||||
*/
|
||||
brw_inst *
|
||||
|
@ -1841,10 +1841,10 @@ brw_DO(struct brw_codegen *p, unsigned execute_size)
|
|||
}
|
||||
|
||||
/**
|
||||
* For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
|
||||
* For pre-gfx6, we patch BREAK/CONT instructions to point at the WHILE
|
||||
* instruction here.
|
||||
*
|
||||
* For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
|
||||
* For gfx6+, see brw_set_uip_jip(), which doesn't care so much about the loop
|
||||
* nesting, since it can always just point to the end of the block/current loop.
|
||||
*/
|
||||
static void
|
||||
|
@ -1863,11 +1863,11 @@ brw_patch_break_cont(struct brw_codegen *p, brw_inst *while_inst)
|
|||
* patching.
|
||||
*/
|
||||
if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_BREAK &&
|
||||
brw_inst_gen4_jump_count(devinfo, inst) == 0) {
|
||||
brw_inst_set_gen4_jump_count(devinfo, inst, br*((while_inst - inst) + 1));
|
||||
brw_inst_gfx4_jump_count(devinfo, inst) == 0) {
|
||||
brw_inst_set_gfx4_jump_count(devinfo, inst, br*((while_inst - inst) + 1));
|
||||
} else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_CONTINUE &&
|
||||
brw_inst_gen4_jump_count(devinfo, inst) == 0) {
|
||||
brw_inst_set_gen4_jump_count(devinfo, inst, br * (while_inst - inst));
|
||||
brw_inst_gfx4_jump_count(devinfo, inst) == 0) {
|
||||
brw_inst_set_gfx4_jump_count(devinfo, inst, br * (while_inst - inst));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1895,7 +1895,7 @@ brw_WHILE(struct brw_codegen *p)
|
|||
brw_inst_set_jip(devinfo, insn, br * (do_insn - insn));
|
||||
} else {
|
||||
brw_set_dest(p, insn, brw_imm_w(0));
|
||||
brw_inst_set_gen6_jump_count(devinfo, insn, br * (do_insn - insn));
|
||||
brw_inst_set_gfx6_jump_count(devinfo, insn, br * (do_insn - insn));
|
||||
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
||||
brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
||||
}
|
||||
|
@ -1922,8 +1922,8 @@ brw_WHILE(struct brw_codegen *p)
|
|||
brw_set_src1(p, insn, brw_imm_d(0));
|
||||
|
||||
brw_inst_set_exec_size(devinfo, insn, brw_inst_exec_size(devinfo, do_insn));
|
||||
brw_inst_set_gen4_jump_count(devinfo, insn, br * (do_insn - insn + 1));
|
||||
brw_inst_set_gen4_pop_count(devinfo, insn, 0);
|
||||
brw_inst_set_gfx4_jump_count(devinfo, insn, br * (do_insn - insn + 1));
|
||||
brw_inst_set_gfx4_pop_count(devinfo, insn, 0);
|
||||
|
||||
brw_patch_break_cont(p, insn);
|
||||
}
|
||||
|
@ -1949,7 +1949,7 @@ void brw_land_fwd_jump(struct brw_codegen *p, int jmp_insn_idx)
|
|||
assert(brw_inst_opcode(devinfo, jmp_insn) == BRW_OPCODE_JMPI);
|
||||
assert(brw_inst_src1_reg_file(devinfo, jmp_insn) == BRW_IMMEDIATE_VALUE);
|
||||
|
||||
brw_inst_set_gen4_jump_count(devinfo, jmp_insn,
|
||||
brw_inst_set_gfx4_jump_count(devinfo, jmp_insn,
|
||||
jmpi * (p->nr_insn - jmp_insn_idx - 1));
|
||||
}
|
||||
|
||||
|
@ -2022,7 +2022,7 @@ void brw_CMPN(struct brw_codegen *p,
|
|||
|
||||
/** Extended math function, float[8].
|
||||
*/
|
||||
void gen4_math(struct brw_codegen *p,
|
||||
void gfx4_math(struct brw_codegen *p,
|
||||
struct brw_reg dest,
|
||||
unsigned function,
|
||||
unsigned msg_reg_nr,
|
||||
|
@ -2056,7 +2056,7 @@ void gen4_math(struct brw_codegen *p,
|
|||
data_type);
|
||||
}
|
||||
|
||||
void gen6_math(struct brw_codegen *p,
|
||||
void gfx6_math(struct brw_codegen *p,
|
||||
struct brw_reg dest,
|
||||
unsigned function,
|
||||
struct brw_reg src0,
|
||||
|
@ -2190,13 +2190,13 @@ void brw_oword_block_write_scratch(struct brw_codegen *p,
|
|||
if (devinfo->ver < 6)
|
||||
brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
|
||||
|
||||
/* Until gen6, writes followed by reads from the same location
|
||||
/* Until gfx6, writes followed by reads from the same location
|
||||
* are not guaranteed to be ordered unless write_commit is set.
|
||||
* If set, then a no-op write is issued to the destination
|
||||
* register to set a dependency, and a read from the destination
|
||||
* can be used to ensure the ordering.
|
||||
*
|
||||
* For gen6, only writes between different threads need ordering
|
||||
* For gfx6, only writes between different threads need ordering
|
||||
* protection. Our use of DP writes is all about register
|
||||
* spilling within a thread.
|
||||
*/
|
||||
|
@ -2313,7 +2313,7 @@ brw_oword_block_read_scratch(struct brw_codegen *p,
|
|||
}
|
||||
|
||||
void
|
||||
gen7_block_read_scratch(struct brw_codegen *p,
|
||||
gfx7_block_read_scratch(struct brw_codegen *p,
|
||||
struct brw_reg dest,
|
||||
int num_regs,
|
||||
unsigned offset)
|
||||
|
@ -2335,7 +2335,7 @@ gen7_block_read_scratch(struct brw_codegen *p,
|
|||
offset /= REG_SIZE;
|
||||
assert(offset < (1 << 12));
|
||||
|
||||
gen7_set_dp_scratch_message(p, insn,
|
||||
gfx7_set_dp_scratch_message(p, insn,
|
||||
false, /* scratch read */
|
||||
false, /* OWords */
|
||||
false, /* invalidate after read */
|
||||
|
@ -2477,7 +2477,7 @@ brw_fb_WRITE(struct brw_codegen *p,
|
|||
}
|
||||
|
||||
brw_inst *
|
||||
gen9_fb_READ(struct brw_codegen *p,
|
||||
gfx9_fb_READ(struct brw_codegen *p,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg payload,
|
||||
unsigned binding_table_index,
|
||||
|
@ -2528,7 +2528,7 @@ void brw_SAMPLE(struct brw_codegen *p,
|
|||
brw_inst *insn;
|
||||
|
||||
if (msg_reg_nr != -1)
|
||||
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
|
||||
gfx6_resolve_implied_move(p, &src0, msg_reg_nr);
|
||||
|
||||
insn = next_insn(p, BRW_OPCODE_SEND);
|
||||
brw_inst_set_sfid(devinfo, insn, BRW_SFID_SAMPLER);
|
||||
|
@ -2626,7 +2626,7 @@ void brw_urb_WRITE(struct brw_codegen *p,
|
|||
const struct gen_device_info *devinfo = p->devinfo;
|
||||
brw_inst *insn;
|
||||
|
||||
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
|
||||
gfx6_resolve_implied_move(p, &src0, msg_reg_nr);
|
||||
|
||||
if (devinfo->ver >= 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
|
||||
/* Enable Channel Masks in the URB_WRITE_HWORD message header */
|
||||
|
@ -2873,7 +2873,7 @@ while_jumps_before_offset(const struct gen_device_info *devinfo,
|
|||
brw_inst *insn, int while_offset, int start_offset)
|
||||
{
|
||||
int scale = 16 / brw_jump_scale(devinfo);
|
||||
int jip = devinfo->ver == 6 ? brw_inst_gen6_jump_count(devinfo, insn)
|
||||
int jip = devinfo->ver == 6 ? brw_inst_gfx6_jump_count(devinfo, insn)
|
||||
: brw_inst_jip(devinfo, insn);
|
||||
assert(jip < 0);
|
||||
return while_offset + jip * scale <= start_offset;
|
||||
|
@ -2922,7 +2922,7 @@ brw_find_next_block_end(struct brw_codegen *p, int start_offset)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* There is no DO instruction on gen6, so to find the end of the loop
|
||||
/* There is no DO instruction on gfx6, so to find the end of the loop
|
||||
* we have to see if the loop is jumping back before our start
|
||||
* instruction.
|
||||
*/
|
||||
|
@ -2997,7 +2997,7 @@ brw_set_uip_jip(struct brw_codegen *p, int start_offset)
|
|||
if (devinfo->ver >= 7)
|
||||
brw_inst_set_jip(devinfo, insn, jump);
|
||||
else
|
||||
brw_inst_set_gen6_jump_count(devinfo, insn, jump);
|
||||
brw_inst_set_gfx6_jump_count(devinfo, insn, jump);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -3039,7 +3039,7 @@ void brw_ff_sync(struct brw_codegen *p,
|
|||
const struct gen_device_info *devinfo = p->devinfo;
|
||||
brw_inst *insn;
|
||||
|
||||
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
|
||||
gfx6_resolve_implied_move(p, &src0, msg_reg_nr);
|
||||
|
||||
insn = next_insn(p, BRW_OPCODE_SEND);
|
||||
brw_set_dest(p, insn, dest);
|
||||
|
@ -3082,7 +3082,7 @@ brw_svb_write(struct brw_codegen *p,
|
|||
BRW_SFID_DATAPORT_WRITE);
|
||||
brw_inst *insn;
|
||||
|
||||
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
|
||||
gfx6_resolve_implied_move(p, &src0, msg_reg_nr);
|
||||
|
||||
insn = next_insn(p, BRW_OPCODE_SEND);
|
||||
brw_inst_set_sfid(devinfo, insn, target_cache);
|
||||
|
|
|
@ -38,7 +38,7 @@ void brw_math_invert( struct brw_codegen *p,
|
|||
struct brw_reg dst,
|
||||
struct brw_reg src)
|
||||
{
|
||||
gen4_math(p,
|
||||
gfx4_math(p,
|
||||
dst,
|
||||
BRW_MATH_FUNCTION_INV,
|
||||
0,
|
||||
|
|
|
@ -1628,7 +1628,7 @@ region_alignment_rules(const struct gen_device_info *devinfo,
|
|||
* destination must be an integer DWord, the hardware allows at least a
|
||||
* float destination type as well. We emit such instructions from
|
||||
*
|
||||
* fs_visitor::emit_interpolation_setup_gen6
|
||||
* fs_visitor::emit_interpolation_setup_gfx6
|
||||
* fs_visitor::emit_fragcoord_interpolation
|
||||
*
|
||||
* and have for years with no ill effects.
|
||||
|
|
|
@ -411,7 +411,7 @@ fs_inst::has_source_and_destination_hazard() const
|
|||
*
|
||||
* Now our destination for the first instruction overwrote the
|
||||
* second instruction's src0, and we get garbage for those 8
|
||||
* pixels. There's a similar issue for the pre-gen6
|
||||
* pixels. There's a similar issue for the pre-gfx6
|
||||
* pixel_x/pixel_y, which are registers of 16-bit values and thus
|
||||
* would get stomped by the first decode as well.
|
||||
*/
|
||||
|
@ -1490,10 +1490,10 @@ fs_visitor::emit_sampleid_setup()
|
|||
* can assume 4x MSAA. Disallow it on IVB+
|
||||
*
|
||||
* FINISHME: One day, we could come up with a way to do this that
|
||||
* actually works on gen7.
|
||||
* actually works on gfx7.
|
||||
*/
|
||||
if (devinfo->ver >= 7)
|
||||
limit_dispatch_width(16, "gl_SampleId is unsupported in SIMD32 on gen7");
|
||||
limit_dispatch_width(16, "gl_SampleId is unsupported in SIMD32 on gfx7");
|
||||
abld.exec_all().group(8, 0).MOV(t2, brw_imm_v(0x32103210));
|
||||
|
||||
/* This special instruction takes care of setting vstride=1,
|
||||
|
@ -3221,7 +3221,7 @@ fs_visitor::compute_to_mrf()
|
|||
break;
|
||||
|
||||
if (devinfo->ver == 6) {
|
||||
/* gen6 math instructions must have the destination be
|
||||
/* gfx6 math instructions must have the destination be
|
||||
* GRF, so no compute-to-MRF for them.
|
||||
*/
|
||||
if (scan_inst->is_math()) {
|
||||
|
@ -3626,7 +3626,7 @@ clear_deps_for_inst_src(fs_inst *inst, bool *deps, int first_grf, int grf_len)
|
|||
* same time that both consider ‘r3’ as the target of their final writes.
|
||||
*/
|
||||
void
|
||||
fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
|
||||
fs_visitor::insert_gfx4_pre_send_dependency_workarounds(bblock_t *block,
|
||||
fs_inst *inst)
|
||||
{
|
||||
int write_len = regs_written(inst);
|
||||
|
@ -3698,7 +3698,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
|
|||
* instruction with a different destination register.
|
||||
*/
|
||||
void
|
||||
fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_inst *inst)
|
||||
fs_visitor::insert_gfx4_post_send_dependency_workarounds(bblock_t *block, fs_inst *inst)
|
||||
{
|
||||
int write_len = regs_written(inst);
|
||||
unsigned first_write_grf = inst->dst.nr;
|
||||
|
@ -3748,7 +3748,7 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins
|
|||
}
|
||||
|
||||
void
|
||||
fs_visitor::insert_gen4_send_dependency_workarounds()
|
||||
fs_visitor::insert_gfx4_send_dependency_workarounds()
|
||||
{
|
||||
if (devinfo->ver != 4 || devinfo->is_g4x)
|
||||
return;
|
||||
|
@ -3757,8 +3757,8 @@ fs_visitor::insert_gen4_send_dependency_workarounds()
|
|||
|
||||
foreach_block_and_inst(block, fs_inst, inst, cfg) {
|
||||
if (inst->mlen != 0 && inst->dst.file == VGRF) {
|
||||
insert_gen4_pre_send_dependency_workarounds(block, inst);
|
||||
insert_gen4_post_send_dependency_workarounds(block, inst);
|
||||
insert_gfx4_pre_send_dependency_workarounds(block, inst);
|
||||
insert_gfx4_post_send_dependency_workarounds(block, inst);
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
@ -4476,13 +4476,13 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
|||
unsigned length = 0;
|
||||
|
||||
if (devinfo->ver < 6) {
|
||||
/* TODO: Support SIMD32 on gen4-5 */
|
||||
/* TODO: Support SIMD32 on gfx4-5 */
|
||||
assert(bld.group() < 16);
|
||||
|
||||
/* For gen4-5, we always have a header consisting of g0 and g1. We have
|
||||
/* For gfx4-5, we always have a header consisting of g0 and g1. We have
|
||||
* an implied MOV from g0,g1 to the start of the message. The MOV from
|
||||
* g0 is handled by the hardware and the MOV from g1 is provided by the
|
||||
* generator. This is required because, on gen4-5, the generator may
|
||||
* generator. This is required because, on gfx4-5, the generator may
|
||||
* generate two write messages with different message lengths in order
|
||||
* to handle AA data properly.
|
||||
*
|
||||
|
@ -4634,8 +4634,8 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
|||
assert(devinfo->ver >= 9);
|
||||
assert(bld.dispatch_width() == 8);
|
||||
|
||||
/* XXX: src_stencil is only available on gen9+. dst_depth is never
|
||||
* available on gen9+. As such it's impossible to have both enabled at the
|
||||
/* XXX: src_stencil is only available on gfx9+. dst_depth is never
|
||||
* available on gfx9+. As such it's impossible to have both enabled at the
|
||||
* same time and therefore length cannot overrun the array.
|
||||
*/
|
||||
assert(length < 15);
|
||||
|
@ -4752,7 +4752,7 @@ lower_fb_read_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||
}
|
||||
|
||||
static void
|
||||
lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
lower_sampler_logical_send_gfx4(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
const fs_reg &coordinate,
|
||||
const fs_reg &shadow_c,
|
||||
const fs_reg &lod, const fs_reg &lod2,
|
||||
|
@ -4859,7 +4859,7 @@ lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||
}
|
||||
|
||||
static void
|
||||
lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
lower_sampler_logical_send_gfx5(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
const fs_reg &coordinate,
|
||||
const fs_reg &shadow_c,
|
||||
const fs_reg &lod, const fs_reg &lod2,
|
||||
|
@ -5025,7 +5025,7 @@ sampler_msg_type(const gen_device_info *devinfo,
|
|||
}
|
||||
|
||||
static void
|
||||
lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
|
||||
const fs_reg &coordinate,
|
||||
const fs_reg &shadow_c,
|
||||
fs_reg lod, const fs_reg &lod2,
|
||||
|
@ -5336,7 +5336,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||
sampler.file == IMM ? sampler.ud % 16 : 0,
|
||||
msg_type,
|
||||
simd_mode,
|
||||
0 /* return_format unused on gen7+ */);
|
||||
0 /* return_format unused on gfx7+ */);
|
||||
inst->src[0] = brw_imm_ud(0);
|
||||
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
||||
} else if (surface_handle.file != BAD_FILE) {
|
||||
|
@ -5347,7 +5347,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||
sampler.file == IMM ? sampler.ud % 16 : 0,
|
||||
msg_type,
|
||||
simd_mode,
|
||||
0 /* return_format unused on gen7+ */);
|
||||
0 /* return_format unused on gfx7+ */);
|
||||
|
||||
/* For bindless samplers, the entire address is included in the message
|
||||
* header so we can leave the portion in the message descriptor 0.
|
||||
|
@ -5372,7 +5372,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||
0, /* sampler */
|
||||
msg_type,
|
||||
simd_mode,
|
||||
0 /* return_format unused on gen7+ */);
|
||||
0 /* return_format unused on gfx7+ */);
|
||||
const fs_builder ubld = bld.group(1, 0).exec_all();
|
||||
fs_reg desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
if (surface.equals(sampler)) {
|
||||
|
@ -5435,7 +5435,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
|
|||
const unsigned grad_components = inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud;
|
||||
|
||||
if (devinfo->ver >= 7) {
|
||||
lower_sampler_logical_send_gen7(bld, inst, op, coordinate,
|
||||
lower_sampler_logical_send_gfx7(bld, inst, op, coordinate,
|
||||
shadow_c, lod, lod2, min_lod,
|
||||
sample_index,
|
||||
mcs, surface, sampler,
|
||||
|
@ -5443,12 +5443,12 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
|
|||
tg4_offset,
|
||||
coord_components, grad_components);
|
||||
} else if (devinfo->ver >= 5) {
|
||||
lower_sampler_logical_send_gen5(bld, inst, op, coordinate,
|
||||
lower_sampler_logical_send_gfx5(bld, inst, op, coordinate,
|
||||
shadow_c, lod, lod2, sample_index,
|
||||
surface, sampler,
|
||||
coord_components, grad_components);
|
||||
} else {
|
||||
lower_sampler_logical_send_gen4(bld, inst, op, coordinate,
|
||||
lower_sampler_logical_send_gfx4(bld, inst, op, coordinate,
|
||||
shadow_c, lod, lod2,
|
||||
surface, sampler,
|
||||
coord_components, grad_components);
|
||||
|
@ -5598,7 +5598,7 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||
unsigned mlen, ex_mlen = 0;
|
||||
if (devinfo->ver >= 9 &&
|
||||
(src.file == BAD_FILE || header.file == BAD_FILE)) {
|
||||
/* We have split sends on gen9 and above */
|
||||
/* We have split sends on gfx9 and above */
|
||||
if (header.file == BAD_FILE) {
|
||||
payload = bld.move_to_vgrf(addr, addr_sz);
|
||||
payload2 = bld.move_to_vgrf(src, src_sz);
|
||||
|
@ -6431,7 +6431,7 @@ static bool
|
|||
is_mixed_float_with_fp32_dst(const fs_inst *inst)
|
||||
{
|
||||
/* This opcode sometimes uses :W type on the source even if the operand is
|
||||
* a :HF, because in gen7 there is no support for :HF, and thus it uses :W.
|
||||
* a :HF, because in gfx7 there is no support for :HF, and thus it uses :W.
|
||||
*/
|
||||
if (inst->opcode == BRW_OPCODE_F16TO32)
|
||||
return true;
|
||||
|
@ -6451,7 +6451,7 @@ static bool
|
|||
is_mixed_float_with_packed_fp16_dst(const fs_inst *inst)
|
||||
{
|
||||
/* This opcode sometimes uses :W type on the destination even if the
|
||||
* destination is a :HF, because in gen7 there is no support for :HF, and
|
||||
* destination is a :HF, because in gfx7 there is no support for :HF, and
|
||||
* thus it uses :W.
|
||||
*/
|
||||
if (inst->opcode == BRW_OPCODE_F32TO16 &&
|
||||
|
@ -7669,7 +7669,7 @@ fs_visitor::dump_instruction(const backend_instruction *be_inst, FILE *file) con
|
|||
}
|
||||
|
||||
void
|
||||
fs_visitor::setup_fs_payload_gen6()
|
||||
fs_visitor::setup_fs_payload_gfx6()
|
||||
{
|
||||
assert(stage == MESA_SHADER_FRAGMENT);
|
||||
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
|
||||
|
@ -8276,7 +8276,7 @@ fs_visitor::allocate_registers(bool allow_spilling)
|
|||
* it inserts dead code that happens to have side effects, and it does
|
||||
* so based on the actual physical registers in use.
|
||||
*/
|
||||
insert_gen4_send_dependency_workarounds();
|
||||
insert_gfx4_send_dependency_workarounds();
|
||||
|
||||
if (failed)
|
||||
return;
|
||||
|
@ -8574,7 +8574,7 @@ fs_visitor::run_gs()
|
|||
* overhead.
|
||||
*/
|
||||
static void
|
||||
gen9_ps_header_only_workaround(struct brw_wm_prog_data *wm_prog_data)
|
||||
gfx9_ps_header_only_workaround(struct brw_wm_prog_data *wm_prog_data)
|
||||
{
|
||||
if (wm_prog_data->num_varying_inputs)
|
||||
return;
|
||||
|
@ -8597,9 +8597,9 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
|
|||
assert(stage == MESA_SHADER_FRAGMENT);
|
||||
|
||||
if (devinfo->ver >= 6)
|
||||
setup_fs_payload_gen6();
|
||||
setup_fs_payload_gfx6();
|
||||
else
|
||||
setup_fs_payload_gen4();
|
||||
setup_fs_payload_gfx4();
|
||||
|
||||
if (0) {
|
||||
emit_dummy_fs();
|
||||
|
@ -8614,9 +8614,9 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
|
|||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
|
||||
(nir->info.outputs_read > 0 && !wm_key->coherent_fb_fetch)) {
|
||||
if (devinfo->ver < 6)
|
||||
emit_interpolation_setup_gen4();
|
||||
emit_interpolation_setup_gfx4();
|
||||
else
|
||||
emit_interpolation_setup_gen6();
|
||||
emit_interpolation_setup_gfx6();
|
||||
}
|
||||
|
||||
/* We handle discards by keeping track of the still-live pixels in f0.1.
|
||||
|
@ -8657,7 +8657,7 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
|
|||
assign_curb_setup();
|
||||
|
||||
if (devinfo->ver >= 9)
|
||||
gen9_ps_header_only_workaround(wm_prog_data);
|
||||
gfx9_ps_header_only_workaround(wm_prog_data);
|
||||
|
||||
assign_urb_setup();
|
||||
|
||||
|
@ -9044,7 +9044,7 @@ brw_nir_populate_wm_prog_data(const nir_shader *shader,
|
|||
}
|
||||
|
||||
/**
|
||||
* Pre-gen6, the register file of the EUs was shared between threads,
|
||||
* Pre-gfx6, the register file of the EUs was shared between threads,
|
||||
* and each thread used some subset allocated on a 16-register block
|
||||
* granularity. The unit states wanted these block counts.
|
||||
*/
|
||||
|
@ -9121,13 +9121,13 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
|||
allow_spilling = false;
|
||||
}
|
||||
|
||||
/* Limit dispatch width to simd8 with dual source blending on gen8.
|
||||
/* Limit dispatch width to simd8 with dual source blending on gfx8.
|
||||
* See: https://gitlab.freedesktop.org/mesa/mesa/-/issues/1917
|
||||
*/
|
||||
if (devinfo->ver == 8 && prog_data->dual_src_blend &&
|
||||
!(INTEL_DEBUG & DEBUG_NO8)) {
|
||||
assert(!params->use_rep_send);
|
||||
v8->limit_dispatch_width(8, "gen8 workaround: "
|
||||
v8->limit_dispatch_width(8, "gfx8 workaround: "
|
||||
"using SIMD8 when dual src blending.\n");
|
||||
}
|
||||
|
||||
|
|
|
@ -132,8 +132,8 @@ public:
|
|||
bool run_bs(bool allow_spilling);
|
||||
void optimize();
|
||||
void allocate_registers(bool allow_spilling);
|
||||
void setup_fs_payload_gen4();
|
||||
void setup_fs_payload_gen6();
|
||||
void setup_fs_payload_gfx4();
|
||||
void setup_fs_payload_gfx6();
|
||||
void setup_vs_payload();
|
||||
void setup_gs_payload();
|
||||
void setup_cs_payload();
|
||||
|
@ -180,10 +180,10 @@ public:
|
|||
bool remove_extra_rounding_modes();
|
||||
|
||||
void schedule_instructions(instruction_scheduler_mode mode);
|
||||
void insert_gen4_send_dependency_workarounds();
|
||||
void insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
|
||||
void insert_gfx4_send_dependency_workarounds();
|
||||
void insert_gfx4_pre_send_dependency_workarounds(bblock_t *block,
|
||||
fs_inst *inst);
|
||||
void insert_gen4_post_send_dependency_workarounds(bblock_t *block,
|
||||
void insert_gfx4_post_send_dependency_workarounds(bblock_t *block,
|
||||
fs_inst *inst);
|
||||
void vfail(const char *msg, va_list args);
|
||||
void fail(const char *msg, ...);
|
||||
|
@ -208,13 +208,13 @@ public:
|
|||
fs_reg *emit_samplepos_setup();
|
||||
fs_reg *emit_sampleid_setup();
|
||||
fs_reg *emit_samplemaskin_setup();
|
||||
void emit_interpolation_setup_gen4();
|
||||
void emit_interpolation_setup_gen6();
|
||||
void emit_interpolation_setup_gfx4();
|
||||
void emit_interpolation_setup_gfx6();
|
||||
void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
|
||||
fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
|
||||
const fs_reg &texture,
|
||||
const fs_reg &texture_handle);
|
||||
void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
|
||||
void emit_gfx6_gather_wa(uint8_t wa, fs_reg dst);
|
||||
fs_reg resolve_source_modifiers(const fs_reg &src);
|
||||
void emit_fsign(const class brw::fs_builder &, const nir_alu_instr *instr,
|
||||
fs_reg result, fs_reg *op, unsigned fsign_src);
|
||||
|
@ -513,16 +513,16 @@ private:
|
|||
struct brw_reg dst, struct brw_reg src);
|
||||
void generate_scratch_write(fs_inst *inst, struct brw_reg src);
|
||||
void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
|
||||
void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst);
|
||||
void generate_scratch_read_gfx7(fs_inst *inst, struct brw_reg dst);
|
||||
void generate_scratch_header(fs_inst *inst, struct brw_reg dst);
|
||||
void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,
|
||||
struct brw_reg index,
|
||||
struct brw_reg offset);
|
||||
void generate_uniform_pull_constant_load_gen7(fs_inst *inst,
|
||||
void generate_uniform_pull_constant_load_gfx7(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg surf_index,
|
||||
struct brw_reg payload);
|
||||
void generate_varying_pull_constant_load_gen4(fs_inst *inst,
|
||||
void generate_varying_pull_constant_load_gfx4(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index);
|
||||
void generate_mov_dispatch_to_flags(fs_inst *inst);
|
||||
|
|
|
@ -663,7 +663,7 @@ namespace brw {
|
|||
*
|
||||
* CMP null<d> src0<f> src1<f>
|
||||
*
|
||||
* Original gen4 does type conversion to the destination type
|
||||
* Original gfx4 does type conversion to the destination type
|
||||
* before comparison, producing garbage results for floating
|
||||
* point comparisons.
|
||||
*
|
||||
|
@ -688,7 +688,7 @@ namespace brw {
|
|||
*
|
||||
* CMP null<d> src0<f> src1<f>
|
||||
*
|
||||
* Original gen4 does type conversion to the destination type
|
||||
* Original gfx4 does type conversion to the destination type
|
||||
* before comparison, producing garbage results for floating
|
||||
* point comparisons.
|
||||
*
|
||||
|
@ -844,7 +844,7 @@ namespace brw {
|
|||
src_reg
|
||||
fix_math_operand(const src_reg &src) const
|
||||
{
|
||||
/* Can't do hstride == 0 args on gen6 math, so expand it out. We
|
||||
/* Can't do hstride == 0 args on gfx6 math, so expand it out. We
|
||||
* might be able to do better by doing execsize = 1 math and then
|
||||
* expanding that result out, but we would need to be careful with
|
||||
* masking.
|
||||
|
|
|
@ -446,7 +446,7 @@ fs_generator::generate_fb_read(fs_inst *inst, struct brw_reg dst,
|
|||
/* We assume that render targets start at binding table index 0. */
|
||||
const unsigned surf_index = inst->target;
|
||||
|
||||
gen9_fb_READ(p, dst, payload, surf_index,
|
||||
gfx9_fb_READ(p, dst, payload, surf_index,
|
||||
inst->header_size, inst->size_written / REG_SIZE,
|
||||
prog_data->persample_dispatch);
|
||||
}
|
||||
|
@ -609,7 +609,7 @@ fs_generator::generate_shuffle(fs_inst *inst,
|
|||
assert(devinfo->ver >= 8 || devinfo->is_haswell || type_sz(src.type) <= 4);
|
||||
|
||||
/* Because we're using the address register, we're limited to 8-wide
|
||||
* execution on gen7. On gen8, we're limited to 16-wide by the address
|
||||
* execution on gfx7. On gfx8, we're limited to 16-wide by the address
|
||||
* register file and 8-wide for 64-bit types. We could try and make this
|
||||
* instruction splittable higher up in the compiler but that gets weird
|
||||
* because it reads all of the channels regardless of execution size. It's
|
||||
|
@ -946,7 +946,7 @@ fs_generator::generate_linterp(fs_inst *inst,
|
|||
* |(x0, x1)|(x2, x3)|(y0, y1)|(y2, y3)| in SIMD16
|
||||
* -----------------------------------
|
||||
*
|
||||
* See also: emit_interpolation_setup_gen4().
|
||||
* See also: emit_interpolation_setup_gfx4().
|
||||
*/
|
||||
struct brw_reg delta_x = src[0];
|
||||
struct brw_reg delta_y = offset(src[0], inst->exec_size / 8);
|
||||
|
@ -954,7 +954,7 @@ fs_generator::generate_linterp(fs_inst *inst,
|
|||
brw_inst *i[2];
|
||||
|
||||
/* nir_lower_interpolation() will do the lowering to MAD instructions for
|
||||
* us on gen11+
|
||||
* us on gfx11+
|
||||
*/
|
||||
assert(devinfo->ver < 11);
|
||||
|
||||
|
@ -968,7 +968,7 @@ fs_generator::generate_linterp(fs_inst *inst,
|
|||
*
|
||||
* This means that we need to split PLN into LINE+MAC on-the-fly.
|
||||
* Unfortunately, the inputs are laid out for PLN and not LINE+MAC so
|
||||
* we have to split into SIMD8 pieces. For gen4 (!has_pln), the
|
||||
* we have to split into SIMD8 pieces. For gfx4 (!has_pln), the
|
||||
* coordinate registers are laid out differently so we leave it as a
|
||||
* SIMD16 instruction.
|
||||
*/
|
||||
|
@ -986,7 +986,7 @@ fs_generator::generate_linterp(fs_inst *inst,
|
|||
offset(delta_x, g * 2));
|
||||
brw_inst_set_group(devinfo, line, inst->group + g * 8);
|
||||
|
||||
/* LINE writes the accumulator automatically on gen4-5. On Sandy
|
||||
/* LINE writes the accumulator automatically on gfx4-5. On Sandy
|
||||
* Bridge and later, we have to explicitly enable it.
|
||||
*/
|
||||
if (devinfo->ver >= 6)
|
||||
|
@ -1101,11 +1101,11 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst,
|
|||
}
|
||||
|
||||
/* Stomp the resinfo output type to UINT32. On gens 4-5, the output type
|
||||
* is set as part of the message descriptor. On gen4, the PRM seems to
|
||||
* is set as part of the message descriptor. On gfx4, the PRM seems to
|
||||
* allow UINT32 and FLOAT32 (i965 PRM, Vol. 4 Section 4.8.1.1), but on
|
||||
* later gens UINT32 is required. Once you hit Sandy Bridge, the bit is
|
||||
* gone from the message descriptor entirely and you just get UINT32 all
|
||||
* the time regasrdless. Since we can really only do non-UINT32 on gen4,
|
||||
* the time regasrdless. Since we can really only do non-UINT32 on gfx4,
|
||||
* just stomp it to UINT32 all the time.
|
||||
*/
|
||||
if (inst->opcode == SHADER_OPCODE_TXS)
|
||||
|
@ -1369,7 +1369,7 @@ fs_generator::generate_ddx(const fs_inst *inst,
|
|||
* correctly for compressed instructions. At least on Haswell and
|
||||
* Iron Lake, compressed ALIGN16 instructions do work. Since we
|
||||
* would have to split to SIMD8 no matter which method we choose, we
|
||||
* may as well use ALIGN16 on all platforms gen7 and earlier.
|
||||
* may as well use ALIGN16 on all platforms gfx7 and earlier.
|
||||
*/
|
||||
struct brw_reg src0 = stride(src, 4, 4, 1);
|
||||
struct brw_reg src1 = stride(src, 4, 4, 1);
|
||||
|
@ -1449,7 +1449,7 @@ fs_generator::generate_ddy(const fs_inst *inst,
|
|||
* correctly for compressed instructions. At least on Haswell and
|
||||
* Iron Lake, compressed ALIGN16 instructions do work. Since we
|
||||
* would have to split to SIMD8 no matter which method we choose, we
|
||||
* may as well use ALIGN16 on all platforms gen7 and earlier.
|
||||
* may as well use ALIGN16 on all platforms gfx7 and earlier.
|
||||
*/
|
||||
struct brw_reg src0 = stride(src, 4, 4, 1);
|
||||
struct brw_reg src1 = stride(src, 4, 4, 1);
|
||||
|
@ -1526,11 +1526,11 @@ fs_generator::generate_scratch_read(fs_inst *inst, struct brw_reg dst)
|
|||
}
|
||||
|
||||
void
|
||||
fs_generator::generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst)
|
||||
fs_generator::generate_scratch_read_gfx7(fs_inst *inst, struct brw_reg dst)
|
||||
{
|
||||
assert(inst->exec_size <= 16 || inst->force_writemask_all);
|
||||
|
||||
gen7_block_read_scratch(p, dst, inst->exec_size / 8, inst->offset);
|
||||
gfx7_block_read_scratch(p, dst, inst->exec_size / 8, inst->offset);
|
||||
}
|
||||
|
||||
/* The A32 messages take a buffer base address in header.5:[31:0] (See
|
||||
|
@ -1625,7 +1625,7 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
|
|||
}
|
||||
|
||||
void
|
||||
fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
|
||||
fs_generator::generate_uniform_pull_constant_load_gfx7(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index,
|
||||
struct brw_reg payload)
|
||||
|
@ -1687,11 +1687,11 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
|
|||
}
|
||||
|
||||
void
|
||||
fs_generator::generate_varying_pull_constant_load_gen4(fs_inst *inst,
|
||||
fs_generator::generate_varying_pull_constant_load_gfx4(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index)
|
||||
{
|
||||
assert(devinfo->ver < 7); /* Should use the gen7 variant. */
|
||||
assert(devinfo->ver < 7); /* Should use the gfx7 variant. */
|
||||
assert(inst->header_size != 0);
|
||||
assert(inst->mlen);
|
||||
|
||||
|
@ -1723,7 +1723,7 @@ fs_generator::generate_varying_pull_constant_load_gen4(fs_inst *inst,
|
|||
}
|
||||
|
||||
struct brw_reg header = brw_vec8_grf(0, 0);
|
||||
gen6_resolve_implied_move(p, &header, inst->base_mrf);
|
||||
gfx6_resolve_implied_move(p, &header, inst->base_mrf);
|
||||
|
||||
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
|
||||
brw_inst_set_compression(devinfo, send, false);
|
||||
|
@ -2000,7 +2000,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
||||
brw_set_default_predicate_control(p, inst->predicate);
|
||||
brw_set_default_predicate_inverse(p, inst->predicate_inverse);
|
||||
/* On gen7 and above, hardware automatically adds the group onto the
|
||||
/* On gfx7 and above, hardware automatically adds the group onto the
|
||||
* flag subregister number. On Sandy Bridge and older, we have to do it
|
||||
* ourselves.
|
||||
*/
|
||||
|
@ -2203,9 +2203,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
|
||||
case BRW_OPCODE_IF:
|
||||
if (inst->src[0].file != BAD_FILE) {
|
||||
/* The instruction has an embedded compare (only allowed on gen6) */
|
||||
/* The instruction has an embedded compare (only allowed on gfx6) */
|
||||
assert(devinfo->ver == 6);
|
||||
gen6_IF(p, inst->conditional_mod, src[0], src[1]);
|
||||
gfx6_IF(p, inst->conditional_mod, src[0], src[1]);
|
||||
} else {
|
||||
brw_IF(p, brw_get_default_exec_size(p));
|
||||
}
|
||||
|
@ -2245,12 +2245,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
if (devinfo->ver >= 6) {
|
||||
assert(inst->mlen == 0);
|
||||
assert(devinfo->ver >= 7 || inst->exec_size == 8);
|
||||
gen6_math(p, dst, brw_math_function(inst->opcode),
|
||||
gfx6_math(p, dst, brw_math_function(inst->opcode),
|
||||
src[0], brw_null_reg());
|
||||
} else {
|
||||
assert(inst->mlen >= 1);
|
||||
assert(devinfo->ver == 5 || devinfo->is_g4x || inst->exec_size == 8);
|
||||
gen4_math(p, dst,
|
||||
gfx4_math(p, dst,
|
||||
brw_math_function(inst->opcode),
|
||||
inst->base_mrf, src[0],
|
||||
BRW_MATH_PRECISION_FULL);
|
||||
|
@ -2265,11 +2265,11 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
assert(inst->mlen == 0);
|
||||
assert((devinfo->ver >= 7 && inst->opcode == SHADER_OPCODE_POW) ||
|
||||
inst->exec_size == 8);
|
||||
gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
|
||||
gfx6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
|
||||
} else {
|
||||
assert(inst->mlen >= 1);
|
||||
assert(inst->exec_size == 8);
|
||||
gen4_math(p, dst, brw_math_function(inst->opcode),
|
||||
gfx4_math(p, dst, brw_math_function(inst->opcode),
|
||||
inst->base_mrf, src[0],
|
||||
BRW_MATH_PRECISION_FULL);
|
||||
send_count++;
|
||||
|
@ -2342,7 +2342,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
break;
|
||||
|
||||
case SHADER_OPCODE_GEN7_SCRATCH_READ:
|
||||
generate_scratch_read_gen7(inst, dst);
|
||||
generate_scratch_read_gfx7(inst, dst);
|
||||
fill_count++;
|
||||
break;
|
||||
|
||||
|
@ -2381,12 +2381,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
|
||||
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
|
||||
assert(inst->force_writemask_all);
|
||||
generate_uniform_pull_constant_load_gen7(inst, dst, src[0], src[1]);
|
||||
generate_uniform_pull_constant_load_gfx7(inst, dst, src[0], src[1]);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
|
||||
generate_varying_pull_constant_load_gen4(inst, dst, src[0]);
|
||||
generate_varying_pull_constant_load_gfx4(inst, dst, src[0]);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
|
|
|
@ -1030,8 +1030,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
|
|||
*
|
||||
* But if we want to use that opcode, we need to provide support on
|
||||
* different optimizations and lowerings. As right now HF support is
|
||||
* only for gen8+, it will be better to use directly the MOV, and use
|
||||
* BRW_OPCODE_F32TO16 when/if we work for HF support on gen7.
|
||||
* only for gfx8+, it will be better to use directly the MOV, and use
|
||||
* BRW_OPCODE_F32TO16 when/if we work for HF support on gfx7.
|
||||
*/
|
||||
assert(type_sz(op[0].type) < 8); /* brw_nir_lower_conversions */
|
||||
inst = bld.MOV(result, op[0]);
|
||||
|
@ -1895,7 +1895,7 @@ fs_visitor::nir_emit_load_const(const fs_builder &bld,
|
|||
case 64:
|
||||
assert(devinfo->ver >= 7);
|
||||
if (devinfo->ver == 7) {
|
||||
/* We don't get 64-bit integer types until gen8 */
|
||||
/* We don't get 64-bit integer types until gfx8 */
|
||||
for (unsigned i = 0; i < instr->def.num_components; i++) {
|
||||
bld.MOV(retype(offset(reg, bld, i), BRW_REGISTER_TYPE_DF),
|
||||
setup_imm_df(bld, instr->value[i].f64));
|
||||
|
@ -1933,7 +1933,7 @@ fs_visitor::get_nir_src(const nir_src &src)
|
|||
}
|
||||
|
||||
if (nir_src_bit_size(src) == 64 && devinfo->ver == 7) {
|
||||
/* The only 64-bit type available on gen7 is DF, so use that. */
|
||||
/* The only 64-bit type available on gfx7 is DF, so use that. */
|
||||
reg.type = BRW_REGISTER_TYPE_DF;
|
||||
} else {
|
||||
/* To avoid floating-point denorm flushing problems, set the type by
|
||||
|
@ -1951,8 +1951,8 @@ fs_visitor::get_nir_src(const nir_src &src)
|
|||
* Return an IMM for constants; otherwise call get_nir_src() as normal.
|
||||
*
|
||||
* This function should not be called on any value which may be 64 bits.
|
||||
* We could theoretically support 64-bit on gen8+ but we choose not to
|
||||
* because it wouldn't work in general (no gen7 support) and there are
|
||||
* We could theoretically support 64-bit on gfx8+ but we choose not to
|
||||
* because it wouldn't work in general (no gfx7 support) and there are
|
||||
* enough restrictions in 64-bit immediates that you can't take the return
|
||||
* value and treat it the same as the result of get_nir_src().
|
||||
*/
|
||||
|
@ -5187,9 +5187,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
const fs_reg tmp = bld.vgrf(value.type);
|
||||
if (devinfo->ver <= 7) {
|
||||
/* The hardware doesn't seem to support these crazy regions with
|
||||
* compressed instructions on gen7 and earlier so we fall back to
|
||||
* compressed instructions on gfx7 and earlier so we fall back to
|
||||
* using quad swizzles. Fortunately, we don't support 64-bit
|
||||
* anything in Vulkan on gen7.
|
||||
* anything in Vulkan on gfx7.
|
||||
*/
|
||||
assert(nir_src_bit_size(instr->src[0]) == 32);
|
||||
const fs_builder ubld = bld.exec_all();
|
||||
|
@ -6033,7 +6033,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
|
|||
inst->shadow_compare = true;
|
||||
|
||||
if (instr->op == nir_texop_tg4 && devinfo->ver == 6)
|
||||
emit_gen6_gather_wa(key_tex->gen6_gather_wa[texture], dst);
|
||||
emit_gfx6_gather_wa(key_tex->gfx6_gather_wa[texture], dst);
|
||||
|
||||
fs_reg nir_dest[5];
|
||||
for (unsigned i = 0; i < dest_size; i++)
|
||||
|
@ -6203,7 +6203,7 @@ setup_imm_df(const fs_builder &bld, double v)
|
|||
if (devinfo->ver >= 8)
|
||||
return brw_imm_df(v);
|
||||
|
||||
/* gen7.5 does not support DF immediates straighforward but the DIM
|
||||
/* gfx7.5 does not support DF immediates straighforward but the DIM
|
||||
* instruction allows to set the 64-bit immediate value.
|
||||
*/
|
||||
if (devinfo->is_haswell) {
|
||||
|
@ -6213,13 +6213,13 @@ setup_imm_df(const fs_builder &bld, double v)
|
|||
return component(dst, 0);
|
||||
}
|
||||
|
||||
/* gen7 does not support DF immediates, so we generate a 64-bit constant by
|
||||
/* gfx7 does not support DF immediates, so we generate a 64-bit constant by
|
||||
* writing the low 32-bit of the constant to suboffset 0 of a VGRF and
|
||||
* the high 32-bit to suboffset 4 and then applying a stride of 0.
|
||||
*
|
||||
* Alternatively, we could also produce a normal VGRF (without stride 0)
|
||||
* by writing to all the channels in the VGRF, however, that would hit the
|
||||
* gen7 bug where we have to split writes that span more than 1 register
|
||||
* gfx7 bug where we have to split writes that span more than 1 register
|
||||
* into instructions with a width of 4 (otherwise the write to the second
|
||||
* register written runs into an execmask hardware bug) which isn't very
|
||||
* nice.
|
||||
|
|
|
@ -109,8 +109,8 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width)
|
|||
* to write into. We currently always ask for 4 registers, but we may
|
||||
* convert that to use less some day.
|
||||
*
|
||||
* Additionally, on gen5 we need aligned pairs of registers for the PLN
|
||||
* instruction, and on gen4 we need 8 contiguous regs for workaround simd16
|
||||
* Additionally, on gfx5 we need aligned pairs of registers for the PLN
|
||||
* instruction, and on gfx4 we need 8 contiguous regs for workaround simd16
|
||||
* texturing.
|
||||
*/
|
||||
const int class_count = MAX_VGRF_SIZE;
|
||||
|
@ -525,7 +525,7 @@ private:
|
|||
* Sets the mrf_used array to indicate which MRFs are used by the shader IR
|
||||
*
|
||||
* This is used in assign_regs() to decide which of the GRFs that we use as
|
||||
* MRFs on gen7 get normally register allocated, and in register spilling to
|
||||
* MRFs on gfx7 get normally register allocated, and in register spilling to
|
||||
* see if we can actually use MRFs to do spills without overwriting normal MRF
|
||||
* contents.
|
||||
*/
|
||||
|
@ -1140,8 +1140,8 @@ fs_reg_alloc::spill_reg(unsigned spill_reg)
|
|||
|
||||
/* Spills may use MRFs 13-15 in the SIMD16 case. Our texturing is done
|
||||
* using up to 11 MRFs starting from either m1 or m2, and fb writes can use
|
||||
* up to m13 (gen6+ simd16: 2 header + 8 color + 2 src0alpha + 2 omask) or
|
||||
* m15 (gen4-5 simd16: 2 header + 8 color + 1 aads + 2 src depth + 2 dst
|
||||
* up to m13 (gfx6+ simd16: 2 header + 8 color + 2 src0alpha + 2 omask) or
|
||||
* m15 (gfx4-5 simd16: 2 header + 8 color + 1 aads + 2 src depth + 2 dst
|
||||
* depth), starting from m1. In summary: We may not be able to spill in
|
||||
* SIMD16 mode, because we'd stomp the FB writes.
|
||||
*/
|
||||
|
|
|
@ -63,7 +63,7 @@ fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
|
|||
* Apply workarounds for Gen6 gather with UINT/SINT
|
||||
*/
|
||||
void
|
||||
fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst)
|
||||
fs_visitor::emit_gfx6_gather_wa(uint8_t wa, fs_reg dst)
|
||||
{
|
||||
if (!wa)
|
||||
return;
|
||||
|
@ -153,7 +153,7 @@ fs_visitor::interp_reg(int location, int channel)
|
|||
|
||||
/** Emits the interpolation for the varying inputs. */
|
||||
void
|
||||
fs_visitor::emit_interpolation_setup_gen4()
|
||||
fs_visitor::emit_interpolation_setup_gfx4()
|
||||
{
|
||||
struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
|
||||
|
||||
|
@ -267,7 +267,7 @@ fs_visitor::emit_shader_float_controls_execution_mode()
|
|||
|
||||
/** Emits the interpolation for the varying inputs. */
|
||||
void
|
||||
fs_visitor::emit_interpolation_setup_gen6()
|
||||
fs_visitor::emit_interpolation_setup_gfx6()
|
||||
{
|
||||
fs_builder abld = bld.annotate("compute pixel centers");
|
||||
|
||||
|
@ -318,7 +318,7 @@ fs_visitor::emit_interpolation_setup_gen6()
|
|||
fs_reg(stride(suboffset(gi_uw, 5), 2, 4, 0)),
|
||||
fs_reg(brw_imm_v(0x11001100)));
|
||||
|
||||
/* As of gen6, we can no longer mix float and int sources. We have
|
||||
/* As of gfx6, we can no longer mix float and int sources. We have
|
||||
* to turn the integer pixel centers into floats for their actual
|
||||
* use.
|
||||
*/
|
||||
|
@ -479,7 +479,7 @@ fs_visitor::emit_fb_writes()
|
|||
fs_inst *inst = NULL;
|
||||
|
||||
if (source_depth_to_render_target && devinfo->ver == 6) {
|
||||
/* For outputting oDepth on gen6, SIMD8 writes have to be used. This
|
||||
/* For outputting oDepth on gfx6, SIMD8 writes have to be used. This
|
||||
* would require SIMD8 moves of each half to message regs, e.g. by using
|
||||
* the SIMD lowering pass. Unfortunately this is more difficult than it
|
||||
* sounds because the SIMD8 single-source message lacks channel selects
|
||||
|
|
|
@ -123,16 +123,16 @@ brw_inst_##name(const struct gen_device_info *devinfo, const brw_inst *inst) \
|
|||
}
|
||||
|
||||
/* A macro for fields which moved as of Gen8+. */
|
||||
#define F8(name, gen4_high, gen4_low, gen8_high, gen8_low, \
|
||||
gen12_high, gen12_low) \
|
||||
#define F8(name, gfx4_high, gfx4_low, gfx8_high, gfx8_low, \
|
||||
gfx12_high, gfx12_low) \
|
||||
FF(name, \
|
||||
/* 4: */ gen4_high, gen4_low, \
|
||||
/* 4.5: */ gen4_high, gen4_low, \
|
||||
/* 5: */ gen4_high, gen4_low, \
|
||||
/* 6: */ gen4_high, gen4_low, \
|
||||
/* 7: */ gen4_high, gen4_low, \
|
||||
/* 8: */ gen8_high, gen8_low, \
|
||||
/* 12: */ gen12_high, gen12_low);
|
||||
/* 4: */ gfx4_high, gfx4_low, \
|
||||
/* 4.5: */ gfx4_high, gfx4_low, \
|
||||
/* 5: */ gfx4_high, gfx4_low, \
|
||||
/* 6: */ gfx4_high, gfx4_low, \
|
||||
/* 7: */ gfx4_high, gfx4_low, \
|
||||
/* 8: */ gfx8_high, gfx8_low, \
|
||||
/* 12: */ gfx12_high, gfx12_low);
|
||||
|
||||
/* Macro for fields that gained extra discontiguous MSBs in Gen12 (specified
|
||||
* by hi12ex-lo12ex).
|
||||
|
@ -444,8 +444,8 @@ static inline void \
|
|||
brw_inst_set_3src_a1_##reg##_type(const struct gen_device_info *devinfo, \
|
||||
brw_inst *inst, enum brw_reg_type type) \
|
||||
{ \
|
||||
UNUSED enum gen10_align1_3src_exec_type exec_type = \
|
||||
(enum gen10_align1_3src_exec_type) brw_inst_3src_a1_exec_type(devinfo, \
|
||||
UNUSED enum gfx10_align1_3src_exec_type exec_type = \
|
||||
(enum gfx10_align1_3src_exec_type) brw_inst_3src_a1_exec_type(devinfo, \
|
||||
inst); \
|
||||
if (brw_reg_type_is_floating_point(type)) { \
|
||||
assert(exec_type == BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT); \
|
||||
|
@ -460,8 +460,8 @@ static inline enum brw_reg_type \
|
|||
brw_inst_3src_a1_##reg##_type(const struct gen_device_info *devinfo, \
|
||||
const brw_inst *inst) \
|
||||
{ \
|
||||
enum gen10_align1_3src_exec_type exec_type = \
|
||||
(enum gen10_align1_3src_exec_type) brw_inst_3src_a1_exec_type(devinfo, \
|
||||
enum gfx10_align1_3src_exec_type exec_type = \
|
||||
(enum gfx10_align1_3src_exec_type) brw_inst_3src_a1_exec_type(devinfo, \
|
||||
inst); \
|
||||
unsigned hw_type = brw_inst_3src_a1_##reg##_hw_type(devinfo, inst); \
|
||||
return brw_a1_hw_3src_type_to_reg_type(devinfo, hw_type, exec_type); \
|
||||
|
@ -603,9 +603,9 @@ brw_inst_##name(const struct gen_device_info *devinfo, const brw_inst *inst) \
|
|||
return brw_inst_bits(inst, high, low); \
|
||||
}
|
||||
|
||||
FJ(gen6_jump_count, 63, 48, devinfo->ver == 6)
|
||||
FJ(gen4_jump_count, 111, 96, devinfo->ver < 6)
|
||||
FC(gen4_pop_count, /* 4+ */ 115, 112, /* 12+ */ -1, -1, devinfo->ver < 6)
|
||||
FJ(gfx6_jump_count, 63, 48, devinfo->ver == 6)
|
||||
FJ(gfx4_jump_count, 111, 96, devinfo->ver < 6)
|
||||
FC(gfx4_pop_count, /* 4+ */ 115, 112, /* 12+ */ -1, -1, devinfo->ver < 6)
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
|
@ -1354,14 +1354,14 @@ brw_compact_inst_set_bits(brw_compact_inst *inst, unsigned high, unsigned low,
|
|||
inst->data = (inst->data & ~mask) | (value << low);
|
||||
}
|
||||
|
||||
#define FC(name, high, low, gen12_high, gen12_low, assertions) \
|
||||
#define FC(name, high, low, gfx12_high, gfx12_low, assertions) \
|
||||
static inline void \
|
||||
brw_compact_inst_set_##name(const struct gen_device_info *devinfo, \
|
||||
brw_compact_inst *inst, unsigned v) \
|
||||
{ \
|
||||
assert(assertions); \
|
||||
if (devinfo->ver >= 12) \
|
||||
brw_compact_inst_set_bits(inst, gen12_high, gen12_low, v); \
|
||||
brw_compact_inst_set_bits(inst, gfx12_high, gfx12_low, v); \
|
||||
else \
|
||||
brw_compact_inst_set_bits(inst, high, low, v); \
|
||||
} \
|
||||
|
@ -1371,7 +1371,7 @@ brw_compact_inst_##name(const struct gen_device_info *devinfo, \
|
|||
{ \
|
||||
assert(assertions); \
|
||||
if (devinfo->ver >= 12) \
|
||||
return brw_compact_inst_bits(inst, gen12_high, gen12_low); \
|
||||
return brw_compact_inst_bits(inst, gfx12_high, gfx12_low); \
|
||||
else \
|
||||
return brw_compact_inst_bits(inst, high, low); \
|
||||
}
|
||||
|
@ -1379,8 +1379,8 @@ brw_compact_inst_##name(const struct gen_device_info *devinfo, \
|
|||
/* A simple macro for fields which stay in the same place on all generations
|
||||
* except for Gen12.
|
||||
*/
|
||||
#define F(name, high, low, gen12_high, gen12_low) \
|
||||
FC(name, high, low, gen12_high, gen12_low, true)
|
||||
#define F(name, high, low, gfx12_high, gfx12_low) \
|
||||
FC(name, high, low, gfx12_high, gfx12_low, true)
|
||||
|
||||
F(src1_reg_nr, /* 4+ */ 63, 56, /* 12+ */ 63, 56)
|
||||
F(src0_reg_nr, /* 4+ */ 55, 48, /* 12+ */ 47, 40)
|
||||
|
|
|
@ -36,7 +36,7 @@ static char const *get_qual_name(int mode)
|
|||
}
|
||||
|
||||
static void
|
||||
gen4_frag_prog_set_interp_modes(struct brw_wm_prog_data *prog_data,
|
||||
gfx4_frag_prog_set_interp_modes(struct brw_wm_prog_data *prog_data,
|
||||
const struct brw_vue_map *vue_map,
|
||||
unsigned location, unsigned slot_count,
|
||||
enum glsl_interp_mode interp)
|
||||
|
@ -79,12 +79,12 @@ brw_setup_vue_interpolation(const struct brw_vue_map *vue_map, nir_shader *nir,
|
|||
unsigned location = var->data.location;
|
||||
unsigned slot_count = glsl_count_attribute_slots(var->type, false);
|
||||
|
||||
gen4_frag_prog_set_interp_modes(prog_data, vue_map, location, slot_count,
|
||||
gfx4_frag_prog_set_interp_modes(prog_data, vue_map, location, slot_count,
|
||||
var->data.interpolation);
|
||||
|
||||
if (location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1) {
|
||||
location = location + VARYING_SLOT_BFC0 - VARYING_SLOT_COL0;
|
||||
gen4_frag_prog_set_interp_modes(prog_data, vue_map, location,
|
||||
gfx4_frag_prog_set_interp_modes(prog_data, vue_map, location,
|
||||
slot_count, var->data.interpolation);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -281,9 +281,9 @@ public:
|
|||
|
||||
enum brw_urb_write_flags urb_write_flags;
|
||||
|
||||
unsigned sol_binding; /**< gen6: SOL binding table index */
|
||||
bool sol_final_write; /**< gen6: send commit message */
|
||||
unsigned sol_vertex; /**< gen6: used for setting dst index in SVB header */
|
||||
unsigned sol_binding; /**< gfx6: SOL binding table index */
|
||||
bool sol_final_write; /**< gfx6: send commit message */
|
||||
unsigned sol_vertex; /**< gfx6: used for setting dst index in SVB header */
|
||||
|
||||
bool is_send_from_grf() const;
|
||||
unsigned size_read(unsigned arg) const;
|
||||
|
|
|
@ -542,8 +542,8 @@ brw_nir_no_indirect_mask(const struct brw_compiler *compiler,
|
|||
* using nir_lower_vars_to_explicit_types and nir_lower_explicit_io in
|
||||
* brw_postprocess_nir.
|
||||
*
|
||||
* We haven't plumbed through the indirect scratch messages on gen6 or
|
||||
* earlier so doing indirects via scratch doesn't work there. On gen7 and
|
||||
* We haven't plumbed through the indirect scratch messages on gfx6 or
|
||||
* earlier so doing indirects via scratch doesn't work there. On gfx7 and
|
||||
* earlier the scratch space size is limited to 12kB. If we allowed
|
||||
* indirects as scratch all the time, we may easily exceed this limit
|
||||
* without having any fallback.
|
||||
|
|
|
@ -61,7 +61,7 @@ struct gen_device_info;
|
|||
/**
|
||||
* First GRF used for the MRF hack.
|
||||
*
|
||||
* On gen7, MRFs are no longer used, and contiguous GRFs are used instead. We
|
||||
* On gfx7, MRFs are no longer used, and contiguous GRFs are used instead. We
|
||||
* haven't converted our compiler to be aware of this, so it asks for MRFs and
|
||||
* brw_eu_emit.c quietly converts them to be accesses of the top GRFs. The
|
||||
* register allocators have to be careful of this to avoid corrupting the "MRF"s
|
||||
|
@ -418,7 +418,7 @@ brw_reg(enum brw_reg_file file,
|
|||
else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
|
||||
assert(nr <= BRW_ARF_TIMESTAMP);
|
||||
/* Asserting on the MRF register number requires to know the hardware gen
|
||||
* (gen6 has 24 MRF registers), which we don't know here, so we assert
|
||||
* (gfx6 has 24 MRF registers), which we don't know here, so we assert
|
||||
* for that in the generators and in brw_eu_emit.c
|
||||
*/
|
||||
|
||||
|
|
|
@ -91,7 +91,7 @@ enum hw_imm_type {
|
|||
static const struct hw_type {
|
||||
enum hw_reg_type reg_type;
|
||||
enum hw_imm_type imm_type;
|
||||
} gen4_hw_type[] = {
|
||||
} gfx4_hw_type[] = {
|
||||
[0 ... BRW_REGISTER_TYPE_LAST] = { INVALID, INVALID },
|
||||
|
||||
[BRW_REGISTER_TYPE_F] = { BRW_HW_REG_TYPE_F, BRW_HW_IMM_TYPE_F },
|
||||
|
@ -104,7 +104,7 @@ static const struct hw_type {
|
|||
[BRW_REGISTER_TYPE_B] = { BRW_HW_REG_TYPE_B, INVALID },
|
||||
[BRW_REGISTER_TYPE_UB] = { BRW_HW_REG_TYPE_UB, INVALID },
|
||||
[BRW_REGISTER_TYPE_V] = { INVALID, BRW_HW_IMM_TYPE_V },
|
||||
}, gen6_hw_type[] = {
|
||||
}, gfx6_hw_type[] = {
|
||||
[0 ... BRW_REGISTER_TYPE_LAST] = { INVALID, INVALID },
|
||||
|
||||
[BRW_REGISTER_TYPE_F] = { BRW_HW_REG_TYPE_F, BRW_HW_IMM_TYPE_F },
|
||||
|
@ -118,7 +118,7 @@ static const struct hw_type {
|
|||
[BRW_REGISTER_TYPE_UB] = { BRW_HW_REG_TYPE_UB, INVALID },
|
||||
[BRW_REGISTER_TYPE_V] = { INVALID, BRW_HW_IMM_TYPE_V },
|
||||
[BRW_REGISTER_TYPE_UV] = { INVALID, BRW_HW_IMM_TYPE_UV },
|
||||
}, gen7_hw_type[] = {
|
||||
}, gfx7_hw_type[] = {
|
||||
[0 ... BRW_REGISTER_TYPE_LAST] = { INVALID, INVALID },
|
||||
|
||||
[BRW_REGISTER_TYPE_DF] = { GFX7_HW_REG_TYPE_DF, INVALID },
|
||||
|
@ -133,7 +133,7 @@ static const struct hw_type {
|
|||
[BRW_REGISTER_TYPE_UB] = { BRW_HW_REG_TYPE_UB, INVALID },
|
||||
[BRW_REGISTER_TYPE_V] = { INVALID, BRW_HW_IMM_TYPE_V },
|
||||
[BRW_REGISTER_TYPE_UV] = { INVALID, BRW_HW_IMM_TYPE_UV },
|
||||
}, gen8_hw_type[] = {
|
||||
}, gfx8_hw_type[] = {
|
||||
[0 ... BRW_REGISTER_TYPE_LAST] = { INVALID, INVALID },
|
||||
|
||||
[BRW_REGISTER_TYPE_DF] = { GFX7_HW_REG_TYPE_DF, GFX8_HW_IMM_TYPE_DF },
|
||||
|
@ -151,7 +151,7 @@ static const struct hw_type {
|
|||
[BRW_REGISTER_TYPE_UB] = { BRW_HW_REG_TYPE_UB, INVALID },
|
||||
[BRW_REGISTER_TYPE_V] = { INVALID, BRW_HW_IMM_TYPE_V },
|
||||
[BRW_REGISTER_TYPE_UV] = { INVALID, BRW_HW_IMM_TYPE_UV },
|
||||
}, gen11_hw_type[] = {
|
||||
}, gfx11_hw_type[] = {
|
||||
[0 ... BRW_REGISTER_TYPE_LAST] = { INVALID, INVALID },
|
||||
|
||||
[BRW_REGISTER_TYPE_NF] = { GFX11_HW_REG_TYPE_NF, INVALID },
|
||||
|
@ -167,7 +167,7 @@ static const struct hw_type {
|
|||
[BRW_REGISTER_TYPE_UB] = { GFX11_HW_REG_TYPE_UB, INVALID },
|
||||
[BRW_REGISTER_TYPE_V] = { INVALID, GFX11_HW_IMM_TYPE_V },
|
||||
[BRW_REGISTER_TYPE_UV] = { INVALID, GFX11_HW_IMM_TYPE_UV },
|
||||
}, gen12_hw_type[] = {
|
||||
}, gfx12_hw_type[] = {
|
||||
[0 ... BRW_REGISTER_TYPE_LAST] = { INVALID, INVALID },
|
||||
|
||||
[BRW_REGISTER_TYPE_F] = { GFX12_HW_REG_TYPE_FLOAT(2), GFX12_HW_REG_TYPE_FLOAT(2) },
|
||||
|
@ -219,19 +219,19 @@ enum hw_3src_reg_type {
|
|||
|
||||
static const struct hw_3src_type {
|
||||
enum hw_3src_reg_type reg_type;
|
||||
enum gen10_align1_3src_exec_type exec_type;
|
||||
} gen6_hw_3src_type[] = {
|
||||
enum gfx10_align1_3src_exec_type exec_type;
|
||||
} gfx6_hw_3src_type[] = {
|
||||
[0 ... BRW_REGISTER_TYPE_LAST] = { INVALID },
|
||||
|
||||
[BRW_REGISTER_TYPE_F] = { GFX7_3SRC_TYPE_F },
|
||||
}, gen7_hw_3src_type[] = {
|
||||
}, gfx7_hw_3src_type[] = {
|
||||
[0 ... BRW_REGISTER_TYPE_LAST] = { INVALID },
|
||||
|
||||
[BRW_REGISTER_TYPE_F] = { GFX7_3SRC_TYPE_F },
|
||||
[BRW_REGISTER_TYPE_D] = { GFX7_3SRC_TYPE_D },
|
||||
[BRW_REGISTER_TYPE_UD] = { GFX7_3SRC_TYPE_UD },
|
||||
[BRW_REGISTER_TYPE_DF] = { GFX7_3SRC_TYPE_DF },
|
||||
}, gen8_hw_3src_type[] = {
|
||||
}, gfx8_hw_3src_type[] = {
|
||||
[0 ... BRW_REGISTER_TYPE_LAST] = { INVALID },
|
||||
|
||||
[BRW_REGISTER_TYPE_F] = { GFX7_3SRC_TYPE_F },
|
||||
|
@ -239,7 +239,7 @@ static const struct hw_3src_type {
|
|||
[BRW_REGISTER_TYPE_UD] = { GFX7_3SRC_TYPE_UD },
|
||||
[BRW_REGISTER_TYPE_DF] = { GFX7_3SRC_TYPE_DF },
|
||||
[BRW_REGISTER_TYPE_HF] = { GFX8_3SRC_TYPE_HF },
|
||||
}, gen10_hw_3src_align1_type[] = {
|
||||
}, gfx10_hw_3src_align1_type[] = {
|
||||
#define E(x) BRW_ALIGN1_3SRC_EXEC_TYPE_##x
|
||||
[0 ... BRW_REGISTER_TYPE_LAST] = { INVALID },
|
||||
|
||||
|
@ -253,7 +253,7 @@ static const struct hw_3src_type {
|
|||
[BRW_REGISTER_TYPE_UW] = { GFX10_ALIGN1_3SRC_REG_TYPE_UW, E(INT) },
|
||||
[BRW_REGISTER_TYPE_B] = { GFX10_ALIGN1_3SRC_REG_TYPE_B, E(INT) },
|
||||
[BRW_REGISTER_TYPE_UB] = { GFX10_ALIGN1_3SRC_REG_TYPE_UB, E(INT) },
|
||||
}, gen11_hw_3src_type[] = {
|
||||
}, gfx11_hw_3src_type[] = {
|
||||
[0 ... BRW_REGISTER_TYPE_LAST] = { INVALID },
|
||||
|
||||
[BRW_REGISTER_TYPE_NF] = { GFX11_ALIGN1_3SRC_REG_TYPE_NF, E(FLOAT) },
|
||||
|
@ -266,7 +266,7 @@ static const struct hw_3src_type {
|
|||
[BRW_REGISTER_TYPE_UW] = { GFX10_ALIGN1_3SRC_REG_TYPE_UW, E(INT) },
|
||||
[BRW_REGISTER_TYPE_B] = { GFX10_ALIGN1_3SRC_REG_TYPE_B, E(INT) },
|
||||
[BRW_REGISTER_TYPE_UB] = { GFX10_ALIGN1_3SRC_REG_TYPE_UB, E(INT) },
|
||||
}, gen12_hw_3src_type[] = {
|
||||
}, gfx12_hw_3src_type[] = {
|
||||
[0 ... BRW_REGISTER_TYPE_LAST] = { INVALID },
|
||||
|
||||
[BRW_REGISTER_TYPE_F] = { GFX12_HW_REG_TYPE_UINT(2), E(FLOAT), },
|
||||
|
@ -294,23 +294,23 @@ brw_reg_type_to_hw_type(const struct gen_device_info *devinfo,
|
|||
const struct hw_type *table;
|
||||
|
||||
if (devinfo->ver >= 12) {
|
||||
assert(type < ARRAY_SIZE(gen12_hw_type));
|
||||
table = gen12_hw_type;
|
||||
assert(type < ARRAY_SIZE(gfx12_hw_type));
|
||||
table = gfx12_hw_type;
|
||||
} else if (devinfo->ver >= 11) {
|
||||
assert(type < ARRAY_SIZE(gen11_hw_type));
|
||||
table = gen11_hw_type;
|
||||
assert(type < ARRAY_SIZE(gfx11_hw_type));
|
||||
table = gfx11_hw_type;
|
||||
} else if (devinfo->ver >= 8) {
|
||||
assert(type < ARRAY_SIZE(gen8_hw_type));
|
||||
table = gen8_hw_type;
|
||||
assert(type < ARRAY_SIZE(gfx8_hw_type));
|
||||
table = gfx8_hw_type;
|
||||
} else if (devinfo->ver >= 7) {
|
||||
assert(type < ARRAY_SIZE(gen7_hw_type));
|
||||
table = gen7_hw_type;
|
||||
assert(type < ARRAY_SIZE(gfx7_hw_type));
|
||||
table = gfx7_hw_type;
|
||||
} else if (devinfo->ver >= 6) {
|
||||
assert(type < ARRAY_SIZE(gen6_hw_type));
|
||||
table = gen6_hw_type;
|
||||
assert(type < ARRAY_SIZE(gfx6_hw_type));
|
||||
table = gfx6_hw_type;
|
||||
} else {
|
||||
assert(type < ARRAY_SIZE(gen4_hw_type));
|
||||
table = gen4_hw_type;
|
||||
assert(type < ARRAY_SIZE(gfx4_hw_type));
|
||||
table = gfx4_hw_type;
|
||||
}
|
||||
|
||||
if (file == BRW_IMMEDIATE_VALUE) {
|
||||
|
@ -334,17 +334,17 @@ brw_hw_type_to_reg_type(const struct gen_device_info *devinfo,
|
|||
const struct hw_type *table;
|
||||
|
||||
if (devinfo->ver >= 12) {
|
||||
table = gen12_hw_type;
|
||||
table = gfx12_hw_type;
|
||||
} else if (devinfo->ver >= 11) {
|
||||
table = gen11_hw_type;
|
||||
table = gfx11_hw_type;
|
||||
} else if (devinfo->ver >= 8) {
|
||||
table = gen8_hw_type;
|
||||
table = gfx8_hw_type;
|
||||
} else if (devinfo->ver >= 7) {
|
||||
table = gen7_hw_type;
|
||||
table = gfx7_hw_type;
|
||||
} else if (devinfo->ver >= 6) {
|
||||
table = gen6_hw_type;
|
||||
table = gfx6_hw_type;
|
||||
} else {
|
||||
table = gen4_hw_type;
|
||||
table = gfx4_hw_type;
|
||||
}
|
||||
|
||||
if (file == BRW_IMMEDIATE_VALUE) {
|
||||
|
@ -374,14 +374,14 @@ brw_reg_type_to_a16_hw_3src_type(const struct gen_device_info *devinfo,
|
|||
const struct hw_3src_type *table;
|
||||
|
||||
if (devinfo->ver >= 8) {
|
||||
assert(type < ARRAY_SIZE(gen8_hw_3src_type));
|
||||
table = gen8_hw_3src_type;
|
||||
assert(type < ARRAY_SIZE(gfx8_hw_3src_type));
|
||||
table = gfx8_hw_3src_type;
|
||||
} else if (devinfo->ver >= 7) {
|
||||
assert(type < ARRAY_SIZE(gen7_hw_3src_type));
|
||||
table = gen7_hw_3src_type;
|
||||
assert(type < ARRAY_SIZE(gfx7_hw_3src_type));
|
||||
table = gfx7_hw_3src_type;
|
||||
} else {
|
||||
assert(type < ARRAY_SIZE(gen6_hw_3src_type));
|
||||
table = gen6_hw_3src_type;
|
||||
assert(type < ARRAY_SIZE(gfx6_hw_3src_type));
|
||||
table = gfx6_hw_3src_type;
|
||||
}
|
||||
|
||||
assert(table[type].reg_type != (enum hw_3src_reg_type)INVALID);
|
||||
|
@ -397,14 +397,14 @@ brw_reg_type_to_a1_hw_3src_type(const struct gen_device_info *devinfo,
|
|||
enum brw_reg_type type)
|
||||
{
|
||||
if (devinfo->ver >= 12) {
|
||||
assert(type < ARRAY_SIZE(gen12_hw_3src_type));
|
||||
return gen12_hw_3src_type[type].reg_type;
|
||||
assert(type < ARRAY_SIZE(gfx12_hw_3src_type));
|
||||
return gfx12_hw_3src_type[type].reg_type;
|
||||
} else if (devinfo->ver >= 11) {
|
||||
assert(type < ARRAY_SIZE(gen11_hw_3src_type));
|
||||
return gen11_hw_3src_type[type].reg_type;
|
||||
assert(type < ARRAY_SIZE(gfx11_hw_3src_type));
|
||||
return gfx11_hw_3src_type[type].reg_type;
|
||||
} else {
|
||||
assert(type < ARRAY_SIZE(gen10_hw_3src_align1_type));
|
||||
return gen10_hw_3src_align1_type[type].reg_type;
|
||||
assert(type < ARRAY_SIZE(gfx10_hw_3src_align1_type));
|
||||
return gfx10_hw_3src_align1_type[type].reg_type;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -419,11 +419,11 @@ brw_a16_hw_3src_type_to_reg_type(const struct gen_device_info *devinfo,
|
|||
const struct hw_3src_type *table = NULL;
|
||||
|
||||
if (devinfo->ver >= 8) {
|
||||
table = gen8_hw_3src_type;
|
||||
table = gfx8_hw_3src_type;
|
||||
} else if (devinfo->ver >= 7) {
|
||||
table = gen7_hw_3src_type;
|
||||
table = gfx7_hw_3src_type;
|
||||
} else if (devinfo->ver >= 6) {
|
||||
table = gen6_hw_3src_type;
|
||||
table = gfx6_hw_3src_type;
|
||||
}
|
||||
|
||||
for (enum brw_reg_type i = 0; i <= BRW_REGISTER_TYPE_LAST; i++) {
|
||||
|
@ -442,9 +442,9 @@ enum brw_reg_type
|
|||
brw_a1_hw_3src_type_to_reg_type(const struct gen_device_info *devinfo,
|
||||
unsigned hw_type, unsigned exec_type)
|
||||
{
|
||||
const struct hw_3src_type *table = (devinfo->ver >= 12 ? gen12_hw_3src_type :
|
||||
devinfo->ver >= 11 ? gen11_hw_3src_type :
|
||||
gen10_hw_3src_align1_type);
|
||||
const struct hw_3src_type *table = (devinfo->ver >= 12 ? gfx12_hw_3src_type :
|
||||
devinfo->ver >= 11 ? gfx11_hw_3src_type :
|
||||
gfx10_hw_3src_align1_type);
|
||||
|
||||
for (enum brw_reg_type i = 0; i <= BRW_REGISTER_TYPE_LAST; i++) {
|
||||
if (table[i].reg_type == hw_type &&
|
||||
|
|
|
@ -63,8 +63,8 @@ class schedule_node : public exec_node
|
|||
{
|
||||
public:
|
||||
schedule_node(backend_instruction *inst, instruction_scheduler *sched);
|
||||
void set_latency_gen4();
|
||||
void set_latency_gen7(bool is_haswell);
|
||||
void set_latency_gfx4();
|
||||
void set_latency_gfx7(bool is_haswell);
|
||||
|
||||
backend_instruction *inst;
|
||||
schedule_node **children;
|
||||
|
@ -115,7 +115,7 @@ exit_unblocked_time(const schedule_node *n)
|
|||
}
|
||||
|
||||
void
|
||||
schedule_node::set_latency_gen4()
|
||||
schedule_node::set_latency_gfx4()
|
||||
{
|
||||
int chans = 8;
|
||||
int math_latency = 22;
|
||||
|
@ -153,7 +153,7 @@ schedule_node::set_latency_gen4()
|
|||
}
|
||||
|
||||
void
|
||||
schedule_node::set_latency_gen7(bool is_haswell)
|
||||
schedule_node::set_latency_gfx7(bool is_haswell)
|
||||
{
|
||||
switch (inst->opcode) {
|
||||
case BRW_OPCODE_MAD:
|
||||
|
@ -935,9 +935,9 @@ schedule_node::schedule_node(backend_instruction *inst,
|
|||
if (!sched->post_reg_alloc)
|
||||
this->latency = 1;
|
||||
else if (devinfo->ver >= 6)
|
||||
set_latency_gen7(devinfo->is_haswell);
|
||||
set_latency_gfx7(devinfo->is_haswell);
|
||||
else
|
||||
set_latency_gen4();
|
||||
set_latency_gfx4();
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1784,7 +1784,7 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
|
|||
cand_generation++;
|
||||
|
||||
/* Shared resource: the mathbox. There's one mathbox per EU on Gen6+
|
||||
* but it's more limited pre-gen6, so if we send something off to it then
|
||||
* but it's more limited pre-gfx6, so if we send something off to it then
|
||||
* the next math instruction isn't going to make progress until the first
|
||||
* is done.
|
||||
*/
|
||||
|
|
|
@ -360,21 +360,21 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
|
|||
return "pack";
|
||||
|
||||
case SHADER_OPCODE_GEN4_SCRATCH_READ:
|
||||
return "gen4_scratch_read";
|
||||
return "gfx4_scratch_read";
|
||||
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
|
||||
return "gen4_scratch_write";
|
||||
return "gfx4_scratch_write";
|
||||
case SHADER_OPCODE_GEN7_SCRATCH_READ:
|
||||
return "gen7_scratch_read";
|
||||
return "gfx7_scratch_read";
|
||||
case SHADER_OPCODE_SCRATCH_HEADER:
|
||||
return "scratch_header";
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8:
|
||||
return "gen8_urb_write_simd8";
|
||||
return "gfx8_urb_write_simd8";
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
|
||||
return "gen8_urb_write_simd8_per_slot";
|
||||
return "gfx8_urb_write_simd8_per_slot";
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
|
||||
return "gen8_urb_write_simd8_masked";
|
||||
return "gfx8_urb_write_simd8_masked";
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
|
||||
return "gen8_urb_write_simd8_masked_per_slot";
|
||||
return "gfx8_urb_write_simd8_masked_per_slot";
|
||||
case SHADER_OPCODE_URB_READ_SIMD8:
|
||||
return "urb_read_simd8";
|
||||
case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
|
||||
|
@ -442,9 +442,9 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
|
|||
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
|
||||
return "uniform_pull_const";
|
||||
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
|
||||
return "uniform_pull_const_gen7";
|
||||
return "uniform_pull_const_gfx7";
|
||||
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
|
||||
return "varying_pull_const_gen4";
|
||||
return "varying_pull_const_gfx4";
|
||||
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
|
||||
return "varying_pull_const_logical";
|
||||
|
||||
|
@ -469,7 +469,7 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
|
|||
case VS_OPCODE_PULL_CONSTANT_LOAD:
|
||||
return "pull_constant_load";
|
||||
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
|
||||
return "pull_constant_load_gen7";
|
||||
return "pull_constant_load_gfx7";
|
||||
|
||||
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
|
||||
return "unpack_flags_simd4x2";
|
||||
|
|
|
@ -920,7 +920,7 @@ vec4_visitor::move_push_constants_to_pull_constants()
|
|||
int pull_constant_loc[this->uniforms];
|
||||
|
||||
/* Only allow 32 registers (256 uniform components) as push constants,
|
||||
* which is the limit on gen6.
|
||||
* which is the limit on gfx6.
|
||||
*
|
||||
* If changing this value, note the limitation about total_regs in
|
||||
* brw_curbe.c.
|
||||
|
@ -1304,7 +1304,7 @@ vec4_visitor::opt_register_coalesce()
|
|||
break;
|
||||
|
||||
if (devinfo->ver == 6) {
|
||||
/* gen6 math instructions must have the destination be
|
||||
/* gfx6 math instructions must have the destination be
|
||||
* VGRF, so no compute-to-MRF for them.
|
||||
*/
|
||||
if (scan_inst->is_math()) {
|
||||
|
@ -1812,7 +1812,7 @@ vec4_visitor::setup_uniforms(int reg)
|
|||
{
|
||||
prog_data->base.dispatch_grf_start_reg = reg;
|
||||
|
||||
/* The pre-gen6 VS requires that some push constants get loaded no
|
||||
/* The pre-gfx6 VS requires that some push constants get loaded no
|
||||
* matter what, or the GPU would hang.
|
||||
*/
|
||||
if (devinfo->ver < 6 && this->uniforms == 0) {
|
||||
|
@ -2189,7 +2189,7 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
|
|||
unsigned lowered_width = MIN2(16, inst->exec_size);
|
||||
|
||||
/* We need to split some cases of double-precision instructions that write
|
||||
* 2 registers. We only need to care about this in gen7 because that is the
|
||||
* 2 registers. We only need to care about this in gfx7 because that is the
|
||||
* only hardware that implements fp64 in Align16.
|
||||
*/
|
||||
if (devinfo->ver == 7 && inst->size_written > REG_SIZE) {
|
||||
|
@ -2212,7 +2212,7 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
|
|||
lowered_width = MIN2(lowered_width, 4);
|
||||
|
||||
/* Interleaved attribute setups use a vertical stride of 0, which
|
||||
* makes them hit the associated instruction decompression bug in gen7.
|
||||
* makes them hit the associated instruction decompression bug in gfx7.
|
||||
* Split them to prevent this.
|
||||
*/
|
||||
if (inst->src[i].file == ATTR &&
|
||||
|
@ -2224,7 +2224,7 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
|
|||
/* IvyBridge can manage a maximum of 4 DFs per SIMD4x2 instruction, since
|
||||
* it doesn't support compression in Align16 mode, no matter if it has
|
||||
* force_writemask_all enabled or disabled (the latter is affected by the
|
||||
* compressed instruction bug in gen7, which is another reason to enforce
|
||||
* compressed instruction bug in gfx7, which is another reason to enforce
|
||||
* this limit).
|
||||
*/
|
||||
if (devinfo->ver == 7 && !devinfo->is_haswell &&
|
||||
|
@ -2384,7 +2384,7 @@ scalarize_predicate(brw_predicate predicate, unsigned writemask)
|
|||
* handful of additional swizzles natively.
|
||||
*/
|
||||
static bool
|
||||
is_gen7_supported_64bit_swizzle(vec4_instruction *inst, unsigned arg)
|
||||
is_gfx7_supported_64bit_swizzle(vec4_instruction *inst, unsigned arg)
|
||||
{
|
||||
switch (inst->src[arg].swizzle) {
|
||||
case BRW_SWIZZLE_XXXX:
|
||||
|
@ -2438,7 +2438,7 @@ vec4_visitor::is_supported_64bit_region(vec4_instruction *inst, unsigned arg)
|
|||
case BRW_SWIZZLE_YXWZ:
|
||||
return true;
|
||||
default:
|
||||
return devinfo->ver == 7 && is_gen7_supported_64bit_swizzle(inst, arg);
|
||||
return devinfo->ver == 7 && is_gfx7_supported_64bit_swizzle(inst, arg);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2599,7 +2599,7 @@ vec4_visitor::apply_logical_swizzle(struct brw_reg *hw_reg,
|
|||
hw_reg->width = BRW_WIDTH_2;
|
||||
|
||||
if (is_supported_64bit_region(inst, arg) &&
|
||||
!is_gen7_supported_64bit_swizzle(inst, arg)) {
|
||||
!is_gfx7_supported_64bit_swizzle(inst, arg)) {
|
||||
/* Supported 64-bit swizzles are those such that their first two
|
||||
* components, when expanded to 32-bit swizzles, match the semantics
|
||||
* of the original 64-bit swizzle with 2-wide row regioning.
|
||||
|
@ -2614,9 +2614,9 @@ vec4_visitor::apply_logical_swizzle(struct brw_reg *hw_reg,
|
|||
* 1. An unsupported swizzle, which should be single-value thanks to the
|
||||
* scalarization pass.
|
||||
*
|
||||
* 2. A gen7 supported swizzle. These can be single-value or double-value
|
||||
* 2. A gfx7 supported swizzle. These can be single-value or double-value
|
||||
* swizzles. If the latter, they are never cross-dvec2 channels. For
|
||||
* these we always need to activate the gen7 vstride=0 exploit.
|
||||
* these we always need to activate the gfx7 vstride=0 exploit.
|
||||
*/
|
||||
unsigned swizzle0 = BRW_GET_SWZ(reg.swizzle, 0);
|
||||
unsigned swizzle1 = BRW_GET_SWZ(reg.swizzle, 1);
|
||||
|
@ -2631,15 +2631,15 @@ vec4_visitor::apply_logical_swizzle(struct brw_reg *hw_reg,
|
|||
swizzle1 -= 2;
|
||||
}
|
||||
|
||||
/* All gen7-specific supported swizzles require the vstride=0 exploit */
|
||||
if (devinfo->ver == 7 && is_gen7_supported_64bit_swizzle(inst, arg))
|
||||
/* All gfx7-specific supported swizzles require the vstride=0 exploit */
|
||||
if (devinfo->ver == 7 && is_gfx7_supported_64bit_swizzle(inst, arg))
|
||||
hw_reg->vstride = BRW_VERTICAL_STRIDE_0;
|
||||
|
||||
/* Any 64-bit source with an offset at 16B is intended to address the
|
||||
* second half of a register and needs a vertical stride of 0 so we:
|
||||
*
|
||||
* 1. Don't violate register region restrictions.
|
||||
* 2. Activate the gen7 instruction decompresion bug exploit when
|
||||
* 2. Activate the gfx7 instruction decompresion bug exploit when
|
||||
* execsize > 4
|
||||
*/
|
||||
if (hw_reg->subnr % REG_SIZE == 16) {
|
||||
|
|
|
@ -270,7 +270,7 @@ public:
|
|||
|
||||
src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
|
||||
src_reg surface);
|
||||
void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
|
||||
void emit_gfx6_gather_wa(uint8_t wa, dst_reg dst);
|
||||
|
||||
void emit_ndc_computation();
|
||||
void emit_psiz_and_flags(dst_reg reg);
|
||||
|
|
|
@ -455,7 +455,7 @@ namespace brw {
|
|||
*
|
||||
* CMP null<d> src0<f> src1<f>
|
||||
*
|
||||
* Original gen4 does type conversion to the destination type
|
||||
* Original gfx4 does type conversion to the destination type
|
||||
* before comparison, producing garbage results for floating
|
||||
* point comparisons.
|
||||
*
|
||||
|
@ -480,7 +480,7 @@ namespace brw {
|
|||
*
|
||||
* CMPN null<d> src0<f> src1<f>
|
||||
*
|
||||
* Original gen4 does type conversion to the destination type
|
||||
* Original gfx4 does type conversion to the destination type
|
||||
* before comparison, producing garbage results for floating
|
||||
* point comparisons.
|
||||
*
|
||||
|
@ -587,14 +587,14 @@ namespace brw {
|
|||
src_reg
|
||||
fix_math_operand(const src_reg &src) const
|
||||
{
|
||||
/* The gen6 math instruction ignores the source modifiers --
|
||||
/* The gfx6 math instruction ignores the source modifiers --
|
||||
* swizzle, abs, negate, and at least some parts of the register
|
||||
* region description.
|
||||
*
|
||||
* Rather than trying to enumerate all these cases, *always* expand the
|
||||
* operand to a temp GRF for gen6.
|
||||
* operand to a temp GRF for gfx6.
|
||||
*
|
||||
* For gen7, keep the operand as-is, except if immediate, which gen7 still
|
||||
* For gfx7, keep the operand as-is, except if immediate, which gfx7 still
|
||||
* can't use.
|
||||
*/
|
||||
if (shader->devinfo->ver == 6 ||
|
||||
|
|
|
@ -346,7 +346,7 @@ try_copy_propagate(const struct gen_device_info *devinfo,
|
|||
|
||||
bool has_source_modifiers = value.negate || value.abs;
|
||||
|
||||
/* gen6 math and gen7+ SENDs from GRFs ignore source modifiers on
|
||||
/* gfx6 math and gfx7+ SENDs from GRFs ignore source modifiers on
|
||||
* instructions.
|
||||
*/
|
||||
if (has_source_modifiers && !inst->can_do_source_mods(devinfo))
|
||||
|
|
|
@ -29,12 +29,12 @@
|
|||
using namespace brw;
|
||||
|
||||
static void
|
||||
generate_math1_gen4(struct brw_codegen *p,
|
||||
generate_math1_gfx4(struct brw_codegen *p,
|
||||
vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src)
|
||||
{
|
||||
gen4_math(p,
|
||||
gfx4_math(p,
|
||||
dst,
|
||||
brw_math_function(inst->opcode),
|
||||
inst->base_mrf,
|
||||
|
@ -43,7 +43,7 @@ generate_math1_gen4(struct brw_codegen *p,
|
|||
}
|
||||
|
||||
static void
|
||||
check_gen6_math_src_arg(struct brw_reg src)
|
||||
check_gfx6_math_src_arg(struct brw_reg src)
|
||||
{
|
||||
/* Source swizzles are ignored. */
|
||||
assert(!src.abs);
|
||||
|
@ -52,7 +52,7 @@ check_gen6_math_src_arg(struct brw_reg src)
|
|||
}
|
||||
|
||||
static void
|
||||
generate_math_gen6(struct brw_codegen *p,
|
||||
generate_math_gfx6(struct brw_codegen *p,
|
||||
vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src0,
|
||||
|
@ -61,17 +61,17 @@ generate_math_gen6(struct brw_codegen *p,
|
|||
/* Can't do writemask because math can't be align16. */
|
||||
assert(dst.writemask == WRITEMASK_XYZW);
|
||||
/* Source swizzles are ignored. */
|
||||
check_gen6_math_src_arg(src0);
|
||||
check_gfx6_math_src_arg(src0);
|
||||
if (src1.file == BRW_GENERAL_REGISTER_FILE)
|
||||
check_gen6_math_src_arg(src1);
|
||||
check_gfx6_math_src_arg(src1);
|
||||
|
||||
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
||||
gen6_math(p, dst, brw_math_function(inst->opcode), src0, src1);
|
||||
gfx6_math(p, dst, brw_math_function(inst->opcode), src0, src1);
|
||||
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
||||
}
|
||||
|
||||
static void
|
||||
generate_math2_gen4(struct brw_codegen *p,
|
||||
generate_math2_gfx4(struct brw_codegen *p,
|
||||
vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src0,
|
||||
|
@ -96,7 +96,7 @@ generate_math2_gen4(struct brw_codegen *p,
|
|||
brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1);
|
||||
brw_pop_insn_state(p);
|
||||
|
||||
gen4_math(p,
|
||||
gfx4_math(p,
|
||||
dst,
|
||||
brw_math_function(inst->opcode),
|
||||
inst->base_mrf,
|
||||
|
@ -261,11 +261,11 @@ generate_tex(struct brw_codegen *p,
|
|||
}
|
||||
|
||||
/* Stomp the resinfo output type to UINT32. On gens 4-5, the output type
|
||||
* is set as part of the message descriptor. On gen4, the PRM seems to
|
||||
* is set as part of the message descriptor. On gfx4, the PRM seems to
|
||||
* allow UINT32 and FLOAT32 (i965 PRM, Vol. 4 Section 4.8.1.1), but on
|
||||
* later gens UINT32 is required. Once you hit Sandy Bridge, the bit is
|
||||
* gone from the message descriptor entirely and you just get UINT32 all
|
||||
* the time regasrdless. Since we can really only do non-UINT32 on gen4,
|
||||
* the time regasrdless. Since we can really only do non-UINT32 on gfx4,
|
||||
* just stomp it to UINT32 all the time.
|
||||
*/
|
||||
if (inst->opcode == SHADER_OPCODE_TXS)
|
||||
|
@ -321,7 +321,7 @@ generate_tex(struct brw_codegen *p,
|
|||
brw_pop_insn_state(p);
|
||||
|
||||
if (inst->base_mrf != -1)
|
||||
gen6_resolve_implied_move(p, &src, inst->base_mrf);
|
||||
gfx6_resolve_implied_move(p, &src, inst->base_mrf);
|
||||
|
||||
/* dst = send(offset, a0.0 | <descriptor>) */
|
||||
brw_send_indirect_message(
|
||||
|
@ -725,7 +725,7 @@ generate_gs_ff_sync(struct brw_codegen *p,
|
|||
static void
|
||||
generate_gs_set_primitive_id(struct brw_codegen *p, struct brw_reg dst)
|
||||
{
|
||||
/* In gen6, PrimitiveID is delivered in R0.1 of the payload */
|
||||
/* In gfx6, PrimitiveID is delivered in R0.1 of the payload */
|
||||
struct brw_reg src = brw_vec8_grf(0, 0);
|
||||
brw_push_insn_state(p);
|
||||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
|
@ -1149,7 +1149,7 @@ generate_scratch_read(struct brw_codegen *p,
|
|||
const struct gen_device_info *devinfo = p->devinfo;
|
||||
struct brw_reg header = brw_vec8_grf(0, 0);
|
||||
|
||||
gen6_resolve_implied_move(p, &header, inst->base_mrf);
|
||||
gfx6_resolve_implied_move(p, &header, inst->base_mrf);
|
||||
|
||||
generate_oword_dual_block_offsets(p, brw_message_reg(inst->base_mrf + 1),
|
||||
index);
|
||||
|
@ -1205,7 +1205,7 @@ generate_scratch_write(struct brw_codegen *p,
|
|||
*/
|
||||
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
|
||||
gen6_resolve_implied_move(p, &header, inst->base_mrf);
|
||||
gfx6_resolve_implied_move(p, &header, inst->base_mrf);
|
||||
|
||||
generate_oword_dual_block_offsets(p, brw_message_reg(inst->base_mrf + 1),
|
||||
index);
|
||||
|
@ -1225,7 +1225,7 @@ generate_scratch_write(struct brw_codegen *p,
|
|||
|
||||
brw_set_default_predicate_control(p, inst->predicate);
|
||||
|
||||
/* Pre-gen6, we have to specify write commits to ensure ordering
|
||||
/* Pre-gfx6, we have to specify write commits to ensure ordering
|
||||
* between reads and writes within a thread. Afterwards, that's
|
||||
* guaranteed and write commits only matter for inter-thread
|
||||
* synchronization.
|
||||
|
@ -1280,7 +1280,7 @@ generate_pull_constant_load(struct brw_codegen *p,
|
|||
|
||||
struct brw_reg header = brw_vec8_grf(0, 0);
|
||||
|
||||
gen6_resolve_implied_move(p, &header, inst->base_mrf);
|
||||
gfx6_resolve_implied_move(p, &header, inst->base_mrf);
|
||||
|
||||
if (devinfo->ver >= 6) {
|
||||
if (offset.file == BRW_IMMEDIATE_VALUE) {
|
||||
|
@ -1350,7 +1350,7 @@ generate_get_buffer_size(struct brw_codegen *p,
|
|||
}
|
||||
|
||||
static void
|
||||
generate_pull_constant_load_gen7(struct brw_codegen *p,
|
||||
generate_pull_constant_load_gfx7(struct brw_codegen *p,
|
||||
vec4_instruction *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg surf_index,
|
||||
|
@ -1681,9 +1681,9 @@ generate_code(struct brw_codegen *p,
|
|||
|
||||
case BRW_OPCODE_IF:
|
||||
if (!inst->src[0].is_null()) {
|
||||
/* The instruction has an embedded compare (only allowed on gen6) */
|
||||
/* The instruction has an embedded compare (only allowed on gfx6) */
|
||||
assert(devinfo->ver == 6);
|
||||
gen6_IF(p, inst->conditional_mod, src[0], src[1]);
|
||||
gfx6_IF(p, inst->conditional_mod, src[0], src[1]);
|
||||
} else {
|
||||
brw_inst *if_inst = brw_IF(p, BRW_EXECUTE_8);
|
||||
brw_inst_set_pred_control(p->devinfo, if_inst, inst->predicate);
|
||||
|
@ -1724,12 +1724,12 @@ generate_code(struct brw_codegen *p,
|
|||
case SHADER_OPCODE_COS:
|
||||
assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
|
||||
if (devinfo->ver >= 7) {
|
||||
gen6_math(p, dst, brw_math_function(inst->opcode), src[0],
|
||||
gfx6_math(p, dst, brw_math_function(inst->opcode), src[0],
|
||||
brw_null_reg());
|
||||
} else if (devinfo->ver == 6) {
|
||||
generate_math_gen6(p, inst, dst, src[0], brw_null_reg());
|
||||
generate_math_gfx6(p, inst, dst, src[0], brw_null_reg());
|
||||
} else {
|
||||
generate_math1_gen4(p, inst, dst, src[0]);
|
||||
generate_math1_gfx4(p, inst, dst, src[0]);
|
||||
send_count++;
|
||||
}
|
||||
break;
|
||||
|
@ -1739,11 +1739,11 @@ generate_code(struct brw_codegen *p,
|
|||
case SHADER_OPCODE_INT_REMAINDER:
|
||||
assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
|
||||
if (devinfo->ver >= 7) {
|
||||
gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
|
||||
gfx6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
|
||||
} else if (devinfo->ver == 6) {
|
||||
generate_math_gen6(p, inst, dst, src[0], src[1]);
|
||||
generate_math_gfx6(p, inst, dst, src[0], src[1]);
|
||||
} else {
|
||||
generate_math2_gen4(p, inst, dst, src[0], src[1]);
|
||||
generate_math2_gfx4(p, inst, dst, src[0], src[1]);
|
||||
send_count++;
|
||||
}
|
||||
break;
|
||||
|
@ -1790,7 +1790,7 @@ generate_code(struct brw_codegen *p,
|
|||
break;
|
||||
|
||||
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
|
||||
generate_pull_constant_load_gen7(p, inst, dst, src[0], src[1]);
|
||||
generate_pull_constant_load_gfx7(p, inst, dst, src[0], src[1]);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
|
|
|
@ -664,7 +664,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
|||
nir->info.gs.uses_end_primitive ? 1 : 0;
|
||||
}
|
||||
} else {
|
||||
/* There are no control data bits in gen6. */
|
||||
/* There are no control data bits in gfx6. */
|
||||
c.control_data_bits_per_vertex = 0;
|
||||
}
|
||||
c.control_data_header_size_bits =
|
||||
|
@ -755,10 +755,10 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
|||
* number of output vertices. So we'll just calculate the amount of space
|
||||
* we need, and if it's too large, fail to compile.
|
||||
*
|
||||
* The above is for gen7+ where we have a single URB entry that will hold
|
||||
* all the output. In gen6, we will have to allocate URB entries for every
|
||||
* The above is for gfx7+ where we have a single URB entry that will hold
|
||||
* all the output. In gfx6, we will have to allocate URB entries for every
|
||||
* vertex we emit, so our URB entries only need to be large enough to hold
|
||||
* a single vertex. Also, gen6 does not have a control data header.
|
||||
* a single vertex. Also, gfx6 does not have a control data header.
|
||||
*/
|
||||
unsigned output_size_bytes;
|
||||
if (compiler->devinfo->ver >= 7) {
|
||||
|
@ -789,8 +789,8 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
|||
return NULL;
|
||||
|
||||
|
||||
/* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
|
||||
* a multiple of 128 bytes in gen6.
|
||||
/* URB entry sizes are stored as a multiple of 64 bytes in gfx7+ and
|
||||
* a multiple of 128 bytes in gfx6.
|
||||
*/
|
||||
if (compiler->devinfo->ver >= 7) {
|
||||
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
||||
|
@ -931,7 +931,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
|||
nir, mem_ctx, false /* no_spills */,
|
||||
shader_time_index, debug_enabled);
|
||||
else
|
||||
gs = new brw::gen6_gs_visitor(compiler, log_data, &c, prog_data, prog,
|
||||
gs = new brw::gfx6_gs_visitor(compiler, log_data, &c, prog_data, prog,
|
||||
nir, mem_ctx, false /* no_spills */,
|
||||
shader_time_index, debug_enabled);
|
||||
|
||||
|
|
|
@ -285,7 +285,7 @@ setup_imm_df(const vec4_builder &bld, double v)
|
|||
const gen_device_info *devinfo = bld.shader->devinfo;
|
||||
assert(devinfo->ver == 7);
|
||||
|
||||
/* gen7.5 does not support DF immediates straighforward but the DIM
|
||||
/* gfx7.5 does not support DF immediates straighforward but the DIM
|
||||
* instruction allows to set the 64-bit immediate value.
|
||||
*/
|
||||
if (devinfo->is_haswell) {
|
||||
|
@ -295,7 +295,7 @@ setup_imm_df(const vec4_builder &bld, double v)
|
|||
return swizzle(src_reg(dst), BRW_SWIZZLE_XXXX);
|
||||
}
|
||||
|
||||
/* gen7 does not support DF immediates */
|
||||
/* gfx7 does not support DF immediates */
|
||||
union {
|
||||
double d;
|
||||
struct {
|
||||
|
|
|
@ -403,7 +403,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
|
|||
2 + has_primitive_id + key->input_vertices <= (devinfo->ver >= 12 ? 63 : 31)) {
|
||||
/* 3DSTATE_HS imposes two constraints on using 8_PATCH mode. First, the
|
||||
* "Instance" field limits the number of output vertices to [1, 16] on
|
||||
* gen11 and below, or [1, 32] on gen12 and above. Secondly, the
|
||||
* gfx11 and below, or [1, 32] on gfx12 and above. Secondly, the
|
||||
* "Dispatch GRF Start Register for URB Data" field is limited to [0,
|
||||
* 31] - which imposes a limit on the input vertices.
|
||||
*/
|
||||
|
|
|
@ -235,7 +235,7 @@ vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1,
|
|||
*
|
||||
* CMP null<d> src0<f> src1<f>
|
||||
*
|
||||
* Original gen4 does type conversion to the destination type before
|
||||
* Original gfx4 does type conversion to the destination type before
|
||||
* comparison, producing garbage results for floating point comparisons.
|
||||
*
|
||||
* The destination type doesn't matter on newer generations, so we set the
|
||||
|
@ -311,14 +311,14 @@ vec4_visitor::fix_math_operand(const src_reg &src)
|
|||
if (devinfo->ver < 6 || src.file == BAD_FILE)
|
||||
return src;
|
||||
|
||||
/* The gen6 math instruction ignores the source modifiers --
|
||||
/* The gfx6 math instruction ignores the source modifiers --
|
||||
* swizzle, abs, negate, and at least some parts of the register
|
||||
* region description.
|
||||
*
|
||||
* Rather than trying to enumerate all these cases, *always* expand the
|
||||
* operand to a temp GRF for gen6.
|
||||
* operand to a temp GRF for gfx6.
|
||||
*
|
||||
* For gen7, keep the operand as-is, except if immediate, which gen7 still
|
||||
* For gfx7, keep the operand as-is, except if immediate, which gfx7 still
|
||||
* can't use.
|
||||
*/
|
||||
|
||||
|
@ -383,7 +383,7 @@ vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
|
|||
* code, I chose instead to remain in align16 mode in defiance of the hw
|
||||
* docs).
|
||||
*
|
||||
* I've [chadv] experimentally confirmed that, on gen7 hardware and the
|
||||
* I've [chadv] experimentally confirmed that, on gfx7 hardware and the
|
||||
* simulator, emitting a f32to16 in align16 mode with UD as destination
|
||||
* data type is safe. The behavior differs from that specified in the PRM
|
||||
* in that the upper word of each destination channel is cleared to 0.
|
||||
|
@ -450,7 +450,7 @@ vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
|
|||
* emitting align1 instructions for unpackHalf2x16 failed to pass the
|
||||
* Piglit tests, so I gave up.
|
||||
*
|
||||
* I've verified that, on gen7 hardware and the simulator, it is safe to
|
||||
* I've verified that, on gfx7 hardware and the simulator, it is safe to
|
||||
* emit f16to32 in align16 mode with UD as source data type.
|
||||
*/
|
||||
|
||||
|
@ -1006,7 +1006,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
|
|||
}
|
||||
|
||||
if (devinfo->ver == 6 && op == ir_tg4) {
|
||||
emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], inst->dst);
|
||||
emit_gfx6_gather_wa(key_tex->gfx6_gather_wa[surface], inst->dst);
|
||||
}
|
||||
|
||||
if (op == ir_query_levels) {
|
||||
|
@ -1022,7 +1022,7 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
|
|||
* Apply workarounds for Gen6 gather with UINT/SINT
|
||||
*/
|
||||
void
|
||||
vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst)
|
||||
vec4_visitor::emit_gfx6_gather_wa(uint8_t wa, dst_reg dst)
|
||||
{
|
||||
if (!wa)
|
||||
return;
|
||||
|
@ -1285,7 +1285,7 @@ vec4_visitor::emit_vertex()
|
|||
int max_usable_mrf = FIRST_SPILL_MRF(devinfo->ver);
|
||||
|
||||
/* The following assertion verifies that max_usable_mrf causes an
|
||||
* even-numbered amount of URB write data, which will meet gen6's
|
||||
* even-numbered amount of URB write data, which will meet gfx6's
|
||||
* requirements for length alignment.
|
||||
*/
|
||||
assert ((max_usable_mrf - base_mrf) % 2 == 0);
|
||||
|
@ -1344,7 +1344,7 @@ vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst,
|
|||
*/
|
||||
int message_header_scale = 2;
|
||||
|
||||
/* Pre-gen6, the message header uses byte offsets instead of vec4
|
||||
/* Pre-gfx6, the message header uses byte offsets instead of vec4
|
||||
* (16-byte) offset units.
|
||||
*/
|
||||
if (devinfo->ver < 6)
|
||||
|
|
|
@ -119,7 +119,7 @@ static const struct {
|
|||
* \param line_aa BRW_WM_AA_NEVER, BRW_WM_AA_ALWAYS or BRW_WM_AA_SOMETIMES
|
||||
* \param lookup bitmask of BRW_WM_IZ_* flags
|
||||
*/
|
||||
void fs_visitor::setup_fs_payload_gen4()
|
||||
void fs_visitor::setup_fs_payload_gfx4()
|
||||
{
|
||||
assert(stage == MESA_SHADER_FRAGMENT);
|
||||
assert(dispatch_width <= 16);
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
namespace brw {
|
||||
|
||||
void
|
||||
gen6_gs_visitor::emit_prolog()
|
||||
gfx6_gs_visitor::emit_prolog()
|
||||
{
|
||||
vec4_gs_visitor::emit_prolog();
|
||||
|
||||
|
@ -44,7 +44,7 @@ gen6_gs_visitor::emit_prolog()
|
|||
* can write to the URB simultaneously and the FF_SYNC message provides the
|
||||
* synchronization mechanism for this, so using this message effectively
|
||||
* stalls the thread until it is its turn to write to the URB. Because of
|
||||
* this, the best way to implement geometry shader algorithms in gen6 is to
|
||||
* this, the best way to implement geometry shader algorithms in gfx6 is to
|
||||
* execute the algorithm before the FF_SYNC message to maximize parallelism.
|
||||
*
|
||||
* To achieve this we buffer the geometry shader outputs for each emitted
|
||||
|
@ -60,7 +60,7 @@ gen6_gs_visitor::emit_prolog()
|
|||
* flags for the next vertex come right after the data items and flags for
|
||||
* the previous vertex.
|
||||
*/
|
||||
this->current_annotation = "gen6 prolog";
|
||||
this->current_annotation = "gfx6 prolog";
|
||||
this->vertex_output = src_reg(this,
|
||||
glsl_type::uint_type,
|
||||
(prog_data->vue_map.num_slots + 1) *
|
||||
|
@ -137,9 +137,9 @@ gen6_gs_visitor::emit_prolog()
|
|||
}
|
||||
|
||||
void
|
||||
gen6_gs_visitor::gs_emit_vertex(int stream_id)
|
||||
gfx6_gs_visitor::gs_emit_vertex(int stream_id)
|
||||
{
|
||||
this->current_annotation = "gen6 emit vertex";
|
||||
this->current_annotation = "gfx6 emit vertex";
|
||||
|
||||
/* Buffer all output slots for this vertex in vertex_output */
|
||||
for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
|
||||
|
@ -201,9 +201,9 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id)
|
|||
}
|
||||
|
||||
void
|
||||
gen6_gs_visitor::gs_end_primitive()
|
||||
gfx6_gs_visitor::gs_end_primitive()
|
||||
{
|
||||
this->current_annotation = "gen6 end primitive";
|
||||
this->current_annotation = "gfx6 end primitive";
|
||||
/* Calling EndPrimitive() is optional for point output. In this case we set
|
||||
* the PrimEnd flag when we process EmitVertex().
|
||||
*/
|
||||
|
@ -251,9 +251,9 @@ gen6_gs_visitor::gs_end_primitive()
|
|||
}
|
||||
|
||||
void
|
||||
gen6_gs_visitor::emit_urb_write_header(int mrf)
|
||||
gfx6_gs_visitor::emit_urb_write_header(int mrf)
|
||||
{
|
||||
this->current_annotation = "gen6 urb header";
|
||||
this->current_annotation = "gfx6 urb header";
|
||||
/* Compute offset of the flags for the current vertex in vertex_output and
|
||||
* write them in dw2 of the message header.
|
||||
*
|
||||
|
@ -287,7 +287,7 @@ align_interleaved_urb_mlen(unsigned mlen)
|
|||
}
|
||||
|
||||
void
|
||||
gen6_gs_visitor::emit_urb_write_opcode(bool complete, int base_mrf,
|
||||
gfx6_gs_visitor::emit_urb_write_opcode(bool complete, int base_mrf,
|
||||
int last_mrf, int urb_offset)
|
||||
{
|
||||
vec4_instruction *inst = NULL;
|
||||
|
@ -317,7 +317,7 @@ gen6_gs_visitor::emit_urb_write_opcode(bool complete, int base_mrf,
|
|||
}
|
||||
|
||||
void
|
||||
gen6_gs_visitor::emit_thread_end()
|
||||
gfx6_gs_visitor::emit_thread_end()
|
||||
{
|
||||
/* Make sure the current primitive is ended: we know it is not ended when
|
||||
* first_vertex is not zero. This is only relevant for outputs other than
|
||||
|
@ -350,7 +350,7 @@ gen6_gs_visitor::emit_thread_end()
|
|||
int max_usable_mrf = FIRST_SPILL_MRF(devinfo->ver);
|
||||
|
||||
/* Issue the FF_SYNC message and obtain the initial VUE handle. */
|
||||
this->current_annotation = "gen6 thread end: ff_sync";
|
||||
this->current_annotation = "gfx6 thread end: ff_sync";
|
||||
|
||||
vec4_instruction *inst = NULL;
|
||||
if (prog->info.has_transform_feedback_varyings) {
|
||||
|
@ -372,12 +372,12 @@ gen6_gs_visitor::emit_thread_end()
|
|||
emit(IF(BRW_PREDICATE_NORMAL));
|
||||
{
|
||||
/* Loop over all buffered vertices and emit URB write messages */
|
||||
this->current_annotation = "gen6 thread end: urb writes init";
|
||||
this->current_annotation = "gfx6 thread end: urb writes init";
|
||||
src_reg vertex(this, glsl_type::uint_type);
|
||||
emit(MOV(dst_reg(vertex), brw_imm_ud(0u)));
|
||||
emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u)));
|
||||
|
||||
this->current_annotation = "gen6 thread end: urb writes";
|
||||
this->current_annotation = "gfx6 thread end: urb writes";
|
||||
emit(BRW_OPCODE_DO);
|
||||
{
|
||||
emit(CMP(dst_null_d(), vertex, this->vertex_count, BRW_CONDITIONAL_GE));
|
||||
|
@ -453,7 +453,7 @@ gen6_gs_visitor::emit_thread_end()
|
|||
|
||||
/* Finally, emit EOT message.
|
||||
*
|
||||
* In gen6 we need to end the thread differently depending on whether we have
|
||||
* In gfx6 we need to end the thread differently depending on whether we have
|
||||
* emitted at least one vertex or not. In case we did, the EOT message must
|
||||
* always include the COMPLETE flag or else the GPU hangs. If we have not
|
||||
* produced any output we can't use the COMPLETE flag.
|
||||
|
@ -466,7 +466,7 @@ gen6_gs_visitor::emit_thread_end()
|
|||
* which works for both cases by setting the COMPLETE and UNUSED flags in
|
||||
* the EOT message.
|
||||
*/
|
||||
this->current_annotation = "gen6 thread end: EOT";
|
||||
this->current_annotation = "gfx6 thread end: EOT";
|
||||
|
||||
if (prog->info.has_transform_feedback_varyings) {
|
||||
/* When emitting EOT, set SONumPrimsWritten Increment Value. */
|
||||
|
@ -483,7 +483,7 @@ gen6_gs_visitor::emit_thread_end()
|
|||
}
|
||||
|
||||
void
|
||||
gen6_gs_visitor::setup_payload()
|
||||
gfx6_gs_visitor::setup_payload()
|
||||
{
|
||||
int attribute_map[BRW_VARYING_SLOT_COUNT * MAX_GS_INPUT_VERTICES];
|
||||
|
||||
|
@ -522,7 +522,7 @@ gen6_gs_visitor::setup_payload()
|
|||
}
|
||||
|
||||
void
|
||||
gen6_gs_visitor::xfb_setup()
|
||||
gfx6_gs_visitor::xfb_setup()
|
||||
{
|
||||
static const unsigned swizzle_for_offset[4] = {
|
||||
BRW_SWIZZLE4(0, 1, 2, 3),
|
||||
|
@ -556,7 +556,7 @@ gen6_gs_visitor::xfb_setup()
|
|||
}
|
||||
|
||||
void
|
||||
gen6_gs_visitor::xfb_write()
|
||||
gfx6_gs_visitor::xfb_write()
|
||||
{
|
||||
unsigned num_verts;
|
||||
|
||||
|
@ -587,7 +587,7 @@ gen6_gs_visitor::xfb_write()
|
|||
unreachable("Unexpected primitive type in Gen6 SOL program.");
|
||||
}
|
||||
|
||||
this->current_annotation = "gen6 thread end: svb writes init";
|
||||
this->current_annotation = "gfx6 thread end: svb writes init";
|
||||
|
||||
emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_ud(0u)));
|
||||
emit(MOV(dst_reg(this->sol_prim_written), brw_imm_ud(0u)));
|
||||
|
@ -604,7 +604,7 @@ gen6_gs_visitor::xfb_write()
|
|||
emit(ADD(dst_reg(sol_temp), this->svbi, brw_imm_ud(num_verts)));
|
||||
|
||||
/* Compare SVBI calculated number with the maximum value, which is
|
||||
* in R1.4 (previously saved in this->max_svbi) for gen6.
|
||||
* in R1.4 (previously saved in this->max_svbi) for gfx6.
|
||||
*/
|
||||
emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
|
||||
emit(IF(BRW_PREDICATE_NORMAL));
|
||||
|
@ -636,7 +636,7 @@ gen6_gs_visitor::xfb_write()
|
|||
}
|
||||
|
||||
void
|
||||
gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
|
||||
gfx6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
|
||||
{
|
||||
unsigned binding;
|
||||
unsigned num_bindings = gs_prog_data->num_transform_feedback_bindings;
|
||||
|
@ -654,7 +654,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
|
|||
/* Avoid overwriting MRF 1 as it is used as URB write message header */
|
||||
dst_reg mrf_reg(MRF, 2);
|
||||
|
||||
this->current_annotation = "gen6: emit SOL vertex data";
|
||||
this->current_annotation = "gfx6: emit SOL vertex data";
|
||||
/* For each vertex, generate code to output each varying using the
|
||||
* appropriate binding table entry.
|
||||
*/
|
||||
|
@ -712,7 +712,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
|
|||
}
|
||||
|
||||
int
|
||||
gen6_gs_visitor::get_vertex_output_offset_for_varying(int vertex, int varying)
|
||||
gfx6_gs_visitor::get_vertex_output_offset_for_varying(int vertex, int varying)
|
||||
{
|
||||
/* Find the output slot assigned to this varying.
|
||||
*
|
||||
|
|
|
@ -32,10 +32,10 @@
|
|||
|
||||
namespace brw {
|
||||
|
||||
class gen6_gs_visitor : public vec4_gs_visitor
|
||||
class gfx6_gs_visitor : public vec4_gs_visitor
|
||||
{
|
||||
public:
|
||||
gen6_gs_visitor(const struct brw_compiler *comp,
|
||||
gfx6_gs_visitor(const struct brw_compiler *comp,
|
||||
void *log_data,
|
||||
struct brw_gs_compile *c,
|
||||
struct brw_gs_prog_data *prog_data,
|
||||
|
|
|
@ -252,7 +252,7 @@ gen_f0_0_MOV_GRF_GRF(struct brw_codegen *p)
|
|||
brw_pop_insn_state(p);
|
||||
}
|
||||
|
||||
/* The handling of f0.1 vs f0.0 changes between gen6 and gen7. Explicitly test
|
||||
/* The handling of f0.1 vs f0.0 changes between gfx6 and gfx7. Explicitly test
|
||||
* it, so that we run the fuzzing can run over all the other bits that might
|
||||
* interact with it.
|
||||
*/
|
||||
|
|
|
@ -153,9 +153,9 @@ TEST_P(validation_test, src1_null_reg)
|
|||
TEST_P(validation_test, math_src0_null_reg)
|
||||
{
|
||||
if (devinfo.ver >= 6) {
|
||||
gen6_math(p, g0, BRW_MATH_FUNCTION_SIN, null, null);
|
||||
gfx6_math(p, g0, BRW_MATH_FUNCTION_SIN, null, null);
|
||||
} else {
|
||||
gen4_math(p, g0, BRW_MATH_FUNCTION_SIN, 0, null, BRW_MATH_PRECISION_FULL);
|
||||
gfx4_math(p, g0, BRW_MATH_FUNCTION_SIN, 0, null, BRW_MATH_PRECISION_FULL);
|
||||
}
|
||||
|
||||
EXPECT_FALSE(validate(p));
|
||||
|
@ -164,11 +164,11 @@ TEST_P(validation_test, math_src0_null_reg)
|
|||
TEST_P(validation_test, math_src1_null_reg)
|
||||
{
|
||||
if (devinfo.ver >= 6) {
|
||||
gen6_math(p, g0, BRW_MATH_FUNCTION_POW, g0, null);
|
||||
gfx6_math(p, g0, BRW_MATH_FUNCTION_POW, g0, null);
|
||||
EXPECT_FALSE(validate(p));
|
||||
} else {
|
||||
/* Math instructions on Gen4/5 are actually SEND messages with payloads.
|
||||
* src1 is an immediate message descriptor set by gen4_math.
|
||||
* src1 is an immediate message descriptor set by gfx4_math.
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
@ -247,9 +247,9 @@ TEST_P(validation_test, invalid_file_encoding)
|
|||
clear_instructions(p);
|
||||
|
||||
if (devinfo.ver < 6) {
|
||||
gen4_math(p, g0, BRW_MATH_FUNCTION_SIN, 0, g0, BRW_MATH_PRECISION_FULL);
|
||||
gfx4_math(p, g0, BRW_MATH_FUNCTION_SIN, 0, g0, BRW_MATH_PRECISION_FULL);
|
||||
} else {
|
||||
gen6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null);
|
||||
gfx6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null);
|
||||
}
|
||||
brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_MESSAGE_REGISTER_FILE, BRW_REGISTER_TYPE_F);
|
||||
|
||||
|
@ -1016,13 +1016,13 @@ TEST_P(validation_test, dst_elements_must_be_evenly_split_between_registers)
|
|||
clear_instructions(p);
|
||||
|
||||
if (devinfo.ver >= 6) {
|
||||
gen6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null);
|
||||
gfx6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null);
|
||||
|
||||
EXPECT_TRUE(validate(p));
|
||||
|
||||
clear_instructions(p);
|
||||
|
||||
gen6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null);
|
||||
gfx6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null);
|
||||
brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 4);
|
||||
|
||||
EXPECT_FALSE(validate(p));
|
||||
|
@ -1310,7 +1310,7 @@ TEST_P(validation_test, half_float_conversion)
|
|||
unsigned dst_stride;
|
||||
unsigned dst_subnr;
|
||||
bool expected_result_bdw;
|
||||
bool expected_result_chv_gen9;
|
||||
bool expected_result_chv_gfx9;
|
||||
} inst[] = {
|
||||
#define INST_C(dst_type, src_type, dst_stride, dst_subnr, expected_result) \
|
||||
{ \
|
||||
|
@ -1322,14 +1322,14 @@ TEST_P(validation_test, half_float_conversion)
|
|||
expected_result, \
|
||||
}
|
||||
#define INST_S(dst_type, src_type, dst_stride, dst_subnr, \
|
||||
expected_result_bdw, expected_result_chv_gen9) \
|
||||
expected_result_bdw, expected_result_chv_gfx9) \
|
||||
{ \
|
||||
BRW_REGISTER_TYPE_##dst_type, \
|
||||
BRW_REGISTER_TYPE_##src_type, \
|
||||
BRW_HORIZONTAL_STRIDE_##dst_stride, \
|
||||
dst_subnr, \
|
||||
expected_result_bdw, \
|
||||
expected_result_chv_gen9, \
|
||||
expected_result_chv_gfx9, \
|
||||
}
|
||||
|
||||
/* MOV to half-float destination */
|
||||
|
@ -1417,7 +1417,7 @@ TEST_P(validation_test, half_float_conversion)
|
|||
}
|
||||
|
||||
if (devinfo.is_cherryview || devinfo.ver >= 9)
|
||||
EXPECT_EQ(inst[i].expected_result_chv_gen9, validate(p));
|
||||
EXPECT_EQ(inst[i].expected_result_chv_gfx9, validate(p));
|
||||
else
|
||||
EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
|
||||
|
||||
|
@ -1733,12 +1733,12 @@ TEST_P(validation_test, mixed_float_align1_math_strided_fp16_inputs)
|
|||
#undef INST
|
||||
};
|
||||
|
||||
/* No half-float math in gen8 */
|
||||
/* No half-float math in gfx8 */
|
||||
if (devinfo.ver < 9)
|
||||
return;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
|
||||
gen6_math(p, retype(g0, inst[i].dst_type),
|
||||
gfx6_math(p, retype(g0, inst[i].dst_type),
|
||||
BRW_MATH_FUNCTION_POW,
|
||||
retype(g0, inst[i].src0_type),
|
||||
retype(g0, inst[i].src1_type));
|
||||
|
@ -2034,14 +2034,14 @@ TEST_P(validation_test, mixed_float_align16_math_packed_format)
|
|||
#undef INST
|
||||
};
|
||||
|
||||
/* Align16 Math for mixed float mode is not supported in gen8 */
|
||||
/* Align16 Math for mixed float mode is not supported in gfx8 */
|
||||
if (devinfo.ver < 9 || devinfo.ver >= 11)
|
||||
return;
|
||||
|
||||
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
|
||||
gen6_math(p, retype(g0, inst[i].dst_type),
|
||||
gfx6_math(p, retype(g0, inst[i].dst_type),
|
||||
BRW_MATH_FUNCTION_POW,
|
||||
retype(g0, inst[i].src0_type),
|
||||
retype(g0, inst[i].src1_type));
|
||||
|
@ -2819,7 +2819,7 @@ TEST_P(validation_test, qword_low_power_no_depctrl)
|
|||
}
|
||||
}
|
||||
|
||||
TEST_P(validation_test, gen11_no_byte_src_1_2)
|
||||
TEST_P(validation_test, gfx11_no_byte_src_1_2)
|
||||
{
|
||||
static const struct {
|
||||
enum opcode opcode;
|
||||
|
|
|
@ -83,7 +83,7 @@ gen_device_name_to_pci_device_id(const char *name)
|
|||
return -1;
|
||||
}
|
||||
|
||||
static const struct gen_device_info gen_device_info_gen3 = {
|
||||
static const struct gen_device_info gen_device_info_gfx3 = {
|
||||
.ver = 3,
|
||||
.simulator_id = -1,
|
||||
.cs_prefetch_size = 512,
|
||||
|
@ -1236,7 +1236,7 @@ gen_get_device_info_from_pci_id(int pci_id,
|
|||
|
||||
#undef CHIPSET
|
||||
#define CHIPSET(id, fam_str, name) \
|
||||
case id: *devinfo = gen_device_info_gen3; break;
|
||||
case id: *devinfo = gen_device_info_gfx3; break;
|
||||
#include "pci_ids/i915_pci_ids.h"
|
||||
|
||||
default:
|
||||
|
@ -1300,7 +1300,7 @@ gen_get_device_name(int devid)
|
|||
}
|
||||
|
||||
/**
|
||||
* for gen8/gen9, SLICE_MASK/SUBSLICE_MASK can be used to compute the topology
|
||||
* for gfx8/gfx9, SLICE_MASK/SUBSLICE_MASK can be used to compute the topology
|
||||
* (kernel 4.13+)
|
||||
*/
|
||||
static bool
|
||||
|
@ -1471,7 +1471,7 @@ gen_get_device_info_from_fd(int fd, struct gen_device_info *devinfo)
|
|||
return false;
|
||||
}
|
||||
|
||||
/* else use the kernel 4.13+ api for gen8+. For older kernels, topology
|
||||
/* else use the kernel 4.13+ api for gfx8+. For older kernels, topology
|
||||
* will be wrong, affecting GPU metrics. In this case, fail silently.
|
||||
*/
|
||||
getparam_topology(devinfo, fd);
|
||||
|
|
|
@ -36,10 +36,10 @@ extern "C" {
|
|||
|
||||
struct drm_i915_query_topology_info;
|
||||
|
||||
#define GEN_DEVICE_MAX_SLICES (6) /* Maximum on gen10 */
|
||||
#define GEN_DEVICE_MAX_SUBSLICES (8) /* Maximum on gen11 */
|
||||
#define GEN_DEVICE_MAX_EUS_PER_SUBSLICE (16) /* Maximum on gen12 */
|
||||
#define GEN_DEVICE_MAX_PIXEL_PIPES (3) /* Maximum on gen12 */
|
||||
#define GEN_DEVICE_MAX_SLICES (6) /* Maximum on gfx10 */
|
||||
#define GEN_DEVICE_MAX_SUBSLICES (8) /* Maximum on gfx11 */
|
||||
#define GEN_DEVICE_MAX_EUS_PER_SUBSLICE (16) /* Maximum on gfx12 */
|
||||
#define GEN_DEVICE_MAX_PIXEL_PIPES (3) /* Maximum on gfx12 */
|
||||
|
||||
/**
|
||||
* Intel hardware information and quirks
|
||||
|
|
|
@ -61,37 +61,37 @@
|
|||
/* Prefixing macros */
|
||||
#if (GFX_VERx10 == 40)
|
||||
# define GENX(X) GFX4_##X
|
||||
# define genX(x) gen4_##x
|
||||
# define genX(x) gfx4_##x
|
||||
#elif (GFX_VERx10 == 45)
|
||||
# define GENX(X) GFX45_##X
|
||||
# define genX(x) gen45_##x
|
||||
# define genX(x) gfx45_##x
|
||||
#elif (GFX_VERx10 == 50)
|
||||
# define GENX(X) GFX5_##X
|
||||
# define genX(x) gen5_##x
|
||||
# define genX(x) gfx5_##x
|
||||
#elif (GFX_VERx10 == 60)
|
||||
# define GENX(X) GFX6_##X
|
||||
# define genX(x) gen6_##x
|
||||
# define genX(x) gfx6_##x
|
||||
#elif (GFX_VERx10 == 70)
|
||||
# define GENX(X) GFX7_##X
|
||||
# define genX(x) gen7_##x
|
||||
# define genX(x) gfx7_##x
|
||||
#elif (GFX_VERx10 == 75)
|
||||
# define GENX(X) GFX75_##X
|
||||
# define genX(x) gen75_##x
|
||||
# define genX(x) gfx75_##x
|
||||
#elif (GFX_VERx10 == 80)
|
||||
# define GENX(X) GFX8_##X
|
||||
# define genX(x) gen8_##x
|
||||
# define genX(x) gfx8_##x
|
||||
#elif (GFX_VERx10 == 90)
|
||||
# define GENX(X) GFX9_##X
|
||||
# define genX(x) gen9_##x
|
||||
# define genX(x) gfx9_##x
|
||||
#elif (GFX_VERx10 == 110)
|
||||
# define GENX(X) GFX11_##X
|
||||
# define genX(x) gen11_##x
|
||||
# define genX(x) gfx11_##x
|
||||
#elif (GFX_VERx10 == 120)
|
||||
# define GENX(X) GFX12_##X
|
||||
# define genX(x) gen12_##x
|
||||
# define genX(x) gfx12_##x
|
||||
#elif (GFX_VERx10 == 125)
|
||||
# define GENX(X) GFX125_##X
|
||||
# define genX(x) gen125_##x
|
||||
# define genX(x) gfx125_##x
|
||||
#else
|
||||
# error "Need to add prefixing macros for this gen"
|
||||
#endif
|
||||
|
|
|
@ -523,9 +523,9 @@ isl_surf_choose_tiling(const struct isl_device *dev,
|
|||
}
|
||||
|
||||
if (ISL_GFX_VER(dev) >= 6) {
|
||||
isl_gen6_filter_tiling(dev, info, &tiling_flags);
|
||||
isl_gfx6_filter_tiling(dev, info, &tiling_flags);
|
||||
} else {
|
||||
isl_gen4_filter_tiling(dev, info, &tiling_flags);
|
||||
isl_gfx4_filter_tiling(dev, info, &tiling_flags);
|
||||
}
|
||||
|
||||
#define CHOOSE(__tiling) \
|
||||
|
@ -569,13 +569,13 @@ isl_choose_msaa_layout(const struct isl_device *dev,
|
|||
enum isl_msaa_layout *msaa_layout)
|
||||
{
|
||||
if (ISL_GFX_VER(dev) >= 8) {
|
||||
return isl_gen8_choose_msaa_layout(dev, info, tiling, msaa_layout);
|
||||
return isl_gfx8_choose_msaa_layout(dev, info, tiling, msaa_layout);
|
||||
} else if (ISL_GFX_VER(dev) >= 7) {
|
||||
return isl_gen7_choose_msaa_layout(dev, info, tiling, msaa_layout);
|
||||
return isl_gfx7_choose_msaa_layout(dev, info, tiling, msaa_layout);
|
||||
} else if (ISL_GFX_VER(dev) >= 6) {
|
||||
return isl_gen6_choose_msaa_layout(dev, info, tiling, msaa_layout);
|
||||
return isl_gfx6_choose_msaa_layout(dev, info, tiling, msaa_layout);
|
||||
} else {
|
||||
return isl_gen4_choose_msaa_layout(dev, info, tiling, msaa_layout);
|
||||
return isl_gfx4_choose_msaa_layout(dev, info, tiling, msaa_layout);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -717,7 +717,7 @@ isl_choose_array_pitch_span(const struct isl_device *dev,
|
|||
return ISL_ARRAY_PITCH_SPAN_COMPACT;
|
||||
|
||||
case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
|
||||
/* Each array image in the gen6 stencil of HiZ surface is compact in the
|
||||
/* Each array image in the gfx6 stencil of HiZ surface is compact in the
|
||||
* sense that every LOD is a compact array of the same size as LOD0.
|
||||
*/
|
||||
return ISL_ARRAY_PITCH_SPAN_COMPACT;
|
||||
|
@ -756,12 +756,12 @@ isl_choose_image_alignment_el(const struct isl_device *dev,
|
|||
/* HiZ surfaces on Sandy Bridge are packed tightly. */
|
||||
*image_align_el = isl_extent3d(1, 1, 1);
|
||||
} else if (ISL_GFX_VER(dev) < 12) {
|
||||
/* On gen7+, HiZ surfaces are always aligned to 16x8 pixels in the
|
||||
/* On gfx7+, HiZ surfaces are always aligned to 16x8 pixels in the
|
||||
* primary surface which works out to 2x2 HiZ elments.
|
||||
*/
|
||||
*image_align_el = isl_extent3d(2, 2, 1);
|
||||
} else {
|
||||
/* On gen12+, HiZ surfaces are always aligned to 16x16 pixels in the
|
||||
/* On gfx12+, HiZ surfaces are always aligned to 16x16 pixels in the
|
||||
* primary surface which works out to 2x4 HiZ elments.
|
||||
* TODO: Verify
|
||||
*/
|
||||
|
@ -771,22 +771,22 @@ isl_choose_image_alignment_el(const struct isl_device *dev,
|
|||
}
|
||||
|
||||
if (ISL_GFX_VER(dev) >= 12) {
|
||||
isl_gen12_choose_image_alignment_el(dev, info, tiling, dim_layout,
|
||||
isl_gfx12_choose_image_alignment_el(dev, info, tiling, dim_layout,
|
||||
msaa_layout, image_align_el);
|
||||
} else if (ISL_GFX_VER(dev) >= 9) {
|
||||
isl_gen9_choose_image_alignment_el(dev, info, tiling, dim_layout,
|
||||
isl_gfx9_choose_image_alignment_el(dev, info, tiling, dim_layout,
|
||||
msaa_layout, image_align_el);
|
||||
} else if (ISL_GFX_VER(dev) >= 8) {
|
||||
isl_gen8_choose_image_alignment_el(dev, info, tiling, dim_layout,
|
||||
isl_gfx8_choose_image_alignment_el(dev, info, tiling, dim_layout,
|
||||
msaa_layout, image_align_el);
|
||||
} else if (ISL_GFX_VER(dev) >= 7) {
|
||||
isl_gen7_choose_image_alignment_el(dev, info, tiling, dim_layout,
|
||||
isl_gfx7_choose_image_alignment_el(dev, info, tiling, dim_layout,
|
||||
msaa_layout, image_align_el);
|
||||
} else if (ISL_GFX_VER(dev) >= 6) {
|
||||
isl_gen6_choose_image_alignment_el(dev, info, tiling, dim_layout,
|
||||
isl_gfx6_choose_image_alignment_el(dev, info, tiling, dim_layout,
|
||||
msaa_layout, image_align_el);
|
||||
} else {
|
||||
isl_gen4_choose_image_alignment_el(dev, info, tiling, dim_layout,
|
||||
isl_gfx4_choose_image_alignment_el(dev, info, tiling, dim_layout,
|
||||
msaa_layout, image_align_el);
|
||||
}
|
||||
}
|
||||
|
@ -991,7 +991,7 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
|
|||
* surface elements.
|
||||
*/
|
||||
static uint32_t
|
||||
isl_calc_array_pitch_el_rows_gen4_2d(
|
||||
isl_calc_array_pitch_el_rows_gfx4_2d(
|
||||
const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
const struct isl_tile_info *tile_info,
|
||||
|
@ -1096,7 +1096,7 @@ isl_calc_array_pitch_el_rows_gen4_2d(
|
|||
* ISL_DIM_LAYOUT_GEN4_2D.
|
||||
*/
|
||||
static void
|
||||
isl_calc_phys_slice0_extent_sa_gen4_2d(
|
||||
isl_calc_phys_slice0_extent_sa_gfx4_2d(
|
||||
const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_msaa_layout msaa_layout,
|
||||
|
@ -1165,7 +1165,7 @@ isl_calc_phys_slice0_extent_sa_gen4_2d(
|
|||
}
|
||||
|
||||
static void
|
||||
isl_calc_phys_total_extent_el_gen4_2d(
|
||||
isl_calc_phys_total_extent_el_gfx4_2d(
|
||||
const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
const struct isl_tile_info *tile_info,
|
||||
|
@ -1179,11 +1179,11 @@ isl_calc_phys_total_extent_el_gen4_2d(
|
|||
const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
|
||||
|
||||
struct isl_extent2d phys_slice0_sa;
|
||||
isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout,
|
||||
isl_calc_phys_slice0_extent_sa_gfx4_2d(dev, info, msaa_layout,
|
||||
image_align_sa, phys_level0_sa,
|
||||
&phys_slice0_sa);
|
||||
*array_pitch_el_rows =
|
||||
isl_calc_array_pitch_el_rows_gen4_2d(dev, info, tile_info,
|
||||
isl_calc_array_pitch_el_rows_gfx4_2d(dev, info, tile_info,
|
||||
image_align_sa, phys_level0_sa,
|
||||
array_pitch_span,
|
||||
&phys_slice0_sa);
|
||||
|
@ -1199,7 +1199,7 @@ isl_calc_phys_total_extent_el_gen4_2d(
|
|||
* ISL_DIM_LAYOUT_GEN4_3D.
|
||||
*/
|
||||
static void
|
||||
isl_calc_phys_total_extent_el_gen4_3d(
|
||||
isl_calc_phys_total_extent_el_gfx4_3d(
|
||||
const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
const struct isl_extent3d *image_align_sa,
|
||||
|
@ -1263,7 +1263,7 @@ isl_calc_phys_total_extent_el_gen4_3d(
|
|||
* ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ.
|
||||
*/
|
||||
static void
|
||||
isl_calc_phys_total_extent_el_gen6_stencil_hiz(
|
||||
isl_calc_phys_total_extent_el_gfx6_stencil_hiz(
|
||||
const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
const struct isl_tile_info *tile_info,
|
||||
|
@ -1324,7 +1324,7 @@ isl_calc_phys_total_extent_el_gen6_stencil_hiz(
|
|||
* ISL_DIM_LAYOUT_GEN9_1D.
|
||||
*/
|
||||
static void
|
||||
isl_calc_phys_total_extent_el_gen9_1d(
|
||||
isl_calc_phys_total_extent_el_gfx9_1d(
|
||||
const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
const struct isl_extent3d *image_align_sa,
|
||||
|
@ -1375,13 +1375,13 @@ isl_calc_phys_total_extent_el(const struct isl_device *dev,
|
|||
switch (dim_layout) {
|
||||
case ISL_DIM_LAYOUT_GEN9_1D:
|
||||
assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
|
||||
isl_calc_phys_total_extent_el_gen9_1d(dev, info,
|
||||
isl_calc_phys_total_extent_el_gfx9_1d(dev, info,
|
||||
image_align_sa, phys_level0_sa,
|
||||
array_pitch_el_rows,
|
||||
total_extent_el);
|
||||
return;
|
||||
case ISL_DIM_LAYOUT_GEN4_2D:
|
||||
isl_calc_phys_total_extent_el_gen4_2d(dev, info, tile_info, msaa_layout,
|
||||
isl_calc_phys_total_extent_el_gfx4_2d(dev, info, tile_info, msaa_layout,
|
||||
image_align_sa, phys_level0_sa,
|
||||
array_pitch_span,
|
||||
array_pitch_el_rows,
|
||||
|
@ -1389,7 +1389,7 @@ isl_calc_phys_total_extent_el(const struct isl_device *dev,
|
|||
return;
|
||||
case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
|
||||
assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
|
||||
isl_calc_phys_total_extent_el_gen6_stencil_hiz(dev, info, tile_info,
|
||||
isl_calc_phys_total_extent_el_gfx6_stencil_hiz(dev, info, tile_info,
|
||||
image_align_sa,
|
||||
phys_level0_sa,
|
||||
array_pitch_el_rows,
|
||||
|
@ -1397,7 +1397,7 @@ isl_calc_phys_total_extent_el(const struct isl_device *dev,
|
|||
return;
|
||||
case ISL_DIM_LAYOUT_GEN4_3D:
|
||||
assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
|
||||
isl_calc_phys_total_extent_el_gen4_3d(dev, info,
|
||||
isl_calc_phys_total_extent_el_gfx4_3d(dev, info,
|
||||
image_align_sa, phys_level0_sa,
|
||||
array_pitch_el_rows,
|
||||
total_extent_el);
|
||||
|
@ -1736,7 +1736,7 @@ isl_surf_init_s(const struct isl_device *dev,
|
|||
* surfaces are also restricted to a maximum size in bytes. This
|
||||
* maximum is 2 GB for all products and all surface types."
|
||||
*
|
||||
* This comment is applicable to all Pre-gen9 platforms.
|
||||
* This comment is applicable to all Pre-gfx9 platforms.
|
||||
*/
|
||||
if (size_B > (uint64_t) 1 << 31)
|
||||
return false;
|
||||
|
@ -1750,7 +1750,7 @@ isl_surf_init_s(const struct isl_device *dev,
|
|||
if (size_B > (uint64_t) 1 << 38)
|
||||
return false;
|
||||
} else {
|
||||
/* gen11+ platforms raised this limit to 2^44 bytes. */
|
||||
/* gfx11+ platforms raised this limit to 2^44 bytes. */
|
||||
if (size_B > (uint64_t) 1 << 44)
|
||||
return false;
|
||||
}
|
||||
|
@ -2047,7 +2047,7 @@ isl_surf_supports_ccs(const struct isl_device *dev,
|
|||
return false;
|
||||
|
||||
/* The PRM doesn't say this explicitly, but fast-clears don't appear to
|
||||
* work for 3D textures until gen9 where the layout of 3D textures
|
||||
* work for 3D textures until gfx9 where the layout of 3D textures
|
||||
* changes to match 2D array textures.
|
||||
*/
|
||||
if (ISL_GFX_VER(dev) <= 8 && surf->dim != ISL_SURF_DIM_2D)
|
||||
|
@ -2058,7 +2058,7 @@ isl_surf_supports_ccs(const struct isl_device *dev,
|
|||
*
|
||||
* "Support is for non-mip-mapped and non-array surface types only."
|
||||
*
|
||||
* This restriction is lifted on gen8+. Technically, it may be possible
|
||||
* This restriction is lifted on gfx8+. Technically, it may be possible
|
||||
* to create a CCS for an arrayed or mipmapped image and only enable
|
||||
* CCS_D when rendering to the base slice. However, there is no
|
||||
* documentation tell us what the hardware would do in that case or what
|
||||
|
@ -2193,36 +2193,36 @@ isl_surf_get_ccs_surf(const struct isl_device *dev,
|
|||
#define isl_genX_call(dev, func, ...) \
|
||||
switch (ISL_GFX_VERX10(dev)) { \
|
||||
case 40: \
|
||||
isl_gen4_##func(__VA_ARGS__); \
|
||||
isl_gfx4_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 45: \
|
||||
/* G45 surface state is the same as gen5 */ \
|
||||
/* G45 surface state is the same as gfx5 */ \
|
||||
case 50: \
|
||||
isl_gen5_##func(__VA_ARGS__); \
|
||||
isl_gfx5_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 60: \
|
||||
isl_gen6_##func(__VA_ARGS__); \
|
||||
isl_gfx6_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 70: \
|
||||
isl_gen7_##func(__VA_ARGS__); \
|
||||
isl_gfx7_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 75: \
|
||||
isl_gen75_##func(__VA_ARGS__); \
|
||||
isl_gfx75_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 80: \
|
||||
isl_gen8_##func(__VA_ARGS__); \
|
||||
isl_gfx8_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 90: \
|
||||
isl_gen9_##func(__VA_ARGS__); \
|
||||
isl_gfx9_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 110: \
|
||||
isl_gen11_##func(__VA_ARGS__); \
|
||||
isl_gfx11_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 120: \
|
||||
isl_gen12_##func(__VA_ARGS__); \
|
||||
isl_gfx12_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
case 125: \
|
||||
isl_gen125_##func(__VA_ARGS__); \
|
||||
isl_gfx125_##func(__VA_ARGS__); \
|
||||
break; \
|
||||
default: \
|
||||
assert(!"Unknown hardware generation"); \
|
||||
|
@ -2310,7 +2310,7 @@ isl_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch,
|
|||
* ISL_DIM_LAYOUT_GEN4_2D.
|
||||
*/
|
||||
static void
|
||||
get_image_offset_sa_gen4_2d(const struct isl_surf *surf,
|
||||
get_image_offset_sa_gfx4_2d(const struct isl_surf *surf,
|
||||
uint32_t level, uint32_t logical_array_layer,
|
||||
uint32_t *x_offset_sa,
|
||||
uint32_t *y_offset_sa)
|
||||
|
@ -2352,7 +2352,7 @@ get_image_offset_sa_gen4_2d(const struct isl_surf *surf,
|
|||
* ISL_DIM_LAYOUT_GEN4_3D.
|
||||
*/
|
||||
static void
|
||||
get_image_offset_sa_gen4_3d(const struct isl_surf *surf,
|
||||
get_image_offset_sa_gfx4_3d(const struct isl_surf *surf,
|
||||
uint32_t level, uint32_t logical_z_offset_px,
|
||||
uint32_t *x_offset_sa,
|
||||
uint32_t *y_offset_sa)
|
||||
|
@ -2405,7 +2405,7 @@ get_image_offset_sa_gen4_3d(const struct isl_surf *surf,
|
|||
}
|
||||
|
||||
static void
|
||||
get_image_offset_sa_gen6_stencil_hiz(const struct isl_surf *surf,
|
||||
get_image_offset_sa_gfx6_stencil_hiz(const struct isl_surf *surf,
|
||||
uint32_t level,
|
||||
uint32_t logical_array_layer,
|
||||
uint32_t *x_offset_sa,
|
||||
|
@ -2468,7 +2468,7 @@ get_image_offset_sa_gen6_stencil_hiz(const struct isl_surf *surf,
|
|||
* ISL_DIM_LAYOUT_GEN9_1D.
|
||||
*/
|
||||
static void
|
||||
get_image_offset_sa_gen9_1d(const struct isl_surf *surf,
|
||||
get_image_offset_sa_gfx9_1d(const struct isl_surf *surf,
|
||||
uint32_t level, uint32_t layer,
|
||||
uint32_t *x_offset_sa,
|
||||
uint32_t *y_offset_sa)
|
||||
|
@ -2519,21 +2519,21 @@ isl_surf_get_image_offset_sa(const struct isl_surf *surf,
|
|||
|
||||
switch (surf->dim_layout) {
|
||||
case ISL_DIM_LAYOUT_GEN9_1D:
|
||||
get_image_offset_sa_gen9_1d(surf, level, logical_array_layer,
|
||||
get_image_offset_sa_gfx9_1d(surf, level, logical_array_layer,
|
||||
x_offset_sa, y_offset_sa);
|
||||
break;
|
||||
case ISL_DIM_LAYOUT_GEN4_2D:
|
||||
get_image_offset_sa_gen4_2d(surf, level, logical_array_layer
|
||||
get_image_offset_sa_gfx4_2d(surf, level, logical_array_layer
|
||||
+ logical_z_offset_px,
|
||||
x_offset_sa, y_offset_sa);
|
||||
break;
|
||||
case ISL_DIM_LAYOUT_GEN4_3D:
|
||||
get_image_offset_sa_gen4_3d(surf, level, logical_array_layer +
|
||||
get_image_offset_sa_gfx4_3d(surf, level, logical_array_layer +
|
||||
logical_z_offset_px,
|
||||
x_offset_sa, y_offset_sa);
|
||||
break;
|
||||
case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
|
||||
get_image_offset_sa_gen6_stencil_hiz(surf, level, logical_array_layer +
|
||||
get_image_offset_sa_gfx6_stencil_hiz(surf, level, logical_array_layer +
|
||||
logical_z_offset_px,
|
||||
x_offset_sa, y_offset_sa);
|
||||
break;
|
||||
|
@ -2752,10 +2752,10 @@ uint32_t
|
|||
isl_surf_get_depth_format(const struct isl_device *dev,
|
||||
const struct isl_surf *surf)
|
||||
{
|
||||
/* Support for separate stencil buffers began in gen5. Support for
|
||||
* interleaved depthstencil buffers ceased in gen7. The intermediate gens,
|
||||
* those that supported separate and interleaved stencil, were gen5 and
|
||||
* gen6.
|
||||
/* Support for separate stencil buffers began in gfx5. Support for
|
||||
* interleaved depthstencil buffers ceased in gfx7. The intermediate gens,
|
||||
* those that supported separate and interleaved stencil, were gfx5 and
|
||||
* gfx6.
|
||||
*
|
||||
* For a list of all available formats, see the Sandybridge PRM >> Volume
|
||||
* 2 Part 1: 3D/Media - 3D Pipeline >> 3DSTATE_DEPTH_BUFFER >> Surface
|
||||
|
|
|
@ -548,8 +548,8 @@ enum isl_dim_layout {
|
|||
|
||||
/**
|
||||
* Special layout used for HiZ and stencil on Sandy Bridge to work around
|
||||
* the hardware's lack of mipmap support. On gen6, HiZ and stencil buffers
|
||||
* work the same as on gen7+ except that they don't technically support
|
||||
* the hardware's lack of mipmap support. On gfx6, HiZ and stencil buffers
|
||||
* work the same as on gfx7+ except that they don't technically support
|
||||
* mipmapping. That does not, however, stop us from doing it. As far as
|
||||
* Sandy Bridge hardware is concerned, HiZ and stencil always operates on a
|
||||
* single miplevel 2D (possibly array) image. The dimensions of that image
|
||||
|
@ -1418,7 +1418,7 @@ struct isl_surf_fill_state_info {
|
|||
uint64_t clear_address;
|
||||
|
||||
/**
|
||||
* Surface write disables for gen4-5
|
||||
* Surface write disables for gfx4-5
|
||||
*/
|
||||
isl_channel_mask_t write_disables;
|
||||
|
||||
|
|
|
@ -132,7 +132,7 @@ uint32_t
|
|||
isl_drm_modifier_get_score(const struct gen_device_info *devinfo,
|
||||
uint64_t modifier)
|
||||
{
|
||||
/* FINISHME: Add gen12 modifiers */
|
||||
/* FINISHME: Add gfx12 modifiers */
|
||||
switch (modifier) {
|
||||
default:
|
||||
return 0;
|
||||
|
|
|
@ -69,9 +69,9 @@ struct surface_format_info {
|
|||
*
|
||||
* Y*: 45
|
||||
* Y+: 45 (g45/gm45)
|
||||
* Y~: 50 (gen5)
|
||||
* Y^: 60 (gen6)
|
||||
* Y#: 70 (gen7)
|
||||
* Y~: 50 (gfx5)
|
||||
* Y^: 60 (gfx6)
|
||||
* Y#: 70 (gfx7)
|
||||
*
|
||||
* The abbreviations in the header below are:
|
||||
* smpl - Sampling Engine
|
||||
|
@ -83,7 +83,7 @@ struct surface_format_info {
|
|||
* VB - Input Vertex Buffer
|
||||
* SO - Steamed Output Vertex Buffers (transform feedback)
|
||||
* color - Color Processing
|
||||
* ccs_e - Lossless Compression Support (gen9+ only)
|
||||
* ccs_e - Lossless Compression Support (gfx9+ only)
|
||||
* sf - Surface Format
|
||||
*
|
||||
* See page 88 of the Sandybridge PRM VOL4_Part1 PDF.
|
||||
|
@ -297,7 +297,7 @@ static const struct surface_format_info format_info[] = {
|
|||
SF( x, x, x, x, x, x, x, x, x, x, x, x, PLANAR_420_8)
|
||||
/* The format enum for R8G8B8_UNORM_SRGB first shows up in the HSW PRM but
|
||||
* empirical testing indicates that it doesn't actually sRGB decode and
|
||||
* acts identical to R8G8B8_UNORM. It does work on gen8+.
|
||||
* acts identical to R8G8B8_UNORM. It does work on gfx8+.
|
||||
*/
|
||||
SF( 80, 80, x, x, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB)
|
||||
SF( 80, 80, x, x, x, x, x, x, x, x, x, x, ETC1_RGB8)
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
#include "isl_priv.h"
|
||||
|
||||
void
|
||||
isl_gen12_choose_image_alignment_el(const struct isl_device *dev,
|
||||
isl_gfx12_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_dim_layout dim_layout,
|
||||
|
@ -66,7 +66,7 @@ isl_gen12_choose_image_alignment_el(const struct isl_device *dev,
|
|||
} else if (isl_surf_usage_is_stencil(info->usage)) {
|
||||
*image_align_el = isl_extent3d(16, 8, 1);
|
||||
} else {
|
||||
isl_gen9_choose_image_alignment_el(dev, info, tiling, dim_layout,
|
||||
isl_gfx9_choose_image_alignment_el(dev, info, tiling, dim_layout,
|
||||
msaa_layout, image_align_el);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
void
|
||||
isl_gen12_choose_image_alignment_el(const struct isl_device *dev,
|
||||
isl_gfx12_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_dim_layout dim_layout,
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
#include "isl_priv.h"
|
||||
|
||||
bool
|
||||
isl_gen4_choose_msaa_layout(const struct isl_device *dev,
|
||||
isl_gfx4_choose_msaa_layout(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout *msaa_layout)
|
||||
|
@ -38,7 +38,7 @@ isl_gen4_choose_msaa_layout(const struct isl_device *dev,
|
|||
}
|
||||
|
||||
void
|
||||
isl_gen4_filter_tiling(const struct isl_device *dev,
|
||||
isl_gfx4_filter_tiling(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
isl_tiling_flags_t *flags)
|
||||
{
|
||||
|
@ -56,7 +56,7 @@ isl_gen4_filter_tiling(const struct isl_device *dev,
|
|||
* BWT014 The Depth Buffer Must be Tiled, it cannot be linear. This
|
||||
* field must be set to 1 on DevBW-A. [DevBW -A,B]
|
||||
*
|
||||
* In testing, the linear configuration doesn't seem to work on gen4.
|
||||
* In testing, the linear configuration doesn't seem to work on gfx4.
|
||||
*/
|
||||
*flags &= (ISL_GFX_VER(dev) == 4 && !ISL_DEV_IS_G4X(dev)) ?
|
||||
ISL_TILING_Y0_BIT : (ISL_TILING_Y0_BIT | ISL_TILING_LINEAR_BIT);
|
||||
|
@ -96,7 +96,7 @@ isl_gen4_filter_tiling(const struct isl_device *dev,
|
|||
}
|
||||
|
||||
void
|
||||
isl_gen4_choose_image_alignment_el(const struct isl_device *dev,
|
||||
isl_gfx4_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_dim_layout dim_layout,
|
||||
|
@ -108,7 +108,7 @@ isl_gen4_choose_image_alignment_el(const struct isl_device *dev,
|
|||
assert(!isl_tiling_is_std_y(tiling));
|
||||
|
||||
/* Note that neither the surface's horizontal nor vertical image alignment
|
||||
* is programmable on gen4 nor gen5.
|
||||
* is programmable on gfx4 nor gfx5.
|
||||
*
|
||||
* From the G35 PRM (2008-01), Volume 1 Graphics Core, Section 6.17.3.4
|
||||
* Alignment Unit Size:
|
||||
|
|
|
@ -31,18 +31,18 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
bool
|
||||
isl_gen4_choose_msaa_layout(const struct isl_device *dev,
|
||||
isl_gfx4_choose_msaa_layout(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout *msaa_layout);
|
||||
|
||||
void
|
||||
isl_gen4_filter_tiling(const struct isl_device *dev,
|
||||
isl_gfx4_filter_tiling(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
isl_tiling_flags_t *flags);
|
||||
|
||||
void
|
||||
isl_gen4_choose_image_alignment_el(const struct isl_device *dev,
|
||||
isl_gfx4_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_dim_layout dim_layout,
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
#include "isl_priv.h"
|
||||
|
||||
bool
|
||||
isl_gen6_choose_msaa_layout(const struct isl_device *dev,
|
||||
isl_gfx6_choose_msaa_layout(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout *msaa_layout)
|
||||
|
@ -66,7 +66,7 @@ isl_gen6_choose_msaa_layout(const struct isl_device *dev,
|
|||
}
|
||||
|
||||
void
|
||||
isl_gen6_choose_image_alignment_el(const struct isl_device *dev,
|
||||
isl_gfx6_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_dim_layout dim_layout,
|
||||
|
|
|
@ -31,13 +31,13 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
bool
|
||||
isl_gen6_choose_msaa_layout(const struct isl_device *dev,
|
||||
isl_gfx6_choose_msaa_layout(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout *msaa_layout);
|
||||
|
||||
void
|
||||
isl_gen6_choose_image_alignment_el(const struct isl_device *dev,
|
||||
isl_gfx6_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_dim_layout dim_layout,
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
#include "isl_priv.h"
|
||||
|
||||
static bool
|
||||
gen7_format_needs_valign2(const struct isl_device *dev,
|
||||
gfx7_format_needs_valign2(const struct isl_device *dev,
|
||||
enum isl_format format)
|
||||
{
|
||||
assert(ISL_GFX_VER(dev) == 7);
|
||||
|
@ -46,7 +46,7 @@ gen7_format_needs_valign2(const struct isl_device *dev,
|
|||
}
|
||||
|
||||
bool
|
||||
isl_gen7_choose_msaa_layout(const struct isl_device *dev,
|
||||
isl_gfx7_choose_msaa_layout(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout *msaa_layout)
|
||||
|
@ -103,7 +103,7 @@ isl_gen7_choose_msaa_layout(const struct isl_device *dev,
|
|||
*/
|
||||
|
||||
/* Multisampling requires vertical alignment of four. */
|
||||
if (info->samples > 1 && gen7_format_needs_valign2(dev, info->format))
|
||||
if (info->samples > 1 && gfx7_format_needs_valign2(dev, info->format))
|
||||
return false;
|
||||
|
||||
/* More obvious restrictions */
|
||||
|
@ -189,7 +189,7 @@ isl_gen7_choose_msaa_layout(const struct isl_device *dev,
|
|||
* flags except ISL_TILING_X_BIT and ISL_TILING_LINEAR_BIT.
|
||||
*/
|
||||
void
|
||||
isl_gen6_filter_tiling(const struct isl_device *dev,
|
||||
isl_gfx6_filter_tiling(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
isl_tiling_flags_t *flags)
|
||||
{
|
||||
|
@ -286,7 +286,7 @@ isl_gen6_filter_tiling(const struct isl_device *dev,
|
|||
|
||||
/* workaround */
|
||||
if (ISL_GFX_VER(dev) == 7 &&
|
||||
gen7_format_needs_valign2(dev, info->format) &&
|
||||
gfx7_format_needs_valign2(dev, info->format) &&
|
||||
(info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) &&
|
||||
info->samples == 1) {
|
||||
/* Y tiling is illegal. From the Ivybridge PRM, Vol4 Part1 2.12.2.1,
|
||||
|
@ -333,7 +333,7 @@ isl_gen6_filter_tiling(const struct isl_device *dev,
|
|||
}
|
||||
|
||||
void
|
||||
isl_gen7_choose_image_alignment_el(const struct isl_device *dev,
|
||||
isl_gfx7_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_dim_layout dim_layout,
|
||||
|
@ -418,7 +418,7 @@ isl_gen7_choose_image_alignment_el(const struct isl_device *dev,
|
|||
(info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT))
|
||||
require_valign4 = true;
|
||||
|
||||
assert(!(require_valign4 && gen7_format_needs_valign2(dev, info->format)));
|
||||
assert(!(require_valign4 && gfx7_format_needs_valign2(dev, info->format)));
|
||||
|
||||
/* We default to VALIGN_2 because it uses the least memory. */
|
||||
const uint32_t valign = require_valign4 ? 4 : 2;
|
||||
|
|
|
@ -31,18 +31,18 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
void
|
||||
isl_gen6_filter_tiling(const struct isl_device *dev,
|
||||
isl_gfx6_filter_tiling(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
isl_tiling_flags_t *flags);
|
||||
|
||||
bool
|
||||
isl_gen7_choose_msaa_layout(const struct isl_device *dev,
|
||||
isl_gfx7_choose_msaa_layout(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout *msaa_layout);
|
||||
|
||||
void
|
||||
isl_gen7_choose_image_alignment_el(const struct isl_device *dev,
|
||||
isl_gfx7_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_dim_layout dim_layout,
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
#include "isl_priv.h"
|
||||
|
||||
bool
|
||||
isl_gen8_choose_msaa_layout(const struct isl_device *dev,
|
||||
isl_gfx8_choose_msaa_layout(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout *msaa_layout)
|
||||
|
@ -88,7 +88,7 @@ isl_gen8_choose_msaa_layout(const struct isl_device *dev,
|
|||
}
|
||||
|
||||
void
|
||||
isl_gen8_choose_image_alignment_el(const struct isl_device *dev,
|
||||
isl_gfx8_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_dim_layout dim_layout,
|
||||
|
|
|
@ -31,13 +31,13 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
bool
|
||||
isl_gen8_choose_msaa_layout(const struct isl_device *dev,
|
||||
isl_gfx8_choose_msaa_layout(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout *msaa_layout);
|
||||
|
||||
void
|
||||
isl_gen8_choose_image_alignment_el(const struct isl_device *dev,
|
||||
isl_gfx8_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_dim_layout dim_layout,
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
* for the standard tiling formats Yf and Ys.
|
||||
*/
|
||||
static void
|
||||
gen9_calc_std_image_alignment_sa(const struct isl_device *dev,
|
||||
gfx9_calc_std_image_alignment_sa(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout msaa_layout,
|
||||
|
@ -97,7 +97,7 @@ gen9_calc_std_image_alignment_sa(const struct isl_device *dev,
|
|||
}
|
||||
|
||||
void
|
||||
isl_gen9_choose_image_alignment_el(const struct isl_device *dev,
|
||||
isl_gfx9_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_dim_layout dim_layout,
|
||||
|
@ -167,7 +167,7 @@ isl_gen9_choose_image_alignment_el(const struct isl_device *dev,
|
|||
|
||||
if (isl_tiling_is_std_y(tiling)) {
|
||||
struct isl_extent3d image_align_sa;
|
||||
gen9_calc_std_image_alignment_sa(dev, info, tiling, msaa_layout,
|
||||
gfx9_calc_std_image_alignment_sa(dev, info, tiling, msaa_layout,
|
||||
&image_align_sa);
|
||||
|
||||
*image_align_el = isl_extent3d_sa_to_el(info->format, image_align_sa);
|
||||
|
@ -196,6 +196,6 @@ isl_gen9_choose_image_alignment_el(const struct isl_device *dev,
|
|||
return;
|
||||
}
|
||||
|
||||
isl_gen8_choose_image_alignment_el(dev, info, tiling, dim_layout,
|
||||
isl_gfx8_choose_image_alignment_el(dev, info, tiling, dim_layout,
|
||||
msaa_layout, image_align_el);
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
void
|
||||
isl_gen9_choose_image_alignment_el(const struct isl_device *dev,
|
||||
isl_gfx9_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_dim_layout dim_layout,
|
||||
|
|
|
@ -205,34 +205,34 @@ _isl_memcpy_tiled_to_linear_sse41(uint32_t xt1, uint32_t xt2,
|
|||
#ifdef genX
|
||||
# include "isl_genX_priv.h"
|
||||
#else
|
||||
# define genX(x) gen4_##x
|
||||
# define genX(x) gfx4_##x
|
||||
# include "isl_genX_priv.h"
|
||||
# undef genX
|
||||
# define genX(x) gen5_##x
|
||||
# define genX(x) gfx5_##x
|
||||
# include "isl_genX_priv.h"
|
||||
# undef genX
|
||||
# define genX(x) gen6_##x
|
||||
# define genX(x) gfx6_##x
|
||||
# include "isl_genX_priv.h"
|
||||
# undef genX
|
||||
# define genX(x) gen7_##x
|
||||
# define genX(x) gfx7_##x
|
||||
# include "isl_genX_priv.h"
|
||||
# undef genX
|
||||
# define genX(x) gen75_##x
|
||||
# define genX(x) gfx75_##x
|
||||
# include "isl_genX_priv.h"
|
||||
# undef genX
|
||||
# define genX(x) gen8_##x
|
||||
# define genX(x) gfx8_##x
|
||||
# include "isl_genX_priv.h"
|
||||
# undef genX
|
||||
# define genX(x) gen9_##x
|
||||
# define genX(x) gfx9_##x
|
||||
# include "isl_genX_priv.h"
|
||||
# undef genX
|
||||
# define genX(x) gen11_##x
|
||||
# define genX(x) gfx11_##x
|
||||
# include "isl_genX_priv.h"
|
||||
# undef genX
|
||||
# define genX(x) gen12_##x
|
||||
# define genX(x) gfx12_##x
|
||||
# include "isl_genX_priv.h"
|
||||
# undef genX
|
||||
# define genX(x) gen125_##x
|
||||
# define genX(x) gfx125_##x
|
||||
# include "isl_genX_priv.h"
|
||||
# undef genX
|
||||
#endif
|
||||
|
|
|
@ -335,7 +335,7 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
|
|||
s.Width = info->surf->logical_level0_px.width - 1;
|
||||
s.Height = info->surf->logical_level0_px.height - 1;
|
||||
|
||||
/* In the gen6 PRM Volume 1 Part 1: Graphics Core, Section 7.18.3.7.1
|
||||
/* In the gfx6 PRM Volume 1 Part 1: Graphics Core, Section 7.18.3.7.1
|
||||
* (Surface Arrays For all surfaces other than separate stencil buffer):
|
||||
*
|
||||
* "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the value
|
||||
|
@ -477,7 +477,7 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
|
|||
#endif
|
||||
|
||||
if (info->surf->dim_layout == ISL_DIM_LAYOUT_GEN9_1D) {
|
||||
/* For gen9 1-D textures, surface pitch is ignored */
|
||||
/* For gfx9 1-D textures, surface pitch is ignored */
|
||||
s.SurfacePitch = 0;
|
||||
} else {
|
||||
s.SurfacePitch = info->surf->row_pitch_B - 1;
|
||||
|
@ -614,7 +614,7 @@ isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
|
|||
* and the data port. Testing seems to indicate that the data port
|
||||
* completely ignores the AuxiliarySurfaceMode field.
|
||||
*
|
||||
* On gen12 HDC supports compression.
|
||||
* On gfx12 HDC supports compression.
|
||||
*/
|
||||
if (GFX_VER < 12)
|
||||
assert(!(info->view->usage & ISL_SURF_USAGE_STORAGE_BIT));
|
||||
|
|
|
@ -101,7 +101,7 @@ t_assert_phys_level0_sa(const struct isl_surf *surf, uint32_t width,
|
|||
}
|
||||
|
||||
static void
|
||||
t_assert_gen4_3d_layer(const struct isl_surf *surf,
|
||||
t_assert_gfx4_3d_layer(const struct isl_surf *surf,
|
||||
uint32_t level,
|
||||
uint32_t aligned_width,
|
||||
uint32_t aligned_height,
|
||||
|
@ -261,15 +261,15 @@ test_bdw_3d_r8g8b8a8_unorm_256x256x256_levels09_tiley0(void)
|
|||
|
||||
uint32_t base_y = 0;
|
||||
|
||||
t_assert_gen4_3d_layer(&surf, 0, 256, 256, 256, 1, 256, &base_y);
|
||||
t_assert_gen4_3d_layer(&surf, 1, 128, 128, 128, 2, 64, &base_y);
|
||||
t_assert_gen4_3d_layer(&surf, 2, 64, 64, 64, 4, 16, &base_y);
|
||||
t_assert_gen4_3d_layer(&surf, 3, 32, 32, 32, 8, 4, &base_y);
|
||||
t_assert_gen4_3d_layer(&surf, 4, 16, 16, 16, 16, 1, &base_y);
|
||||
t_assert_gen4_3d_layer(&surf, 5, 8, 8, 8, 32, 1, &base_y);
|
||||
t_assert_gen4_3d_layer(&surf, 6, 4, 4, 4, 64, 1, &base_y);
|
||||
t_assert_gen4_3d_layer(&surf, 7, 4, 4, 2, 128, 1, &base_y);
|
||||
t_assert_gen4_3d_layer(&surf, 8, 4, 4, 1, 256, 1, &base_y);
|
||||
t_assert_gfx4_3d_layer(&surf, 0, 256, 256, 256, 1, 256, &base_y);
|
||||
t_assert_gfx4_3d_layer(&surf, 1, 128, 128, 128, 2, 64, &base_y);
|
||||
t_assert_gfx4_3d_layer(&surf, 2, 64, 64, 64, 4, 16, &base_y);
|
||||
t_assert_gfx4_3d_layer(&surf, 3, 32, 32, 32, 8, 4, &base_y);
|
||||
t_assert_gfx4_3d_layer(&surf, 4, 16, 16, 16, 16, 1, &base_y);
|
||||
t_assert_gfx4_3d_layer(&surf, 5, 8, 8, 8, 32, 1, &base_y);
|
||||
t_assert_gfx4_3d_layer(&surf, 6, 4, 4, 4, 64, 1, &base_y);
|
||||
t_assert_gfx4_3d_layer(&surf, 7, 4, 4, 2, 128, 1, &base_y);
|
||||
t_assert_gfx4_3d_layer(&surf, 8, 4, 4, 1, 256, 1, &base_y);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
|
|
|
@ -984,7 +984,7 @@ accumulate_uint40(int a_index,
|
|||
}
|
||||
|
||||
static void
|
||||
gen8_read_report_clock_ratios(const uint32_t *report,
|
||||
gfx8_read_report_clock_ratios(const uint32_t *report,
|
||||
uint64_t *slice_freq_hz,
|
||||
uint64_t *unslice_freq_hz)
|
||||
{
|
||||
|
@ -1029,10 +1029,10 @@ gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result,
|
|||
if (devinfo->ver < 8)
|
||||
return;
|
||||
|
||||
gen8_read_report_clock_ratios(start,
|
||||
gfx8_read_report_clock_ratios(start,
|
||||
&result->slice_frequency[0],
|
||||
&result->unslice_frequency[0]);
|
||||
gen8_read_report_clock_ratios(end,
|
||||
gfx8_read_report_clock_ratios(end,
|
||||
&result->slice_frequency[1],
|
||||
&result->unslice_frequency[1]);
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
|
|||
{
|
||||
switch (devinfo->ver) {
|
||||
case 7: {
|
||||
struct gen7_mdapi_metrics *mdapi_data = (struct gen7_mdapi_metrics *) data;
|
||||
struct gfx7_mdapi_metrics *mdapi_data = (struct gfx7_mdapi_metrics *) data;
|
||||
|
||||
if (data_size < sizeof(*mdapi_data))
|
||||
return 0;
|
||||
|
@ -66,7 +66,7 @@ gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
|
|||
return sizeof(*mdapi_data);
|
||||
}
|
||||
case 8: {
|
||||
struct gen8_mdapi_metrics *mdapi_data = (struct gen8_mdapi_metrics *) data;
|
||||
struct gfx8_mdapi_metrics *mdapi_data = (struct gfx8_mdapi_metrics *) data;
|
||||
|
||||
if (data_size < sizeof(*mdapi_data))
|
||||
return 0;
|
||||
|
@ -100,7 +100,7 @@ gen_perf_query_result_write_mdapi(void *data, uint32_t data_size,
|
|||
case 9:
|
||||
case 11:
|
||||
case 12:{
|
||||
struct gen9_mdapi_metrics *mdapi_data = (struct gen9_mdapi_metrics *) data;
|
||||
struct gfx9_mdapi_metrics *mdapi_data = (struct gfx9_mdapi_metrics *) data;
|
||||
|
||||
if (data_size < sizeof(*mdapi_data))
|
||||
return 0;
|
||||
|
@ -245,7 +245,7 @@ gen_perf_register_mdapi_oa_query(struct gen_perf_config *perf,
|
|||
query = gen_perf_append_query_info(perf, 1 + 45 + 16 + 7);
|
||||
query->oa_format = I915_OA_FORMAT_A45_B8_C8;
|
||||
|
||||
struct gen7_mdapi_metrics metric_data;
|
||||
struct gfx7_mdapi_metrics metric_data;
|
||||
query->data_size = sizeof(metric_data);
|
||||
|
||||
MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
|
||||
|
@ -270,7 +270,7 @@ gen_perf_register_mdapi_oa_query(struct gen_perf_config *perf,
|
|||
query = gen_perf_append_query_info(perf, 2 + 36 + 16 + 16);
|
||||
query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
|
||||
|
||||
struct gen8_mdapi_metrics metric_data;
|
||||
struct gfx8_mdapi_metrics metric_data;
|
||||
query->data_size = sizeof(metric_data);
|
||||
|
||||
MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
|
||||
|
@ -307,7 +307,7 @@ gen_perf_register_mdapi_oa_query(struct gen_perf_config *perf,
|
|||
query = gen_perf_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
|
||||
query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
|
||||
|
||||
struct gen9_mdapi_metrics metric_data;
|
||||
struct gfx9_mdapi_metrics metric_data;
|
||||
query->data_size = sizeof(metric_data);
|
||||
|
||||
MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
|
||||
|
|
|
@ -37,7 +37,7 @@ struct gen_perf_query_result;
|
|||
* Data format expected by MDAPI.
|
||||
*/
|
||||
|
||||
struct gen7_mdapi_metrics {
|
||||
struct gfx7_mdapi_metrics {
|
||||
uint64_t TotalTime;
|
||||
|
||||
uint64_t ACounters[45];
|
||||
|
@ -55,7 +55,7 @@ struct gen7_mdapi_metrics {
|
|||
#define GTDI_QUERY_BDW_METRICS_OA_COUNT 36
|
||||
#define GTDI_QUERY_BDW_METRICS_OA_40b_COUNT 32
|
||||
#define GTDI_QUERY_BDW_METRICS_NOA_COUNT 16
|
||||
struct gen8_mdapi_metrics {
|
||||
struct gfx8_mdapi_metrics {
|
||||
uint64_t TotalTime;
|
||||
uint64_t GPUTicks;
|
||||
uint64_t OaCntr[GTDI_QUERY_BDW_METRICS_OA_COUNT];
|
||||
|
@ -81,7 +81,7 @@ struct gen8_mdapi_metrics {
|
|||
|
||||
#define GTDI_MAX_READ_REGS 16
|
||||
|
||||
struct gen9_mdapi_metrics {
|
||||
struct gfx9_mdapi_metrics {
|
||||
uint64_t TotalTime;
|
||||
uint64_t GPUTicks;
|
||||
uint64_t OaCntr[GTDI_QUERY_BDW_METRICS_OA_COUNT];
|
||||
|
@ -110,7 +110,7 @@ struct gen9_mdapi_metrics {
|
|||
};
|
||||
|
||||
/* Add new definition */
|
||||
#define gen11_mdapi_metrics gen9_mdapi_metrics
|
||||
#define gfx11_mdapi_metrics gfx9_mdapi_metrics
|
||||
|
||||
struct mdapi_pipeline_metrics {
|
||||
uint64_t IAVertices;
|
||||
|
@ -138,17 +138,17 @@ static inline void gen_perf_query_mdapi_write_marker(void *data, uint32_t data_s
|
|||
{
|
||||
switch (devinfo->ver) {
|
||||
case 8: {
|
||||
if (data_size < sizeof(struct gen8_mdapi_metrics))
|
||||
if (data_size < sizeof(struct gfx8_mdapi_metrics))
|
||||
return;
|
||||
struct gen8_mdapi_metrics *mdapi_data = data;
|
||||
struct gfx8_mdapi_metrics *mdapi_data = data;
|
||||
mdapi_data->MarkerUser = value;
|
||||
break;
|
||||
}
|
||||
case 9:
|
||||
case 11: {
|
||||
if (data_size < sizeof(struct gen9_mdapi_metrics))
|
||||
if (data_size < sizeof(struct gfx9_mdapi_metrics))
|
||||
return;
|
||||
struct gen9_mdapi_metrics *mdapi_data = data;
|
||||
struct gfx9_mdapi_metrics *mdapi_data = data;
|
||||
mdapi_data->MarkerUser = value;
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -40,7 +40,7 @@
|
|||
#define GFX9_RPSTAT0_PREV_GT_FREQ_MASK INTEL_MASK(8, 0)
|
||||
|
||||
/* Programmable perf 64bits counters (used for GTRequestQueueFull counter on
|
||||
* gen7-11)
|
||||
* gfx7-11)
|
||||
*/
|
||||
#define PERF_CNT_1_DW0 0x91b8
|
||||
#define PERF_CNT_2_DW0 0x91c0
|
||||
|
|
|
@ -467,23 +467,23 @@ get_context_init(const struct gen_device_info *devinfo,
|
|||
uint32_t *data,
|
||||
uint32_t *size)
|
||||
{
|
||||
static const gen_context_init_t gen8_contexts[] = {
|
||||
[I915_ENGINE_CLASS_RENDER] = gen8_render_context_init,
|
||||
[I915_ENGINE_CLASS_COPY] = gen8_blitter_context_init,
|
||||
[I915_ENGINE_CLASS_VIDEO] = gen8_video_context_init,
|
||||
static const gen_context_init_t gfx8_contexts[] = {
|
||||
[I915_ENGINE_CLASS_RENDER] = gfx8_render_context_init,
|
||||
[I915_ENGINE_CLASS_COPY] = gfx8_blitter_context_init,
|
||||
[I915_ENGINE_CLASS_VIDEO] = gfx8_video_context_init,
|
||||
};
|
||||
static const gen_context_init_t gen10_contexts[] = {
|
||||
[I915_ENGINE_CLASS_RENDER] = gen10_render_context_init,
|
||||
[I915_ENGINE_CLASS_COPY] = gen10_blitter_context_init,
|
||||
[I915_ENGINE_CLASS_VIDEO] = gen10_video_context_init,
|
||||
static const gen_context_init_t gfx10_contexts[] = {
|
||||
[I915_ENGINE_CLASS_RENDER] = gfx10_render_context_init,
|
||||
[I915_ENGINE_CLASS_COPY] = gfx10_blitter_context_init,
|
||||
[I915_ENGINE_CLASS_VIDEO] = gfx10_video_context_init,
|
||||
};
|
||||
|
||||
assert(devinfo->ver >= 8);
|
||||
|
||||
if (devinfo->ver <= 10)
|
||||
gen8_contexts[engine_class](params, data, size);
|
||||
gfx8_contexts[engine_class](params, data, size);
|
||||
else
|
||||
gen10_contexts[engine_class](params, data, size);
|
||||
gfx10_contexts[engine_class](params, data, size);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
|
|
|
@ -618,7 +618,7 @@ decode_3dstate_sampler_state_pointers(struct aub_viewer_decode_ctx *ctx,
|
|||
}
|
||||
|
||||
static void
|
||||
decode_3dstate_sampler_state_pointers_gen6(struct aub_viewer_decode_ctx *ctx,
|
||||
decode_3dstate_sampler_state_pointers_gfx6(struct aub_viewer_decode_ctx *ctx,
|
||||
struct intel_group *inst,
|
||||
const uint32_t *p)
|
||||
{
|
||||
|
@ -864,7 +864,7 @@ struct custom_decoder {
|
|||
{ "3DSTATE_SAMPLER_STATE_POINTERS_DS", decode_3dstate_sampler_state_pointers, AUB_DECODE_STAGE_DS, },
|
||||
{ "3DSTATE_SAMPLER_STATE_POINTERS_HS", decode_3dstate_sampler_state_pointers, AUB_DECODE_STAGE_HS, },
|
||||
{ "3DSTATE_SAMPLER_STATE_POINTERS_PS", decode_3dstate_sampler_state_pointers, AUB_DECODE_STAGE_PS, },
|
||||
{ "3DSTATE_SAMPLER_STATE_POINTERS", decode_3dstate_sampler_state_pointers_gen6 },
|
||||
{ "3DSTATE_SAMPLER_STATE_POINTERS", decode_3dstate_sampler_state_pointers_gfx6 },
|
||||
|
||||
{ "3DSTATE_VIEWPORT_STATE_POINTERS_CC", decode_3dstate_viewport_state_pointers_cc },
|
||||
{ "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip },
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
#ifndef GFX10_CONTEXT_H
|
||||
#define GFX10_CONTEXT_H
|
||||
|
||||
static inline void gen10_render_context_init(const struct gen_context_parameters *params,
|
||||
static inline void gfx10_render_context_init(const struct gen_context_parameters *params,
|
||||
uint32_t *data, uint32_t *size)
|
||||
{
|
||||
*size = CONTEXT_RENDER_SIZE;
|
||||
|
@ -75,7 +75,7 @@ static inline void gen10_render_context_init(const struct gen_context_parameters
|
|||
*data++ = MI_BATCH_BUFFER_END | 1 /* End Context */;
|
||||
}
|
||||
|
||||
static inline void gen10_blitter_context_init(const struct gen_context_parameters *params,
|
||||
static inline void gfx10_blitter_context_init(const struct gen_context_parameters *params,
|
||||
uint32_t *data, uint32_t *size)
|
||||
{
|
||||
*size = CONTEXT_OTHER_SIZE;
|
||||
|
@ -125,7 +125,7 @@ static inline void gen10_blitter_context_init(const struct gen_context_parameter
|
|||
*data++ = MI_BATCH_BUFFER_END | 1 /* End Context */;
|
||||
}
|
||||
|
||||
static inline void gen10_video_context_init(const struct gen_context_parameters *params,
|
||||
static inline void gfx10_video_context_init(const struct gen_context_parameters *params,
|
||||
uint32_t *data, uint32_t *size)
|
||||
{
|
||||
*size = CONTEXT_OTHER_SIZE;
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
#ifndef GFX8_CONTEXT_H
|
||||
#define GFX8_CONTEXT_H
|
||||
|
||||
static inline void gen8_render_context_init(const struct gen_context_parameters *params,
|
||||
static inline void gfx8_render_context_init(const struct gen_context_parameters *params,
|
||||
uint32_t *data, uint32_t *size)
|
||||
{
|
||||
*size = CONTEXT_RENDER_SIZE;
|
||||
|
@ -73,7 +73,7 @@ static inline void gen8_render_context_init(const struct gen_context_parameters
|
|||
*data++ = MI_BATCH_BUFFER_END;
|
||||
}
|
||||
|
||||
static inline void gen8_blitter_context_init(const struct gen_context_parameters *params,
|
||||
static inline void gfx8_blitter_context_init(const struct gen_context_parameters *params,
|
||||
uint32_t *data, uint32_t *size)
|
||||
{
|
||||
*size = CONTEXT_OTHER_SIZE;
|
||||
|
@ -115,7 +115,7 @@ static inline void gen8_blitter_context_init(const struct gen_context_parameters
|
|||
*data++ = MI_BATCH_BUFFER_END;
|
||||
}
|
||||
|
||||
static inline void gen8_video_context_init(const struct gen_context_parameters *params,
|
||||
static inline void gfx8_video_context_init(const struct gen_context_parameters *params,
|
||||
uint32_t *data, uint32_t *size)
|
||||
{
|
||||
*size = CONTEXT_OTHER_SIZE;
|
||||
|
|
|
@ -153,7 +153,7 @@ i965_postprocess_labels()
|
|||
if (p->devinfo->ver >= 7) {
|
||||
brw_inst_set_jip(p->devinfo, inst, relative_offset);
|
||||
} else if (p->devinfo->ver == 6) {
|
||||
brw_inst_set_gen6_jump_count(p->devinfo, inst, relative_offset);
|
||||
brw_inst_set_gfx6_jump_count(p->devinfo, inst, relative_offset);
|
||||
}
|
||||
break;
|
||||
case BRW_OPCODE_BREAK:
|
||||
|
|
|
@ -829,7 +829,7 @@ mathinstruction:
|
|||
predicate MATH saturate math_function execsize dst src srcimm instoptions
|
||||
{
|
||||
brw_set_default_access_mode(p, $9.access_mode);
|
||||
gen6_math(p, $6, $4, $7, $8);
|
||||
gfx6_math(p, $6, $4, $7, $8);
|
||||
i965_asm_set_instruction_options(p, $9);
|
||||
brw_inst_set_exec_size(p->devinfo, brw_last_inst, $5);
|
||||
brw_inst_set_saturate(p->devinfo, brw_last_inst, $3);
|
||||
|
@ -1137,7 +1137,7 @@ branchinstruction:
|
|||
brw_set_src0(p, brw_last_inst, retype(brw_null_reg(),
|
||||
BRW_REGISTER_TYPE_D));
|
||||
brw_set_src1(p, brw_last_inst, brw_imm_d(0x0));
|
||||
brw_inst_set_gen4_pop_count(p->devinfo, brw_last_inst, $4);
|
||||
brw_inst_set_gfx4_pop_count(p->devinfo, brw_last_inst, $4);
|
||||
|
||||
brw_inst_set_thread_control(p->devinfo, brw_last_inst,
|
||||
BRW_THREAD_SWITCH);
|
||||
|
@ -1181,8 +1181,8 @@ branchinstruction:
|
|||
brw_set_dest(p, brw_last_inst, brw_ip_reg());
|
||||
brw_set_src0(p, brw_last_inst, brw_ip_reg());
|
||||
brw_set_src1(p, brw_last_inst, brw_imm_d(0x0));
|
||||
brw_inst_set_gen4_jump_count(p->devinfo, brw_last_inst, $3);
|
||||
brw_inst_set_gen4_pop_count(p->devinfo, brw_last_inst, $4);
|
||||
brw_inst_set_gfx4_jump_count(p->devinfo, brw_last_inst, $3);
|
||||
brw_inst_set_gfx4_pop_count(p->devinfo, brw_last_inst, $4);
|
||||
|
||||
if (!p->single_program_flow)
|
||||
brw_inst_set_thread_control(p->devinfo, brw_last_inst,
|
||||
|
@ -1232,8 +1232,8 @@ branchinstruction:
|
|||
brw_set_dest(p, brw_last_inst, brw_ip_reg());
|
||||
brw_set_src0(p, brw_last_inst, brw_ip_reg());
|
||||
brw_set_src1(p, brw_last_inst, brw_imm_d(0x0));
|
||||
brw_inst_set_gen4_jump_count(p->devinfo, brw_last_inst, $4);
|
||||
brw_inst_set_gen4_pop_count(p->devinfo, brw_last_inst, $5);
|
||||
brw_inst_set_gfx4_jump_count(p->devinfo, brw_last_inst, $4);
|
||||
brw_inst_set_gfx4_pop_count(p->devinfo, brw_last_inst, $5);
|
||||
|
||||
if (!p->single_program_flow)
|
||||
brw_inst_set_thread_control(p->devinfo, brw_last_inst,
|
||||
|
@ -1282,7 +1282,7 @@ branchinstruction:
|
|||
|
||||
brw_set_dest(p, brw_last_inst, brw_ip_reg());
|
||||
brw_set_src0(p, brw_last_inst, brw_ip_reg());
|
||||
brw_inst_set_gen4_jump_count(p->devinfo, brw_last_inst, $4);
|
||||
brw_inst_set_gfx4_jump_count(p->devinfo, brw_last_inst, $4);
|
||||
brw_set_src1(p, brw_last_inst, brw_imm_d($4));
|
||||
|
||||
if (!p->single_program_flow)
|
||||
|
@ -1327,8 +1327,8 @@ breakinstruction:
|
|||
brw_set_dest(p, brw_last_inst, brw_ip_reg());
|
||||
brw_set_src0(p, brw_last_inst, brw_ip_reg());
|
||||
brw_set_src1(p, brw_last_inst, brw_imm_d(0x0));
|
||||
brw_inst_set_gen4_jump_count(p->devinfo, brw_last_inst, $4);
|
||||
brw_inst_set_gen4_pop_count(p->devinfo, brw_last_inst, $5);
|
||||
brw_inst_set_gfx4_jump_count(p->devinfo, brw_last_inst, $4);
|
||||
brw_inst_set_gfx4_pop_count(p->devinfo, brw_last_inst, $5);
|
||||
|
||||
brw_pop_insn_state(p);
|
||||
}
|
||||
|
@ -1383,8 +1383,8 @@ breakinstruction:
|
|||
brw_set_src0(p, brw_last_inst, brw_ip_reg());
|
||||
brw_set_src1(p, brw_last_inst, brw_imm_d(0x0));
|
||||
|
||||
brw_inst_set_gen4_jump_count(p->devinfo, brw_last_inst, $4);
|
||||
brw_inst_set_gen4_pop_count(p->devinfo, brw_last_inst, $5);
|
||||
brw_inst_set_gfx4_jump_count(p->devinfo, brw_last_inst, $4);
|
||||
brw_inst_set_gfx4_pop_count(p->devinfo, brw_last_inst, $5);
|
||||
|
||||
brw_pop_insn_state(p);
|
||||
}
|
||||
|
@ -1435,8 +1435,8 @@ loopinstruction:
|
|||
brw_set_dest(p, brw_last_inst, brw_ip_reg());
|
||||
brw_set_src0(p, brw_last_inst, brw_ip_reg());
|
||||
brw_set_src1(p, brw_last_inst, brw_imm_d(0x0));
|
||||
brw_inst_set_gen4_jump_count(p->devinfo, brw_last_inst, $4);
|
||||
brw_inst_set_gen4_pop_count(p->devinfo, brw_last_inst, 0);
|
||||
brw_inst_set_gfx4_jump_count(p->devinfo, brw_last_inst, $4);
|
||||
brw_inst_set_gfx4_pop_count(p->devinfo, brw_last_inst, 0);
|
||||
|
||||
brw_pop_insn_state(p);
|
||||
}
|
||||
|
|
|
@ -160,17 +160,17 @@ i915_ioctl_get_param(int fd, unsigned long request, void *arg)
|
|||
return 0;
|
||||
|
||||
case I915_PARAM_NUM_FENCES_AVAIL:
|
||||
*gp->value = 8; /* gen2/3 value, unused in brw/iris */
|
||||
*gp->value = 8; /* gfx2/3 value, unused in brw/iris */
|
||||
return 0;
|
||||
|
||||
case I915_PARAM_HAS_BLT:
|
||||
*gp->value = 1; /* gen2/3 value, unused in brw/iris */
|
||||
*gp->value = 1; /* gfx2/3 value, unused in brw/iris */
|
||||
return 0;
|
||||
|
||||
case I915_PARAM_HAS_BSD:
|
||||
case I915_PARAM_HAS_LLC:
|
||||
case I915_PARAM_HAS_VEBOX:
|
||||
*gp->value = 0; /* gen2/3 value, unused in brw/iris */
|
||||
*gp->value = 0; /* gfx2/3 value, unused in brw/iris */
|
||||
return 0;
|
||||
|
||||
case I915_PARAM_HAS_GEM:
|
||||
|
|
|
@ -6,7 +6,7 @@ Missing Features:
|
|||
- Sparse memory
|
||||
|
||||
Performance:
|
||||
- Multi-{sampled/gen8,LOD} HiZ
|
||||
- Multi-{sampled/gfx8,LOD} HiZ
|
||||
- MSAA fast clears
|
||||
- Pushing pieces of UBOs?
|
||||
- Enable guardband clipping
|
||||
|
|
|
@ -569,8 +569,8 @@ static void
|
|||
emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_bo *bo, uint32_t offset)
|
||||
{
|
||||
/* In gen8+ the address field grew to two dwords to accomodate 48 bit
|
||||
* offsets. The high 16 bits are in the last dword, so we can use the gen8
|
||||
/* In gfx8+ the address field grew to two dwords to accomodate 48 bit
|
||||
* offsets. The high 16 bits are in the last dword, so we can use the gfx8
|
||||
* version in either case, as long as we set the instruction length in the
|
||||
* header accordingly. This means that we always emit three dwords here
|
||||
* and all the padding and adjustment we do in this file works for all
|
||||
|
@ -580,14 +580,14 @@ emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer,
|
|||
#define GFX7_MI_BATCH_BUFFER_START_length 2
|
||||
#define GFX7_MI_BATCH_BUFFER_START_length_bias 2
|
||||
|
||||
const uint32_t gen7_length =
|
||||
const uint32_t gfx7_length =
|
||||
GFX7_MI_BATCH_BUFFER_START_length - GFX7_MI_BATCH_BUFFER_START_length_bias;
|
||||
const uint32_t gen8_length =
|
||||
const uint32_t gfx8_length =
|
||||
GFX8_MI_BATCH_BUFFER_START_length - GFX8_MI_BATCH_BUFFER_START_length_bias;
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GFX8_MI_BATCH_BUFFER_START, bbs) {
|
||||
bbs.DWordLength = cmd_buffer->device->info.ver < 8 ?
|
||||
gen7_length : gen8_length;
|
||||
gfx7_length : gfx8_length;
|
||||
bbs.SecondLevelBatchBuffer = Firstlevelbatch;
|
||||
bbs.AddressSpaceIndicator = ASI_PPGTT;
|
||||
bbs.BatchBufferStartAddress = (struct anv_address) { bo, offset };
|
||||
|
|
|
@ -98,25 +98,25 @@ anv_device_init_blorp(struct anv_device *device)
|
|||
device->blorp.upload_shader = upload_blorp_shader;
|
||||
switch (device->info.verx10) {
|
||||
case 70:
|
||||
device->blorp.exec = gen7_blorp_exec;
|
||||
device->blorp.exec = gfx7_blorp_exec;
|
||||
break;
|
||||
case 75:
|
||||
device->blorp.exec = gen75_blorp_exec;
|
||||
device->blorp.exec = gfx75_blorp_exec;
|
||||
break;
|
||||
case 80:
|
||||
device->blorp.exec = gen8_blorp_exec;
|
||||
device->blorp.exec = gfx8_blorp_exec;
|
||||
break;
|
||||
case 90:
|
||||
device->blorp.exec = gen9_blorp_exec;
|
||||
device->blorp.exec = gfx9_blorp_exec;
|
||||
break;
|
||||
case 110:
|
||||
device->blorp.exec = gen11_blorp_exec;
|
||||
device->blorp.exec = gfx11_blorp_exec;
|
||||
break;
|
||||
case 120:
|
||||
device->blorp.exec = gen12_blorp_exec;
|
||||
device->blorp.exec = gfx12_blorp_exec;
|
||||
break;
|
||||
case 125:
|
||||
device->blorp.exec = gen125_blorp_exec;
|
||||
device->blorp.exec = gfx125_blorp_exec;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unknown hardware generation");
|
||||
|
|
|
@ -95,7 +95,7 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device,
|
|||
unreachable("Unsupported descriptor type");
|
||||
}
|
||||
|
||||
/* On gen8 and above when we have softpin enabled, we also need to push
|
||||
/* On gfx8 and above when we have softpin enabled, we also need to push
|
||||
* SSBO address ranges so that we can use A64 messages in the shader.
|
||||
*/
|
||||
if (device->has_a64_buffer_access &&
|
||||
|
|
|
@ -811,7 +811,7 @@ anv_physical_device_try_create(struct anv_instance *instance,
|
|||
/* We've had bindless samplers since Ivy Bridge (forever in Vulkan terms)
|
||||
* because it's just a matter of setting the sampler address in the sample
|
||||
* message header. However, we've not bothered to wire it up for vec4 so
|
||||
* we leave it disabled on gen7.
|
||||
* we leave it disabled on gfx7.
|
||||
*/
|
||||
device->has_bindless_samplers = device->info.ver >= 8;
|
||||
|
||||
|
@ -1486,7 +1486,7 @@ void anv_GetPhysicalDeviceFeatures2(
|
|||
(VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
|
||||
features->rectangularLines = true;
|
||||
features->bresenhamLines = true;
|
||||
/* Support for Smooth lines with MSAA was removed on gen11. From the
|
||||
/* Support for Smooth lines with MSAA was removed on gfx11. From the
|
||||
* BSpec section "Multisample ModesState" table for "AA Line Support
|
||||
* Requirements":
|
||||
*
|
||||
|
@ -1958,7 +1958,7 @@ anv_get_physical_device_properties_1_1(struct anv_physical_device *pdevice,
|
|||
VK_SUBGROUP_FEATURE_QUAD_BIT;
|
||||
if (pdevice->info.ver >= 8) {
|
||||
/* TODO: There's no technical reason why these can't be made to
|
||||
* work on gen7 but they don't at the moment so it's best to leave
|
||||
* work on gfx7 but they don't at the moment so it's best to leave
|
||||
* the feature disabled than enabled and broken.
|
||||
*/
|
||||
p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
|
||||
|
@ -2716,7 +2716,7 @@ anv_device_init_border_colors(struct anv_device *device)
|
|||
anv_state_pool_emit_data(&device->dynamic_state_pool,
|
||||
sizeof(border_colors), 512, border_colors);
|
||||
} else {
|
||||
static const struct gen8_border_color border_colors[] = {
|
||||
static const struct gfx8_border_color border_colors[] = {
|
||||
[VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
|
||||
[VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
|
||||
[VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
|
||||
|
@ -3186,7 +3186,7 @@ VkResult anv_CreateDevice(
|
|||
anv_state_reserved_pool_init(&device->custom_border_colors,
|
||||
&device->dynamic_state_pool,
|
||||
MAX_CUSTOM_BORDER_COLORS,
|
||||
sizeof(struct gen8_border_color), 64);
|
||||
sizeof(struct gfx8_border_color), 64);
|
||||
}
|
||||
|
||||
result = anv_state_pool_init(&device->instruction_state_pool, device,
|
||||
|
|
|
@ -1113,8 +1113,8 @@ anv_get_image_format_properties(
|
|||
}
|
||||
|
||||
/* From the bspec section entitled "Surface Layout and Tiling",
|
||||
* pre-gen9 has a 2 GB limitation of the size in bytes,
|
||||
* gen9 and gen10 have a 256 GB limitation and gen11+
|
||||
* pre-gfx9 has a 2 GB limitation of the size in bytes,
|
||||
* gfx9 and gfx10 have a 256 GB limitation and gfx11+
|
||||
* has a 16 TB limitation.
|
||||
*/
|
||||
uint64_t maxResourceSize = 0;
|
||||
|
|
|
@ -54,13 +54,13 @@ void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
|
|||
|
||||
void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
void genX(cmd_buffer_emit_gen7_depth_flush)(struct anv_cmd_buffer *cmd_buffer);
|
||||
void genX(cmd_buffer_emit_gfx7_depth_flush)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
void genX(cmd_buffer_set_binding_for_gen8_vb_flush)(struct anv_cmd_buffer *cmd_buffer,
|
||||
void genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_buffer,
|
||||
int vb_index,
|
||||
struct anv_address vb_address,
|
||||
uint32_t vb_size);
|
||||
void genX(cmd_buffer_update_dirty_vbs_for_gen8_vb_flush)(struct anv_cmd_buffer *cmd_buffer,
|
||||
void genX(cmd_buffer_update_dirty_vbs_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_buffer,
|
||||
uint32_t access_type,
|
||||
uint64_t vb_used);
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue