2010-10-10 23:42:37 +01:00
|
|
|
/*
|
|
|
|
* Copyright © 2010 Intel Corporation
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Eric Anholt <eric@anholt.net>
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2017-03-20 16:04:38 +00:00
|
|
|
#ifndef BRW_FS_H
|
|
|
|
#define BRW_FS_H
|
2012-04-10 20:01:50 +01:00
|
|
|
|
2011-05-03 18:55:50 +01:00
|
|
|
#include "brw_shader.h"
|
2015-02-05 23:11:18 +00:00
|
|
|
#include "brw_ir_fs.h"
|
2015-06-03 17:59:44 +01:00
|
|
|
#include "brw_fs_builder.h"
|
2016-03-10 01:03:57 +00:00
|
|
|
#include "brw_fs_live_variables.h"
|
2020-03-26 21:59:02 +00:00
|
|
|
#include "brw_ir_performance.h"
|
2016-01-18 10:54:03 +00:00
|
|
|
#include "compiler/nir/nir.h"
|
2010-10-10 23:42:37 +01:00
|
|
|
|
2014-06-15 06:53:40 +01:00
|
|
|
struct bblock_t;
|
2012-06-06 18:57:54 +01:00
|
|
|
namespace {
|
2013-01-08 03:42:38 +00:00
|
|
|
struct acp_entry;
|
2012-06-06 18:57:54 +01:00
|
|
|
}
|
2012-05-11 00:10:15 +01:00
|
|
|
|
2016-03-13 23:35:49 +00:00
|
|
|
class fs_visitor;
|
|
|
|
|
2012-06-05 19:37:22 +01:00
|
|
|
namespace brw {
|
2016-03-13 23:35:49 +00:00
|
|
|
/**
|
|
|
|
* Register pressure analysis of a shader. Estimates how many registers
|
|
|
|
* are live at any point of the program in GRF units.
|
|
|
|
*/
|
|
|
|
struct register_pressure {
|
|
|
|
register_pressure(const fs_visitor *v);
|
|
|
|
~register_pressure();
|
|
|
|
|
|
|
|
analysis_dependency_class
|
|
|
|
dependency_class() const
|
|
|
|
{
|
|
|
|
return (DEPENDENCY_INSTRUCTION_IDENTITY |
|
|
|
|
DEPENDENCY_INSTRUCTION_DATA_FLOW |
|
|
|
|
DEPENDENCY_VARIABLES);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
validate(const fs_visitor *) const
|
|
|
|
{
|
|
|
|
/* FINISHME */
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned *regs_live_at_ip;
|
|
|
|
};
|
2012-06-05 19:37:22 +01:00
|
|
|
}
|
|
|
|
|
2015-06-30 06:50:28 +01:00
|
|
|
struct brw_gs_compile;
|
|
|
|
|
2015-06-25 18:55:51 +01:00
|
|
|
static inline fs_reg
|
2016-05-27 07:09:46 +01:00
|
|
|
offset(const fs_reg ®, const brw::fs_builder &bld, unsigned delta)
|
2015-06-25 18:55:51 +01:00
|
|
|
{
|
2016-05-27 07:09:46 +01:00
|
|
|
return offset(reg, bld.dispatch_width(), delta);
|
2015-06-25 18:55:51 +01:00
|
|
|
}
|
|
|
|
|
2016-11-29 13:20:20 +00:00
|
|
|
#define UBO_START ((1 << 16) - 4)
|
|
|
|
|
2016-10-17 22:10:26 +01:00
|
|
|
struct shader_stats {
|
2016-10-17 22:12:28 +01:00
|
|
|
const char *scheduler_mode;
|
2016-10-17 22:10:26 +01:00
|
|
|
unsigned promoted_constants;
|
|
|
|
};
|
|
|
|
|
2012-11-09 09:05:47 +00:00
|
|
|
/**
|
|
|
|
* The fragment shader front-end.
|
|
|
|
*
|
|
|
|
* Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
|
|
|
|
*/
|
2015-05-20 18:35:34 +01:00
|
|
|
class fs_visitor : public backend_shader
|
2010-10-10 23:42:37 +01:00
|
|
|
{
|
|
|
|
public:
|
2015-06-23 01:17:56 +01:00
|
|
|
fs_visitor(const struct brw_compiler *compiler, void *log_data,
|
2014-05-14 09:21:02 +01:00
|
|
|
void *mem_ctx,
|
2019-02-21 23:20:39 +00:00
|
|
|
const brw_base_prog_key *key,
|
2015-03-12 05:41:49 +00:00
|
|
|
struct brw_stage_prog_data *prog_data,
|
2015-10-06 03:26:02 +01:00
|
|
|
const nir_shader *shader,
|
2015-06-19 23:40:09 +01:00
|
|
|
unsigned dispatch_width,
|
2015-11-10 22:35:27 +00:00
|
|
|
int shader_time_index,
|
2021-04-06 23:23:08 +01:00
|
|
|
bool debug_enabled);
|
2015-06-30 06:50:28 +01:00
|
|
|
fs_visitor(const struct brw_compiler *compiler, void *log_data,
|
|
|
|
void *mem_ctx,
|
|
|
|
struct brw_gs_compile *gs_compile,
|
|
|
|
struct brw_gs_prog_data *prog_data,
|
2015-11-03 20:51:32 +00:00
|
|
|
const nir_shader *shader,
|
2021-03-23 18:31:51 +00:00
|
|
|
int shader_time_index,
|
|
|
|
bool debug_enabled);
|
2015-06-30 06:50:28 +01:00
|
|
|
void init();
|
2012-07-04 21:12:50 +01:00
|
|
|
~fs_visitor();
|
2010-10-10 23:42:37 +01:00
|
|
|
|
2014-05-16 10:21:51 +01:00
|
|
|
fs_reg vgrf(const glsl_type *const type);
|
2011-07-26 02:13:04 +01:00
|
|
|
void import_uniforms(fs_visitor *v);
|
2010-10-10 23:42:37 +01:00
|
|
|
|
2015-06-03 20:22:39 +01:00
|
|
|
void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld,
|
|
|
|
const fs_reg &dst,
|
|
|
|
const fs_reg &surf_index,
|
|
|
|
const fs_reg &varying_offset,
|
2020-02-21 16:59:38 +00:00
|
|
|
uint32_t const_offset,
|
|
|
|
uint8_t alignment);
|
2015-06-03 20:22:10 +01:00
|
|
|
void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf);
|
2012-11-09 00:06:24 +00:00
|
|
|
|
2016-05-16 22:30:25 +01:00
|
|
|
bool run_fs(bool allow_spilling, bool do_rep_send);
|
2017-09-29 00:25:31 +01:00
|
|
|
bool run_vs();
|
intel/compiler: Implement TCS 8_PATCH mode and INTEL_DEBUG=tcs8
Our tessellation control shaders can be dispatched in several modes.
- SINGLE_PATCH (Gen7+) processes a single patch per thread, with each
channel corresponding to a different patch vertex. PATCHLIST_N will
launch (N / 8) threads. If N is less than 8, some channels will be
disabled, leaving some untapped hardware capabilities. Conditionals
based on gl_InvocationID are non-uniform, which means that they'll
often have to execute both paths. However, if there are fewer than
8 vertices, all invocations will happen within a single thread, so
barriers can become no-ops, which is nice. We also burn a maximum
of 4 registers for ICP handles, so we can compile without regard for
the value of N. It also works in all cases.
- DUAL_PATCH mode processes up to two patches at a time, where the first
four channels come from patch 1, and the second group of four come
from patch 2. This tries to provide better EU utilization for small
patches (N <= 4). It cannot be used in all cases.
- 8_PATCH mode processes 8 patches at a time, with a thread launched per
vertex in the patch. Each channel corresponds to the same vertex, but
in each of the 8 patches. This utilizes all channels even for small
patches. It also makes conditions on gl_InvocationID uniform, leading
to proper jumps. Barriers, unfortunately, become real. Worse, for
PATCHLIST_N, the thread payload burns N registers for ICP handles.
This can burn up to 32 registers, or 1/4 of our register file, for
URB handles. For Vulkan (and DX), we know the number of vertices at
compile time, so we can limit the amount of waste. In GL, the patch
dimension is dynamic state, so we either would have to waste all 32
(not reasonable) or guess (badly) and recompile. This is unfortunate.
Because we can only spawn 16 thread instances, we can only use this
mode for PATCHLIST_16 and smaller. The rest must use SINGLE_PATCH.
This patch implements the new 8_PATCH TCS mode, but leaves us using
SINGLE_PATCH by default. A new INTEL_DEBUG=tcs8 flag will switch to
using 8_PATCH mode for testing and benchmarking purposes. We may
want to consider using 8_PATCH mode in Vulkan in some cases.
The data I've seen shows that 8_PATCH mode can be more efficient in
some cases, but SINGLE_PATCH mode (the one we use today) is faster
in other cases. Ultimately, the TES matters much more than the TCS
for performance, so the decision may not matter much.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2019-05-03 22:57:54 +01:00
|
|
|
bool run_tcs();
|
2015-11-10 22:35:27 +00:00
|
|
|
bool run_tes();
|
2015-03-12 06:14:31 +00:00
|
|
|
bool run_gs();
|
2020-05-19 22:37:44 +01:00
|
|
|
bool run_cs(bool allow_spilling);
|
2020-10-21 20:46:50 +01:00
|
|
|
bool run_bs(bool allow_spilling);
|
2014-11-14 00:28:18 +00:00
|
|
|
void optimize();
|
2020-05-19 22:37:44 +01:00
|
|
|
void allocate_registers(bool allow_spilling);
|
2021-03-29 23:40:04 +01:00
|
|
|
void setup_fs_payload_gfx4();
|
|
|
|
void setup_fs_payload_gfx6();
|
2014-10-28 05:42:50 +00:00
|
|
|
void setup_vs_payload();
|
2015-03-12 06:14:31 +00:00
|
|
|
void setup_gs_payload();
|
2014-08-31 03:57:39 +01:00
|
|
|
void setup_cs_payload();
|
2018-11-16 03:05:08 +00:00
|
|
|
bool fixup_sends_duplicate_payload();
|
2014-12-30 04:33:12 +00:00
|
|
|
void fixup_3src_null_dest();
|
2020-01-24 06:55:33 +00:00
|
|
|
bool fixup_nomask_control_flow();
|
2010-10-10 23:42:37 +01:00
|
|
|
void assign_curb_setup();
|
|
|
|
void assign_urb_setup();
|
2015-03-12 06:14:31 +00:00
|
|
|
void convert_attr_sources_to_hw_regs(fs_inst *inst);
|
2014-10-28 05:42:50 +00:00
|
|
|
void assign_vs_urb_setup();
|
intel/compiler: Implement TCS 8_PATCH mode and INTEL_DEBUG=tcs8
Our tessellation control shaders can be dispatched in several modes.
- SINGLE_PATCH (Gen7+) processes a single patch per thread, with each
channel corresponding to a different patch vertex. PATCHLIST_N will
launch (N / 8) threads. If N is less than 8, some channels will be
disabled, leaving some untapped hardware capabilities. Conditionals
based on gl_InvocationID are non-uniform, which means that they'll
often have to execute both paths. However, if there are fewer than
8 vertices, all invocations will happen within a single thread, so
barriers can become no-ops, which is nice. We also burn a maximum
of 4 registers for ICP handles, so we can compile without regard for
the value of N. It also works in all cases.
- DUAL_PATCH mode processes up to two patches at a time, where the first
four channels come from patch 1, and the second group of four come
from patch 2. This tries to provide better EU utilization for small
patches (N <= 4). It cannot be used in all cases.
- 8_PATCH mode processes 8 patches at a time, with a thread launched per
vertex in the patch. Each channel corresponds to the same vertex, but
in each of the 8 patches. This utilizes all channels even for small
patches. It also makes conditions on gl_InvocationID uniform, leading
to proper jumps. Barriers, unfortunately, become real. Worse, for
PATCHLIST_N, the thread payload burns N registers for ICP handles.
This can burn up to 32 registers, or 1/4 of our register file, for
URB handles. For Vulkan (and DX), we know the number of vertices at
compile time, so we can limit the amount of waste. In GL, the patch
dimension is dynamic state, so we either would have to waste all 32
(not reasonable) or guess (badly) and recompile. This is unfortunate.
Because we can only spawn 16 thread instances, we can only use this
mode for PATCHLIST_16 and smaller. The rest must use SINGLE_PATCH.
This patch implements the new 8_PATCH TCS mode, but leaves us using
SINGLE_PATCH by default. A new INTEL_DEBUG=tcs8 flag will switch to
using 8_PATCH mode for testing and benchmarking purposes. We may
want to consider using 8_PATCH mode in Vulkan in some cases.
The data I've seen shows that 8_PATCH mode can be more efficient in
some cases, but SINGLE_PATCH mode (the one we use today) is faster
in other cases. Ultimately, the TES matters much more than the TCS
for performance, so the decision may not matter much.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2019-05-03 22:57:54 +01:00
|
|
|
void assign_tcs_urb_setup();
|
2015-11-10 22:35:27 +00:00
|
|
|
void assign_tes_urb_setup();
|
2015-03-12 06:14:31 +00:00
|
|
|
void assign_gs_urb_setup();
|
2016-05-16 22:30:25 +01:00
|
|
|
bool assign_regs(bool allow_spilling, bool spill_all);
|
2010-10-10 23:42:37 +01:00
|
|
|
void assign_regs_trivial();
|
2015-06-12 20:01:35 +01:00
|
|
|
void calculate_payload_ranges(int payload_node_count,
|
2016-09-23 13:15:33 +01:00
|
|
|
int *payload_last_use_ip) const;
|
2010-10-14 04:17:15 +01:00
|
|
|
void split_virtual_grfs();
|
2014-09-16 21:14:09 +01:00
|
|
|
bool compact_virtual_grfs();
|
2014-03-11 21:35:27 +00:00
|
|
|
void assign_constant_locations();
|
2017-06-02 17:54:31 +01:00
|
|
|
bool get_pull_locs(const fs_reg &src, unsigned *out_surf_index,
|
|
|
|
unsigned *out_pull_index);
|
2015-12-09 01:14:49 +00:00
|
|
|
void lower_constant_loads();
|
2016-03-13 02:50:24 +00:00
|
|
|
virtual void invalidate_analysis(brw::analysis_dependency_class c);
|
2015-07-02 23:41:02 +01:00
|
|
|
void validate();
|
2011-07-23 00:45:15 +01:00
|
|
|
bool opt_algebraic();
|
2020-11-30 23:24:51 +00:00
|
|
|
bool opt_redundant_halt();
|
2012-05-11 00:10:15 +01:00
|
|
|
bool opt_cse();
|
2016-03-13 23:25:57 +00:00
|
|
|
bool opt_cse_local(const brw::fs_live_variables &live, bblock_t *block, int &ip);
|
|
|
|
|
2016-11-28 18:45:08 +00:00
|
|
|
bool opt_copy_propagation();
|
2012-06-06 18:57:54 +01:00
|
|
|
bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
|
2014-09-24 01:22:09 +01:00
|
|
|
bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
|
2016-11-28 18:45:08 +00:00
|
|
|
bool opt_copy_propagation_local(void *mem_ctx, bblock_t *block,
|
|
|
|
exec_list *acp);
|
2016-04-20 22:22:53 +01:00
|
|
|
bool opt_drop_redundant_mov_to_flags();
|
2014-04-14 23:01:37 +01:00
|
|
|
bool opt_register_renaming();
|
2017-06-15 23:23:57 +01:00
|
|
|
bool opt_bank_conflicts();
|
2013-11-30 06:16:14 +00:00
|
|
|
bool register_coalesce();
|
2010-10-08 22:00:14 +01:00
|
|
|
bool compute_to_mrf();
|
2015-02-20 18:25:04 +00:00
|
|
|
bool eliminate_find_live_channel();
|
2010-10-10 23:42:37 +01:00
|
|
|
bool dead_code_eliminate();
|
2010-11-19 07:57:05 +00:00
|
|
|
bool remove_duplicate_mrf_writes();
|
2017-07-01 07:14:56 +01:00
|
|
|
bool remove_extra_rounding_modes();
|
2015-02-08 21:59:57 +00:00
|
|
|
|
2013-11-07 01:38:23 +00:00
|
|
|
void schedule_instructions(instruction_scheduler_mode mode);
|
2021-03-29 23:40:04 +01:00
|
|
|
void insert_gfx4_send_dependency_workarounds();
|
|
|
|
void insert_gfx4_pre_send_dependency_workarounds(bblock_t *block,
|
2014-08-25 03:07:01 +01:00
|
|
|
fs_inst *inst);
|
2021-03-29 23:40:04 +01:00
|
|
|
void insert_gfx4_post_send_dependency_workarounds(bblock_t *block,
|
2014-08-25 03:07:01 +01:00
|
|
|
fs_inst *inst);
|
i965: Accurately bail on SIMD16 compiles.
Ideally, we'd like to never even attempt the SIMD16 compile if we could
know ahead of time that it won't succeed---it's purely a waste of time.
This is especially important for state-based recompiles, which happen at
draw time.
The fragment shader compiler has a number of checks like:
if (dispatch_width == 16)
fail("...some reason...");
This patch introduces a new no16() function which replaces the above
pattern. In the SIMD8 compile, it sets a "SIMD16 will never work" flag.
Then, brw_wm_fs_emit can check that flag, skip the SIMD16 compile, and
issue a helpful performance warning if INTEL_DEBUG=perf is set. (In
SIMD16 mode, no16() calls fail(), for safety's sake.)
The great part is that this is not a heuristic---if the flag is set, we
know with 100% certainty that the SIMD16 compile would fail. (It might
fail anyway if we run out of registers, but it's always worth trying.)
v2: Fix missing va_end in early-return case (caught by Ilia Mirkin).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz> [v1]
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> [v1]
Reviewed-by: Eric Anholt <eric@anholt.net>
2014-03-07 08:49:45 +00:00
|
|
|
void vfail(const char *msg, va_list args);
|
2011-03-13 20:43:05 +00:00
|
|
|
void fail(const char *msg, ...);
|
2016-05-18 22:39:52 +01:00
|
|
|
void limit_dispatch_width(unsigned n, const char *msg);
|
2013-02-16 03:26:48 +00:00
|
|
|
void lower_uniform_pull_constant_loads();
|
2014-04-18 19:56:46 +01:00
|
|
|
bool lower_load_payload();
|
2016-05-05 10:45:19 +01:00
|
|
|
bool lower_pack();
|
2018-12-29 12:00:13 +00:00
|
|
|
bool lower_regioning();
|
2015-07-13 15:44:58 +01:00
|
|
|
bool lower_logical_sends();
|
2015-05-11 17:29:56 +01:00
|
|
|
bool lower_integer_multiplication();
|
2016-02-11 20:27:02 +00:00
|
|
|
bool lower_minmax();
|
2015-07-13 19:15:31 +01:00
|
|
|
bool lower_simd_width();
|
2020-01-04 00:12:23 +00:00
|
|
|
bool lower_barycentrics();
|
2019-01-11 04:23:53 +00:00
|
|
|
bool lower_derivatives();
|
2018-11-09 22:13:37 +00:00
|
|
|
bool lower_scoreboard();
|
2018-09-19 09:28:06 +01:00
|
|
|
bool lower_sub_sat();
|
2014-02-12 19:00:46 +00:00
|
|
|
bool opt_combine_constants();
|
2011-01-19 01:16:49 +00:00
|
|
|
|
2010-10-10 23:42:37 +01:00
|
|
|
void emit_dummy_fs();
|
2014-09-26 22:47:03 +01:00
|
|
|
void emit_repclear_shader();
|
2016-07-15 00:52:10 +01:00
|
|
|
void emit_fragcoord_interpolation(fs_reg wpos);
|
2014-08-05 18:29:00 +01:00
|
|
|
fs_reg *emit_frontfacing_interpolation();
|
2014-08-05 19:10:07 +01:00
|
|
|
fs_reg *emit_samplepos_setup();
|
2014-10-17 20:59:18 +01:00
|
|
|
fs_reg *emit_sampleid_setup();
|
i965: Fix gl_SampleMaskIn[] in per-sample shading mode.
The coverage mask is not sufficient - in per-sample mode, we also need
to AND with a mask representing the samples being processed by the
current fragment shader invocation.
Fixes 18 dEQP-GLES31.functional.shaders.sample_variables tests:
sample_mask_in.bit_count_per_sample.multisample_{rbo,texture}_{1,2,4,8}
sample_mask_in.bit_count_per_two_samples.multisample_{rbo,texture}_{4,8}
sample_mask_in.bits_unique_per_sample.multisample_{rbo,texture}_{1,2,4,8}
sample_mask_in.bits_unique_per_two_samples.multisample_{rbo,texture}_{4,8}
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2016-04-06 04:14:22 +01:00
|
|
|
fs_reg *emit_samplemaskin_setup();
|
2021-03-29 23:40:04 +01:00
|
|
|
void emit_interpolation_setup_gfx4();
|
|
|
|
void emit_interpolation_setup_gfx6();
|
2013-10-24 23:53:05 +01:00
|
|
|
void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
|
2015-07-17 16:23:31 +01:00
|
|
|
fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
|
2019-02-06 21:42:17 +00:00
|
|
|
const fs_reg &texture,
|
|
|
|
const fs_reg &texture_handle);
|
2021-03-29 23:40:04 +01:00
|
|
|
void emit_gfx6_gather_wa(uint8_t wa, fs_reg dst);
|
2015-08-10 19:52:50 +01:00
|
|
|
fs_reg resolve_source_modifiers(const fs_reg &src);
|
2018-06-26 03:50:56 +01:00
|
|
|
void emit_fsign(const class brw::fs_builder &, const nir_alu_instr *instr,
|
2018-06-26 03:53:38 +01:00
|
|
|
fs_reg result, fs_reg *op, unsigned fsign_src);
|
2018-06-01 11:36:47 +01:00
|
|
|
void emit_shader_float_controls_execution_mode();
|
2013-10-23 01:51:28 +01:00
|
|
|
bool opt_peephole_sel();
|
2013-10-23 01:51:28 +01:00
|
|
|
bool opt_peephole_predicated_break();
|
2013-12-12 08:30:16 +00:00
|
|
|
bool opt_saturate_propagation();
|
2014-08-22 18:54:43 +01:00
|
|
|
bool opt_cmod_propagation();
|
2015-04-24 00:56:53 +01:00
|
|
|
bool opt_zero_samples();
|
2010-10-08 22:35:34 +01:00
|
|
|
|
2019-05-03 22:20:00 +01:00
|
|
|
void set_tcs_invocation_id();
|
|
|
|
|
2014-08-15 18:32:07 +01:00
|
|
|
void emit_nir_code();
|
2015-10-01 20:23:53 +01:00
|
|
|
void nir_setup_outputs();
|
|
|
|
void nir_setup_uniforms();
|
|
|
|
void nir_emit_system_values();
|
2014-08-15 18:32:07 +01:00
|
|
|
void nir_emit_impl(nir_function_impl *impl);
|
|
|
|
void nir_emit_cf_list(exec_list *list);
|
|
|
|
void nir_emit_if(nir_if *if_stmt);
|
|
|
|
void nir_emit_loop(nir_loop *loop);
|
|
|
|
void nir_emit_block(nir_block *block);
|
|
|
|
void nir_emit_instr(nir_instr *instr);
|
2019-05-21 20:09:42 +01:00
|
|
|
void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr,
|
|
|
|
bool need_dest);
|
2018-12-03 23:53:36 +00:00
|
|
|
bool try_emit_b2fi_of_inot(const brw::fs_builder &bld, fs_reg result,
|
|
|
|
nir_alu_instr *instr);
|
2015-06-26 00:22:26 +01:00
|
|
|
void nir_emit_load_const(const brw::fs_builder &bld,
|
|
|
|
nir_load_const_instr *instr);
|
2015-11-05 07:05:07 +00:00
|
|
|
void nir_emit_vs_intrinsic(const brw::fs_builder &bld,
|
|
|
|
nir_intrinsic_instr *instr);
|
2015-11-15 01:40:43 +00:00
|
|
|
void nir_emit_tcs_intrinsic(const brw::fs_builder &bld,
|
|
|
|
nir_intrinsic_instr *instr);
|
2015-11-05 07:05:07 +00:00
|
|
|
void nir_emit_gs_intrinsic(const brw::fs_builder &bld,
|
|
|
|
nir_intrinsic_instr *instr);
|
|
|
|
void nir_emit_fs_intrinsic(const brw::fs_builder &bld,
|
|
|
|
nir_intrinsic_instr *instr);
|
|
|
|
void nir_emit_cs_intrinsic(const brw::fs_builder &bld,
|
|
|
|
nir_intrinsic_instr *instr);
|
2020-10-21 20:46:50 +01:00
|
|
|
void nir_emit_bs_intrinsic(const brw::fs_builder &bld,
|
|
|
|
nir_intrinsic_instr *instr);
|
2018-08-16 22:23:10 +01:00
|
|
|
fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld,
|
|
|
|
nir_intrinsic_instr *instr);
|
2018-10-20 16:05:33 +01:00
|
|
|
fs_reg get_nir_ssbo_intrinsic_index(const brw::fs_builder &bld,
|
|
|
|
nir_intrinsic_instr *instr);
|
2019-02-28 14:15:30 +00:00
|
|
|
fs_reg swizzle_nir_scratch_addr(const brw::fs_builder &bld,
|
|
|
|
const fs_reg &addr,
|
|
|
|
bool in_dwords);
|
2015-06-03 19:01:32 +01:00
|
|
|
void nir_emit_intrinsic(const brw::fs_builder &bld,
|
|
|
|
nir_intrinsic_instr *instr);
|
2015-11-10 22:35:27 +00:00
|
|
|
void nir_emit_tes_intrinsic(const brw::fs_builder &bld,
|
|
|
|
nir_intrinsic_instr *instr);
|
2015-06-01 08:41:47 +01:00
|
|
|
void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
|
|
|
|
int op, nir_intrinsic_instr *instr);
|
2018-04-18 22:02:33 +01:00
|
|
|
void nir_emit_ssbo_atomic_float(const brw::fs_builder &bld,
|
|
|
|
int op, nir_intrinsic_instr *instr);
|
2015-10-10 21:01:03 +01:00
|
|
|
void nir_emit_shared_atomic(const brw::fs_builder &bld,
|
|
|
|
int op, nir_intrinsic_instr *instr);
|
2018-04-18 22:02:33 +01:00
|
|
|
void nir_emit_shared_atomic_float(const brw::fs_builder &bld,
|
|
|
|
int op, nir_intrinsic_instr *instr);
|
2018-11-26 21:15:04 +00:00
|
|
|
void nir_emit_global_atomic(const brw::fs_builder &bld,
|
|
|
|
int op, nir_intrinsic_instr *instr);
|
|
|
|
void nir_emit_global_atomic_float(const brw::fs_builder &bld,
|
|
|
|
int op, nir_intrinsic_instr *instr);
|
2015-06-03 19:02:57 +01:00
|
|
|
void nir_emit_texture(const brw::fs_builder &bld,
|
|
|
|
nir_tex_instr *instr);
|
2015-06-03 18:57:12 +01:00
|
|
|
void nir_emit_jump(const brw::fs_builder &bld,
|
|
|
|
nir_jump_instr *instr);
|
2016-05-19 22:43:23 +01:00
|
|
|
fs_reg get_nir_src(const nir_src &src);
|
|
|
|
fs_reg get_nir_src_imm(const nir_src &src);
|
|
|
|
fs_reg get_nir_dest(const nir_dest &dest);
|
2015-11-10 22:35:27 +00:00
|
|
|
fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
|
2019-05-03 22:37:11 +01:00
|
|
|
fs_reg get_tcs_single_patch_icp_handle(const brw::fs_builder &bld,
|
|
|
|
nir_intrinsic_instr *instr);
|
intel/compiler: Implement TCS 8_PATCH mode and INTEL_DEBUG=tcs8
Our tessellation control shaders can be dispatched in several modes.
- SINGLE_PATCH (Gen7+) processes a single patch per thread, with each
channel corresponding to a different patch vertex. PATCHLIST_N will
launch (N / 8) threads. If N is less than 8, some channels will be
disabled, leaving some untapped hardware capabilities. Conditionals
based on gl_InvocationID are non-uniform, which means that they'll
often have to execute both paths. However, if there are fewer than
8 vertices, all invocations will happen within a single thread, so
barriers can become no-ops, which is nice. We also burn a maximum
of 4 registers for ICP handles, so we can compile without regard for
the value of N. It also works in all cases.
- DUAL_PATCH mode processes up to two patches at a time, where the first
four channels come from patch 1, and the second group of four come
from patch 2. This tries to provide better EU utilization for small
patches (N <= 4). It cannot be used in all cases.
- 8_PATCH mode processes 8 patches at a time, with a thread launched per
vertex in the patch. Each channel corresponds to the same vertex, but
in each of the 8 patches. This utilizes all channels even for small
patches. It also makes conditions on gl_InvocationID uniform, leading
to proper jumps. Barriers, unfortunately, become real. Worse, for
PATCHLIST_N, the thread payload burns N registers for ICP handles.
This can burn up to 32 registers, or 1/4 of our register file, for
URB handles. For Vulkan (and DX), we know the number of vertices at
compile time, so we can limit the amount of waste. In GL, the patch
dimension is dynamic state, so we either would have to waste all 32
(not reasonable) or guess (badly) and recompile. This is unfortunate.
Because we can only spawn 16 thread instances, we can only use this
mode for PATCHLIST_16 and smaller. The rest must use SINGLE_PATCH.
This patch implements the new 8_PATCH TCS mode, but leaves us using
SINGLE_PATCH by default. A new INTEL_DEBUG=tcs8 flag will switch to
using 8_PATCH mode for testing and benchmarking purposes. We may
want to consider using 8_PATCH mode in Vulkan in some cases.
The data I've seen shows that 8_PATCH mode can be more efficient in
some cases, but SINGLE_PATCH mode (the one we use today) is faster
in other cases. Ultimately, the TES matters much more than the TCS
for performance, so the decision may not matter much.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2019-05-03 22:57:54 +01:00
|
|
|
fs_reg get_tcs_eight_patch_icp_handle(const brw::fs_builder &bld,
|
|
|
|
nir_intrinsic_instr *instr);
|
|
|
|
struct brw_reg get_tcs_output_urb_handle();
|
2019-05-03 22:37:11 +01:00
|
|
|
|
2015-06-03 19:12:49 +01:00
|
|
|
void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
|
|
|
|
unsigned wr_mask);
|
2014-08-15 18:32:07 +01:00
|
|
|
|
2016-01-21 17:10:09 +00:00
|
|
|
bool optimize_extract_to_float(nir_alu_instr *instr,
|
|
|
|
const fs_reg &result);
|
2015-02-15 21:45:04 +00:00
|
|
|
bool optimize_frontfacing_ternary(nir_alu_instr *instr,
|
|
|
|
const fs_reg &result);
|
|
|
|
|
2013-10-27 00:32:03 +01:00
|
|
|
void emit_alpha_test();
|
2015-06-03 19:07:52 +01:00
|
|
|
fs_inst *emit_single_fb_write(const brw::fs_builder &bld,
|
|
|
|
fs_reg color1, fs_reg color2,
|
2015-07-16 14:12:48 +01:00
|
|
|
fs_reg src0_alpha, unsigned components);
|
i965,iris,anv: Make alpha to coverage work with sample mask
From "Alpha Coverage" section of SKL PRM Volume 7:
"If Pixel Shader outputs oMask, AlphaToCoverage is disabled in
hardware, regardless of the state setting for this feature."
From OpenGL spec 4.6, "15.2 Shader Execution":
"The built-in integer array gl_SampleMask can be used to change
the sample coverage for a fragment from within the shader."
From OpenGL spec 4.6, "17.3.1 Alpha To Coverage":
"If SAMPLE_ALPHA_TO_COVERAGE is enabled, a temporary coverage value
is generated where each bit is determined by the alpha value at the
corresponding sample location. The temporary coverage value is then
ANDed with the fragment coverage value to generate a new fragment
coverage value."
Similar wording could be found in Vulkan spec 1.1.100
"25.6. Multisample Coverage"
Thus we need to compute alpha to coverage dithering manually in shader
and replace sample mask store with the bitwise-AND of sample mask and
alpha to coverage dithering.
The following formula is used to compute final sample mask:
m = int(16.0 * clamp(src0_alpha, 0.0, 1.0))
dither_mask = 0x1111 * ((0xfea80 >> (m & ~3)) & 0xf) |
0x0808 * (m & 2) | 0x0100 * (m & 1)
sample_mask = sample_mask & dither_mask
Credits to Francisco Jerez <currojerez@riseup.net> for creating it.
It gives a number of ones proportional to the alpha for 2, 4, 8 or 16
least significant bits of the result.
GEN6 hardware does not have issue with simultaneous usage of sample mask
and alpha to coverage however due to the wrong sending order of oMask
and src0_alpha it is still affected by it.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109743
Signed-off-by: Danylo Piliaiev <danylo.piliaiev@globallogic.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
2019-02-20 17:39:18 +00:00
|
|
|
void emit_alpha_to_coverage_workaround(const fs_reg &src0_alpha);
|
2010-10-10 23:42:37 +01:00
|
|
|
void emit_fb_writes();
|
2016-07-22 04:25:28 +01:00
|
|
|
fs_inst *emit_non_coherent_fb_read(const brw::fs_builder &bld,
|
|
|
|
const fs_reg &dst, unsigned target);
|
2015-03-12 06:14:31 +00:00
|
|
|
void emit_urb_writes(const fs_reg &gs_vertex_count = fs_reg());
|
|
|
|
void set_gs_stream_control_data_bits(const fs_reg &vertex_count,
|
|
|
|
unsigned stream_id);
|
|
|
|
void emit_gs_control_data_bits(const fs_reg &vertex_count);
|
|
|
|
void emit_gs_end_primitive(const nir_src &vertex_count_nir_src);
|
|
|
|
void emit_gs_vertex(const nir_src &vertex_count_nir_src,
|
|
|
|
unsigned stream_id);
|
|
|
|
void emit_gs_thread_end();
|
|
|
|
void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
|
2015-11-25 22:14:05 +00:00
|
|
|
unsigned base_offset, const nir_src &offset_src,
|
2016-05-19 06:58:51 +01:00
|
|
|
unsigned num_components, unsigned first_component);
|
2015-04-12 10:06:57 +01:00
|
|
|
void emit_cs_terminate();
|
2014-10-10 16:28:24 +01:00
|
|
|
fs_reg *emit_cs_work_group_id_setup();
|
2012-11-27 22:10:52 +00:00
|
|
|
|
2014-08-27 19:32:08 +01:00
|
|
|
void emit_barrier();
|
|
|
|
|
2012-11-27 22:10:52 +00:00
|
|
|
void emit_shader_time_begin();
|
|
|
|
void emit_shader_time_end();
|
2015-06-03 18:43:09 +01:00
|
|
|
void SHADER_TIME_ADD(const brw::fs_builder &bld,
|
2015-06-19 23:40:09 +01:00
|
|
|
int shader_time_subindex,
|
2015-06-19 22:46:03 +01:00
|
|
|
fs_reg value);
|
2012-11-27 22:10:52 +00:00
|
|
|
|
2015-06-03 18:43:09 +01:00
|
|
|
fs_reg get_timestamp(const brw::fs_builder &bld);
|
2012-11-27 22:10:52 +00:00
|
|
|
|
2016-04-26 02:33:22 +01:00
|
|
|
fs_reg interp_reg(int location, int channel);
|
2015-06-28 19:04:17 +01:00
|
|
|
|
2016-09-23 13:15:33 +01:00
|
|
|
virtual void dump_instructions() const;
|
|
|
|
virtual void dump_instructions(const char *name) const;
|
|
|
|
void dump_instruction(const backend_instruction *inst) const;
|
|
|
|
void dump_instruction(const backend_instruction *inst, FILE *file) const;
|
2012-10-30 22:35:44 +00:00
|
|
|
|
2019-02-21 23:20:39 +00:00
|
|
|
const brw_base_prog_key *const key;
|
2015-03-09 08:58:51 +00:00
|
|
|
const struct brw_sampler_prog_key_data *key_tex;
|
|
|
|
|
2015-06-30 06:50:28 +01:00
|
|
|
struct brw_gs_compile *gs_compile;
|
|
|
|
|
2014-08-29 20:50:46 +01:00
|
|
|
struct brw_stage_prog_data *prog_data;
|
2010-10-10 23:42:37 +01:00
|
|
|
|
2020-10-29 19:13:50 +00:00
|
|
|
brw_analysis<brw::fs_live_variables, backend_shader> live_analysis;
|
|
|
|
brw_analysis<brw::register_pressure, fs_visitor> regpressure_analysis;
|
|
|
|
brw_analysis<brw::performance, fs_visitor> performance_analysis;
|
2013-08-05 07:27:14 +01:00
|
|
|
|
2014-02-19 14:27:01 +00:00
|
|
|
/** Number of uniform variable components visited. */
|
|
|
|
unsigned uniforms;
|
|
|
|
|
2014-05-14 05:00:35 +01:00
|
|
|
/** Byte-offset for the next available spot in the scratch space buffer. */
|
|
|
|
unsigned last_scratch;
|
|
|
|
|
2014-03-07 10:10:14 +00:00
|
|
|
/**
|
|
|
|
* Array mapping UNIFORM register numbers to the pull parameter index,
|
|
|
|
* or -1 if this uniform register isn't being uploaded as a pull constant.
|
|
|
|
*/
|
|
|
|
int *pull_constant_loc;
|
|
|
|
|
2014-03-11 21:35:27 +00:00
|
|
|
/**
|
|
|
|
* Array mapping UNIFORM register numbers to the push parameter index,
|
|
|
|
* or -1 if this uniform register isn't being uploaded as a push constant.
|
2011-07-26 02:13:04 +01:00
|
|
|
*/
|
2014-03-11 21:35:27 +00:00
|
|
|
int *push_constant_loc;
|
2011-07-26 02:13:04 +01:00
|
|
|
|
2017-08-24 19:40:31 +01:00
|
|
|
fs_reg subgroup_id;
|
2018-11-12 14:29:51 +00:00
|
|
|
fs_reg group_size[3];
|
2019-02-28 14:15:30 +00:00
|
|
|
fs_reg scratch_base;
|
2012-09-18 17:12:48 +01:00
|
|
|
fs_reg frag_depth;
|
2015-10-20 22:29:39 +01:00
|
|
|
fs_reg frag_stencil;
|
2013-10-25 00:21:13 +01:00
|
|
|
fs_reg sample_mask;
|
2014-10-28 05:42:50 +00:00
|
|
|
fs_reg outputs[VARYING_SLOT_MAX];
|
2012-04-25 21:58:07 +01:00
|
|
|
fs_reg dual_src_output;
|
2010-10-10 23:42:37 +01:00
|
|
|
int first_non_payload_grf;
|
2021-03-29 23:16:59 +01:00
|
|
|
/** Either BRW_MAX_GRF or GFX7_MRF_HACK_START */
|
2015-02-10 13:51:34 +00:00
|
|
|
unsigned max_grf;
|
2010-10-10 23:42:37 +01:00
|
|
|
|
2014-11-12 19:05:51 +00:00
|
|
|
fs_reg *nir_locals;
|
2015-06-24 20:28:47 +01:00
|
|
|
fs_reg *nir_ssa_values;
|
2014-12-17 20:34:27 +00:00
|
|
|
fs_reg *nir_system_values;
|
2014-08-15 18:32:07 +01:00
|
|
|
|
2011-03-13 20:43:05 +00:00
|
|
|
bool failed;
|
2011-05-16 23:10:26 +01:00
|
|
|
char *fail_msg;
|
2010-10-10 23:42:37 +01:00
|
|
|
|
2014-05-14 05:52:51 +01:00
|
|
|
/** Register numbers for thread payload fields. */
|
2015-07-27 14:14:36 +01:00
|
|
|
struct thread_payload {
|
2017-01-13 23:36:51 +00:00
|
|
|
uint8_t subspan_coord_reg[2];
|
|
|
|
uint8_t source_depth_reg[2];
|
|
|
|
uint8_t source_w_reg[2];
|
|
|
|
uint8_t aa_dest_stencil_reg[2];
|
|
|
|
uint8_t dest_depth_reg[2];
|
|
|
|
uint8_t sample_pos_reg[2];
|
|
|
|
uint8_t sample_mask_in_reg[2];
|
|
|
|
uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT][2];
|
|
|
|
uint8_t local_invocation_id_reg[2];
|
2014-05-14 05:52:51 +01:00
|
|
|
|
|
|
|
/** The number of thread payload registers the hardware will supply. */
|
|
|
|
uint8_t num_regs;
|
|
|
|
} payload;
|
|
|
|
|
2014-05-14 08:08:58 +01:00
|
|
|
bool source_depth_to_render_target;
|
|
|
|
bool runtime_check_aads_emit;
|
|
|
|
|
2010-10-10 23:42:37 +01:00
|
|
|
fs_reg pixel_x;
|
|
|
|
fs_reg pixel_y;
|
|
|
|
fs_reg wpos_w;
|
|
|
|
fs_reg pixel_w;
|
2016-07-12 00:24:12 +01:00
|
|
|
fs_reg delta_xy[BRW_BARYCENTRIC_MODE_COUNT];
|
2012-11-27 22:10:52 +00:00
|
|
|
fs_reg shader_start_time;
|
2015-03-12 06:14:31 +00:00
|
|
|
fs_reg final_gs_vertex_count;
|
|
|
|
fs_reg control_data_bits;
|
2015-11-15 01:40:43 +00:00
|
|
|
fs_reg invocation_id;
|
2010-10-10 23:42:37 +01:00
|
|
|
|
2015-02-10 13:51:34 +00:00
|
|
|
unsigned grf_used;
|
2013-10-29 19:46:18 +00:00
|
|
|
bool spilled_any_registers;
|
2011-03-12 03:19:01 +00:00
|
|
|
|
2016-05-18 22:39:52 +01:00
|
|
|
const unsigned dispatch_width; /**< 8, 16 or 32 */
|
|
|
|
unsigned max_dispatch_width;
|
2015-03-16 19:18:31 +00:00
|
|
|
|
2015-06-19 23:40:09 +01:00
|
|
|
int shader_time_index;
|
|
|
|
|
2016-10-17 22:10:26 +01:00
|
|
|
struct shader_stats shader_stats;
|
|
|
|
|
2015-06-03 17:59:44 +01:00
|
|
|
brw::fs_builder bld;
|
2018-12-05 19:35:37 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
fs_reg prepare_alu_destination_and_sources(const brw::fs_builder &bld,
|
|
|
|
nir_alu_instr *instr,
|
|
|
|
fs_reg *op,
|
|
|
|
bool need_dest);
|
2017-02-09 15:20:04 +00:00
|
|
|
|
|
|
|
void resolve_inot_sources(const brw::fs_builder &bld, nir_alu_instr *instr,
|
|
|
|
fs_reg *op);
|
2019-07-11 00:48:01 +01:00
|
|
|
void lower_mul_dword_inst(fs_inst *inst, bblock_t *block);
|
2019-07-11 23:08:03 +01:00
|
|
|
void lower_mul_qword_inst(fs_inst *inst, bblock_t *block);
|
2019-07-11 00:48:01 +01:00
|
|
|
void lower_mulh_inst(fs_inst *inst, bblock_t *block);
|
2020-01-14 20:22:47 +00:00
|
|
|
|
|
|
|
unsigned workgroup_size() const;
|
2010-10-10 23:42:37 +01:00
|
|
|
};
|
|
|
|
|
2020-01-04 22:32:09 +00:00
|
|
|
/**
|
|
|
|
* Return the flag register used in fragment shaders to keep track of live
|
2021-03-29 23:46:12 +01:00
|
|
|
* samples. On Gfx7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
|
2020-01-04 23:48:07 +00:00
|
|
|
* dispatch mode, while earlier generations are constrained to f0.1, which
|
|
|
|
* limits the dispatch width to SIMD16 for fragment shaders that use discard.
|
2020-01-04 22:32:09 +00:00
|
|
|
*/
|
|
|
|
static inline unsigned
|
|
|
|
sample_mask_flag_subreg(const fs_visitor *shader)
|
|
|
|
{
|
|
|
|
assert(shader->stage == MESA_SHADER_FRAGMENT);
|
2021-03-29 22:41:58 +01:00
|
|
|
return shader->devinfo->ver >= 7 ? 2 : 1;
|
2020-01-04 22:32:09 +00:00
|
|
|
}
|
|
|
|
|
2012-11-09 09:05:47 +00:00
|
|
|
/**
|
|
|
|
* The fragment shader code generator.
|
|
|
|
*
|
|
|
|
* Translates FS IR to actual i965 assembly code.
|
|
|
|
*/
|
|
|
|
class fs_generator
|
|
|
|
{
|
|
|
|
public:
|
2015-04-16 22:34:04 +01:00
|
|
|
fs_generator(const struct brw_compiler *compiler, void *log_data,
|
2014-05-14 09:21:02 +01:00
|
|
|
void *mem_ctx,
|
2014-10-21 06:53:31 +01:00
|
|
|
struct brw_stage_prog_data *prog_data,
|
2015-01-13 22:28:13 +00:00
|
|
|
bool runtime_check_aads_emit,
|
2016-01-15 04:27:51 +00:00
|
|
|
gl_shader_stage stage);
|
2012-11-09 09:05:47 +00:00
|
|
|
~fs_generator();
|
|
|
|
|
2014-10-28 02:40:47 +00:00
|
|
|
void enable_debug(const char *shader_name);
|
2019-04-24 05:19:56 +01:00
|
|
|
int generate_code(const cfg_t *cfg, int dispatch_width,
|
2020-03-05 00:24:25 +00:00
|
|
|
struct shader_stats shader_stats,
|
2020-03-26 23:27:32 +00:00
|
|
|
const brw::performance &perf,
|
2019-04-24 05:19:56 +01:00
|
|
|
struct brw_compile_stats *stats);
|
2020-08-08 04:26:07 +01:00
|
|
|
void add_const_data(void *data, unsigned size);
|
2018-02-27 00:34:55 +00:00
|
|
|
const unsigned *get_assembly();
|
2012-11-09 09:05:47 +00:00
|
|
|
|
|
|
|
private:
|
2014-06-05 14:03:08 +01:00
|
|
|
void fire_fb_write(fs_inst *inst,
|
2014-09-16 23:16:20 +01:00
|
|
|
struct brw_reg payload,
|
2014-06-05 14:03:08 +01:00
|
|
|
struct brw_reg implied_header,
|
|
|
|
GLuint nr);
|
2018-10-29 20:06:14 +00:00
|
|
|
void generate_send(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg desc,
|
|
|
|
struct brw_reg ex_desc,
|
|
|
|
struct brw_reg payload,
|
|
|
|
struct brw_reg payload2);
|
2014-09-16 23:16:20 +01:00
|
|
|
void generate_fb_write(fs_inst *inst, struct brw_reg payload);
|
2016-07-22 00:52:33 +01:00
|
|
|
void generate_fb_read(fs_inst *inst, struct brw_reg dst,
|
|
|
|
struct brw_reg payload);
|
2015-09-29 22:32:02 +01:00
|
|
|
void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload);
|
2014-10-21 07:00:50 +01:00
|
|
|
void generate_urb_write(fs_inst *inst, struct brw_reg payload);
|
2014-08-27 19:33:25 +01:00
|
|
|
void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
|
2014-08-27 19:32:08 +01:00
|
|
|
void generate_barrier(fs_inst *inst, struct brw_reg src);
|
2017-06-14 19:06:45 +01:00
|
|
|
bool generate_linterp(fs_inst *inst, struct brw_reg dst,
|
2012-11-09 09:05:47 +00:00
|
|
|
struct brw_reg *src);
|
2018-10-30 20:47:39 +00:00
|
|
|
void generate_tex(fs_inst *inst, struct brw_reg dst,
|
2015-11-02 23:24:05 +00:00
|
|
|
struct brw_reg surface_index,
|
2014-08-03 10:23:31 +01:00
|
|
|
struct brw_reg sampler_index);
|
2015-04-13 15:55:49 +01:00
|
|
|
void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst,
|
|
|
|
struct brw_reg src,
|
|
|
|
struct brw_reg surf_index);
|
2017-06-15 23:41:40 +01:00
|
|
|
void generate_ddx(const fs_inst *inst,
|
|
|
|
struct brw_reg dst, struct brw_reg src);
|
|
|
|
void generate_ddy(const fs_inst *inst,
|
|
|
|
struct brw_reg dst, struct brw_reg src);
|
2013-10-16 19:45:06 +01:00
|
|
|
void generate_scratch_write(fs_inst *inst, struct brw_reg src);
|
|
|
|
void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
|
2021-03-29 23:40:04 +01:00
|
|
|
void generate_scratch_read_gfx7(fs_inst *inst, struct brw_reg dst);
|
2020-10-09 10:13:20 +01:00
|
|
|
void generate_scratch_header(fs_inst *inst, struct brw_reg dst);
|
2012-11-07 18:42:34 +00:00
|
|
|
void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,
|
|
|
|
struct brw_reg index,
|
|
|
|
struct brw_reg offset);
|
2021-03-29 23:40:04 +01:00
|
|
|
void generate_uniform_pull_constant_load_gfx7(fs_inst *inst,
|
2012-12-05 08:06:30 +00:00
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg surf_index,
|
2016-10-26 22:25:06 +01:00
|
|
|
struct brw_reg payload);
|
2021-03-29 23:40:04 +01:00
|
|
|
void generate_varying_pull_constant_load_gfx4(fs_inst *inst,
|
2016-05-20 21:03:31 +01:00
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg index);
|
2012-12-06 18:36:11 +00:00
|
|
|
void generate_mov_dispatch_to_flags(fs_inst *inst);
|
2013-10-25 00:17:08 +01:00
|
|
|
|
2013-11-18 08:13:13 +00:00
|
|
|
void generate_pixel_interpolator_query(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg src,
|
|
|
|
struct brw_reg msg_data,
|
|
|
|
unsigned msg_type);
|
|
|
|
|
2013-10-25 00:17:08 +01:00
|
|
|
void generate_set_sample_id(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg src0,
|
|
|
|
struct brw_reg src1);
|
|
|
|
|
2020-11-30 23:24:51 +00:00
|
|
|
void generate_halt(fs_inst *inst);
|
2012-12-06 18:15:08 +00:00
|
|
|
|
2013-01-09 19:46:42 +00:00
|
|
|
void generate_pack_half_2x16_split(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg x,
|
|
|
|
struct brw_reg y);
|
|
|
|
|
2013-03-19 22:28:11 +00:00
|
|
|
void generate_shader_time_add(fs_inst *inst,
|
|
|
|
struct brw_reg payload,
|
|
|
|
struct brw_reg offset,
|
|
|
|
struct brw_reg value);
|
|
|
|
|
2015-11-08 02:58:34 +00:00
|
|
|
void generate_mov_indirect(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg reg,
|
|
|
|
struct brw_reg indirect_byte_offset);
|
|
|
|
|
2017-08-29 17:21:32 +01:00
|
|
|
void generate_shuffle(fs_inst *inst,
|
|
|
|
struct brw_reg dst,
|
|
|
|
struct brw_reg src,
|
|
|
|
struct brw_reg idx);
|
|
|
|
|
2018-12-06 22:11:34 +00:00
|
|
|
void generate_quad_swizzle(const fs_inst *inst,
|
|
|
|
struct brw_reg dst, struct brw_reg src,
|
|
|
|
unsigned swiz);
|
|
|
|
|
2020-11-19 15:32:27 +00:00
|
|
|
bool patch_halt_jumps();
|
2012-11-09 09:05:47 +00:00
|
|
|
|
2015-04-16 22:13:52 +01:00
|
|
|
const struct brw_compiler *compiler;
|
2015-04-16 22:34:04 +01:00
|
|
|
void *log_data; /* Passed to compiler->*_log functions */
|
|
|
|
|
2016-08-22 23:01:08 +01:00
|
|
|
const struct gen_device_info *devinfo;
|
2012-11-09 09:05:47 +00:00
|
|
|
|
2015-04-16 19:06:57 +01:00
|
|
|
struct brw_codegen *p;
|
2014-08-29 20:50:46 +01:00
|
|
|
struct brw_stage_prog_data * const prog_data;
|
2012-11-09 09:05:47 +00:00
|
|
|
|
2016-05-18 22:39:52 +01:00
|
|
|
unsigned dispatch_width; /**< 8, 16 or 32 */
|
2012-11-09 09:05:47 +00:00
|
|
|
|
2012-12-06 18:15:08 +00:00
|
|
|
exec_list discard_halt_patches;
|
2014-06-05 14:03:06 +01:00
|
|
|
bool runtime_check_aads_emit;
|
2014-10-28 02:40:47 +00:00
|
|
|
bool debug_flag;
|
|
|
|
const char *shader_name;
|
2016-01-15 04:27:51 +00:00
|
|
|
gl_shader_stage stage;
|
2012-11-09 09:05:47 +00:00
|
|
|
void *mem_ctx;
|
|
|
|
};
|
|
|
|
|
2017-01-13 23:23:48 +00:00
|
|
|
namespace brw {
|
|
|
|
inline fs_reg
|
2017-01-13 23:36:51 +00:00
|
|
|
fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2],
|
2020-01-03 22:41:15 +00:00
|
|
|
brw_reg_type type = BRW_REGISTER_TYPE_F)
|
2017-01-13 23:23:48 +00:00
|
|
|
{
|
2017-01-13 23:36:51 +00:00
|
|
|
if (!regs[0])
|
2017-01-13 23:23:48 +00:00
|
|
|
return fs_reg();
|
2017-01-13 23:36:51 +00:00
|
|
|
|
|
|
|
if (bld.dispatch_width() > 16) {
|
2020-01-03 22:41:15 +00:00
|
|
|
const fs_reg tmp = bld.vgrf(type);
|
2017-01-13 23:36:51 +00:00
|
|
|
const brw::fs_builder hbld = bld.exec_all().group(16, 0);
|
|
|
|
const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
|
2020-01-03 22:41:15 +00:00
|
|
|
fs_reg *const components = new fs_reg[m];
|
2017-01-13 23:36:51 +00:00
|
|
|
|
2020-01-03 22:41:15 +00:00
|
|
|
for (unsigned g = 0; g < m; g++)
|
|
|
|
components[g] = retype(brw_vec8_grf(regs[g], 0), type);
|
2017-01-13 23:36:51 +00:00
|
|
|
|
2020-01-03 22:41:15 +00:00
|
|
|
hbld.LOAD_PAYLOAD(tmp, components, m, 0);
|
2017-01-13 23:36:51 +00:00
|
|
|
|
|
|
|
delete[] components;
|
|
|
|
return tmp;
|
|
|
|
|
2017-01-13 23:23:48 +00:00
|
|
|
} else {
|
2017-01-13 23:36:51 +00:00
|
|
|
return fs_reg(retype(brw_vec8_grf(regs[0], 0), type));
|
2017-01-13 23:23:48 +00:00
|
|
|
}
|
|
|
|
}
|
2018-12-29 09:44:00 +00:00
|
|
|
|
2020-01-03 22:41:15 +00:00
|
|
|
inline fs_reg
|
|
|
|
fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2])
|
|
|
|
{
|
|
|
|
if (!regs[0])
|
|
|
|
return fs_reg();
|
|
|
|
|
|
|
|
const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
|
2020-01-04 01:08:51 +00:00
|
|
|
const brw::fs_builder hbld = bld.exec_all().group(8, 0);
|
2020-01-03 22:41:15 +00:00
|
|
|
const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
|
|
|
|
fs_reg *const components = new fs_reg[2 * m];
|
|
|
|
|
|
|
|
for (unsigned c = 0; c < 2; c++) {
|
|
|
|
for (unsigned g = 0; g < m; g++)
|
2020-01-04 01:08:51 +00:00
|
|
|
components[c * m + g] = offset(brw_vec8_grf(regs[g / 2], 0),
|
|
|
|
hbld, c + 2 * (g % 2));
|
2020-01-03 22:41:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
hbld.LOAD_PAYLOAD(tmp, components, 2 * m, 0);
|
|
|
|
|
|
|
|
delete[] components;
|
|
|
|
return tmp;
|
|
|
|
}
|
|
|
|
|
2018-12-29 12:00:13 +00:00
|
|
|
bool
|
|
|
|
lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i);
|
2017-01-13 23:23:48 +00:00
|
|
|
}
|
|
|
|
|
2018-06-09 10:45:22 +01:00
|
|
|
void shuffle_from_32bit_read(const brw::fs_builder &bld,
|
|
|
|
const fs_reg &dst,
|
|
|
|
const fs_reg &src,
|
|
|
|
uint32_t first_component,
|
|
|
|
uint32_t components);
|
|
|
|
|
2016-03-09 13:12:43 +00:00
|
|
|
fs_reg setup_imm_df(const brw::fs_builder &bld,
|
|
|
|
double v);
|
2016-07-12 11:57:25 +01:00
|
|
|
|
2018-07-27 12:38:38 +01:00
|
|
|
fs_reg setup_imm_b(const brw::fs_builder &bld,
|
|
|
|
int8_t v);
|
|
|
|
|
|
|
|
fs_reg setup_imm_ub(const brw::fs_builder &bld,
|
|
|
|
uint8_t v);
|
|
|
|
|
2016-07-12 11:57:25 +01:00
|
|
|
enum brw_barycentric_mode brw_barycentric_mode(enum glsl_interp_mode mode,
|
|
|
|
nir_intrinsic_op op);
|
2017-03-20 16:04:38 +00:00
|
|
|
|
2019-08-26 07:59:25 +01:00
|
|
|
uint32_t brw_fb_write_msg_control(const fs_inst *inst,
|
|
|
|
const struct brw_wm_prog_data *prog_data);
|
|
|
|
|
2018-12-11 17:45:43 +00:00
|
|
|
void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data);
|
2019-08-26 07:59:25 +01:00
|
|
|
|
2017-03-20 16:04:38 +00:00
|
|
|
#endif /* BRW_FS_H */
|