Merge remote-tracking branch 'mesa-public/master' into vulkan
This pulls in nir_builder_init_simple_shader and allows us to delete anv_nir_builder.h entirely.
This commit is contained in:
commit
a33fcc0fd4
|
@ -112,7 +112,7 @@ GL 4.0, GLSL 4.00 --- all DONE: nvc0, r600, radeonsi
|
|||
GL_ARB_gpu_shader_fp64 DONE (llvmpipe, softpipe)
|
||||
GL_ARB_sample_shading DONE (i965, nv50)
|
||||
GL_ARB_shader_subroutine DONE (i965, nv50, llvmpipe, softpipe)
|
||||
GL_ARB_tessellation_shader DONE (i965/gen8+)
|
||||
GL_ARB_tessellation_shader DONE (i965)
|
||||
GL_ARB_texture_buffer_object_rgb32 DONE (i965, llvmpipe, softpipe)
|
||||
GL_ARB_texture_cube_map_array DONE (i965, nv50, llvmpipe, softpipe)
|
||||
GL_ARB_texture_gather DONE (i965, nv50, llvmpipe, softpipe)
|
||||
|
|
|
@ -47,7 +47,7 @@ Note: some of the new features are only available with certain drivers.
|
|||
<li>GL_ARB_base_instance on freedreno/a4xx</li>
|
||||
<li>GL_ARB_compute_shader on i965</li>
|
||||
<li>GL_ARB_copy_image on r600</li>
|
||||
<li>GL_ARB_tessellation_shader on i965/gen8+ and r600 (evergreen/cayman only)</li>
|
||||
<li>GL_ARB_tessellation_shader on i965 and r600 (evergreen/cayman only)</li>
|
||||
<li>GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx</li>
|
||||
<li>GL_ARB_texture_buffer_range on freedreno/a4xx</li>
|
||||
<li>GL_ARB_texture_query_lod on freedreno/a4xx</li>
|
||||
|
|
|
@ -1968,14 +1968,10 @@ tgsi_to_nir(const void *tgsi_tokens,
|
|||
tgsi_scan_shader(tgsi_tokens, &scan);
|
||||
c->scan = &scan;
|
||||
|
||||
s = nir_shader_create(NULL, tgsi_processor_to_shader_stage(scan.processor),
|
||||
options);
|
||||
|
||||
nir_function *func = nir_function_create(s, "main");
|
||||
nir_function_impl *impl = nir_function_impl_create(func);
|
||||
|
||||
nir_builder_init(&c->build, impl);
|
||||
c->build.cursor = nir_after_cf_list(&impl->body);
|
||||
nir_builder_init_simple_shader(&c->build, NULL,
|
||||
tgsi_processor_to_shader_stage(scan.processor),
|
||||
options);
|
||||
s = c->build.shader;
|
||||
|
||||
s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1;
|
||||
s->num_uniforms = scan.const_file_max[0] + 1;
|
||||
|
|
|
@ -404,6 +404,19 @@ debug_get_flags_option(const char *name,
|
|||
const struct debug_named_value *flags,
|
||||
uint64_t dfault);
|
||||
|
||||
#define DEBUG_GET_ONCE_OPTION(suffix, name, dfault) \
|
||||
static const char * \
|
||||
debug_get_option_ ## suffix (void) \
|
||||
{ \
|
||||
static boolean first = TRUE; \
|
||||
static const char * value; \
|
||||
if (first) { \
|
||||
first = FALSE; \
|
||||
value = debug_get_option(name, dfault); \
|
||||
} \
|
||||
return value; \
|
||||
}
|
||||
|
||||
#define DEBUG_GET_ONCE_BOOL_OPTION(sufix, name, dfault) \
|
||||
static boolean \
|
||||
debug_get_option_ ## sufix (void) \
|
||||
|
|
|
@ -1889,6 +1889,9 @@ AlgebraicOpt::handleCVT_EXTBF(Instruction *cvt)
|
|||
arg = shift->getSrc(0);
|
||||
offset = imm.reg.data.u32;
|
||||
}
|
||||
// We just AND'd the high bits away, which means this is effectively an
|
||||
// unsigned value.
|
||||
cvt->sType = TYPE_U32;
|
||||
} else if (insn->op == OP_SHR &&
|
||||
insn->sType == cvt->sType &&
|
||||
insn->src(1).getImmediate(imm)) {
|
||||
|
|
|
@ -1956,7 +1956,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
|
|||
|
||||
if (!gs_ring_buffer) {
|
||||
radeon_set_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4,
|
||||
ALIGN_DIVUP(cb->buffer_size >> 4, 16), pkt_flags);
|
||||
ALIGN_DIVUP(cb->buffer_size, 256), pkt_flags);
|
||||
radeon_set_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8,
|
||||
pkt_flags);
|
||||
}
|
||||
|
|
|
@ -1768,7 +1768,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
|
|||
|
||||
if (!gs_ring_buffer) {
|
||||
radeon_set_context_reg(cs, reg_alu_constbuf_size + buffer_index * 4,
|
||||
ALIGN_DIVUP(cb->buffer_size >> 4, 16));
|
||||
ALIGN_DIVUP(cb->buffer_size, 256));
|
||||
radeon_set_context_reg(cs, reg_alu_const_cache + buffer_index * 4, offset >> 8);
|
||||
}
|
||||
|
||||
|
|
|
@ -28,8 +28,11 @@
|
|||
#include "si_shader.h"
|
||||
#include "sid.h"
|
||||
#include "sid_tables.h"
|
||||
#include "radeon/radeon_elf_util.h"
|
||||
#include "ddebug/dd_util.h"
|
||||
#include "util/u_memory.h"
|
||||
|
||||
DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)
|
||||
|
||||
static void si_dump_shader(struct si_shader_ctx_state *state, const char *name,
|
||||
FILE *f)
|
||||
|
@ -42,6 +45,98 @@ static void si_dump_shader(struct si_shader_ctx_state *state, const char *name,
|
|||
fprintf(f, "%s\n\n", state->current->binary.disasm_string);
|
||||
}
|
||||
|
||||
/**
|
||||
* Shader compiles can be overridden with arbitrary ELF objects by setting
|
||||
* the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
|
||||
*/
|
||||
bool si_replace_shader(unsigned num, struct radeon_shader_binary *binary)
|
||||
{
|
||||
const char *p = debug_get_option_replace_shaders();
|
||||
const char *semicolon;
|
||||
char *copy = NULL;
|
||||
FILE *f;
|
||||
long filesize, nread;
|
||||
char *buf = NULL;
|
||||
bool replaced = false;
|
||||
|
||||
if (!p)
|
||||
return false;
|
||||
|
||||
while (*p) {
|
||||
unsigned long i;
|
||||
char *endp;
|
||||
i = strtoul(p, &endp, 0);
|
||||
|
||||
p = endp;
|
||||
if (*p != ':') {
|
||||
fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n");
|
||||
exit(1);
|
||||
}
|
||||
++p;
|
||||
|
||||
if (i == num)
|
||||
break;
|
||||
|
||||
p = strchr(p, ';');
|
||||
if (!p)
|
||||
return false;
|
||||
++p;
|
||||
}
|
||||
if (!*p)
|
||||
return false;
|
||||
|
||||
semicolon = strchr(p, ';');
|
||||
if (semicolon) {
|
||||
p = copy = strndup(p, semicolon - p);
|
||||
if (!copy) {
|
||||
fprintf(stderr, "out of memory\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p);
|
||||
|
||||
f = fopen(p, "r");
|
||||
if (!f) {
|
||||
perror("radeonsi: failed to open file");
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
if (fseek(f, 0, SEEK_END) != 0)
|
||||
goto file_error;
|
||||
|
||||
filesize = ftell(f);
|
||||
if (filesize < 0)
|
||||
goto file_error;
|
||||
|
||||
if (fseek(f, 0, SEEK_SET) != 0)
|
||||
goto file_error;
|
||||
|
||||
buf = MALLOC(filesize);
|
||||
if (!buf) {
|
||||
fprintf(stderr, "out of memory\n");
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
nread = fread(buf, 1, filesize, f);
|
||||
if (nread != filesize)
|
||||
goto file_error;
|
||||
|
||||
radeon_elf_read(buf, filesize, binary);
|
||||
replaced = true;
|
||||
|
||||
out_close:
|
||||
fclose(f);
|
||||
out_free:
|
||||
FREE(buf);
|
||||
free(copy);
|
||||
return replaced;
|
||||
|
||||
file_error:
|
||||
perror("radeonsi: reading shader");
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
/* Parsed IBs are difficult to read without colors. Use "less -R file" to
|
||||
* read them, or use "aha -b -f file" to convert them to html.
|
||||
*/
|
||||
|
|
|
@ -329,6 +329,7 @@ void si_init_cp_dma_functions(struct si_context *sctx);
|
|||
/* si_debug.c */
|
||||
void si_init_debug_functions(struct si_context *sctx);
|
||||
void si_check_vm_faults(struct si_context *sctx);
|
||||
bool si_replace_shader(unsigned num, struct radeon_shader_binary *binary);
|
||||
|
||||
/* si_dma.c */
|
||||
void si_dma_copy(struct pipe_context *ctx,
|
||||
|
|
|
@ -3884,11 +3884,17 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
|
|||
bool dump_asm = r600_can_dump_shader(&sscreen->b,
|
||||
shader->selector ? shader->selector->tokens : NULL);
|
||||
bool dump_ir = dump_asm && !(sscreen->b.debug_flags & DBG_NO_IR);
|
||||
unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations);
|
||||
|
||||
r = radeon_llvm_compile(mod, &shader->binary,
|
||||
r600_get_llvm_processor_name(sscreen->b.family), dump_ir, dump_asm, tm);
|
||||
if (r)
|
||||
return r;
|
||||
if (dump_ir || dump_asm)
|
||||
fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
|
||||
|
||||
if (!si_replace_shader(count, &shader->binary)) {
|
||||
r = radeon_llvm_compile(mod, &shader->binary,
|
||||
r600_get_llvm_processor_name(sscreen->b.family), dump_ir, dump_asm, tm);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
r = si_shader_binary_read(sscreen, shader);
|
||||
|
||||
|
|
|
@ -634,7 +634,6 @@ static int si_shader_select(struct pipe_context *ctx,
|
|||
sel->last_variant = shader;
|
||||
}
|
||||
state->current = shader;
|
||||
p_atomic_inc(&sctx->screen->b.num_compilations);
|
||||
pipe_mutex_unlock(sel->mutex);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -951,6 +951,11 @@ builtin_variable_generator::generate_vs_special_vars()
|
|||
add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceIDARB");
|
||||
if (state->ARB_draw_instanced_enable || state->is_version(140, 300))
|
||||
add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceID");
|
||||
if (state->ARB_shader_draw_parameters_enable) {
|
||||
add_system_value(SYSTEM_VALUE_BASE_VERTEX, int_t, "gl_BaseVertexARB");
|
||||
add_system_value(SYSTEM_VALUE_BASE_INSTANCE, int_t, "gl_BaseInstanceARB");
|
||||
add_system_value(SYSTEM_VALUE_DRAW_ID, int_t, "gl_DrawIDARB");
|
||||
}
|
||||
if (state->AMD_vertex_shader_layer_enable) {
|
||||
var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer");
|
||||
var->data.interpolation = INTERP_QUALIFIER_FLAT;
|
||||
|
|
|
@ -610,6 +610,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
|
|||
EXT(ARB_shader_atomic_counters, true, false, ARB_shader_atomic_counters),
|
||||
EXT(ARB_shader_bit_encoding, true, false, ARB_shader_bit_encoding),
|
||||
EXT(ARB_shader_clock, true, false, ARB_shader_clock),
|
||||
EXT(ARB_shader_draw_parameters, true, false, ARB_shader_draw_parameters),
|
||||
EXT(ARB_shader_image_load_store, true, false, ARB_shader_image_load_store),
|
||||
EXT(ARB_shader_image_size, true, false, ARB_shader_image_size),
|
||||
EXT(ARB_shader_precision, true, false, ARB_shader_precision),
|
||||
|
|
|
@ -536,6 +536,8 @@ struct _mesa_glsl_parse_state {
|
|||
bool ARB_shader_bit_encoding_warn;
|
||||
bool ARB_shader_clock_enable;
|
||||
bool ARB_shader_clock_warn;
|
||||
bool ARB_shader_draw_parameters_enable;
|
||||
bool ARB_shader_draw_parameters_warn;
|
||||
bool ARB_shader_image_load_store_enable;
|
||||
bool ARB_shader_image_load_store_warn;
|
||||
bool ARB_shader_image_size_enable;
|
||||
|
|
|
@ -1655,6 +1655,10 @@ nir_intrinsic_from_system_value(gl_system_value val)
|
|||
return nir_intrinsic_load_vertex_id;
|
||||
case SYSTEM_VALUE_INSTANCE_ID:
|
||||
return nir_intrinsic_load_instance_id;
|
||||
case SYSTEM_VALUE_DRAW_ID:
|
||||
return nir_intrinsic_load_draw_id;
|
||||
case SYSTEM_VALUE_BASE_INSTANCE:
|
||||
return nir_intrinsic_load_base_instance;
|
||||
case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
|
||||
return nir_intrinsic_load_vertex_id_zero_base;
|
||||
case SYSTEM_VALUE_BASE_VERTEX:
|
||||
|
@ -1700,6 +1704,10 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
|
|||
return SYSTEM_VALUE_VERTEX_ID;
|
||||
case nir_intrinsic_load_instance_id:
|
||||
return SYSTEM_VALUE_INSTANCE_ID;
|
||||
case nir_intrinsic_load_draw_id:
|
||||
return SYSTEM_VALUE_DRAW_ID;
|
||||
case nir_intrinsic_load_base_instance:
|
||||
return SYSTEM_VALUE_BASE_INSTANCE;
|
||||
case nir_intrinsic_load_vertex_id_zero_base:
|
||||
return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
|
||||
case nir_intrinsic_load_base_vertex:
|
||||
|
|
|
@ -43,6 +43,17 @@ nir_builder_init(nir_builder *build, nir_function_impl *impl)
|
|||
build->shader = impl->function->shader;
|
||||
}
|
||||
|
||||
static inline void
|
||||
nir_builder_init_simple_shader(nir_builder *build, void *mem_ctx,
|
||||
gl_shader_stage stage,
|
||||
const nir_shader_compiler_options *options)
|
||||
{
|
||||
build->shader = nir_shader_create(mem_ctx, stage, options);
|
||||
nir_function *func = nir_function_create(build->shader, "main");
|
||||
build->impl = nir_function_impl_create(func);
|
||||
build->cursor = nir_after_cf_list(&build->impl->body);
|
||||
}
|
||||
|
||||
static inline void
|
||||
nir_builder_instr_insert(nir_builder *build, nir_instr *instr)
|
||||
{
|
||||
|
|
|
@ -258,6 +258,8 @@ SYSTEM_VALUE(vertex_id, 1, 0)
|
|||
SYSTEM_VALUE(vertex_id_zero_base, 1, 0)
|
||||
SYSTEM_VALUE(base_vertex, 1, 0)
|
||||
SYSTEM_VALUE(instance_id, 1, 0)
|
||||
SYSTEM_VALUE(base_instance, 1, 0)
|
||||
SYSTEM_VALUE(draw_id, 1, 0)
|
||||
SYSTEM_VALUE(sample_id, 1, 0)
|
||||
SYSTEM_VALUE(sample_pos, 2, 0)
|
||||
SYSTEM_VALUE(sample_mask_in, 1, 0)
|
||||
|
|
|
@ -62,6 +62,10 @@ optimizations = [
|
|||
(('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
|
||||
(('fadd', ('fneg', a), a), 0.0),
|
||||
(('iadd', ('ineg', a), a), 0),
|
||||
(('iadd', ('ineg', a), ('iadd', a, b)), b),
|
||||
(('iadd', a, ('iadd', ('ineg', a), b)), b),
|
||||
(('fadd', ('fneg', a), ('fadd', a, b)), b),
|
||||
(('fadd', a, ('fadd', ('fneg', a), b)), b),
|
||||
(('fmul', a, 0.0), 0.0),
|
||||
(('imul', a, 0), 0),
|
||||
(('umul_unorm_4x8', a, 0), 0),
|
||||
|
|
|
@ -379,6 +379,26 @@ typedef enum
|
|||
* \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
|
||||
*/
|
||||
SYSTEM_VALUE_BASE_VERTEX,
|
||||
|
||||
/**
|
||||
* Value of \c baseinstance passed to instanced draw entry points
|
||||
*
|
||||
* \sa SYSTEM_VALUE_INSTANCE_ID
|
||||
*/
|
||||
SYSTEM_VALUE_BASE_INSTANCE,
|
||||
|
||||
/**
|
||||
* From _ARB_shader_draw_parameters:
|
||||
*
|
||||
* "Additionally, this extension adds a further built-in variable,
|
||||
* gl_DrawID to the shading language. This variable contains the index
|
||||
* of the draw currently being processed by a Multi* variant of a
|
||||
* drawing command (such as MultiDrawElements or
|
||||
* MultiDrawArraysIndirect)."
|
||||
*
|
||||
* If GL_ARB_multi_draw_indirect is not supported, this is always 0.
|
||||
*/
|
||||
SYSTEM_VALUE_DRAW_ID,
|
||||
/*@}*/
|
||||
|
||||
/**
|
||||
|
|
|
@ -30,24 +30,17 @@ protected:
|
|||
~nir_cf_test();
|
||||
|
||||
nir_builder b;
|
||||
nir_shader *shader;
|
||||
nir_function_impl *impl;
|
||||
};
|
||||
|
||||
nir_cf_test::nir_cf_test()
|
||||
{
|
||||
static const nir_shader_compiler_options options = { };
|
||||
shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, &options);
|
||||
nir_function *func = nir_function_create(shader, "main");
|
||||
nir_function_overload *overload = nir_function_overload_create(func);
|
||||
impl = nir_function_impl_create(overload);
|
||||
|
||||
nir_builder_init(&b, impl);
|
||||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, &options);
|
||||
}
|
||||
|
||||
nir_cf_test::~nir_cf_test()
|
||||
{
|
||||
ralloc_free(shader);
|
||||
ralloc_free(b.shader);
|
||||
}
|
||||
|
||||
TEST_F(nir_cf_test, delete_break_in_loop)
|
||||
|
@ -56,12 +49,12 @@ TEST_F(nir_cf_test, delete_break_in_loop)
|
|||
*
|
||||
* while (...) { break; }
|
||||
*/
|
||||
nir_loop *loop = nir_loop_create(shader);
|
||||
nir_cf_node_insert(nir_after_cf_list(&impl->body), &loop->cf_node);
|
||||
nir_loop *loop = nir_loop_create(b.shader);
|
||||
nir_cf_node_insert(nir_after_cf_list(&b.impl->body), &loop->cf_node);
|
||||
|
||||
b.cursor = nir_after_cf_list(&loop->body);
|
||||
|
||||
nir_jump_instr *jump = nir_jump_instr_create(shader, nir_jump_break);
|
||||
nir_jump_instr *jump = nir_jump_instr_create(b.shader, nir_jump_break);
|
||||
nir_builder_instr_insert(&b, &jump->instr);
|
||||
|
||||
/* At this point, we should have:
|
||||
|
@ -82,10 +75,10 @@ TEST_F(nir_cf_test, delete_break_in_loop)
|
|||
* block block_3:
|
||||
* }
|
||||
*/
|
||||
nir_block *block_0 = nir_start_block(impl);
|
||||
nir_block *block_0 = nir_start_block(b.impl);
|
||||
nir_block *block_1 = nir_cf_node_as_block(nir_loop_first_cf_node(loop));
|
||||
nir_block *block_2 = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
|
||||
nir_block *block_3 = impl->end_block;
|
||||
nir_block *block_3 = b.impl->end_block;
|
||||
ASSERT_EQ(nir_cf_node_block, block_0->cf_node.type);
|
||||
ASSERT_EQ(nir_cf_node_block, block_1->cf_node.type);
|
||||
ASSERT_EQ(nir_cf_node_block, block_2->cf_node.type);
|
||||
|
@ -108,12 +101,12 @@ TEST_F(nir_cf_test, delete_break_in_loop)
|
|||
EXPECT_TRUE(_mesa_set_search(block_2->predecessors, block_1));
|
||||
EXPECT_TRUE(_mesa_set_search(block_3->predecessors, block_2));
|
||||
|
||||
nir_print_shader(shader, stderr);
|
||||
nir_print_shader(b.shader, stderr);
|
||||
|
||||
/* Now remove the break. */
|
||||
nir_instr_remove(&jump->instr);
|
||||
|
||||
nir_print_shader(shader, stderr);
|
||||
nir_print_shader(b.shader, stderr);
|
||||
|
||||
/* At this point, we should have:
|
||||
*
|
||||
|
@ -151,5 +144,5 @@ TEST_F(nir_cf_test, delete_break_in_loop)
|
|||
EXPECT_TRUE(_mesa_set_search(block_2->predecessors, block_1));
|
||||
EXPECT_TRUE(_mesa_set_search(block_3->predecessors, block_2));
|
||||
|
||||
nir_metadata_require(impl, nir_metadata_dominance);
|
||||
nir_metadata_require(b.impl, nir_metadata_dominance);
|
||||
}
|
||||
|
|
|
@ -155,6 +155,7 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
|
|||
ctx->Extensions.ARB_gpu_shader_fp64 = true;
|
||||
ctx->Extensions.ARB_sample_shading = true;
|
||||
ctx->Extensions.ARB_shader_bit_encoding = true;
|
||||
ctx->Extensions.ARB_shader_draw_parameters = true;
|
||||
ctx->Extensions.ARB_shader_stencil_export = true;
|
||||
ctx->Extensions.ARB_shader_subroutine = true;
|
||||
ctx->Extensions.ARB_shader_texture_lod = true;
|
||||
|
|
|
@ -77,6 +77,7 @@ i965_compiler_FILES = \
|
|||
brw_vec4_surface_builder.cpp \
|
||||
brw_vec4_surface_builder.h \
|
||||
brw_vec4_tcs.cpp \
|
||||
brw_vec4_tes.cpp \
|
||||
brw_vec4_visitor.cpp \
|
||||
brw_vec4_vs_visitor.cpp \
|
||||
brw_vue_map.c \
|
||||
|
|
|
@ -598,6 +598,9 @@ struct brw_vs_prog_data {
|
|||
|
||||
bool uses_vertexid;
|
||||
bool uses_instanceid;
|
||||
bool uses_basevertex;
|
||||
bool uses_baseinstance;
|
||||
bool uses_drawid;
|
||||
};
|
||||
|
||||
struct brw_tcs_prog_data
|
||||
|
|
|
@ -374,8 +374,8 @@ brw_initialize_context_constants(struct brw_context *brw)
|
|||
|
||||
const bool stage_exists[MESA_SHADER_STAGES] = {
|
||||
[MESA_SHADER_VERTEX] = true,
|
||||
[MESA_SHADER_TESS_CTRL] = brw->gen >= 8,
|
||||
[MESA_SHADER_TESS_EVAL] = brw->gen >= 8,
|
||||
[MESA_SHADER_TESS_CTRL] = brw->gen >= 7,
|
||||
[MESA_SHADER_TESS_EVAL] = brw->gen >= 7,
|
||||
[MESA_SHADER_GEOMETRY] = brw->gen >= 6,
|
||||
[MESA_SHADER_FRAGMENT] = true,
|
||||
[MESA_SHADER_COMPUTE] =
|
||||
|
|
|
@ -909,8 +909,13 @@ struct brw_context
|
|||
uint32_t pma_stall_bits;
|
||||
|
||||
struct {
|
||||
/** The value of gl_BaseVertex for the current _mesa_prim. */
|
||||
int gl_basevertex;
|
||||
struct {
|
||||
/** The value of gl_BaseVertex for the current _mesa_prim. */
|
||||
int gl_basevertex;
|
||||
|
||||
/** The value of gl_BaseInstance for the current _mesa_prim. */
|
||||
int gl_baseinstance;
|
||||
} params;
|
||||
|
||||
/**
|
||||
* Buffer and offset used for GL_ARB_shader_draw_parameters
|
||||
|
@ -918,6 +923,15 @@ struct brw_context
|
|||
*/
|
||||
drm_intel_bo *draw_params_bo;
|
||||
uint32_t draw_params_offset;
|
||||
|
||||
/**
|
||||
* The value of gl_DrawID for the current _mesa_prim. This always comes
|
||||
* in from it's own vertex buffer since it's not part of the indirect
|
||||
* draw parameters.
|
||||
*/
|
||||
int gl_drawid;
|
||||
drm_intel_bo *draw_id_bo;
|
||||
uint32_t draw_id_offset;
|
||||
} draw;
|
||||
|
||||
struct {
|
||||
|
|
|
@ -1315,6 +1315,13 @@ enum opcode {
|
|||
TCS_OPCODE_SET_OUTPUT_URB_OFFSETS,
|
||||
TCS_OPCODE_GET_PRIMITIVE_ID,
|
||||
TCS_OPCODE_CREATE_BARRIER_HEADER,
|
||||
TCS_OPCODE_SRC0_010_IS_ZERO,
|
||||
TCS_OPCODE_RELEASE_INPUT,
|
||||
TCS_OPCODE_THREAD_END,
|
||||
|
||||
TES_OPCODE_GET_PRIMITIVE_ID,
|
||||
TES_OPCODE_CREATE_INPUT_READ_HEADER,
|
||||
TES_OPCODE_ADD_INDIRECT_URB_OFFSET,
|
||||
};
|
||||
|
||||
enum brw_urb_write_flags {
|
||||
|
|
|
@ -462,9 +462,29 @@ brw_try_draw_prims(struct gl_context *ctx,
|
|||
}
|
||||
}
|
||||
|
||||
brw->draw.gl_basevertex =
|
||||
/* Determine if we need to flag BRW_NEW_VERTICES for updating the
|
||||
* gl_BaseVertexARB or gl_BaseInstanceARB values. For indirect draw, we
|
||||
* always flag if the shader uses one of the values. For direct draws,
|
||||
* we only flag if the values change.
|
||||
*/
|
||||
const int new_basevertex =
|
||||
prims[i].indexed ? prims[i].basevertex : prims[i].start;
|
||||
const int new_baseinstance = prims[i].base_instance;
|
||||
if (i > 0) {
|
||||
const bool uses_draw_parameters =
|
||||
brw->vs.prog_data->uses_basevertex ||
|
||||
brw->vs.prog_data->uses_baseinstance;
|
||||
|
||||
if ((uses_draw_parameters && prims[i].is_indirect) ||
|
||||
(brw->vs.prog_data->uses_basevertex &&
|
||||
brw->draw.params.gl_basevertex != new_basevertex) ||
|
||||
(brw->vs.prog_data->uses_baseinstance &&
|
||||
brw->draw.params.gl_baseinstance != new_baseinstance))
|
||||
brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
|
||||
}
|
||||
|
||||
brw->draw.params.gl_basevertex = new_basevertex;
|
||||
brw->draw.params.gl_baseinstance = new_baseinstance;
|
||||
drm_intel_bo_unreference(brw->draw.draw_params_bo);
|
||||
|
||||
if (prims[i].is_indirect) {
|
||||
|
@ -482,6 +502,18 @@ brw_try_draw_prims(struct gl_context *ctx,
|
|||
brw->draw.draw_params_offset = 0;
|
||||
}
|
||||
|
||||
/* gl_DrawID always needs its own vertex buffer since it's not part of
|
||||
* the indirect parameter buffer. If the program uses gl_DrawID we need
|
||||
* to flag BRW_NEW_VERTICES. For the first iteration, we don't have
|
||||
* valid brw->vs.prog_data, but we always flag BRW_NEW_VERTICES before
|
||||
* the loop.
|
||||
*/
|
||||
brw->draw.gl_drawid = prims[i].draw_id;
|
||||
drm_intel_bo_unreference(brw->draw.draw_id_bo);
|
||||
brw->draw.draw_id_bo = NULL;
|
||||
if (i > 0 && brw->vs.prog_data->uses_drawid)
|
||||
brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
|
||||
|
||||
if (brw->gen < 6)
|
||||
brw_set_prim(brw, &prims[i]);
|
||||
else
|
||||
|
|
|
@ -592,11 +592,19 @@ void
|
|||
brw_prepare_shader_draw_parameters(struct brw_context *brw)
|
||||
{
|
||||
/* For non-indirect draws, upload gl_BaseVertex. */
|
||||
if (brw->vs.prog_data->uses_vertexid && brw->draw.draw_params_bo == NULL) {
|
||||
intel_upload_data(brw, &brw->draw.gl_basevertex, 4, 4,
|
||||
if ((brw->vs.prog_data->uses_basevertex ||
|
||||
brw->vs.prog_data->uses_baseinstance) &&
|
||||
brw->draw.draw_params_bo == NULL) {
|
||||
intel_upload_data(brw, &brw->draw.params, sizeof(brw->draw.params), 4,
|
||||
&brw->draw.draw_params_bo,
|
||||
&brw->draw.draw_params_offset);
|
||||
}
|
||||
|
||||
if (brw->vs.prog_data->uses_drawid) {
|
||||
intel_upload_data(brw, &brw->draw.gl_drawid, sizeof(brw->draw.gl_drawid), 4,
|
||||
&brw->draw.draw_id_bo,
|
||||
&brw->draw.draw_id_offset);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -658,8 +666,11 @@ brw_emit_vertices(struct brw_context *brw)
|
|||
brw_emit_query_begin(brw);
|
||||
|
||||
unsigned nr_elements = brw->vb.nr_enabled;
|
||||
if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid)
|
||||
if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid ||
|
||||
brw->vs.prog_data->uses_basevertex || brw->vs.prog_data->uses_baseinstance)
|
||||
++nr_elements;
|
||||
if (brw->vs.prog_data->uses_drawid)
|
||||
nr_elements++;
|
||||
|
||||
/* If the VS doesn't read any inputs (calculating vertex position from
|
||||
* a state variable for some reason, for example), emit a single pad
|
||||
|
@ -693,8 +704,11 @@ brw_emit_vertices(struct brw_context *brw)
|
|||
/* Now emit VB and VEP state packets.
|
||||
*/
|
||||
|
||||
unsigned nr_buffers =
|
||||
brw->vb.nr_buffers + brw->vs.prog_data->uses_vertexid;
|
||||
const bool uses_draw_params =
|
||||
brw->vs.prog_data->uses_basevertex ||
|
||||
brw->vs.prog_data->uses_baseinstance;
|
||||
const unsigned nr_buffers = brw->vb.nr_buffers +
|
||||
uses_draw_params + brw->vs.prog_data->uses_drawid;
|
||||
|
||||
if (nr_buffers) {
|
||||
if (brw->gen >= 6) {
|
||||
|
@ -713,7 +727,7 @@ brw_emit_vertices(struct brw_context *brw)
|
|||
|
||||
}
|
||||
|
||||
if (brw->vs.prog_data->uses_vertexid) {
|
||||
if (uses_draw_params) {
|
||||
EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers,
|
||||
brw->draw.draw_params_bo,
|
||||
brw->draw.draw_params_bo->size - 1,
|
||||
|
@ -721,6 +735,16 @@ brw_emit_vertices(struct brw_context *brw)
|
|||
0, /* stride */
|
||||
0); /* step rate */
|
||||
}
|
||||
|
||||
if (brw->vs.prog_data->uses_drawid) {
|
||||
EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers + 1,
|
||||
brw->draw.draw_id_bo,
|
||||
brw->draw.draw_id_bo->size - 1,
|
||||
brw->draw.draw_id_offset,
|
||||
0, /* stride */
|
||||
0); /* step rate */
|
||||
}
|
||||
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
|
@ -790,21 +814,25 @@ brw_emit_vertices(struct brw_context *brw)
|
|||
((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
|
||||
}
|
||||
|
||||
if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
|
||||
if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid ||
|
||||
brw->vs.prog_data->uses_basevertex || brw->vs.prog_data->uses_baseinstance) {
|
||||
uint32_t dw0 = 0, dw1 = 0;
|
||||
uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0;
|
||||
uint32_t comp1 = BRW_VE1_COMPONENT_STORE_0;
|
||||
uint32_t comp2 = BRW_VE1_COMPONENT_STORE_0;
|
||||
uint32_t comp3 = BRW_VE1_COMPONENT_STORE_0;
|
||||
|
||||
if (brw->vs.prog_data->uses_vertexid) {
|
||||
if (brw->vs.prog_data->uses_basevertex)
|
||||
comp0 = BRW_VE1_COMPONENT_STORE_SRC;
|
||||
comp2 = BRW_VE1_COMPONENT_STORE_VID;
|
||||
}
|
||||
|
||||
if (brw->vs.prog_data->uses_instanceid) {
|
||||
if (brw->vs.prog_data->uses_baseinstance)
|
||||
comp1 = BRW_VE1_COMPONENT_STORE_SRC;
|
||||
|
||||
if (brw->vs.prog_data->uses_vertexid)
|
||||
comp2 = BRW_VE1_COMPONENT_STORE_VID;
|
||||
|
||||
if (brw->vs.prog_data->uses_instanceid)
|
||||
comp3 = BRW_VE1_COMPONENT_STORE_IID;
|
||||
}
|
||||
|
||||
dw1 = (comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
|
||||
(comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
|
||||
|
@ -814,11 +842,11 @@ brw_emit_vertices(struct brw_context *brw)
|
|||
if (brw->gen >= 6) {
|
||||
dw0 |= GEN6_VE0_VALID |
|
||||
brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
|
||||
BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT;
|
||||
BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT;
|
||||
} else {
|
||||
dw0 |= BRW_VE0_VALID |
|
||||
brw->vb.nr_buffers << BRW_VE0_INDEX_SHIFT |
|
||||
BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT;
|
||||
BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT;
|
||||
dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
|
||||
}
|
||||
|
||||
|
@ -830,6 +858,30 @@ brw_emit_vertices(struct brw_context *brw)
|
|||
OUT_BATCH(dw1);
|
||||
}
|
||||
|
||||
if (brw->vs.prog_data->uses_drawid) {
|
||||
uint32_t dw0 = 0, dw1 = 0;
|
||||
|
||||
dw1 = (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
|
||||
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
|
||||
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
|
||||
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT);
|
||||
|
||||
if (brw->gen >= 6) {
|
||||
dw0 |= GEN6_VE0_VALID |
|
||||
((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) |
|
||||
(BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
|
||||
} else {
|
||||
dw0 |= BRW_VE0_VALID |
|
||||
((brw->vb.nr_buffers + 1) << BRW_VE0_INDEX_SHIFT) |
|
||||
(BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
|
||||
|
||||
dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
|
||||
}
|
||||
|
||||
OUT_BATCH(dw0);
|
||||
OUT_BATCH(dw1);
|
||||
}
|
||||
|
||||
if (brw->gen >= 6 && gen6_edgeflag_input) {
|
||||
uint32_t format =
|
||||
brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
|
||||
|
|
|
@ -1672,7 +1672,10 @@ fs_visitor::assign_vs_urb_setup()
|
|||
|
||||
assert(stage == MESA_SHADER_VERTEX);
|
||||
int count = _mesa_bitcount_64(vs_prog_data->inputs_read);
|
||||
if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid)
|
||||
if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
|
||||
vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance)
|
||||
count++;
|
||||
if (vs_prog_data->uses_drawid)
|
||||
count++;
|
||||
|
||||
/* Each attribute is 4 regs. */
|
||||
|
|
|
@ -222,6 +222,20 @@ emit_system_values_block(nir_block *block, void *void_visitor)
|
|||
*reg = *v->emit_vs_system_value(SYSTEM_VALUE_INSTANCE_ID);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_base_instance:
|
||||
assert(v->stage == MESA_SHADER_VERTEX);
|
||||
reg = &v->nir_system_values[SYSTEM_VALUE_BASE_INSTANCE];
|
||||
if (reg->file == BAD_FILE)
|
||||
*reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_INSTANCE);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_draw_id:
|
||||
assert(v->stage == MESA_SHADER_VERTEX);
|
||||
reg = &v->nir_system_values[SYSTEM_VALUE_DRAW_ID];
|
||||
if (reg->file == BAD_FILE)
|
||||
*reg = *v->emit_vs_system_value(SYSTEM_VALUE_DRAW_ID);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_invocation_id:
|
||||
assert(v->stage == MESA_SHADER_GEOMETRY);
|
||||
reg = &v->nir_system_values[SYSTEM_VALUE_INVOCATION_ID];
|
||||
|
@ -1747,7 +1761,9 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
|
|||
|
||||
case nir_intrinsic_load_vertex_id_zero_base:
|
||||
case nir_intrinsic_load_base_vertex:
|
||||
case nir_intrinsic_load_instance_id: {
|
||||
case nir_intrinsic_load_instance_id:
|
||||
case nir_intrinsic_load_base_instance:
|
||||
case nir_intrinsic_load_draw_id: {
|
||||
gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
|
||||
fs_reg val = nir_system_values[sv];
|
||||
assert(val.file != BAD_FILE);
|
||||
|
|
|
@ -43,9 +43,14 @@ fs_visitor::emit_vs_system_value(int location)
|
|||
switch (location) {
|
||||
case SYSTEM_VALUE_BASE_VERTEX:
|
||||
reg->reg_offset = 0;
|
||||
vs_prog_data->uses_vertexid = true;
|
||||
vs_prog_data->uses_basevertex = true;
|
||||
break;
|
||||
case SYSTEM_VALUE_BASE_INSTANCE:
|
||||
reg->reg_offset = 1;
|
||||
vs_prog_data->uses_baseinstance = true;
|
||||
break;
|
||||
case SYSTEM_VALUE_VERTEX_ID:
|
||||
unreachable("should have been lowered");
|
||||
case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
|
||||
reg->reg_offset = 2;
|
||||
vs_prog_data->uses_vertexid = true;
|
||||
|
@ -54,6 +59,16 @@ fs_visitor::emit_vs_system_value(int location)
|
|||
reg->reg_offset = 3;
|
||||
vs_prog_data->uses_instanceid = true;
|
||||
break;
|
||||
case SYSTEM_VALUE_DRAW_ID:
|
||||
if (nir->info.system_values_read &
|
||||
(BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
|
||||
BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
|
||||
BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
|
||||
BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID)))
|
||||
reg->nr += 4;
|
||||
reg->reg_offset = 0;
|
||||
vs_prog_data->uses_drawid = true;
|
||||
break;
|
||||
default:
|
||||
unreachable("not reached");
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "brw_eu.h"
|
||||
#include "brw_fs.h"
|
||||
#include "brw_nir.h"
|
||||
#include "brw_vec4_tes.h"
|
||||
#include "glsl/glsl_parser_extras.h"
|
||||
#include "main/shaderobj.h"
|
||||
#include "main/uniforms.h"
|
||||
|
@ -86,7 +87,8 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
|
|||
compiler->scalar_stage[MESA_SHADER_VERTEX] =
|
||||
devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS);
|
||||
compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false;
|
||||
compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = true;
|
||||
compiler->scalar_stage[MESA_SHADER_TESS_EVAL] =
|
||||
devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TES", true);
|
||||
compiler->scalar_stage[MESA_SHADER_GEOMETRY] =
|
||||
devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false);
|
||||
compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true;
|
||||
|
@ -569,6 +571,18 @@ brw_instruction_name(enum opcode op)
|
|||
return "tcs_get_primitive_id";
|
||||
case TCS_OPCODE_CREATE_BARRIER_HEADER:
|
||||
return "tcs_create_barrier_header";
|
||||
case TCS_OPCODE_SRC0_010_IS_ZERO:
|
||||
return "tcs_src0<0,1,0>_is_zero";
|
||||
case TCS_OPCODE_RELEASE_INPUT:
|
||||
return "tcs_release_input";
|
||||
case TCS_OPCODE_THREAD_END:
|
||||
return "tcs_thread_end";
|
||||
case TES_OPCODE_CREATE_INPUT_READ_HEADER:
|
||||
return "tes_create_input_read_header";
|
||||
case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
|
||||
return "tes_add_indirect_urb_offset";
|
||||
case TES_OPCODE_GET_PRIMITIVE_ID:
|
||||
return "tes_get_primitive_id";
|
||||
}
|
||||
|
||||
unreachable("not reached");
|
||||
|
@ -1004,6 +1018,7 @@ backend_instruction::has_side_effects() const
|
|||
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
|
||||
case FS_OPCODE_FB_WRITE:
|
||||
case SHADER_OPCODE_BARRIER:
|
||||
case TCS_OPCODE_RELEASE_INPUT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -1403,6 +1418,19 @@ brw_compile_tes(const struct brw_compiler *compiler,
|
|||
|
||||
return g.get_assembly(final_assembly_size);
|
||||
} else {
|
||||
unreachable("XXX: vec4 tessellation evalation shaders not merged yet.");
|
||||
brw::vec4_tes_visitor v(compiler, log_data, key, prog_data,
|
||||
nir, mem_ctx, shader_time_index);
|
||||
if (!v.run()) {
|
||||
if (error_str)
|
||||
*error_str = ralloc_strdup(mem_ctx, v.fail_msg);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_TES))
|
||||
v.dump_instructions();
|
||||
|
||||
return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
|
||||
&prog_data->base, v.cfg,
|
||||
final_assembly_size);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -196,10 +196,14 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
|
|||
&gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
|
||||
|
||||
&brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
|
||||
&brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
|
||||
&brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
|
||||
&brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
|
||||
&brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
|
||||
|
||||
&gen6_vs_push_constants, /* Before vs_state */
|
||||
&gen7_tcs_push_constants,
|
||||
&gen7_tes_push_constants,
|
||||
&gen6_gs_push_constants, /* Before gs_state */
|
||||
&gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
|
||||
|
||||
|
@ -209,6 +213,12 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
|
|||
&brw_vs_pull_constants,
|
||||
&brw_vs_ubo_surfaces,
|
||||
&brw_vs_abo_surfaces,
|
||||
&brw_tcs_pull_constants,
|
||||
&brw_tcs_ubo_surfaces,
|
||||
&brw_tcs_abo_surfaces,
|
||||
&brw_tes_pull_constants,
|
||||
&brw_tes_ubo_surfaces,
|
||||
&brw_tes_abo_surfaces,
|
||||
&brw_gs_pull_constants,
|
||||
&brw_gs_ubo_surfaces,
|
||||
&brw_gs_abo_surfaces,
|
||||
|
@ -218,11 +228,15 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
|
|||
&gen6_renderbuffer_surfaces,
|
||||
&brw_texture_surfaces,
|
||||
&brw_vs_binding_table,
|
||||
&brw_tcs_binding_table,
|
||||
&brw_tes_binding_table,
|
||||
&brw_gs_binding_table,
|
||||
&brw_wm_binding_table,
|
||||
|
||||
&brw_fs_samplers,
|
||||
&brw_vs_samplers,
|
||||
&brw_tcs_samplers,
|
||||
&brw_tes_samplers,
|
||||
&brw_gs_samplers,
|
||||
&gen6_multisample_state,
|
||||
|
||||
|
|
|
@ -157,6 +157,7 @@ vec4_instruction::is_send_from_grf()
|
|||
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
|
||||
case VEC4_OPCODE_URB_READ:
|
||||
case TCS_OPCODE_URB_WRITE:
|
||||
case TCS_OPCODE_RELEASE_INPUT:
|
||||
case SHADER_OPCODE_BARRIER:
|
||||
return true;
|
||||
default:
|
||||
|
@ -189,6 +190,7 @@ vec4_instruction::has_source_and_destination_hazard() const
|
|||
switch (opcode) {
|
||||
case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
|
||||
case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
|
||||
case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -274,6 +276,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
|
|||
case SHADER_OPCODE_POW:
|
||||
return 2;
|
||||
case VS_OPCODE_URB_WRITE:
|
||||
case TCS_OPCODE_THREAD_END:
|
||||
return 1;
|
||||
case VS_OPCODE_PULL_CONSTANT_LOAD:
|
||||
return 2;
|
||||
|
@ -1563,7 +1566,7 @@ int
|
|||
vec4_vs_visitor::setup_attributes(int payload_reg)
|
||||
{
|
||||
int nr_attributes;
|
||||
int attribute_map[VERT_ATTRIB_MAX + 1];
|
||||
int attribute_map[VERT_ATTRIB_MAX + 2];
|
||||
memset(attribute_map, 0, sizeof(attribute_map));
|
||||
|
||||
nr_attributes = 0;
|
||||
|
@ -1574,12 +1577,19 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
|
|||
}
|
||||
}
|
||||
|
||||
if (vs_prog_data->uses_drawid) {
|
||||
attribute_map[VERT_ATTRIB_MAX + 1] = payload_reg + nr_attributes;
|
||||
nr_attributes++;
|
||||
}
|
||||
|
||||
/* VertexID is stored by the VF as the last vertex element, but we
|
||||
* don't represent it with a flag in inputs_read, so we call it
|
||||
* VERT_ATTRIB_MAX.
|
||||
*/
|
||||
if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) {
|
||||
if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
|
||||
vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) {
|
||||
attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes;
|
||||
nr_attributes++;
|
||||
}
|
||||
|
||||
lower_attributes_to_hw_regs(attribute_map, false /* interleaved */);
|
||||
|
@ -1979,11 +1989,18 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
|
|||
* incoming vertex attribute. So, add an extra slot.
|
||||
*/
|
||||
if (shader->info.system_values_read &
|
||||
(BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
|
||||
(BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
|
||||
BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
|
||||
BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
|
||||
BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) {
|
||||
nr_attributes++;
|
||||
}
|
||||
|
||||
/* gl_DrawID has its very own vec4 */
|
||||
if (shader->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)) {
|
||||
nr_attributes++;
|
||||
}
|
||||
|
||||
/* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB Entry
|
||||
* Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. Empirically, in
|
||||
* vec4 mode, the hardware appears to wedge unless we read something.
|
||||
|
|
|
@ -47,6 +47,8 @@ can_do_writemask(const struct brw_device_info *devinfo,
|
|||
case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
|
||||
case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
|
||||
case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
|
||||
case TES_OPCODE_CREATE_INPUT_READ_HEADER:
|
||||
case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
|
||||
case VEC4_OPCODE_URB_READ:
|
||||
return false;
|
||||
default:
|
||||
|
|
|
@ -724,6 +724,9 @@ generate_gs_set_primitive_id(struct brw_codegen *p, struct brw_reg dst)
|
|||
static void
|
||||
generate_tcs_get_instance_id(struct brw_codegen *p, struct brw_reg dst)
|
||||
{
|
||||
const struct brw_device_info *devinfo = p->devinfo;
|
||||
const bool ivb = devinfo->is_ivybridge || devinfo->is_baytrail;
|
||||
|
||||
/* "Instance Count" comes as part of the payload in r0.2 bits 23:17.
|
||||
*
|
||||
* Since we operate in SIMD4x2 mode, we need run half as many threads
|
||||
|
@ -736,8 +739,8 @@ generate_tcs_get_instance_id(struct brw_codegen *p, struct brw_reg dst)
|
|||
brw_push_insn_state(p);
|
||||
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
||||
|
||||
const int mask = INTEL_MASK(23, 17);
|
||||
const int shift = 17;
|
||||
const int mask = ivb ? INTEL_MASK(22, 16) : INTEL_MASK(23, 17);
|
||||
const int shift = ivb ? 16 : 17;
|
||||
|
||||
brw_AND(p, get_element_ud(dst, 0), get_element_ud(r0, 2), brw_imm_ud(mask));
|
||||
brw_SHR(p, get_element_ud(dst, 0), get_element_ud(dst, 0),
|
||||
|
@ -763,8 +766,12 @@ generate_tcs_urb_write(struct brw_codegen *p,
|
|||
true /* header */, false /* eot */);
|
||||
brw_inst_set_urb_opcode(devinfo, send, BRW_URB_OPCODE_WRITE_OWORD);
|
||||
brw_inst_set_urb_global_offset(devinfo, send, inst->offset);
|
||||
brw_inst_set_urb_per_slot_offset(devinfo, send, 1);
|
||||
brw_inst_set_urb_swizzle_control(devinfo, send, BRW_URB_SWIZZLE_INTERLEAVE);
|
||||
if (inst->urb_write_flags & BRW_URB_WRITE_EOT) {
|
||||
brw_inst_set_eot(devinfo, send, 1);
|
||||
} else {
|
||||
brw_inst_set_urb_per_slot_offset(devinfo, send, 1);
|
||||
brw_inst_set_urb_swizzle_control(devinfo, send, BRW_URB_SWIZZLE_INTERLEAVE);
|
||||
}
|
||||
|
||||
/* what happens to swizzles? */
|
||||
}
|
||||
|
@ -872,6 +879,46 @@ generate_tcs_output_urb_offsets(struct brw_codegen *p,
|
|||
brw_pop_insn_state(p);
|
||||
}
|
||||
|
||||
static void
|
||||
generate_tes_create_input_read_header(struct brw_codegen *p,
|
||||
struct brw_reg dst)
|
||||
{
|
||||
brw_push_insn_state(p);
|
||||
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
||||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
|
||||
/* Initialize the register to 0 */
|
||||
brw_MOV(p, dst, brw_imm_ud(0));
|
||||
|
||||
/* Enable all the channels in m0.5 bits 15:8 */
|
||||
brw_MOV(p, get_element_ud(dst, 5), brw_imm_ud(0xff00));
|
||||
|
||||
/* Copy g1.3 (the patch URB handle) to m0.0 and m0.1. For safety,
|
||||
* mask out irrelevant "Reserved" bits, as they're not marked MBZ.
|
||||
*/
|
||||
brw_AND(p, vec2(get_element_ud(dst, 0)),
|
||||
retype(brw_vec1_grf(1, 3), BRW_REGISTER_TYPE_UD),
|
||||
brw_imm_ud(0x1fff));
|
||||
brw_pop_insn_state(p);
|
||||
}
|
||||
|
||||
static void
|
||||
generate_tes_add_indirect_urb_offset(struct brw_codegen *p,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg header,
|
||||
struct brw_reg offset)
|
||||
{
|
||||
brw_push_insn_state(p);
|
||||
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
||||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
|
||||
brw_MOV(p, dst, header);
|
||||
/* m0.3-0.4: 128-bit-granular offsets into the URB from the handles */
|
||||
brw_MOV(p, vec2(get_element_ud(dst, 3)), stride(offset, 4, 1, 0));
|
||||
|
||||
brw_pop_insn_state(p);
|
||||
}
|
||||
|
||||
static void
|
||||
generate_vec4_urb_read(struct brw_codegen *p,
|
||||
vec4_instruction *inst,
|
||||
|
@ -897,6 +944,75 @@ generate_vec4_urb_read(struct brw_codegen *p,
|
|||
brw_inst_set_urb_global_offset(devinfo, send, inst->offset);
|
||||
}
|
||||
|
||||
static void
|
||||
generate_tcs_release_input(struct brw_codegen *p,
|
||||
struct brw_reg header,
|
||||
struct brw_reg vertex,
|
||||
struct brw_reg is_unpaired)
|
||||
{
|
||||
const struct brw_device_info *devinfo = p->devinfo;
|
||||
|
||||
assert(vertex.file == BRW_IMMEDIATE_VALUE);
|
||||
assert(vertex.type == BRW_REGISTER_TYPE_UD);
|
||||
|
||||
/* m0.0-0.1: URB handles */
|
||||
struct brw_reg urb_handles =
|
||||
retype(brw_vec2_grf(1 + (vertex.ud >> 3), vertex.ud & 7),
|
||||
BRW_REGISTER_TYPE_UD);
|
||||
|
||||
brw_push_insn_state(p);
|
||||
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
||||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
brw_MOV(p, header, brw_imm_ud(0));
|
||||
brw_MOV(p, vec2(get_element_ud(header, 0)), urb_handles);
|
||||
brw_pop_insn_state(p);
|
||||
|
||||
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
|
||||
brw_set_dest(p, send, brw_null_reg());
|
||||
brw_set_src0(p, send, header);
|
||||
brw_set_message_descriptor(p, send, BRW_SFID_URB,
|
||||
1 /* mlen */, 0 /* rlen */,
|
||||
true /* header */, false /* eot */);
|
||||
brw_inst_set_urb_opcode(devinfo, send, BRW_URB_OPCODE_READ_OWORD);
|
||||
brw_inst_set_urb_complete(devinfo, send, 1);
|
||||
brw_inst_set_urb_swizzle_control(devinfo, send, is_unpaired.ud ?
|
||||
BRW_URB_SWIZZLE_NONE :
|
||||
BRW_URB_SWIZZLE_INTERLEAVE);
|
||||
}
|
||||
|
||||
static void
|
||||
generate_tcs_thread_end(struct brw_codegen *p, vec4_instruction *inst)
|
||||
{
|
||||
struct brw_reg header = brw_message_reg(inst->base_mrf);
|
||||
|
||||
brw_push_insn_state(p);
|
||||
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
||||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
brw_MOV(p, header, brw_imm_ud(0));
|
||||
brw_MOV(p, get_element_ud(header, 0),
|
||||
retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD));
|
||||
brw_pop_insn_state(p);
|
||||
|
||||
brw_urb_WRITE(p,
|
||||
brw_null_reg(), /* dest */
|
||||
inst->base_mrf, /* starting mrf reg nr */
|
||||
header,
|
||||
BRW_URB_WRITE_EOT | inst->urb_write_flags,
|
||||
inst->mlen,
|
||||
0, /* response len */
|
||||
0, /* urb destination offset */
|
||||
0);
|
||||
}
|
||||
|
||||
static void
|
||||
generate_tes_get_primitive_id(struct brw_codegen *p, struct brw_reg dst)
|
||||
{
|
||||
brw_push_insn_state(p);
|
||||
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
||||
brw_MOV(p, dst, retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_D));
|
||||
brw_pop_insn_state(p);
|
||||
}
|
||||
|
||||
static void
|
||||
generate_tcs_get_primitive_id(struct brw_codegen *p, struct brw_reg dst)
|
||||
{
|
||||
|
@ -911,6 +1027,8 @@ generate_tcs_create_barrier_header(struct brw_codegen *p,
|
|||
struct brw_vue_prog_data *prog_data,
|
||||
struct brw_reg dst)
|
||||
{
|
||||
const struct brw_device_info *devinfo = p->devinfo;
|
||||
const bool ivb = devinfo->is_ivybridge || devinfo->is_baytrail;
|
||||
struct brw_reg m0_2 = get_element_ud(dst, 2);
|
||||
unsigned instances = ((struct brw_tcs_prog_data *) prog_data)->instances;
|
||||
|
||||
|
@ -921,13 +1039,13 @@ generate_tcs_create_barrier_header(struct brw_codegen *p,
|
|||
/* Zero the message header */
|
||||
brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
|
||||
|
||||
/* Copy "Barrier ID" from DW0 bits 16:13 */
|
||||
/* Copy "Barrier ID" from r0.2, bits 16:13 (Gen7.5+) or 15:12 (Gen7) */
|
||||
brw_AND(p, m0_2,
|
||||
retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
|
||||
brw_imm_ud(0x1e000));
|
||||
brw_imm_ud(ivb ? INTEL_MASK(15, 12) : INTEL_MASK(16, 13)));
|
||||
|
||||
/* Shift it into place */
|
||||
brw_SHL(p, m0_2, get_element_ud(dst, 2), brw_imm_ud(11));
|
||||
/* Shift it up to bits 27:24. */
|
||||
brw_SHL(p, m0_2, get_element_ud(dst, 2), brw_imm_ud(ivb ? 12 : 11));
|
||||
|
||||
/* Set the Barrier Count and the enable bit */
|
||||
brw_OR(p, m0_2, m0_2, brw_imm_ud(instances << 9 | (1 << 15)));
|
||||
|
@ -1788,6 +1906,32 @@ generate_code(struct brw_codegen *p,
|
|||
generate_tcs_create_barrier_header(p, prog_data, dst);
|
||||
break;
|
||||
|
||||
case TES_OPCODE_CREATE_INPUT_READ_HEADER:
|
||||
generate_tes_create_input_read_header(p, dst);
|
||||
break;
|
||||
|
||||
case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
|
||||
generate_tes_add_indirect_urb_offset(p, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case TES_OPCODE_GET_PRIMITIVE_ID:
|
||||
generate_tes_get_primitive_id(p, dst);
|
||||
break;
|
||||
|
||||
case TCS_OPCODE_SRC0_010_IS_ZERO:
|
||||
/* If src_reg had stride like fs_reg, we wouldn't need this. */
|
||||
brw_MOV(p, brw_null_reg(), stride(src[0], 0, 1, 0));
|
||||
brw_inst_set_cond_modifier(devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
|
||||
break;
|
||||
|
||||
case TCS_OPCODE_RELEASE_INPUT:
|
||||
generate_tcs_release_input(p, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case TCS_OPCODE_THREAD_END:
|
||||
generate_tcs_thread_end(p, inst);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_BARRIER:
|
||||
brw_barrier(p, src[0]);
|
||||
brw_WAIT(p);
|
||||
|
|
|
@ -78,6 +78,20 @@ vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
|
|||
glsl_type::int_type);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_base_instance:
|
||||
reg = &nir_system_values[SYSTEM_VALUE_BASE_INSTANCE];
|
||||
if (reg->file == BAD_FILE)
|
||||
*reg = *make_reg_for_system_value(SYSTEM_VALUE_BASE_INSTANCE,
|
||||
glsl_type::int_type);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_draw_id:
|
||||
reg = &nir_system_values[SYSTEM_VALUE_DRAW_ID];
|
||||
if (reg->file == BAD_FILE)
|
||||
*reg = *make_reg_for_system_value(SYSTEM_VALUE_DRAW_ID,
|
||||
glsl_type::int_type);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -669,6 +683,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
|||
case nir_intrinsic_load_vertex_id_zero_base:
|
||||
case nir_intrinsic_load_base_vertex:
|
||||
case nir_intrinsic_load_instance_id:
|
||||
case nir_intrinsic_load_base_instance:
|
||||
case nir_intrinsic_load_draw_id:
|
||||
case nir_intrinsic_load_invocation_id:
|
||||
case nir_intrinsic_load_tess_level_inner:
|
||||
case nir_intrinsic_load_tess_level_outer: {
|
||||
|
|
|
@ -156,18 +156,56 @@ vec4_tcs_visitor::emit_prolog()
|
|||
void
|
||||
vec4_tcs_visitor::emit_thread_end()
|
||||
{
|
||||
vec4_instruction *inst;
|
||||
current_annotation = "thread end";
|
||||
|
||||
if (nir->info.tcs.vertices_out % 2) {
|
||||
emit(BRW_OPCODE_ENDIF);
|
||||
}
|
||||
|
||||
if (devinfo->gen == 7) {
|
||||
struct brw_tcs_prog_data *tcs_prog_data =
|
||||
(struct brw_tcs_prog_data *) prog_data;
|
||||
|
||||
current_annotation = "release input vertices";
|
||||
|
||||
/* Synchronize all threads, so we know that no one is still
|
||||
* using the input URB handles.
|
||||
*/
|
||||
if (tcs_prog_data->instances > 1) {
|
||||
dst_reg header = dst_reg(this, glsl_type::uvec4_type);
|
||||
emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header);
|
||||
emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header));
|
||||
}
|
||||
|
||||
/* Make thread 0 (invocations <1, 0>) release pairs of ICP handles.
|
||||
* We want to compare the bottom half of invocation_id with 0, but
|
||||
* use that truth value for the top half as well. Unfortunately,
|
||||
* we don't have stride in the vec4 world, nor UV immediates in
|
||||
* align16, so we need an opcode to get invocation_id<0,4,0>.
|
||||
*/
|
||||
emit(TCS_OPCODE_SRC0_010_IS_ZERO, dst_null_d(), invocation_id);
|
||||
emit(IF(BRW_PREDICATE_NORMAL));
|
||||
for (unsigned i = 0; i < key->input_vertices; i += 2) {
|
||||
/* If we have an odd number of input vertices, the last will be
|
||||
* unpaired. We don't want to use an interleaved URB write in
|
||||
* that case.
|
||||
*/
|
||||
const bool is_unpaired = i == key->input_vertices - 1;
|
||||
|
||||
dst_reg header(this, glsl_type::uvec4_type);
|
||||
emit(TCS_OPCODE_RELEASE_INPUT, header, brw_imm_ud(i),
|
||||
brw_imm_ud(is_unpaired));
|
||||
}
|
||||
emit(BRW_OPCODE_ENDIF);
|
||||
}
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME))
|
||||
emit_shader_time_end();
|
||||
|
||||
vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
|
||||
inst->mlen = 1; /* just the header, no data. */
|
||||
inst->urb_write_flags = BRW_URB_WRITE_EOT_COMPLETE;
|
||||
inst = emit(TCS_OPCODE_THREAD_END);
|
||||
inst->base_mrf = 14;
|
||||
inst->mlen = 1;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,204 @@
|
|||
/*
|
||||
* Copyright © 2013 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file brw_vec4_tes.cpp
|
||||
*
|
||||
* Tessellaton evaluation shader specific code derived from the vec4_visitor class.
|
||||
*/
|
||||
|
||||
#include "brw_vec4_tes.h"
|
||||
|
||||
namespace brw {
|
||||
|
||||
vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler,
|
||||
void *log_data,
|
||||
const struct brw_tes_prog_key *key,
|
||||
struct brw_tes_prog_data *prog_data,
|
||||
const nir_shader *shader,
|
||||
void *mem_ctx,
|
||||
int shader_time_index)
|
||||
: vec4_visitor(compiler, log_data, &key->tex, &prog_data->base,
|
||||
shader, mem_ctx, false, shader_time_index)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
dst_reg *
|
||||
vec4_tes_visitor::make_reg_for_system_value(int location, const glsl_type *type)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
vec4_tes_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
|
||||
{
|
||||
const struct brw_tes_prog_data *tes_prog_data =
|
||||
(const struct brw_tes_prog_data *) prog_data;
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_load_tess_level_outer: {
|
||||
dst_reg dst(this, glsl_type::vec4_type);
|
||||
nir_system_values[SYSTEM_VALUE_TESS_LEVEL_OUTER] = dst;
|
||||
|
||||
dst_reg temp(this, glsl_type::vec4_type);
|
||||
vec4_instruction *read =
|
||||
emit(VEC4_OPCODE_URB_READ, temp, input_read_header);
|
||||
read->offset = 1;
|
||||
read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
|
||||
emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX)));
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_tess_level_inner: {
|
||||
dst_reg dst(this, glsl_type::vec2_type);
|
||||
nir_system_values[SYSTEM_VALUE_TESS_LEVEL_INNER] = dst;
|
||||
|
||||
/* Set up the message header to reference the proper parts of the URB */
|
||||
dst_reg temp(this, glsl_type::vec4_type);
|
||||
vec4_instruction *read =
|
||||
emit(VEC4_OPCODE_URB_READ, temp, input_read_header);
|
||||
read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
|
||||
if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
|
||||
emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX)));
|
||||
} else {
|
||||
read->offset = 1;
|
||||
emit(MOV(dst, src_reg(temp)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
vec4_visitor::nir_setup_system_value_intrinsic(instr);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
vec4_tes_visitor::setup_payload()
|
||||
{
|
||||
int reg = 0;
|
||||
|
||||
/* The payload always contains important data in r0 and r1, which contains
|
||||
* the URB handles that are passed on to the URB write at the end
|
||||
* of the thread.
|
||||
*/
|
||||
reg += 2;
|
||||
|
||||
reg = setup_uniforms(reg);
|
||||
|
||||
this->first_non_payload_grf = reg;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
vec4_tes_visitor::emit_prolog()
|
||||
{
|
||||
input_read_header = src_reg(this, glsl_type::uvec4_type);
|
||||
emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header));
|
||||
|
||||
this->current_annotation = NULL;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
vec4_tes_visitor::emit_urb_write_header(int mrf)
|
||||
{
|
||||
/* No need to do anything for DS; an implied write to this MRF will be
|
||||
* performed by VS_OPCODE_URB_WRITE.
|
||||
*/
|
||||
(void) mrf;
|
||||
}
|
||||
|
||||
|
||||
vec4_instruction *
|
||||
vec4_tes_visitor::emit_urb_write_opcode(bool complete)
|
||||
{
|
||||
/* For DS, the URB writes end the thread. */
|
||||
if (complete) {
|
||||
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
|
||||
emit_shader_time_end();
|
||||
}
|
||||
|
||||
vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
|
||||
inst->urb_write_flags = complete ?
|
||||
BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
|
||||
|
||||
return inst;
|
||||
}
|
||||
|
||||
void
|
||||
vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
||||
{
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_load_tess_coord:
|
||||
/* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
|
||||
emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
|
||||
src_reg(brw_vec8_grf(1, 0))));
|
||||
break;
|
||||
case nir_intrinsic_load_primitive_id:
|
||||
emit(TES_OPCODE_GET_PRIMITIVE_ID,
|
||||
get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_input:
|
||||
case nir_intrinsic_load_per_vertex_input: {
|
||||
src_reg indirect_offset = get_indirect_offset(instr);
|
||||
unsigned imm_offset = instr->const_index[0];
|
||||
src_reg header = input_read_header;
|
||||
|
||||
if (indirect_offset.file != BAD_FILE) {
|
||||
header = src_reg(this, glsl_type::uvec4_type);
|
||||
emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
|
||||
input_read_header, indirect_offset);
|
||||
}
|
||||
|
||||
dst_reg temp(this, glsl_type::ivec4_type);
|
||||
vec4_instruction *read =
|
||||
emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
|
||||
read->offset = imm_offset;
|
||||
read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
|
||||
|
||||
/* Copy to target. We might end up with some funky writemasks landing
|
||||
* in here, but we really don't want them in the above pseudo-ops.
|
||||
*/
|
||||
dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
|
||||
dst.writemask = brw_writemask_for_size(instr->num_components);
|
||||
emit(MOV(dst, src_reg(temp)));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
vec4_visitor::nir_emit_intrinsic(instr);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
vec4_tes_visitor::emit_thread_end()
|
||||
{
|
||||
/* For DS, we always end the thread by emitting a single vertex.
|
||||
* emit_urb_write_opcode() will take care of setting the eot flag on the
|
||||
* SEND instruction.
|
||||
*/
|
||||
emit_vertex();
|
||||
}
|
||||
|
||||
} /* namespace brw */
|
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* Copyright © 2013 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file brw_vec4_tes.h
|
||||
*
|
||||
* The vec4 mode tessellation evaluation shader compiler backend.
|
||||
*/
|
||||
|
||||
#ifndef BRW_VEC4_TES_H
|
||||
#define BRW_VEC4_TES_H
|
||||
|
||||
#include "brw_vec4.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace brw {
|
||||
|
||||
class vec4_tes_visitor : public vec4_visitor
|
||||
{
|
||||
public:
|
||||
vec4_tes_visitor(const struct brw_compiler *compiler,
|
||||
void *log_data,
|
||||
const struct brw_tes_prog_key *key,
|
||||
struct brw_tes_prog_data *prog_data,
|
||||
const nir_shader *nir,
|
||||
void *mem_ctx,
|
||||
int shader_time_index);
|
||||
|
||||
protected:
|
||||
virtual dst_reg *make_reg_for_system_value(int location,
|
||||
const glsl_type *type);
|
||||
virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
|
||||
virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
|
||||
|
||||
virtual void setup_payload();
|
||||
virtual void emit_prolog();
|
||||
virtual void emit_thread_end();
|
||||
|
||||
virtual void emit_urb_write_header(int mrf);
|
||||
virtual vec4_instruction *emit_urb_write_opcode(bool complete);
|
||||
|
||||
private:
|
||||
src_reg input_read_header;
|
||||
};
|
||||
|
||||
} /* namespace brw */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* BRW_VEC4_TES_H */
|
|
@ -155,7 +155,11 @@ vec4_vs_visitor::make_reg_for_system_value(int location,
|
|||
switch (location) {
|
||||
case SYSTEM_VALUE_BASE_VERTEX:
|
||||
reg->writemask = WRITEMASK_X;
|
||||
vs_prog_data->uses_vertexid = true;
|
||||
vs_prog_data->uses_basevertex = true;
|
||||
break;
|
||||
case SYSTEM_VALUE_BASE_INSTANCE:
|
||||
reg->writemask = WRITEMASK_Y;
|
||||
vs_prog_data->uses_baseinstance = true;
|
||||
break;
|
||||
case SYSTEM_VALUE_VERTEX_ID:
|
||||
case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
|
||||
|
@ -166,6 +170,11 @@ vec4_vs_visitor::make_reg_for_system_value(int location,
|
|||
reg->writemask = WRITEMASK_W;
|
||||
vs_prog_data->uses_instanceid = true;
|
||||
break;
|
||||
case SYSTEM_VALUE_DRAW_ID:
|
||||
reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX + 1);
|
||||
reg->writemask = WRITEMASK_X;
|
||||
vs_prog_data->uses_drawid = true;
|
||||
break;
|
||||
default:
|
||||
unreachable("not reached");
|
||||
}
|
||||
|
|
|
@ -58,36 +58,64 @@ const struct brw_tracked_state gen7_tes_push_constants = {
|
|||
static void
|
||||
gen7_upload_ds_state(struct brw_context *brw)
|
||||
{
|
||||
/* Disable the DS Unit */
|
||||
BEGIN_BATCH(7);
|
||||
OUT_BATCH(_3DSTATE_CONSTANT_DS << 16 | (7 - 2));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
const struct brw_stage_state *stage_state = &brw->tes.base;
|
||||
/* BRW_NEW_TESS_PROGRAMS */
|
||||
bool active = brw->tess_eval_program;
|
||||
|
||||
BEGIN_BATCH(6);
|
||||
OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
/* BRW_NEW_TES_PROG_DATA */
|
||||
const struct brw_tes_prog_data *tes_prog_data = brw->tes.prog_data;
|
||||
const struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
|
||||
const struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
|
||||
|
||||
BEGIN_BATCH(2);
|
||||
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2));
|
||||
OUT_BATCH(brw->hw_bt_pool.next_offset);
|
||||
ADVANCE_BATCH();
|
||||
const unsigned thread_count = (brw->max_ds_threads - 1) <<
|
||||
(brw->is_haswell ? HSW_DS_MAX_THREADS_SHIFT : GEN7_DS_MAX_THREADS_SHIFT);
|
||||
|
||||
if (active) {
|
||||
BEGIN_BATCH(6);
|
||||
OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2));
|
||||
OUT_BATCH(stage_state->prog_offset);
|
||||
OUT_BATCH(SET_FIELD(DIV_ROUND_UP(stage_state->sampler_count, 4),
|
||||
GEN7_DS_SAMPLER_COUNT) |
|
||||
SET_FIELD(prog_data->binding_table.size_bytes / 4,
|
||||
GEN7_DS_BINDING_TABLE_ENTRY_COUNT));
|
||||
if (prog_data->total_scratch) {
|
||||
OUT_RELOC(stage_state->scratch_bo,
|
||||
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
|
||||
ffs(prog_data->total_scratch) - 11);
|
||||
} else {
|
||||
OUT_BATCH(0);
|
||||
}
|
||||
OUT_BATCH(SET_FIELD(prog_data->dispatch_grf_start_reg,
|
||||
GEN7_DS_DISPATCH_START_GRF) |
|
||||
SET_FIELD(vue_prog_data->urb_read_length,
|
||||
GEN7_DS_URB_READ_LENGTH));
|
||||
|
||||
OUT_BATCH(GEN7_DS_ENABLE |
|
||||
GEN7_DS_STATISTICS_ENABLE |
|
||||
thread_count |
|
||||
(tes_prog_data->domain == BRW_TESS_DOMAIN_TRI ?
|
||||
GEN7_DS_COMPUTE_W_COORDINATE_ENABLE : 0));
|
||||
ADVANCE_BATCH();
|
||||
} else {
|
||||
BEGIN_BATCH(6);
|
||||
OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
brw->tes.enabled = active;
|
||||
}
|
||||
|
||||
const struct brw_tracked_state gen7_ds_state = {
|
||||
.dirty = {
|
||||
.mesa = 0,
|
||||
.brw = BRW_NEW_CONTEXT,
|
||||
.mesa = _NEW_TRANSFORM,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_CONTEXT |
|
||||
BRW_NEW_TESS_PROGRAMS |
|
||||
BRW_NEW_TES_PROG_DATA,
|
||||
},
|
||||
.emit = gen7_upload_ds_state,
|
||||
};
|
||||
|
|
|
@ -60,37 +60,58 @@ const struct brw_tracked_state gen7_tcs_push_constants = {
|
|||
static void
|
||||
gen7_upload_hs_state(struct brw_context *brw)
|
||||
{
|
||||
/* Disable the HS Unit */
|
||||
BEGIN_BATCH(7);
|
||||
OUT_BATCH(_3DSTATE_CONSTANT_HS << 16 | (7 - 2));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
const struct brw_stage_state *stage_state = &brw->tcs.base;
|
||||
/* BRW_NEW_TESS_PROGRAMS */
|
||||
bool active = brw->tess_eval_program;
|
||||
/* BRW_NEW_TCS_PROG_DATA */
|
||||
const struct brw_vue_prog_data *prog_data = &brw->tcs.prog_data->base;
|
||||
|
||||
BEGIN_BATCH(7);
|
||||
OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
|
||||
BEGIN_BATCH(2);
|
||||
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2));
|
||||
OUT_BATCH(brw->hw_bt_pool.next_offset);
|
||||
ADVANCE_BATCH();
|
||||
if (active) {
|
||||
BEGIN_BATCH(7);
|
||||
OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
|
||||
OUT_BATCH(SET_FIELD(DIV_ROUND_UP(stage_state->sampler_count, 4),
|
||||
GEN7_HS_SAMPLER_COUNT) |
|
||||
SET_FIELD(prog_data->base.binding_table.size_bytes / 4,
|
||||
GEN7_HS_BINDING_TABLE_ENTRY_COUNT) |
|
||||
(brw->max_hs_threads - 1));
|
||||
OUT_BATCH(GEN7_HS_ENABLE |
|
||||
GEN7_HS_STATISTICS_ENABLE |
|
||||
SET_FIELD(brw->tcs.prog_data->instances - 1,
|
||||
GEN7_HS_INSTANCE_COUNT));
|
||||
OUT_BATCH(stage_state->prog_offset);
|
||||
if (prog_data->base.total_scratch) {
|
||||
OUT_RELOC(stage_state->scratch_bo,
|
||||
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
|
||||
ffs(prog_data->base.total_scratch) - 11);
|
||||
} else {
|
||||
OUT_BATCH(0);
|
||||
}
|
||||
OUT_BATCH(GEN7_HS_INCLUDE_VERTEX_HANDLES |
|
||||
SET_FIELD(prog_data->base.dispatch_grf_start_reg,
|
||||
GEN7_HS_DISPATCH_START_GRF));
|
||||
/* Ignore URB semaphores */
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
} else {
|
||||
BEGIN_BATCH(7);
|
||||
OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
brw->tcs.enabled = active;
|
||||
}
|
||||
|
||||
const struct brw_tracked_state gen7_hs_state = {
|
||||
.dirty = {
|
||||
.mesa = 0,
|
||||
.brw = BRW_NEW_CONTEXT,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_TCS_PROG_DATA |
|
||||
BRW_NEW_TESS_PROGRAMS,
|
||||
},
|
||||
.emit = gen7_upload_hs_state,
|
||||
};
|
||||
|
|
|
@ -115,7 +115,12 @@ gen8_emit_vertices(struct brw_context *brw)
|
|||
}
|
||||
|
||||
/* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */
|
||||
unsigned nr_buffers = brw->vb.nr_buffers + brw->vs.prog_data->uses_vertexid;
|
||||
const bool uses_draw_params =
|
||||
brw->vs.prog_data->uses_basevertex ||
|
||||
brw->vs.prog_data->uses_baseinstance;
|
||||
const unsigned nr_buffers = brw->vb.nr_buffers +
|
||||
uses_draw_params + brw->vs.prog_data->uses_drawid;
|
||||
|
||||
if (nr_buffers) {
|
||||
assert(nr_buffers <= 33);
|
||||
|
||||
|
@ -135,7 +140,7 @@ gen8_emit_vertices(struct brw_context *brw)
|
|||
OUT_BATCH(buffer->bo->size);
|
||||
}
|
||||
|
||||
if (brw->vs.prog_data->uses_vertexid) {
|
||||
if (uses_draw_params) {
|
||||
OUT_BATCH(brw->vb.nr_buffers << GEN6_VB0_INDEX_SHIFT |
|
||||
GEN7_VB0_ADDRESS_MODIFYENABLE |
|
||||
mocs_wb << 16);
|
||||
|
@ -143,21 +148,33 @@ gen8_emit_vertices(struct brw_context *brw)
|
|||
brw->draw.draw_params_offset);
|
||||
OUT_BATCH(brw->draw.draw_params_bo->size);
|
||||
}
|
||||
|
||||
if (brw->vs.prog_data->uses_drawid) {
|
||||
OUT_BATCH((brw->vb.nr_buffers + 1) << GEN6_VB0_INDEX_SHIFT |
|
||||
GEN7_VB0_ADDRESS_MODIFYENABLE |
|
||||
mocs_wb << 16);
|
||||
OUT_RELOC64(brw->draw.draw_id_bo, I915_GEM_DOMAIN_VERTEX, 0,
|
||||
brw->draw.draw_id_offset);
|
||||
OUT_BATCH(brw->draw.draw_id_bo->size);
|
||||
}
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
/* Normally we don't need an element for the SGVS attribute because the
|
||||
* 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an
|
||||
* element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if the
|
||||
* vertex ID is used then it needs an element for the base vertex buffer.
|
||||
* Additionally if there is an edge flag element then the SGVS can't be
|
||||
* inserted past that so we need a dummy element to ensure that the edge
|
||||
* flag is the last one.
|
||||
* element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if
|
||||
* we're using draw parameters then we need an element for the those
|
||||
* values. Additionally if there is an edge flag element then the SGVS
|
||||
* can't be inserted past that so we need a dummy element to ensure that
|
||||
* the edge flag is the last one.
|
||||
*/
|
||||
bool needs_sgvs_element = (brw->vs.prog_data->uses_vertexid ||
|
||||
(brw->vs.prog_data->uses_instanceid &&
|
||||
uses_edge_flag));
|
||||
unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element;
|
||||
const bool needs_sgvs_element = (brw->vs.prog_data->uses_basevertex ||
|
||||
brw->vs.prog_data->uses_baseinstance ||
|
||||
((brw->vs.prog_data->uses_instanceid ||
|
||||
brw->vs.prog_data->uses_vertexid) &&
|
||||
uses_edge_flag));
|
||||
const unsigned nr_elements =
|
||||
brw->vb.nr_enabled + needs_sgvs_element + brw->vs.prog_data->uses_drawid;
|
||||
|
||||
/* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
|
||||
* presumably for VertexID/InstanceID.
|
||||
|
@ -212,12 +229,13 @@ gen8_emit_vertices(struct brw_context *brw)
|
|||
}
|
||||
|
||||
if (needs_sgvs_element) {
|
||||
if (brw->vs.prog_data->uses_vertexid) {
|
||||
if (brw->vs.prog_data->uses_basevertex ||
|
||||
brw->vs.prog_data->uses_baseinstance) {
|
||||
OUT_BATCH(GEN6_VE0_VALID |
|
||||
brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
|
||||
BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
|
||||
BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT);
|
||||
OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
|
||||
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
|
||||
(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT) |
|
||||
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
|
||||
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
|
||||
} else {
|
||||
|
@ -229,6 +247,16 @@ gen8_emit_vertices(struct brw_context *brw)
|
|||
}
|
||||
}
|
||||
|
||||
if (brw->vs.prog_data->uses_drawid) {
|
||||
OUT_BATCH(GEN6_VE0_VALID |
|
||||
((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) |
|
||||
(BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT));
|
||||
OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
|
||||
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
|
||||
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
|
||||
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
|
||||
}
|
||||
|
||||
if (gen6_edgeflag_input) {
|
||||
uint32_t format =
|
||||
brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
|
||||
|
@ -266,6 +294,15 @@ gen8_emit_vertices(struct brw_context *brw)
|
|||
OUT_BATCH(buffer->step_rate);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
if (brw->vs.prog_data->uses_drawid) {
|
||||
const unsigned element = brw->vb.nr_enabled + needs_sgvs_element;
|
||||
BEGIN_BATCH(3);
|
||||
OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
|
||||
OUT_BATCH(element);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
}
|
||||
|
||||
const struct brw_tracked_state gen8_vertices = {
|
||||
|
|
|
@ -203,6 +203,7 @@ intelInitExtensions(struct gl_context *ctx)
|
|||
ctx->Extensions.ARB_point_sprite = true;
|
||||
ctx->Extensions.ARB_seamless_cube_map = true;
|
||||
ctx->Extensions.ARB_shader_bit_encoding = true;
|
||||
ctx->Extensions.ARB_shader_draw_parameters = true;
|
||||
ctx->Extensions.ARB_shader_texture_lod = true;
|
||||
ctx->Extensions.ARB_shadow = true;
|
||||
ctx->Extensions.ARB_sync = true;
|
||||
|
@ -333,6 +334,7 @@ intelInitExtensions(struct gl_context *ctx)
|
|||
ctx->Extensions.ARB_shader_image_load_store = true;
|
||||
ctx->Extensions.ARB_shader_image_size = true;
|
||||
ctx->Extensions.ARB_shader_texture_image_samples = true;
|
||||
ctx->Extensions.ARB_tessellation_shader = true;
|
||||
ctx->Extensions.ARB_texture_compression_bptc = true;
|
||||
ctx->Extensions.ARB_texture_view = true;
|
||||
ctx->Extensions.ARB_shader_storage_buffer_object = true;
|
||||
|
@ -362,7 +364,6 @@ intelInitExtensions(struct gl_context *ctx)
|
|||
|
||||
if (brw->gen >= 8) {
|
||||
ctx->Extensions.ARB_stencil_texturing = true;
|
||||
ctx->Extensions.ARB_tessellation_shader = true;
|
||||
}
|
||||
|
||||
if (brw->gen >= 9) {
|
||||
|
|
|
@ -96,6 +96,7 @@ EXT(ARB_separate_shader_objects , dummy_true
|
|||
EXT(ARB_shader_atomic_counters , ARB_shader_atomic_counters , GLL, GLC, x , x , 2011)
|
||||
EXT(ARB_shader_bit_encoding , ARB_shader_bit_encoding , GLL, GLC, x , x , 2010)
|
||||
EXT(ARB_shader_clock , ARB_shader_clock , GLL, GLC, x , x , 2015)
|
||||
EXT(ARB_shader_draw_parameters , ARB_shader_draw_parameters , GLL, GLC, x , x , 2013)
|
||||
EXT(ARB_shader_image_load_store , ARB_shader_image_load_store , GLL, GLC, x , x , 2011)
|
||||
EXT(ARB_shader_image_size , ARB_shader_image_size , GLL, GLC, x , x , 2012)
|
||||
EXT(ARB_shader_objects , dummy_true , GLL, GLC, x , x , 2002)
|
||||
|
|
|
@ -3717,6 +3717,7 @@ struct gl_extensions
|
|||
GLboolean ARB_shader_atomic_counters;
|
||||
GLboolean ARB_shader_bit_encoding;
|
||||
GLboolean ARB_shader_clock;
|
||||
GLboolean ARB_shader_draw_parameters;
|
||||
GLboolean ARB_shader_image_load_store;
|
||||
GLboolean ARB_shader_image_size;
|
||||
GLboolean ARB_shader_precision;
|
||||
|
|
|
@ -1083,11 +1083,11 @@ prog_to_nir(const struct gl_program *prog,
|
|||
c = rzalloc(NULL, struct ptn_compile);
|
||||
if (!c)
|
||||
return NULL;
|
||||
s = nir_shader_create(NULL, stage, options);
|
||||
if (!s)
|
||||
goto fail;
|
||||
c->prog = prog;
|
||||
|
||||
nir_builder_init_simple_shader(&c->build, NULL, stage, options);
|
||||
s = c->build.shader;
|
||||
|
||||
if (prog->Parameters->NumParameters > 0) {
|
||||
c->parameters = rzalloc(s, nir_variable);
|
||||
c->parameters->type =
|
||||
|
@ -1098,13 +1098,6 @@ prog_to_nir(const struct gl_program *prog,
|
|||
exec_list_push_tail(&s->uniforms, &c->parameters->node);
|
||||
}
|
||||
|
||||
nir_function *func = nir_function_create(s, "main");
|
||||
nir_function_impl *impl = nir_function_impl_create(func);
|
||||
|
||||
c->build.shader = s;
|
||||
c->build.impl = impl;
|
||||
c->build.cursor = nir_after_cf_list(&impl->body);
|
||||
|
||||
setup_registers_and_variables(c);
|
||||
if (unlikely(c->error))
|
||||
goto fail;
|
||||
|
|
|
@ -4328,6 +4328,8 @@ const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
|
|||
TGSI_SEMANTIC_INSTANCEID,
|
||||
TGSI_SEMANTIC_VERTEXID_NOBASE,
|
||||
TGSI_SEMANTIC_BASEVERTEX,
|
||||
0, /* SYSTEM_VALUE_BASE_INSTANCE */
|
||||
0, /* SYSTEM_VALUE_DRAW_ID */
|
||||
|
||||
/* Geometry shader
|
||||
*/
|
||||
|
|
|
@ -58,6 +58,7 @@ struct _mesa_prim {
|
|||
GLint basevertex;
|
||||
GLuint num_instances;
|
||||
GLuint base_instance;
|
||||
GLuint draw_id;
|
||||
|
||||
GLsizeiptr indirect_offset;
|
||||
};
|
||||
|
|
|
@ -1341,6 +1341,7 @@ vbo_validated_multidrawelements(struct gl_context *ctx, GLenum mode,
|
|||
prim[i].indexed = 1;
|
||||
prim[i].num_instances = 1;
|
||||
prim[i].base_instance = 0;
|
||||
prim[i].draw_id = i;
|
||||
prim[i].is_indirect = 0;
|
||||
if (basevertex != NULL)
|
||||
prim[i].basevertex = basevertex[i];
|
||||
|
@ -1371,6 +1372,7 @@ vbo_validated_multidrawelements(struct gl_context *ctx, GLenum mode,
|
|||
prim[0].indexed = 1;
|
||||
prim[0].num_instances = 1;
|
||||
prim[0].base_instance = 0;
|
||||
prim[0].draw_id = i;
|
||||
prim[0].is_indirect = 0;
|
||||
if (basevertex != NULL)
|
||||
prim[0].basevertex = basevertex[i];
|
||||
|
@ -1598,6 +1600,7 @@ vbo_validated_multidrawarraysindirect(struct gl_context *ctx,
|
|||
prim[i].mode = mode;
|
||||
prim[i].indirect_offset = offset;
|
||||
prim[i].is_indirect = 1;
|
||||
prim[i].draw_id = i;
|
||||
}
|
||||
|
||||
check_buffers_are_unmapped(exec->array.inputs);
|
||||
|
@ -1684,6 +1687,7 @@ vbo_validated_multidrawelementsindirect(struct gl_context *ctx,
|
|||
prim[i].indexed = 1;
|
||||
prim[i].indirect_offset = offset;
|
||||
prim[i].is_indirect = 1;
|
||||
prim[i].draw_id = i;
|
||||
}
|
||||
|
||||
check_buffers_are_unmapped(exec->array.inputs);
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
#include "anv_meta.h"
|
||||
#include "anv_meta_clear.h"
|
||||
#include "anv_private.h"
|
||||
#include "anv_nir_builder.h"
|
||||
#include "glsl/nir/nir_builder.h"
|
||||
|
||||
struct anv_render_pass anv_meta_dummy_renderpass = {0};
|
||||
|
||||
|
@ -41,7 +41,7 @@ build_nir_vertex_shader(bool attr_flat)
|
|||
|
||||
const struct glsl_type *vertex_type = glsl_vec4_type();
|
||||
|
||||
nir_builder_init_simple_shader(&b, MESA_SHADER_VERTEX);
|
||||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
|
||||
|
||||
nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
|
||||
vertex_type, "a_pos");
|
||||
|
@ -73,7 +73,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
|
|||
{
|
||||
nir_builder b;
|
||||
|
||||
nir_builder_init_simple_shader(&b, MESA_SHADER_FRAGMENT);
|
||||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
|
||||
|
||||
const struct glsl_type *color_type = glsl_vec4_type();
|
||||
|
||||
|
|
|
@ -23,8 +23,8 @@
|
|||
|
||||
#include "anv_meta.h"
|
||||
#include "anv_meta_clear.h"
|
||||
#include "anv_nir_builder.h"
|
||||
#include "anv_private.h"
|
||||
#include "glsl/nir/nir_builder.h"
|
||||
|
||||
/** Vertex attributes for color clears. */
|
||||
struct color_clear_vattrs {
|
||||
|
@ -66,8 +66,8 @@ build_color_shaders(struct nir_shader **out_vs,
|
|||
nir_builder vs_b;
|
||||
nir_builder fs_b;
|
||||
|
||||
nir_builder_init_simple_shader(&vs_b, MESA_SHADER_VERTEX);
|
||||
nir_builder_init_simple_shader(&fs_b, MESA_SHADER_FRAGMENT);
|
||||
nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
|
||||
nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
|
||||
|
||||
const struct glsl_type *position_type = glsl_vec4_type();
|
||||
const struct glsl_type *color_type = glsl_vec4_type();
|
||||
|
@ -372,8 +372,8 @@ build_depthstencil_shaders(struct nir_shader **out_vs,
|
|||
nir_builder vs_b;
|
||||
nir_builder fs_b;
|
||||
|
||||
nir_builder_init_simple_shader(&vs_b, MESA_SHADER_VERTEX);
|
||||
nir_builder_init_simple_shader(&fs_b, MESA_SHADER_FRAGMENT);
|
||||
nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
|
||||
nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
|
||||
|
||||
const struct glsl_type *position_type = glsl_vec4_type();
|
||||
|
||||
|
|
|
@ -1,44 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_nir.h"
|
||||
#include "glsl/nir/nir_builder.h"
|
||||
#include "util/ralloc.h"
|
||||
|
||||
/* This file includes NIR helpers used by meta shaders in the Vulkan
|
||||
* driver. Eventually, these will all be merged into nir_builder.
|
||||
* However, for now, keeping them in their own file helps to prevent merge
|
||||
* conflicts.
|
||||
*/
|
||||
|
||||
static inline void
|
||||
nir_builder_init_simple_shader(nir_builder *b, gl_shader_stage stage)
|
||||
{
|
||||
b->shader = nir_shader_create(NULL, stage, NULL);
|
||||
|
||||
nir_function *func =
|
||||
nir_function_create(b->shader, ralloc_strdup(b->shader, "main"));
|
||||
|
||||
b->impl = nir_function_impl_create(func);
|
||||
b->cursor = nir_after_cf_list(&b->impl->body);
|
||||
}
|
Loading…
Reference in New Issue