Merge remote-tracking branch 'mesa-public/master' into vulkan

This pulls in nir_builder_init_simple_shader and allows us to delete
anv_nir_builder.h entirely.
This commit is contained in:
Jason Ekstrand 2015-12-29 13:47:37 -08:00
commit a33fcc0fd4
54 changed files with 1088 additions and 199 deletions

View File

@ -112,7 +112,7 @@ GL 4.0, GLSL 4.00 --- all DONE: nvc0, r600, radeonsi
GL_ARB_gpu_shader_fp64 DONE (llvmpipe, softpipe)
GL_ARB_sample_shading DONE (i965, nv50)
GL_ARB_shader_subroutine DONE (i965, nv50, llvmpipe, softpipe)
GL_ARB_tessellation_shader DONE (i965/gen8+)
GL_ARB_tessellation_shader DONE (i965)
GL_ARB_texture_buffer_object_rgb32 DONE (i965, llvmpipe, softpipe)
GL_ARB_texture_cube_map_array DONE (i965, nv50, llvmpipe, softpipe)
GL_ARB_texture_gather DONE (i965, nv50, llvmpipe, softpipe)

View File

@ -47,7 +47,7 @@ Note: some of the new features are only available with certain drivers.
<li>GL_ARB_base_instance on freedreno/a4xx</li>
<li>GL_ARB_compute_shader on i965</li>
<li>GL_ARB_copy_image on r600</li>
<li>GL_ARB_tessellation_shader on i965/gen8+ and r600 (evergreen/cayman only)</li>
<li>GL_ARB_tessellation_shader on i965 and r600 (evergreen/cayman only)</li>
<li>GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx</li>
<li>GL_ARB_texture_buffer_range on freedreno/a4xx</li>
<li>GL_ARB_texture_query_lod on freedreno/a4xx</li>

View File

@ -1968,14 +1968,10 @@ tgsi_to_nir(const void *tgsi_tokens,
tgsi_scan_shader(tgsi_tokens, &scan);
c->scan = &scan;
s = nir_shader_create(NULL, tgsi_processor_to_shader_stage(scan.processor),
options);
nir_function *func = nir_function_create(s, "main");
nir_function_impl *impl = nir_function_impl_create(func);
nir_builder_init(&c->build, impl);
c->build.cursor = nir_after_cf_list(&impl->body);
nir_builder_init_simple_shader(&c->build, NULL,
tgsi_processor_to_shader_stage(scan.processor),
options);
s = c->build.shader;
s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1;
s->num_uniforms = scan.const_file_max[0] + 1;

View File

@ -404,6 +404,19 @@ debug_get_flags_option(const char *name,
const struct debug_named_value *flags,
uint64_t dfault);
#define DEBUG_GET_ONCE_OPTION(suffix, name, dfault) \
static const char * \
debug_get_option_ ## suffix (void) \
{ \
static boolean first = TRUE; \
static const char * value; \
if (first) { \
first = FALSE; \
value = debug_get_option(name, dfault); \
} \
return value; \
}
#define DEBUG_GET_ONCE_BOOL_OPTION(sufix, name, dfault) \
static boolean \
debug_get_option_ ## sufix (void) \

View File

@ -1889,6 +1889,9 @@ AlgebraicOpt::handleCVT_EXTBF(Instruction *cvt)
arg = shift->getSrc(0);
offset = imm.reg.data.u32;
}
// We just AND'd the high bits away, which means this is effectively an
// unsigned value.
cvt->sType = TYPE_U32;
} else if (insn->op == OP_SHR &&
insn->sType == cvt->sType &&
insn->src(1).getImmediate(imm)) {

View File

@ -1956,7 +1956,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
if (!gs_ring_buffer) {
radeon_set_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4,
ALIGN_DIVUP(cb->buffer_size >> 4, 16), pkt_flags);
ALIGN_DIVUP(cb->buffer_size, 256), pkt_flags);
radeon_set_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8,
pkt_flags);
}

View File

@ -1768,7 +1768,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
if (!gs_ring_buffer) {
radeon_set_context_reg(cs, reg_alu_constbuf_size + buffer_index * 4,
ALIGN_DIVUP(cb->buffer_size >> 4, 16));
ALIGN_DIVUP(cb->buffer_size, 256));
radeon_set_context_reg(cs, reg_alu_const_cache + buffer_index * 4, offset >> 8);
}

View File

@ -28,8 +28,11 @@
#include "si_shader.h"
#include "sid.h"
#include "sid_tables.h"
#include "radeon/radeon_elf_util.h"
#include "ddebug/dd_util.h"
#include "util/u_memory.h"
DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)
static void si_dump_shader(struct si_shader_ctx_state *state, const char *name,
FILE *f)
@ -42,6 +45,98 @@ static void si_dump_shader(struct si_shader_ctx_state *state, const char *name,
fprintf(f, "%s\n\n", state->current->binary.disasm_string);
}
/**
* Shader compiles can be overridden with arbitrary ELF objects by setting
* the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
*/
bool si_replace_shader(unsigned num, struct radeon_shader_binary *binary)
{
const char *p = debug_get_option_replace_shaders();
const char *semicolon;
char *copy = NULL;
FILE *f;
long filesize, nread;
char *buf = NULL;
bool replaced = false;
if (!p)
return false;
while (*p) {
unsigned long i;
char *endp;
i = strtoul(p, &endp, 0);
p = endp;
if (*p != ':') {
fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n");
exit(1);
}
++p;
if (i == num)
break;
p = strchr(p, ';');
if (!p)
return false;
++p;
}
if (!*p)
return false;
semicolon = strchr(p, ';');
if (semicolon) {
p = copy = strndup(p, semicolon - p);
if (!copy) {
fprintf(stderr, "out of memory\n");
return false;
}
}
fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p);
f = fopen(p, "r");
if (!f) {
perror("radeonsi: failed to open file");
goto out_free;
}
if (fseek(f, 0, SEEK_END) != 0)
goto file_error;
filesize = ftell(f);
if (filesize < 0)
goto file_error;
if (fseek(f, 0, SEEK_SET) != 0)
goto file_error;
buf = MALLOC(filesize);
if (!buf) {
fprintf(stderr, "out of memory\n");
goto out_close;
}
nread = fread(buf, 1, filesize, f);
if (nread != filesize)
goto file_error;
radeon_elf_read(buf, filesize, binary);
replaced = true;
out_close:
fclose(f);
out_free:
FREE(buf);
free(copy);
return replaced;
file_error:
perror("radeonsi: reading shader");
goto out_close;
}
/* Parsed IBs are difficult to read without colors. Use "less -R file" to
* read them, or use "aha -b -f file" to convert them to html.
*/

View File

@ -329,6 +329,7 @@ void si_init_cp_dma_functions(struct si_context *sctx);
/* si_debug.c */
void si_init_debug_functions(struct si_context *sctx);
void si_check_vm_faults(struct si_context *sctx);
bool si_replace_shader(unsigned num, struct radeon_shader_binary *binary);
/* si_dma.c */
void si_dma_copy(struct pipe_context *ctx,

View File

@ -3884,11 +3884,17 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
bool dump_asm = r600_can_dump_shader(&sscreen->b,
shader->selector ? shader->selector->tokens : NULL);
bool dump_ir = dump_asm && !(sscreen->b.debug_flags & DBG_NO_IR);
unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations);
r = radeon_llvm_compile(mod, &shader->binary,
r600_get_llvm_processor_name(sscreen->b.family), dump_ir, dump_asm, tm);
if (r)
return r;
if (dump_ir || dump_asm)
fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
if (!si_replace_shader(count, &shader->binary)) {
r = radeon_llvm_compile(mod, &shader->binary,
r600_get_llvm_processor_name(sscreen->b.family), dump_ir, dump_asm, tm);
if (r)
return r;
}
r = si_shader_binary_read(sscreen, shader);

View File

@ -634,7 +634,6 @@ static int si_shader_select(struct pipe_context *ctx,
sel->last_variant = shader;
}
state->current = shader;
p_atomic_inc(&sctx->screen->b.num_compilations);
pipe_mutex_unlock(sel->mutex);
return 0;
}

View File

@ -951,6 +951,11 @@ builtin_variable_generator::generate_vs_special_vars()
add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceIDARB");
if (state->ARB_draw_instanced_enable || state->is_version(140, 300))
add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceID");
if (state->ARB_shader_draw_parameters_enable) {
add_system_value(SYSTEM_VALUE_BASE_VERTEX, int_t, "gl_BaseVertexARB");
add_system_value(SYSTEM_VALUE_BASE_INSTANCE, int_t, "gl_BaseInstanceARB");
add_system_value(SYSTEM_VALUE_DRAW_ID, int_t, "gl_DrawIDARB");
}
if (state->AMD_vertex_shader_layer_enable) {
var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer");
var->data.interpolation = INTERP_QUALIFIER_FLAT;

View File

@ -610,6 +610,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
EXT(ARB_shader_atomic_counters, true, false, ARB_shader_atomic_counters),
EXT(ARB_shader_bit_encoding, true, false, ARB_shader_bit_encoding),
EXT(ARB_shader_clock, true, false, ARB_shader_clock),
EXT(ARB_shader_draw_parameters, true, false, ARB_shader_draw_parameters),
EXT(ARB_shader_image_load_store, true, false, ARB_shader_image_load_store),
EXT(ARB_shader_image_size, true, false, ARB_shader_image_size),
EXT(ARB_shader_precision, true, false, ARB_shader_precision),

View File

@ -536,6 +536,8 @@ struct _mesa_glsl_parse_state {
bool ARB_shader_bit_encoding_warn;
bool ARB_shader_clock_enable;
bool ARB_shader_clock_warn;
bool ARB_shader_draw_parameters_enable;
bool ARB_shader_draw_parameters_warn;
bool ARB_shader_image_load_store_enable;
bool ARB_shader_image_load_store_warn;
bool ARB_shader_image_size_enable;

View File

@ -1655,6 +1655,10 @@ nir_intrinsic_from_system_value(gl_system_value val)
return nir_intrinsic_load_vertex_id;
case SYSTEM_VALUE_INSTANCE_ID:
return nir_intrinsic_load_instance_id;
case SYSTEM_VALUE_DRAW_ID:
return nir_intrinsic_load_draw_id;
case SYSTEM_VALUE_BASE_INSTANCE:
return nir_intrinsic_load_base_instance;
case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
return nir_intrinsic_load_vertex_id_zero_base;
case SYSTEM_VALUE_BASE_VERTEX:
@ -1700,6 +1704,10 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
return SYSTEM_VALUE_VERTEX_ID;
case nir_intrinsic_load_instance_id:
return SYSTEM_VALUE_INSTANCE_ID;
case nir_intrinsic_load_draw_id:
return SYSTEM_VALUE_DRAW_ID;
case nir_intrinsic_load_base_instance:
return SYSTEM_VALUE_BASE_INSTANCE;
case nir_intrinsic_load_vertex_id_zero_base:
return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
case nir_intrinsic_load_base_vertex:

View File

@ -43,6 +43,17 @@ nir_builder_init(nir_builder *build, nir_function_impl *impl)
build->shader = impl->function->shader;
}
static inline void
nir_builder_init_simple_shader(nir_builder *build, void *mem_ctx,
gl_shader_stage stage,
const nir_shader_compiler_options *options)
{
build->shader = nir_shader_create(mem_ctx, stage, options);
nir_function *func = nir_function_create(build->shader, "main");
build->impl = nir_function_impl_create(func);
build->cursor = nir_after_cf_list(&build->impl->body);
}
static inline void
nir_builder_instr_insert(nir_builder *build, nir_instr *instr)
{

View File

@ -258,6 +258,8 @@ SYSTEM_VALUE(vertex_id, 1, 0)
SYSTEM_VALUE(vertex_id_zero_base, 1, 0)
SYSTEM_VALUE(base_vertex, 1, 0)
SYSTEM_VALUE(instance_id, 1, 0)
SYSTEM_VALUE(base_instance, 1, 0)
SYSTEM_VALUE(draw_id, 1, 0)
SYSTEM_VALUE(sample_id, 1, 0)
SYSTEM_VALUE(sample_pos, 2, 0)
SYSTEM_VALUE(sample_mask_in, 1, 0)

View File

@ -62,6 +62,10 @@ optimizations = [
(('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
(('fadd', ('fneg', a), a), 0.0),
(('iadd', ('ineg', a), a), 0),
(('iadd', ('ineg', a), ('iadd', a, b)), b),
(('iadd', a, ('iadd', ('ineg', a), b)), b),
(('fadd', ('fneg', a), ('fadd', a, b)), b),
(('fadd', a, ('fadd', ('fneg', a), b)), b),
(('fmul', a, 0.0), 0.0),
(('imul', a, 0), 0),
(('umul_unorm_4x8', a, 0), 0),

View File

@ -379,6 +379,26 @@ typedef enum
* \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
*/
SYSTEM_VALUE_BASE_VERTEX,
/**
* Value of \c baseinstance passed to instanced draw entry points
*
* \sa SYSTEM_VALUE_INSTANCE_ID
*/
SYSTEM_VALUE_BASE_INSTANCE,
/**
* From _ARB_shader_draw_parameters:
*
* "Additionally, this extension adds a further built-in variable,
* gl_DrawID to the shading language. This variable contains the index
* of the draw currently being processed by a Multi* variant of a
* drawing command (such as MultiDrawElements or
* MultiDrawArraysIndirect)."
*
* If GL_ARB_multi_draw_indirect is not supported, this is always 0.
*/
SYSTEM_VALUE_DRAW_ID,
/*@}*/
/**

View File

@ -30,24 +30,17 @@ protected:
~nir_cf_test();
nir_builder b;
nir_shader *shader;
nir_function_impl *impl;
};
nir_cf_test::nir_cf_test()
{
static const nir_shader_compiler_options options = { };
shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, &options);
nir_function *func = nir_function_create(shader, "main");
nir_function_overload *overload = nir_function_overload_create(func);
impl = nir_function_impl_create(overload);
nir_builder_init(&b, impl);
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, &options);
}
nir_cf_test::~nir_cf_test()
{
ralloc_free(shader);
ralloc_free(b.shader);
}
TEST_F(nir_cf_test, delete_break_in_loop)
@ -56,12 +49,12 @@ TEST_F(nir_cf_test, delete_break_in_loop)
*
* while (...) { break; }
*/
nir_loop *loop = nir_loop_create(shader);
nir_cf_node_insert(nir_after_cf_list(&impl->body), &loop->cf_node);
nir_loop *loop = nir_loop_create(b.shader);
nir_cf_node_insert(nir_after_cf_list(&b.impl->body), &loop->cf_node);
b.cursor = nir_after_cf_list(&loop->body);
nir_jump_instr *jump = nir_jump_instr_create(shader, nir_jump_break);
nir_jump_instr *jump = nir_jump_instr_create(b.shader, nir_jump_break);
nir_builder_instr_insert(&b, &jump->instr);
/* At this point, we should have:
@ -82,10 +75,10 @@ TEST_F(nir_cf_test, delete_break_in_loop)
* block block_3:
* }
*/
nir_block *block_0 = nir_start_block(impl);
nir_block *block_0 = nir_start_block(b.impl);
nir_block *block_1 = nir_cf_node_as_block(nir_loop_first_cf_node(loop));
nir_block *block_2 = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
nir_block *block_3 = impl->end_block;
nir_block *block_3 = b.impl->end_block;
ASSERT_EQ(nir_cf_node_block, block_0->cf_node.type);
ASSERT_EQ(nir_cf_node_block, block_1->cf_node.type);
ASSERT_EQ(nir_cf_node_block, block_2->cf_node.type);
@ -108,12 +101,12 @@ TEST_F(nir_cf_test, delete_break_in_loop)
EXPECT_TRUE(_mesa_set_search(block_2->predecessors, block_1));
EXPECT_TRUE(_mesa_set_search(block_3->predecessors, block_2));
nir_print_shader(shader, stderr);
nir_print_shader(b.shader, stderr);
/* Now remove the break. */
nir_instr_remove(&jump->instr);
nir_print_shader(shader, stderr);
nir_print_shader(b.shader, stderr);
/* At this point, we should have:
*
@ -151,5 +144,5 @@ TEST_F(nir_cf_test, delete_break_in_loop)
EXPECT_TRUE(_mesa_set_search(block_2->predecessors, block_1));
EXPECT_TRUE(_mesa_set_search(block_3->predecessors, block_2));
nir_metadata_require(impl, nir_metadata_dominance);
nir_metadata_require(b.impl, nir_metadata_dominance);
}

View File

@ -155,6 +155,7 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
ctx->Extensions.ARB_gpu_shader_fp64 = true;
ctx->Extensions.ARB_sample_shading = true;
ctx->Extensions.ARB_shader_bit_encoding = true;
ctx->Extensions.ARB_shader_draw_parameters = true;
ctx->Extensions.ARB_shader_stencil_export = true;
ctx->Extensions.ARB_shader_subroutine = true;
ctx->Extensions.ARB_shader_texture_lod = true;

View File

@ -77,6 +77,7 @@ i965_compiler_FILES = \
brw_vec4_surface_builder.cpp \
brw_vec4_surface_builder.h \
brw_vec4_tcs.cpp \
brw_vec4_tes.cpp \
brw_vec4_visitor.cpp \
brw_vec4_vs_visitor.cpp \
brw_vue_map.c \

View File

@ -598,6 +598,9 @@ struct brw_vs_prog_data {
bool uses_vertexid;
bool uses_instanceid;
bool uses_basevertex;
bool uses_baseinstance;
bool uses_drawid;
};
struct brw_tcs_prog_data

View File

@ -374,8 +374,8 @@ brw_initialize_context_constants(struct brw_context *brw)
const bool stage_exists[MESA_SHADER_STAGES] = {
[MESA_SHADER_VERTEX] = true,
[MESA_SHADER_TESS_CTRL] = brw->gen >= 8,
[MESA_SHADER_TESS_EVAL] = brw->gen >= 8,
[MESA_SHADER_TESS_CTRL] = brw->gen >= 7,
[MESA_SHADER_TESS_EVAL] = brw->gen >= 7,
[MESA_SHADER_GEOMETRY] = brw->gen >= 6,
[MESA_SHADER_FRAGMENT] = true,
[MESA_SHADER_COMPUTE] =

View File

@ -909,8 +909,13 @@ struct brw_context
uint32_t pma_stall_bits;
struct {
/** The value of gl_BaseVertex for the current _mesa_prim. */
int gl_basevertex;
struct {
/** The value of gl_BaseVertex for the current _mesa_prim. */
int gl_basevertex;
/** The value of gl_BaseInstance for the current _mesa_prim. */
int gl_baseinstance;
} params;
/**
* Buffer and offset used for GL_ARB_shader_draw_parameters
@ -918,6 +923,15 @@ struct brw_context
*/
drm_intel_bo *draw_params_bo;
uint32_t draw_params_offset;
/**
* The value of gl_DrawID for the current _mesa_prim. This always comes
* in from it's own vertex buffer since it's not part of the indirect
* draw parameters.
*/
int gl_drawid;
drm_intel_bo *draw_id_bo;
uint32_t draw_id_offset;
} draw;
struct {

View File

@ -1315,6 +1315,13 @@ enum opcode {
TCS_OPCODE_SET_OUTPUT_URB_OFFSETS,
TCS_OPCODE_GET_PRIMITIVE_ID,
TCS_OPCODE_CREATE_BARRIER_HEADER,
TCS_OPCODE_SRC0_010_IS_ZERO,
TCS_OPCODE_RELEASE_INPUT,
TCS_OPCODE_THREAD_END,
TES_OPCODE_GET_PRIMITIVE_ID,
TES_OPCODE_CREATE_INPUT_READ_HEADER,
TES_OPCODE_ADD_INDIRECT_URB_OFFSET,
};
enum brw_urb_write_flags {

View File

@ -462,9 +462,29 @@ brw_try_draw_prims(struct gl_context *ctx,
}
}
brw->draw.gl_basevertex =
/* Determine if we need to flag BRW_NEW_VERTICES for updating the
* gl_BaseVertexARB or gl_BaseInstanceARB values. For indirect draw, we
* always flag if the shader uses one of the values. For direct draws,
* we only flag if the values change.
*/
const int new_basevertex =
prims[i].indexed ? prims[i].basevertex : prims[i].start;
const int new_baseinstance = prims[i].base_instance;
if (i > 0) {
const bool uses_draw_parameters =
brw->vs.prog_data->uses_basevertex ||
brw->vs.prog_data->uses_baseinstance;
if ((uses_draw_parameters && prims[i].is_indirect) ||
(brw->vs.prog_data->uses_basevertex &&
brw->draw.params.gl_basevertex != new_basevertex) ||
(brw->vs.prog_data->uses_baseinstance &&
brw->draw.params.gl_baseinstance != new_baseinstance))
brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
}
brw->draw.params.gl_basevertex = new_basevertex;
brw->draw.params.gl_baseinstance = new_baseinstance;
drm_intel_bo_unreference(brw->draw.draw_params_bo);
if (prims[i].is_indirect) {
@ -482,6 +502,18 @@ brw_try_draw_prims(struct gl_context *ctx,
brw->draw.draw_params_offset = 0;
}
/* gl_DrawID always needs its own vertex buffer since it's not part of
* the indirect parameter buffer. If the program uses gl_DrawID we need
* to flag BRW_NEW_VERTICES. For the first iteration, we don't have
* valid brw->vs.prog_data, but we always flag BRW_NEW_VERTICES before
* the loop.
*/
brw->draw.gl_drawid = prims[i].draw_id;
drm_intel_bo_unreference(brw->draw.draw_id_bo);
brw->draw.draw_id_bo = NULL;
if (i > 0 && brw->vs.prog_data->uses_drawid)
brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
if (brw->gen < 6)
brw_set_prim(brw, &prims[i]);
else

View File

@ -592,11 +592,19 @@ void
brw_prepare_shader_draw_parameters(struct brw_context *brw)
{
/* For non-indirect draws, upload gl_BaseVertex. */
if (brw->vs.prog_data->uses_vertexid && brw->draw.draw_params_bo == NULL) {
intel_upload_data(brw, &brw->draw.gl_basevertex, 4, 4,
if ((brw->vs.prog_data->uses_basevertex ||
brw->vs.prog_data->uses_baseinstance) &&
brw->draw.draw_params_bo == NULL) {
intel_upload_data(brw, &brw->draw.params, sizeof(brw->draw.params), 4,
&brw->draw.draw_params_bo,
&brw->draw.draw_params_offset);
}
if (brw->vs.prog_data->uses_drawid) {
intel_upload_data(brw, &brw->draw.gl_drawid, sizeof(brw->draw.gl_drawid), 4,
&brw->draw.draw_id_bo,
&brw->draw.draw_id_offset);
}
}
/**
@ -658,8 +666,11 @@ brw_emit_vertices(struct brw_context *brw)
brw_emit_query_begin(brw);
unsigned nr_elements = brw->vb.nr_enabled;
if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid)
if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid ||
brw->vs.prog_data->uses_basevertex || brw->vs.prog_data->uses_baseinstance)
++nr_elements;
if (brw->vs.prog_data->uses_drawid)
nr_elements++;
/* If the VS doesn't read any inputs (calculating vertex position from
* a state variable for some reason, for example), emit a single pad
@ -693,8 +704,11 @@ brw_emit_vertices(struct brw_context *brw)
/* Now emit VB and VEP state packets.
*/
unsigned nr_buffers =
brw->vb.nr_buffers + brw->vs.prog_data->uses_vertexid;
const bool uses_draw_params =
brw->vs.prog_data->uses_basevertex ||
brw->vs.prog_data->uses_baseinstance;
const unsigned nr_buffers = brw->vb.nr_buffers +
uses_draw_params + brw->vs.prog_data->uses_drawid;
if (nr_buffers) {
if (brw->gen >= 6) {
@ -713,7 +727,7 @@ brw_emit_vertices(struct brw_context *brw)
}
if (brw->vs.prog_data->uses_vertexid) {
if (uses_draw_params) {
EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers,
brw->draw.draw_params_bo,
brw->draw.draw_params_bo->size - 1,
@ -721,6 +735,16 @@ brw_emit_vertices(struct brw_context *brw)
0, /* stride */
0); /* step rate */
}
if (brw->vs.prog_data->uses_drawid) {
EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers + 1,
brw->draw.draw_id_bo,
brw->draw.draw_id_bo->size - 1,
brw->draw.draw_id_offset,
0, /* stride */
0); /* step rate */
}
ADVANCE_BATCH();
}
@ -790,21 +814,25 @@ brw_emit_vertices(struct brw_context *brw)
((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
}
if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid ||
brw->vs.prog_data->uses_basevertex || brw->vs.prog_data->uses_baseinstance) {
uint32_t dw0 = 0, dw1 = 0;
uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0;
uint32_t comp1 = BRW_VE1_COMPONENT_STORE_0;
uint32_t comp2 = BRW_VE1_COMPONENT_STORE_0;
uint32_t comp3 = BRW_VE1_COMPONENT_STORE_0;
if (brw->vs.prog_data->uses_vertexid) {
if (brw->vs.prog_data->uses_basevertex)
comp0 = BRW_VE1_COMPONENT_STORE_SRC;
comp2 = BRW_VE1_COMPONENT_STORE_VID;
}
if (brw->vs.prog_data->uses_instanceid) {
if (brw->vs.prog_data->uses_baseinstance)
comp1 = BRW_VE1_COMPONENT_STORE_SRC;
if (brw->vs.prog_data->uses_vertexid)
comp2 = BRW_VE1_COMPONENT_STORE_VID;
if (brw->vs.prog_data->uses_instanceid)
comp3 = BRW_VE1_COMPONENT_STORE_IID;
}
dw1 = (comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
(comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
@ -814,11 +842,11 @@ brw_emit_vertices(struct brw_context *brw)
if (brw->gen >= 6) {
dw0 |= GEN6_VE0_VALID |
brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT;
BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT;
} else {
dw0 |= BRW_VE0_VALID |
brw->vb.nr_buffers << BRW_VE0_INDEX_SHIFT |
BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT;
BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT;
dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
}
@ -830,6 +858,30 @@ brw_emit_vertices(struct brw_context *brw)
OUT_BATCH(dw1);
}
if (brw->vs.prog_data->uses_drawid) {
uint32_t dw0 = 0, dw1 = 0;
dw1 = (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT);
if (brw->gen >= 6) {
dw0 |= GEN6_VE0_VALID |
((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) |
(BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
} else {
dw0 |= BRW_VE0_VALID |
((brw->vb.nr_buffers + 1) << BRW_VE0_INDEX_SHIFT) |
(BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
}
OUT_BATCH(dw0);
OUT_BATCH(dw1);
}
if (brw->gen >= 6 && gen6_edgeflag_input) {
uint32_t format =
brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);

View File

@ -1672,7 +1672,10 @@ fs_visitor::assign_vs_urb_setup()
assert(stage == MESA_SHADER_VERTEX);
int count = _mesa_bitcount_64(vs_prog_data->inputs_read);
if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid)
if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance)
count++;
if (vs_prog_data->uses_drawid)
count++;
/* Each attribute is 4 regs. */

View File

@ -222,6 +222,20 @@ emit_system_values_block(nir_block *block, void *void_visitor)
*reg = *v->emit_vs_system_value(SYSTEM_VALUE_INSTANCE_ID);
break;
case nir_intrinsic_load_base_instance:
assert(v->stage == MESA_SHADER_VERTEX);
reg = &v->nir_system_values[SYSTEM_VALUE_BASE_INSTANCE];
if (reg->file == BAD_FILE)
*reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_INSTANCE);
break;
case nir_intrinsic_load_draw_id:
assert(v->stage == MESA_SHADER_VERTEX);
reg = &v->nir_system_values[SYSTEM_VALUE_DRAW_ID];
if (reg->file == BAD_FILE)
*reg = *v->emit_vs_system_value(SYSTEM_VALUE_DRAW_ID);
break;
case nir_intrinsic_load_invocation_id:
assert(v->stage == MESA_SHADER_GEOMETRY);
reg = &v->nir_system_values[SYSTEM_VALUE_INVOCATION_ID];
@ -1747,7 +1761,9 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
case nir_intrinsic_load_vertex_id_zero_base:
case nir_intrinsic_load_base_vertex:
case nir_intrinsic_load_instance_id: {
case nir_intrinsic_load_instance_id:
case nir_intrinsic_load_base_instance:
case nir_intrinsic_load_draw_id: {
gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
fs_reg val = nir_system_values[sv];
assert(val.file != BAD_FILE);

View File

@ -43,9 +43,14 @@ fs_visitor::emit_vs_system_value(int location)
switch (location) {
case SYSTEM_VALUE_BASE_VERTEX:
reg->reg_offset = 0;
vs_prog_data->uses_vertexid = true;
vs_prog_data->uses_basevertex = true;
break;
case SYSTEM_VALUE_BASE_INSTANCE:
reg->reg_offset = 1;
vs_prog_data->uses_baseinstance = true;
break;
case SYSTEM_VALUE_VERTEX_ID:
unreachable("should have been lowered");
case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
reg->reg_offset = 2;
vs_prog_data->uses_vertexid = true;
@ -54,6 +59,16 @@ fs_visitor::emit_vs_system_value(int location)
reg->reg_offset = 3;
vs_prog_data->uses_instanceid = true;
break;
case SYSTEM_VALUE_DRAW_ID:
if (nir->info.system_values_read &
(BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID)))
reg->nr += 4;
reg->reg_offset = 0;
vs_prog_data->uses_drawid = true;
break;
default:
unreachable("not reached");
}

View File

@ -26,6 +26,7 @@
#include "brw_eu.h"
#include "brw_fs.h"
#include "brw_nir.h"
#include "brw_vec4_tes.h"
#include "glsl/glsl_parser_extras.h"
#include "main/shaderobj.h"
#include "main/uniforms.h"
@ -86,7 +87,8 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
compiler->scalar_stage[MESA_SHADER_VERTEX] =
devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS);
compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false;
compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = true;
compiler->scalar_stage[MESA_SHADER_TESS_EVAL] =
devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TES", true);
compiler->scalar_stage[MESA_SHADER_GEOMETRY] =
devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false);
compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true;
@ -569,6 +571,18 @@ brw_instruction_name(enum opcode op)
return "tcs_get_primitive_id";
case TCS_OPCODE_CREATE_BARRIER_HEADER:
return "tcs_create_barrier_header";
case TCS_OPCODE_SRC0_010_IS_ZERO:
return "tcs_src0<0,1,0>_is_zero";
case TCS_OPCODE_RELEASE_INPUT:
return "tcs_release_input";
case TCS_OPCODE_THREAD_END:
return "tcs_thread_end";
case TES_OPCODE_CREATE_INPUT_READ_HEADER:
return "tes_create_input_read_header";
case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
return "tes_add_indirect_urb_offset";
case TES_OPCODE_GET_PRIMITIVE_ID:
return "tes_get_primitive_id";
}
unreachable("not reached");
@ -1004,6 +1018,7 @@ backend_instruction::has_side_effects() const
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
case FS_OPCODE_FB_WRITE:
case SHADER_OPCODE_BARRIER:
case TCS_OPCODE_RELEASE_INPUT:
return true;
default:
return false;
@ -1403,6 +1418,19 @@ brw_compile_tes(const struct brw_compiler *compiler,
return g.get_assembly(final_assembly_size);
} else {
unreachable("XXX: vec4 tessellation evalation shaders not merged yet.");
brw::vec4_tes_visitor v(compiler, log_data, key, prog_data,
nir, mem_ctx, shader_time_index);
if (!v.run()) {
if (error_str)
*error_str = ralloc_strdup(mem_ctx, v.fail_msg);
return NULL;
}
if (unlikely(INTEL_DEBUG & DEBUG_TES))
v.dump_instructions();
return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
&prog_data->base, v.cfg,
final_assembly_size);
}
}

View File

@ -196,10 +196,14 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
&gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
&brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
&brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
&brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
&brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
&brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
&gen6_vs_push_constants, /* Before vs_state */
&gen7_tcs_push_constants,
&gen7_tes_push_constants,
&gen6_gs_push_constants, /* Before gs_state */
&gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
@ -209,6 +213,12 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
&brw_vs_pull_constants,
&brw_vs_ubo_surfaces,
&brw_vs_abo_surfaces,
&brw_tcs_pull_constants,
&brw_tcs_ubo_surfaces,
&brw_tcs_abo_surfaces,
&brw_tes_pull_constants,
&brw_tes_ubo_surfaces,
&brw_tes_abo_surfaces,
&brw_gs_pull_constants,
&brw_gs_ubo_surfaces,
&brw_gs_abo_surfaces,
@ -218,11 +228,15 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
&gen6_renderbuffer_surfaces,
&brw_texture_surfaces,
&brw_vs_binding_table,
&brw_tcs_binding_table,
&brw_tes_binding_table,
&brw_gs_binding_table,
&brw_wm_binding_table,
&brw_fs_samplers,
&brw_vs_samplers,
&brw_tcs_samplers,
&brw_tes_samplers,
&brw_gs_samplers,
&gen6_multisample_state,

View File

@ -157,6 +157,7 @@ vec4_instruction::is_send_from_grf()
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
case VEC4_OPCODE_URB_READ:
case TCS_OPCODE_URB_WRITE:
case TCS_OPCODE_RELEASE_INPUT:
case SHADER_OPCODE_BARRIER:
return true;
default:
@ -189,6 +190,7 @@ vec4_instruction::has_source_and_destination_hazard() const
switch (opcode) {
case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
return true;
default:
return false;
@ -274,6 +276,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
case SHADER_OPCODE_POW:
return 2;
case VS_OPCODE_URB_WRITE:
case TCS_OPCODE_THREAD_END:
return 1;
case VS_OPCODE_PULL_CONSTANT_LOAD:
return 2;
@ -1563,7 +1566,7 @@ int
vec4_vs_visitor::setup_attributes(int payload_reg)
{
int nr_attributes;
int attribute_map[VERT_ATTRIB_MAX + 1];
int attribute_map[VERT_ATTRIB_MAX + 2];
memset(attribute_map, 0, sizeof(attribute_map));
nr_attributes = 0;
@ -1574,12 +1577,19 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
}
}
if (vs_prog_data->uses_drawid) {
attribute_map[VERT_ATTRIB_MAX + 1] = payload_reg + nr_attributes;
nr_attributes++;
}
/* VertexID is stored by the VF as the last vertex element, but we
* don't represent it with a flag in inputs_read, so we call it
* VERT_ATTRIB_MAX.
*/
if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) {
if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) {
attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes;
nr_attributes++;
}
lower_attributes_to_hw_regs(attribute_map, false /* interleaved */);
@ -1979,11 +1989,18 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
* incoming vertex attribute. So, add an extra slot.
*/
if (shader->info.system_values_read &
(BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
(BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) {
nr_attributes++;
}
/* gl_DrawID has its very own vec4 */
if (shader->info.system_values_read & BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)) {
nr_attributes++;
}
/* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB Entry
* Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. Empirically, in
* vec4 mode, the hardware appears to wedge unless we read something.

View File

@ -47,6 +47,8 @@ can_do_writemask(const struct brw_device_info *devinfo,
case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
case TES_OPCODE_CREATE_INPUT_READ_HEADER:
case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
case VEC4_OPCODE_URB_READ:
return false;
default:

View File

@ -724,6 +724,9 @@ generate_gs_set_primitive_id(struct brw_codegen *p, struct brw_reg dst)
static void
generate_tcs_get_instance_id(struct brw_codegen *p, struct brw_reg dst)
{
const struct brw_device_info *devinfo = p->devinfo;
const bool ivb = devinfo->is_ivybridge || devinfo->is_baytrail;
/* "Instance Count" comes as part of the payload in r0.2 bits 23:17.
*
* Since we operate in SIMD4x2 mode, we need run half as many threads
@ -736,8 +739,8 @@ generate_tcs_get_instance_id(struct brw_codegen *p, struct brw_reg dst)
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
const int mask = INTEL_MASK(23, 17);
const int shift = 17;
const int mask = ivb ? INTEL_MASK(22, 16) : INTEL_MASK(23, 17);
const int shift = ivb ? 16 : 17;
brw_AND(p, get_element_ud(dst, 0), get_element_ud(r0, 2), brw_imm_ud(mask));
brw_SHR(p, get_element_ud(dst, 0), get_element_ud(dst, 0),
@ -763,8 +766,12 @@ generate_tcs_urb_write(struct brw_codegen *p,
true /* header */, false /* eot */);
brw_inst_set_urb_opcode(devinfo, send, BRW_URB_OPCODE_WRITE_OWORD);
brw_inst_set_urb_global_offset(devinfo, send, inst->offset);
brw_inst_set_urb_per_slot_offset(devinfo, send, 1);
brw_inst_set_urb_swizzle_control(devinfo, send, BRW_URB_SWIZZLE_INTERLEAVE);
if (inst->urb_write_flags & BRW_URB_WRITE_EOT) {
brw_inst_set_eot(devinfo, send, 1);
} else {
brw_inst_set_urb_per_slot_offset(devinfo, send, 1);
brw_inst_set_urb_swizzle_control(devinfo, send, BRW_URB_SWIZZLE_INTERLEAVE);
}
/* what happens to swizzles? */
}
@ -872,6 +879,46 @@ generate_tcs_output_urb_offsets(struct brw_codegen *p,
brw_pop_insn_state(p);
}
static void
generate_tes_create_input_read_header(struct brw_codegen *p,
struct brw_reg dst)
{
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
/* Initialize the register to 0 */
brw_MOV(p, dst, brw_imm_ud(0));
/* Enable all the channels in m0.5 bits 15:8 */
brw_MOV(p, get_element_ud(dst, 5), brw_imm_ud(0xff00));
/* Copy g1.3 (the patch URB handle) to m0.0 and m0.1. For safety,
* mask out irrelevant "Reserved" bits, as they're not marked MBZ.
*/
brw_AND(p, vec2(get_element_ud(dst, 0)),
retype(brw_vec1_grf(1, 3), BRW_REGISTER_TYPE_UD),
brw_imm_ud(0x1fff));
brw_pop_insn_state(p);
}
static void
generate_tes_add_indirect_urb_offset(struct brw_codegen *p,
struct brw_reg dst,
struct brw_reg header,
struct brw_reg offset)
{
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, dst, header);
/* m0.3-0.4: 128-bit-granular offsets into the URB from the handles */
brw_MOV(p, vec2(get_element_ud(dst, 3)), stride(offset, 4, 1, 0));
brw_pop_insn_state(p);
}
static void
generate_vec4_urb_read(struct brw_codegen *p,
vec4_instruction *inst,
@ -897,6 +944,75 @@ generate_vec4_urb_read(struct brw_codegen *p,
brw_inst_set_urb_global_offset(devinfo, send, inst->offset);
}
static void
generate_tcs_release_input(struct brw_codegen *p,
struct brw_reg header,
struct brw_reg vertex,
struct brw_reg is_unpaired)
{
const struct brw_device_info *devinfo = p->devinfo;
assert(vertex.file == BRW_IMMEDIATE_VALUE);
assert(vertex.type == BRW_REGISTER_TYPE_UD);
/* m0.0-0.1: URB handles */
struct brw_reg urb_handles =
retype(brw_vec2_grf(1 + (vertex.ud >> 3), vertex.ud & 7),
BRW_REGISTER_TYPE_UD);
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, header, brw_imm_ud(0));
brw_MOV(p, vec2(get_element_ud(header, 0)), urb_handles);
brw_pop_insn_state(p);
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
brw_set_dest(p, send, brw_null_reg());
brw_set_src0(p, send, header);
brw_set_message_descriptor(p, send, BRW_SFID_URB,
1 /* mlen */, 0 /* rlen */,
true /* header */, false /* eot */);
brw_inst_set_urb_opcode(devinfo, send, BRW_URB_OPCODE_READ_OWORD);
brw_inst_set_urb_complete(devinfo, send, 1);
brw_inst_set_urb_swizzle_control(devinfo, send, is_unpaired.ud ?
BRW_URB_SWIZZLE_NONE :
BRW_URB_SWIZZLE_INTERLEAVE);
}
static void
generate_tcs_thread_end(struct brw_codegen *p, vec4_instruction *inst)
{
struct brw_reg header = brw_message_reg(inst->base_mrf);
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, header, brw_imm_ud(0));
brw_MOV(p, get_element_ud(header, 0),
retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD));
brw_pop_insn_state(p);
brw_urb_WRITE(p,
brw_null_reg(), /* dest */
inst->base_mrf, /* starting mrf reg nr */
header,
BRW_URB_WRITE_EOT | inst->urb_write_flags,
inst->mlen,
0, /* response len */
0, /* urb destination offset */
0);
}
static void
generate_tes_get_primitive_id(struct brw_codegen *p, struct brw_reg dst)
{
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_MOV(p, dst, retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_D));
brw_pop_insn_state(p);
}
static void
generate_tcs_get_primitive_id(struct brw_codegen *p, struct brw_reg dst)
{
@ -911,6 +1027,8 @@ generate_tcs_create_barrier_header(struct brw_codegen *p,
struct brw_vue_prog_data *prog_data,
struct brw_reg dst)
{
const struct brw_device_info *devinfo = p->devinfo;
const bool ivb = devinfo->is_ivybridge || devinfo->is_baytrail;
struct brw_reg m0_2 = get_element_ud(dst, 2);
unsigned instances = ((struct brw_tcs_prog_data *) prog_data)->instances;
@ -921,13 +1039,13 @@ generate_tcs_create_barrier_header(struct brw_codegen *p,
/* Zero the message header */
brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
/* Copy "Barrier ID" from DW0 bits 16:13 */
/* Copy "Barrier ID" from r0.2, bits 16:13 (Gen7.5+) or 15:12 (Gen7) */
brw_AND(p, m0_2,
retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
brw_imm_ud(0x1e000));
brw_imm_ud(ivb ? INTEL_MASK(15, 12) : INTEL_MASK(16, 13)));
/* Shift it into place */
brw_SHL(p, m0_2, get_element_ud(dst, 2), brw_imm_ud(11));
/* Shift it up to bits 27:24. */
brw_SHL(p, m0_2, get_element_ud(dst, 2), brw_imm_ud(ivb ? 12 : 11));
/* Set the Barrier Count and the enable bit */
brw_OR(p, m0_2, m0_2, brw_imm_ud(instances << 9 | (1 << 15)));
@ -1788,6 +1906,32 @@ generate_code(struct brw_codegen *p,
generate_tcs_create_barrier_header(p, prog_data, dst);
break;
case TES_OPCODE_CREATE_INPUT_READ_HEADER:
generate_tes_create_input_read_header(p, dst);
break;
case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
generate_tes_add_indirect_urb_offset(p, dst, src[0], src[1]);
break;
case TES_OPCODE_GET_PRIMITIVE_ID:
generate_tes_get_primitive_id(p, dst);
break;
case TCS_OPCODE_SRC0_010_IS_ZERO:
/* If src_reg had stride like fs_reg, we wouldn't need this. */
brw_MOV(p, brw_null_reg(), stride(src[0], 0, 1, 0));
brw_inst_set_cond_modifier(devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
break;
case TCS_OPCODE_RELEASE_INPUT:
generate_tcs_release_input(p, dst, src[0], src[1]);
break;
case TCS_OPCODE_THREAD_END:
generate_tcs_thread_end(p, inst);
break;
case SHADER_OPCODE_BARRIER:
brw_barrier(p, src[0]);
brw_WAIT(p);

View File

@ -78,6 +78,20 @@ vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
glsl_type::int_type);
break;
case nir_intrinsic_load_base_instance:
reg = &nir_system_values[SYSTEM_VALUE_BASE_INSTANCE];
if (reg->file == BAD_FILE)
*reg = *make_reg_for_system_value(SYSTEM_VALUE_BASE_INSTANCE,
glsl_type::int_type);
break;
case nir_intrinsic_load_draw_id:
reg = &nir_system_values[SYSTEM_VALUE_DRAW_ID];
if (reg->file == BAD_FILE)
*reg = *make_reg_for_system_value(SYSTEM_VALUE_DRAW_ID,
glsl_type::int_type);
break;
default:
break;
}
@ -669,6 +683,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
case nir_intrinsic_load_vertex_id_zero_base:
case nir_intrinsic_load_base_vertex:
case nir_intrinsic_load_instance_id:
case nir_intrinsic_load_base_instance:
case nir_intrinsic_load_draw_id:
case nir_intrinsic_load_invocation_id:
case nir_intrinsic_load_tess_level_inner:
case nir_intrinsic_load_tess_level_outer: {

View File

@ -156,18 +156,56 @@ vec4_tcs_visitor::emit_prolog()
void
vec4_tcs_visitor::emit_thread_end()
{
vec4_instruction *inst;
current_annotation = "thread end";
if (nir->info.tcs.vertices_out % 2) {
emit(BRW_OPCODE_ENDIF);
}
if (devinfo->gen == 7) {
struct brw_tcs_prog_data *tcs_prog_data =
(struct brw_tcs_prog_data *) prog_data;
current_annotation = "release input vertices";
/* Synchronize all threads, so we know that no one is still
* using the input URB handles.
*/
if (tcs_prog_data->instances > 1) {
dst_reg header = dst_reg(this, glsl_type::uvec4_type);
emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header);
emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header));
}
/* Make thread 0 (invocations <1, 0>) release pairs of ICP handles.
* We want to compare the bottom half of invocation_id with 0, but
* use that truth value for the top half as well. Unfortunately,
* we don't have stride in the vec4 world, nor UV immediates in
* align16, so we need an opcode to get invocation_id<0,4,0>.
*/
emit(TCS_OPCODE_SRC0_010_IS_ZERO, dst_null_d(), invocation_id);
emit(IF(BRW_PREDICATE_NORMAL));
for (unsigned i = 0; i < key->input_vertices; i += 2) {
/* If we have an odd number of input vertices, the last will be
* unpaired. We don't want to use an interleaved URB write in
* that case.
*/
const bool is_unpaired = i == key->input_vertices - 1;
dst_reg header(this, glsl_type::uvec4_type);
emit(TCS_OPCODE_RELEASE_INPUT, header, brw_imm_ud(i),
brw_imm_ud(is_unpaired));
}
emit(BRW_OPCODE_ENDIF);
}
if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME))
emit_shader_time_end();
vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
inst->mlen = 1; /* just the header, no data. */
inst->urb_write_flags = BRW_URB_WRITE_EOT_COMPLETE;
inst = emit(TCS_OPCODE_THREAD_END);
inst->base_mrf = 14;
inst->mlen = 1;
}

View File

@ -0,0 +1,204 @@
/*
* Copyright © 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* \file brw_vec4_tes.cpp
*
* Tessellaton evaluation shader specific code derived from the vec4_visitor class.
*/
#include "brw_vec4_tes.h"
namespace brw {
vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler,
void *log_data,
const struct brw_tes_prog_key *key,
struct brw_tes_prog_data *prog_data,
const nir_shader *shader,
void *mem_ctx,
int shader_time_index)
: vec4_visitor(compiler, log_data, &key->tex, &prog_data->base,
shader, mem_ctx, false, shader_time_index)
{
}
dst_reg *
vec4_tes_visitor::make_reg_for_system_value(int location, const glsl_type *type)
{
return NULL;
}
void
vec4_tes_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
{
const struct brw_tes_prog_data *tes_prog_data =
(const struct brw_tes_prog_data *) prog_data;
switch (instr->intrinsic) {
case nir_intrinsic_load_tess_level_outer: {
dst_reg dst(this, glsl_type::vec4_type);
nir_system_values[SYSTEM_VALUE_TESS_LEVEL_OUTER] = dst;
dst_reg temp(this, glsl_type::vec4_type);
vec4_instruction *read =
emit(VEC4_OPCODE_URB_READ, temp, input_read_header);
read->offset = 1;
read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX)));
break;
}
case nir_intrinsic_load_tess_level_inner: {
dst_reg dst(this, glsl_type::vec2_type);
nir_system_values[SYSTEM_VALUE_TESS_LEVEL_INNER] = dst;
/* Set up the message header to reference the proper parts of the URB */
dst_reg temp(this, glsl_type::vec4_type);
vec4_instruction *read =
emit(VEC4_OPCODE_URB_READ, temp, input_read_header);
read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX)));
} else {
read->offset = 1;
emit(MOV(dst, src_reg(temp)));
}
break;
}
default:
vec4_visitor::nir_setup_system_value_intrinsic(instr);
}
}
void
vec4_tes_visitor::setup_payload()
{
int reg = 0;
/* The payload always contains important data in r0 and r1, which contains
* the URB handles that are passed on to the URB write at the end
* of the thread.
*/
reg += 2;
reg = setup_uniforms(reg);
this->first_non_payload_grf = reg;
}
void
vec4_tes_visitor::emit_prolog()
{
input_read_header = src_reg(this, glsl_type::uvec4_type);
emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header));
this->current_annotation = NULL;
}
void
vec4_tes_visitor::emit_urb_write_header(int mrf)
{
/* No need to do anything for DS; an implied write to this MRF will be
* performed by VS_OPCODE_URB_WRITE.
*/
(void) mrf;
}
vec4_instruction *
vec4_tes_visitor::emit_urb_write_opcode(bool complete)
{
/* For DS, the URB writes end the thread. */
if (complete) {
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
emit_shader_time_end();
}
vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
inst->urb_write_flags = complete ?
BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
return inst;
}
void
vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
{
switch (instr->intrinsic) {
case nir_intrinsic_load_tess_coord:
/* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
src_reg(brw_vec8_grf(1, 0))));
break;
case nir_intrinsic_load_primitive_id:
emit(TES_OPCODE_GET_PRIMITIVE_ID,
get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
break;
case nir_intrinsic_load_input:
case nir_intrinsic_load_per_vertex_input: {
src_reg indirect_offset = get_indirect_offset(instr);
unsigned imm_offset = instr->const_index[0];
src_reg header = input_read_header;
if (indirect_offset.file != BAD_FILE) {
header = src_reg(this, glsl_type::uvec4_type);
emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
input_read_header, indirect_offset);
}
dst_reg temp(this, glsl_type::ivec4_type);
vec4_instruction *read =
emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
read->offset = imm_offset;
read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
/* Copy to target. We might end up with some funky writemasks landing
* in here, but we really don't want them in the above pseudo-ops.
*/
dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
dst.writemask = brw_writemask_for_size(instr->num_components);
emit(MOV(dst, src_reg(temp)));
break;
}
default:
vec4_visitor::nir_emit_intrinsic(instr);
}
}
void
vec4_tes_visitor::emit_thread_end()
{
/* For DS, we always end the thread by emitting a single vertex.
* emit_urb_write_opcode() will take care of setting the eot flag on the
* SEND instruction.
*/
emit_vertex();
}
} /* namespace brw */

View File

@ -0,0 +1,69 @@
/*
* Copyright © 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* \file brw_vec4_tes.h
*
* The vec4 mode tessellation evaluation shader compiler backend.
*/
#ifndef BRW_VEC4_TES_H
#define BRW_VEC4_TES_H
#include "brw_vec4.h"
#ifdef __cplusplus
namespace brw {
class vec4_tes_visitor : public vec4_visitor
{
public:
vec4_tes_visitor(const struct brw_compiler *compiler,
void *log_data,
const struct brw_tes_prog_key *key,
struct brw_tes_prog_data *prog_data,
const nir_shader *nir,
void *mem_ctx,
int shader_time_index);
protected:
virtual dst_reg *make_reg_for_system_value(int location,
const glsl_type *type);
virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
virtual void setup_payload();
virtual void emit_prolog();
virtual void emit_thread_end();
virtual void emit_urb_write_header(int mrf);
virtual vec4_instruction *emit_urb_write_opcode(bool complete);
private:
src_reg input_read_header;
};
} /* namespace brw */
#endif /* __cplusplus */
#endif /* BRW_VEC4_TES_H */

View File

@ -155,7 +155,11 @@ vec4_vs_visitor::make_reg_for_system_value(int location,
switch (location) {
case SYSTEM_VALUE_BASE_VERTEX:
reg->writemask = WRITEMASK_X;
vs_prog_data->uses_vertexid = true;
vs_prog_data->uses_basevertex = true;
break;
case SYSTEM_VALUE_BASE_INSTANCE:
reg->writemask = WRITEMASK_Y;
vs_prog_data->uses_baseinstance = true;
break;
case SYSTEM_VALUE_VERTEX_ID:
case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
@ -166,6 +170,11 @@ vec4_vs_visitor::make_reg_for_system_value(int location,
reg->writemask = WRITEMASK_W;
vs_prog_data->uses_instanceid = true;
break;
case SYSTEM_VALUE_DRAW_ID:
reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX + 1);
reg->writemask = WRITEMASK_X;
vs_prog_data->uses_drawid = true;
break;
default:
unreachable("not reached");
}

View File

@ -58,36 +58,64 @@ const struct brw_tracked_state gen7_tes_push_constants = {
static void
gen7_upload_ds_state(struct brw_context *brw)
{
/* Disable the DS Unit */
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_CONSTANT_DS << 16 | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
const struct brw_stage_state *stage_state = &brw->tes.base;
/* BRW_NEW_TESS_PROGRAMS */
bool active = brw->tess_eval_program;
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
/* BRW_NEW_TES_PROG_DATA */
const struct brw_tes_prog_data *tes_prog_data = brw->tes.prog_data;
const struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
const struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2));
OUT_BATCH(brw->hw_bt_pool.next_offset);
ADVANCE_BATCH();
const unsigned thread_count = (brw->max_ds_threads - 1) <<
(brw->is_haswell ? HSW_DS_MAX_THREADS_SHIFT : GEN7_DS_MAX_THREADS_SHIFT);
if (active) {
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2));
OUT_BATCH(stage_state->prog_offset);
OUT_BATCH(SET_FIELD(DIV_ROUND_UP(stage_state->sampler_count, 4),
GEN7_DS_SAMPLER_COUNT) |
SET_FIELD(prog_data->binding_table.size_bytes / 4,
GEN7_DS_BINDING_TABLE_ENTRY_COUNT));
if (prog_data->total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
ffs(prog_data->total_scratch) - 11);
} else {
OUT_BATCH(0);
}
OUT_BATCH(SET_FIELD(prog_data->dispatch_grf_start_reg,
GEN7_DS_DISPATCH_START_GRF) |
SET_FIELD(vue_prog_data->urb_read_length,
GEN7_DS_URB_READ_LENGTH));
OUT_BATCH(GEN7_DS_ENABLE |
GEN7_DS_STATISTICS_ENABLE |
thread_count |
(tes_prog_data->domain == BRW_TESS_DOMAIN_TRI ?
GEN7_DS_COMPUTE_W_COORDINATE_ENABLE : 0));
ADVANCE_BATCH();
} else {
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
brw->tes.enabled = active;
}
const struct brw_tracked_state gen7_ds_state = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_CONTEXT,
.mesa = _NEW_TRANSFORM,
.brw = BRW_NEW_BATCH |
BRW_NEW_CONTEXT |
BRW_NEW_TESS_PROGRAMS |
BRW_NEW_TES_PROG_DATA,
},
.emit = gen7_upload_ds_state,
};

View File

@ -60,37 +60,58 @@ const struct brw_tracked_state gen7_tcs_push_constants = {
static void
gen7_upload_hs_state(struct brw_context *brw)
{
/* Disable the HS Unit */
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_CONSTANT_HS << 16 | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
const struct brw_stage_state *stage_state = &brw->tcs.base;
/* BRW_NEW_TESS_PROGRAMS */
bool active = brw->tess_eval_program;
/* BRW_NEW_TCS_PROG_DATA */
const struct brw_vue_prog_data *prog_data = &brw->tcs.prog_data->base;
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2));
OUT_BATCH(brw->hw_bt_pool.next_offset);
ADVANCE_BATCH();
if (active) {
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
OUT_BATCH(SET_FIELD(DIV_ROUND_UP(stage_state->sampler_count, 4),
GEN7_HS_SAMPLER_COUNT) |
SET_FIELD(prog_data->base.binding_table.size_bytes / 4,
GEN7_HS_BINDING_TABLE_ENTRY_COUNT) |
(brw->max_hs_threads - 1));
OUT_BATCH(GEN7_HS_ENABLE |
GEN7_HS_STATISTICS_ENABLE |
SET_FIELD(brw->tcs.prog_data->instances - 1,
GEN7_HS_INSTANCE_COUNT));
OUT_BATCH(stage_state->prog_offset);
if (prog_data->base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
ffs(prog_data->base.total_scratch) - 11);
} else {
OUT_BATCH(0);
}
OUT_BATCH(GEN7_HS_INCLUDE_VERTEX_HANDLES |
SET_FIELD(prog_data->base.dispatch_grf_start_reg,
GEN7_HS_DISPATCH_START_GRF));
/* Ignore URB semaphores */
OUT_BATCH(0);
ADVANCE_BATCH();
} else {
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
brw->tcs.enabled = active;
}
const struct brw_tracked_state gen7_hs_state = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_CONTEXT,
.brw = BRW_NEW_BATCH |
BRW_NEW_TCS_PROG_DATA |
BRW_NEW_TESS_PROGRAMS,
},
.emit = gen7_upload_hs_state,
};

View File

@ -115,7 +115,12 @@ gen8_emit_vertices(struct brw_context *brw)
}
/* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */
unsigned nr_buffers = brw->vb.nr_buffers + brw->vs.prog_data->uses_vertexid;
const bool uses_draw_params =
brw->vs.prog_data->uses_basevertex ||
brw->vs.prog_data->uses_baseinstance;
const unsigned nr_buffers = brw->vb.nr_buffers +
uses_draw_params + brw->vs.prog_data->uses_drawid;
if (nr_buffers) {
assert(nr_buffers <= 33);
@ -135,7 +140,7 @@ gen8_emit_vertices(struct brw_context *brw)
OUT_BATCH(buffer->bo->size);
}
if (brw->vs.prog_data->uses_vertexid) {
if (uses_draw_params) {
OUT_BATCH(brw->vb.nr_buffers << GEN6_VB0_INDEX_SHIFT |
GEN7_VB0_ADDRESS_MODIFYENABLE |
mocs_wb << 16);
@ -143,21 +148,33 @@ gen8_emit_vertices(struct brw_context *brw)
brw->draw.draw_params_offset);
OUT_BATCH(brw->draw.draw_params_bo->size);
}
if (brw->vs.prog_data->uses_drawid) {
OUT_BATCH((brw->vb.nr_buffers + 1) << GEN6_VB0_INDEX_SHIFT |
GEN7_VB0_ADDRESS_MODIFYENABLE |
mocs_wb << 16);
OUT_RELOC64(brw->draw.draw_id_bo, I915_GEM_DOMAIN_VERTEX, 0,
brw->draw.draw_id_offset);
OUT_BATCH(brw->draw.draw_id_bo->size);
}
ADVANCE_BATCH();
}
/* Normally we don't need an element for the SGVS attribute because the
* 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an
* element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if the
* vertex ID is used then it needs an element for the base vertex buffer.
* Additionally if there is an edge flag element then the SGVS can't be
* inserted past that so we need a dummy element to ensure that the edge
* flag is the last one.
* element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if
* we're using draw parameters then we need an element for the those
* values. Additionally if there is an edge flag element then the SGVS
* can't be inserted past that so we need a dummy element to ensure that
* the edge flag is the last one.
*/
bool needs_sgvs_element = (brw->vs.prog_data->uses_vertexid ||
(brw->vs.prog_data->uses_instanceid &&
uses_edge_flag));
unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element;
const bool needs_sgvs_element = (brw->vs.prog_data->uses_basevertex ||
brw->vs.prog_data->uses_baseinstance ||
((brw->vs.prog_data->uses_instanceid ||
brw->vs.prog_data->uses_vertexid) &&
uses_edge_flag));
const unsigned nr_elements =
brw->vb.nr_enabled + needs_sgvs_element + brw->vs.prog_data->uses_drawid;
/* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
* presumably for VertexID/InstanceID.
@ -212,12 +229,13 @@ gen8_emit_vertices(struct brw_context *brw)
}
if (needs_sgvs_element) {
if (brw->vs.prog_data->uses_vertexid) {
if (brw->vs.prog_data->uses_basevertex ||
brw->vs.prog_data->uses_baseinstance) {
OUT_BATCH(GEN6_VE0_VALID |
brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT);
OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT) |
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
} else {
@ -229,6 +247,16 @@ gen8_emit_vertices(struct brw_context *brw)
}
}
if (brw->vs.prog_data->uses_drawid) {
OUT_BATCH(GEN6_VE0_VALID |
((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) |
(BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT));
OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
}
if (gen6_edgeflag_input) {
uint32_t format =
brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
@ -266,6 +294,15 @@ gen8_emit_vertices(struct brw_context *brw)
OUT_BATCH(buffer->step_rate);
ADVANCE_BATCH();
}
if (brw->vs.prog_data->uses_drawid) {
const unsigned element = brw->vb.nr_enabled + needs_sgvs_element;
BEGIN_BATCH(3);
OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
OUT_BATCH(element);
OUT_BATCH(0);
ADVANCE_BATCH();
}
}
const struct brw_tracked_state gen8_vertices = {

View File

@ -203,6 +203,7 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.ARB_point_sprite = true;
ctx->Extensions.ARB_seamless_cube_map = true;
ctx->Extensions.ARB_shader_bit_encoding = true;
ctx->Extensions.ARB_shader_draw_parameters = true;
ctx->Extensions.ARB_shader_texture_lod = true;
ctx->Extensions.ARB_shadow = true;
ctx->Extensions.ARB_sync = true;
@ -333,6 +334,7 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.ARB_shader_image_load_store = true;
ctx->Extensions.ARB_shader_image_size = true;
ctx->Extensions.ARB_shader_texture_image_samples = true;
ctx->Extensions.ARB_tessellation_shader = true;
ctx->Extensions.ARB_texture_compression_bptc = true;
ctx->Extensions.ARB_texture_view = true;
ctx->Extensions.ARB_shader_storage_buffer_object = true;
@ -362,7 +364,6 @@ intelInitExtensions(struct gl_context *ctx)
if (brw->gen >= 8) {
ctx->Extensions.ARB_stencil_texturing = true;
ctx->Extensions.ARB_tessellation_shader = true;
}
if (brw->gen >= 9) {

View File

@ -96,6 +96,7 @@ EXT(ARB_separate_shader_objects , dummy_true
EXT(ARB_shader_atomic_counters , ARB_shader_atomic_counters , GLL, GLC, x , x , 2011)
EXT(ARB_shader_bit_encoding , ARB_shader_bit_encoding , GLL, GLC, x , x , 2010)
EXT(ARB_shader_clock , ARB_shader_clock , GLL, GLC, x , x , 2015)
EXT(ARB_shader_draw_parameters , ARB_shader_draw_parameters , GLL, GLC, x , x , 2013)
EXT(ARB_shader_image_load_store , ARB_shader_image_load_store , GLL, GLC, x , x , 2011)
EXT(ARB_shader_image_size , ARB_shader_image_size , GLL, GLC, x , x , 2012)
EXT(ARB_shader_objects , dummy_true , GLL, GLC, x , x , 2002)

View File

@ -3717,6 +3717,7 @@ struct gl_extensions
GLboolean ARB_shader_atomic_counters;
GLboolean ARB_shader_bit_encoding;
GLboolean ARB_shader_clock;
GLboolean ARB_shader_draw_parameters;
GLboolean ARB_shader_image_load_store;
GLboolean ARB_shader_image_size;
GLboolean ARB_shader_precision;

View File

@ -1083,11 +1083,11 @@ prog_to_nir(const struct gl_program *prog,
c = rzalloc(NULL, struct ptn_compile);
if (!c)
return NULL;
s = nir_shader_create(NULL, stage, options);
if (!s)
goto fail;
c->prog = prog;
nir_builder_init_simple_shader(&c->build, NULL, stage, options);
s = c->build.shader;
if (prog->Parameters->NumParameters > 0) {
c->parameters = rzalloc(s, nir_variable);
c->parameters->type =
@ -1098,13 +1098,6 @@ prog_to_nir(const struct gl_program *prog,
exec_list_push_tail(&s->uniforms, &c->parameters->node);
}
nir_function *func = nir_function_create(s, "main");
nir_function_impl *impl = nir_function_impl_create(func);
c->build.shader = s;
c->build.impl = impl;
c->build.cursor = nir_after_cf_list(&impl->body);
setup_registers_and_variables(c);
if (unlikely(c->error))
goto fail;

View File

@ -4328,6 +4328,8 @@ const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
TGSI_SEMANTIC_INSTANCEID,
TGSI_SEMANTIC_VERTEXID_NOBASE,
TGSI_SEMANTIC_BASEVERTEX,
0, /* SYSTEM_VALUE_BASE_INSTANCE */
0, /* SYSTEM_VALUE_DRAW_ID */
/* Geometry shader
*/

View File

@ -58,6 +58,7 @@ struct _mesa_prim {
GLint basevertex;
GLuint num_instances;
GLuint base_instance;
GLuint draw_id;
GLsizeiptr indirect_offset;
};

View File

@ -1341,6 +1341,7 @@ vbo_validated_multidrawelements(struct gl_context *ctx, GLenum mode,
prim[i].indexed = 1;
prim[i].num_instances = 1;
prim[i].base_instance = 0;
prim[i].draw_id = i;
prim[i].is_indirect = 0;
if (basevertex != NULL)
prim[i].basevertex = basevertex[i];
@ -1371,6 +1372,7 @@ vbo_validated_multidrawelements(struct gl_context *ctx, GLenum mode,
prim[0].indexed = 1;
prim[0].num_instances = 1;
prim[0].base_instance = 0;
prim[0].draw_id = i;
prim[0].is_indirect = 0;
if (basevertex != NULL)
prim[0].basevertex = basevertex[i];
@ -1598,6 +1600,7 @@ vbo_validated_multidrawarraysindirect(struct gl_context *ctx,
prim[i].mode = mode;
prim[i].indirect_offset = offset;
prim[i].is_indirect = 1;
prim[i].draw_id = i;
}
check_buffers_are_unmapped(exec->array.inputs);
@ -1684,6 +1687,7 @@ vbo_validated_multidrawelementsindirect(struct gl_context *ctx,
prim[i].indexed = 1;
prim[i].indirect_offset = offset;
prim[i].is_indirect = 1;
prim[i].draw_id = i;
}
check_buffers_are_unmapped(exec->array.inputs);

View File

@ -30,7 +30,7 @@
#include "anv_meta.h"
#include "anv_meta_clear.h"
#include "anv_private.h"
#include "anv_nir_builder.h"
#include "glsl/nir/nir_builder.h"
struct anv_render_pass anv_meta_dummy_renderpass = {0};
@ -41,7 +41,7 @@ build_nir_vertex_shader(bool attr_flat)
const struct glsl_type *vertex_type = glsl_vec4_type();
nir_builder_init_simple_shader(&b, MESA_SHADER_VERTEX);
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vertex_type, "a_pos");
@ -73,7 +73,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
{
nir_builder b;
nir_builder_init_simple_shader(&b, MESA_SHADER_FRAGMENT);
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
const struct glsl_type *color_type = glsl_vec4_type();

View File

@ -23,8 +23,8 @@
#include "anv_meta.h"
#include "anv_meta_clear.h"
#include "anv_nir_builder.h"
#include "anv_private.h"
#include "glsl/nir/nir_builder.h"
/** Vertex attributes for color clears. */
struct color_clear_vattrs {
@ -66,8 +66,8 @@ build_color_shaders(struct nir_shader **out_vs,
nir_builder vs_b;
nir_builder fs_b;
nir_builder_init_simple_shader(&vs_b, MESA_SHADER_VERTEX);
nir_builder_init_simple_shader(&fs_b, MESA_SHADER_FRAGMENT);
nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
const struct glsl_type *position_type = glsl_vec4_type();
const struct glsl_type *color_type = glsl_vec4_type();
@ -372,8 +372,8 @@ build_depthstencil_shaders(struct nir_shader **out_vs,
nir_builder vs_b;
nir_builder fs_b;
nir_builder_init_simple_shader(&vs_b, MESA_SHADER_VERTEX);
nir_builder_init_simple_shader(&fs_b, MESA_SHADER_FRAGMENT);
nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
const struct glsl_type *position_type = glsl_vec4_type();

View File

@ -1,44 +0,0 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_nir.h"
#include "glsl/nir/nir_builder.h"
#include "util/ralloc.h"
/* This file includes NIR helpers used by meta shaders in the Vulkan
* driver. Eventually, these will all be merged into nir_builder.
* However, for now, keeping them in their own file helps to prevent merge
* conflicts.
*/
static inline void
nir_builder_init_simple_shader(nir_builder *b, gl_shader_stage stage)
{
b->shader = nir_shader_create(NULL, stage, NULL);
nir_function *func =
nir_function_create(b->shader, ralloc_strdup(b->shader, "main"));
b->impl = nir_function_impl_create(func);
b->cursor = nir_after_cf_list(&b->impl->body);
}