Merge remote-tracking branch 'public/master' into vulkan
This commit is contained in:
commit
e26a978773
|
@ -172,7 +172,7 @@ GL 4.3, GLSL 4.30:
|
|||
GL_KHR_debug DONE (all drivers)
|
||||
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
|
||||
GL_ARB_fragment_layer_viewport DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe)
|
||||
GL_ARB_framebuffer_no_attachments DONE (i965)
|
||||
GL_ARB_framebuffer_no_attachments DONE (i965, r600, radeonsi)
|
||||
GL_ARB_internalformat_query2 DONE (all drivers)
|
||||
GL_ARB_invalidate_subdata DONE (all drivers)
|
||||
GL_ARB_multi_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
|
|
|
@ -44,6 +44,7 @@ Note: some of the new features are only available with certain drivers.
|
|||
</p>
|
||||
|
||||
<ul>
|
||||
<li>GL_ARB_framebuffer_no_attachments on r600, radeonsi</li>
|
||||
<li>GL_ARB_internalformat_query2 on all drivers</li>
|
||||
<li>GL_ARB_shader_atomic_counter_ops on nvc0</li>
|
||||
<li>GL_ARB_shader_image_load_store on radeonsi, softpipe</li>
|
||||
|
@ -53,6 +54,7 @@ Note: some of the new features are only available with certain drivers.
|
|||
<li>GL_OES_draw_buffers_indexed and GL_EXT_draw_buffers_indexed on all drivers that support GL_ARB_draw_buffers_blend</li>
|
||||
<li>GL_OES_shader_image_atomic on all drivers that support GL_ARB_shader_image_load_store</li>
|
||||
<li>GL_OES_texture_border_clamp and GL_EXT_texture_border_clamp on all drivers that support GL_ARB_texture_border_clamp</li>
|
||||
<li>EGL_KHR_reusable_sync on all drivers</li>
|
||||
</ul>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
|
|
@ -216,7 +216,7 @@ process_block_array(struct uniform_block_array_elements *ub_array, char **name,
|
|||
{
|
||||
if (ub_array) {
|
||||
for (unsigned j = 0; j < ub_array->num_array_elements; j++) {
|
||||
size_t new_length = name_length;
|
||||
size_t new_length = name_length;
|
||||
|
||||
/* Append the subscript to the current variable name */
|
||||
ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]",
|
||||
|
@ -261,7 +261,6 @@ process_block_array(struct uniform_block_array_elements *ub_array, char **name,
|
|||
}
|
||||
blocks[i].NumUniforms =
|
||||
(unsigned)(ptrdiff_t)(&variables[parcel->index] - blocks[i].Uniforms);
|
||||
blocks[i].IsShaderStorage = b->is_shader_storage;
|
||||
|
||||
*block_index = *block_index + 1;
|
||||
*binding_offset = *binding_offset + 1;
|
||||
|
@ -291,13 +290,105 @@ resize_block_array(const glsl_type *type,
|
|||
}
|
||||
}
|
||||
|
||||
unsigned
|
||||
static void
|
||||
create_buffer_blocks(void *mem_ctx, struct gl_context *ctx,
|
||||
struct gl_shader_program *prog,
|
||||
struct gl_uniform_block **out_blks, unsigned num_blocks,
|
||||
struct hash_table *block_hash, unsigned num_variables,
|
||||
bool create_ubo_blocks)
|
||||
{
|
||||
if (num_blocks == 0) {
|
||||
assert(num_variables == 0);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(num_variables != 0);
|
||||
|
||||
/* Allocate storage to hold all of the information related to uniform
|
||||
* blocks that can be queried through the API.
|
||||
*/
|
||||
struct gl_uniform_block *blocks = ralloc_array(mem_ctx, gl_uniform_block, num_blocks);
|
||||
gl_uniform_buffer_variable *variables =
|
||||
ralloc_array(blocks, gl_uniform_buffer_variable, num_variables);
|
||||
|
||||
/* Add each variable from each uniform block to the API tracking
|
||||
* structures.
|
||||
*/
|
||||
ubo_visitor parcel(blocks, variables, num_variables);
|
||||
|
||||
STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD140)
|
||||
== unsigned(ubo_packing_std140));
|
||||
STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_SHARED)
|
||||
== unsigned(ubo_packing_shared));
|
||||
STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_PACKED)
|
||||
== unsigned(ubo_packing_packed));
|
||||
STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD430)
|
||||
== unsigned(ubo_packing_std430));
|
||||
|
||||
unsigned i = 0;
|
||||
struct hash_entry *entry;
|
||||
hash_table_foreach (block_hash, entry) {
|
||||
const struct link_uniform_block_active *const b =
|
||||
(const struct link_uniform_block_active *) entry->data;
|
||||
const glsl_type *block_type = b->type;
|
||||
|
||||
if ((create_ubo_blocks && !b->is_shader_storage) ||
|
||||
(!create_ubo_blocks && b->is_shader_storage)) {
|
||||
|
||||
if (b->array != NULL) {
|
||||
unsigned binding_offset = 0;
|
||||
char *name = ralloc_strdup(NULL,
|
||||
block_type->without_array()->name);
|
||||
size_t name_length = strlen(name);
|
||||
|
||||
assert(b->has_instance_name);
|
||||
process_block_array(b->array, &name, name_length, blocks, &parcel,
|
||||
variables, b, &i, &binding_offset, ctx, prog);
|
||||
ralloc_free(name);
|
||||
} else {
|
||||
blocks[i].Name = ralloc_strdup(blocks, block_type->name);
|
||||
blocks[i].Uniforms = &variables[parcel.index];
|
||||
blocks[i].Binding = (b->has_binding) ? b->binding : 0;
|
||||
blocks[i].UniformBufferSize = 0;
|
||||
blocks[i]._Packing =
|
||||
gl_uniform_block_packing(block_type->interface_packing);
|
||||
|
||||
parcel.process(block_type,
|
||||
b->has_instance_name ? block_type->name : "");
|
||||
|
||||
blocks[i].UniformBufferSize = parcel.buffer_size;
|
||||
|
||||
/* Check SSBO size is lower than maximum supported size for SSBO
|
||||
*/
|
||||
if (b->is_shader_storage &&
|
||||
parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) {
|
||||
linker_error(prog, "shader storage block `%s' has size %d, "
|
||||
"which is larger than than the maximum allowed (%d)",
|
||||
block_type->name, parcel.buffer_size,
|
||||
ctx->Const.MaxShaderStorageBlockSize);
|
||||
}
|
||||
blocks[i].NumUniforms = (unsigned)(ptrdiff_t)
|
||||
(&variables[parcel.index] - blocks[i].Uniforms);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*out_blks = blocks;
|
||||
|
||||
assert(parcel.index == num_variables);
|
||||
}
|
||||
|
||||
void
|
||||
link_uniform_blocks(void *mem_ctx,
|
||||
struct gl_context *ctx,
|
||||
struct gl_shader_program *prog,
|
||||
struct gl_shader **shader_list,
|
||||
unsigned num_shaders,
|
||||
struct gl_uniform_block **blocks_ret)
|
||||
struct gl_uniform_block **ubo_blocks,
|
||||
unsigned *num_ubo_blocks,
|
||||
struct gl_uniform_block **ssbo_blocks,
|
||||
unsigned *num_ssbo_blocks)
|
||||
{
|
||||
/* This hash table will track all of the uniform blocks that have been
|
||||
* encountered. Since blocks with the same block-name must be the same,
|
||||
|
@ -310,7 +401,7 @@ link_uniform_blocks(void *mem_ctx,
|
|||
if (block_hash == NULL) {
|
||||
_mesa_error_no_memory(__func__);
|
||||
linker_error(prog, "out of memory\n");
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Determine which uniform blocks are active.
|
||||
|
@ -323,8 +414,8 @@ link_uniform_blocks(void *mem_ctx,
|
|||
/* Count the number of active uniform blocks. Count the total number of
|
||||
* active slots in those uniform blocks.
|
||||
*/
|
||||
unsigned num_blocks = 0;
|
||||
unsigned num_variables = 0;
|
||||
unsigned num_ubo_variables = 0;
|
||||
unsigned num_ssbo_variables = 0;
|
||||
count_block_size block_size;
|
||||
struct hash_entry *entry;
|
||||
|
||||
|
@ -346,102 +437,36 @@ link_uniform_blocks(void *mem_ctx,
|
|||
|
||||
if (b->array != NULL) {
|
||||
unsigned aoa_size = b->type->arrays_of_arrays_size();
|
||||
num_blocks += aoa_size;
|
||||
num_variables += aoa_size * block_size.num_active_uniforms;
|
||||
} else {
|
||||
num_blocks++;
|
||||
num_variables += block_size.num_active_uniforms;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (num_blocks == 0) {
|
||||
assert(num_variables == 0);
|
||||
_mesa_hash_table_destroy(block_hash, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
assert(num_variables != 0);
|
||||
|
||||
/* Allocate storage to hold all of the informatation related to uniform
|
||||
* blocks that can be queried through the API.
|
||||
*/
|
||||
gl_uniform_block *blocks =
|
||||
ralloc_array(mem_ctx, gl_uniform_block, num_blocks);
|
||||
gl_uniform_buffer_variable *variables =
|
||||
ralloc_array(blocks, gl_uniform_buffer_variable, num_variables);
|
||||
|
||||
/* Add each variable from each uniform block to the API tracking
|
||||
* structures.
|
||||
*/
|
||||
unsigned i = 0;
|
||||
ubo_visitor parcel(blocks, variables, num_variables);
|
||||
|
||||
STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD140)
|
||||
== unsigned(ubo_packing_std140));
|
||||
STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_SHARED)
|
||||
== unsigned(ubo_packing_shared));
|
||||
STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_PACKED)
|
||||
== unsigned(ubo_packing_packed));
|
||||
STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD430)
|
||||
== unsigned(ubo_packing_std430));
|
||||
|
||||
hash_table_foreach (block_hash, entry) {
|
||||
const struct link_uniform_block_active *const b =
|
||||
(const struct link_uniform_block_active *) entry->data;
|
||||
const glsl_type *block_type = b->type;
|
||||
|
||||
if (b->array != NULL) {
|
||||
unsigned binding_offset = 0;
|
||||
char *name = ralloc_strdup(NULL, block_type->without_array()->name);
|
||||
size_t name_length = strlen(name);
|
||||
|
||||
assert(b->has_instance_name);
|
||||
process_block_array(b->array, &name, name_length, blocks, &parcel,
|
||||
variables, b, &i, &binding_offset, ctx, prog);
|
||||
ralloc_free(name);
|
||||
} else {
|
||||
blocks[i].Name = ralloc_strdup(blocks, block_type->name);
|
||||
blocks[i].Uniforms = &variables[parcel.index];
|
||||
blocks[i].Binding = (b->has_binding) ? b->binding : 0;
|
||||
blocks[i].UniformBufferSize = 0;
|
||||
blocks[i]._Packing =
|
||||
gl_uniform_block_packing(block_type->interface_packing);
|
||||
|
||||
parcel.process(block_type,
|
||||
b->has_instance_name ? block_type->name : "");
|
||||
|
||||
blocks[i].UniformBufferSize = parcel.buffer_size;
|
||||
|
||||
/* Check SSBO size is lower than maximum supported size for SSBO */
|
||||
if (b->is_shader_storage &&
|
||||
parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) {
|
||||
linker_error(prog, "shader storage block `%s' has size %d, "
|
||||
"which is larger than than the maximum allowed (%d)",
|
||||
block_type->name,
|
||||
parcel.buffer_size,
|
||||
ctx->Const.MaxShaderStorageBlockSize);
|
||||
if (b->is_shader_storage) {
|
||||
*num_ssbo_blocks += aoa_size;
|
||||
num_ssbo_variables += aoa_size * block_size.num_active_uniforms;
|
||||
} else {
|
||||
*num_ubo_blocks += aoa_size;
|
||||
num_ubo_variables += aoa_size * block_size.num_active_uniforms;
|
||||
}
|
||||
} else {
|
||||
if (b->is_shader_storage) {
|
||||
(*num_ssbo_blocks)++;
|
||||
num_ssbo_variables += block_size.num_active_uniforms;
|
||||
} else {
|
||||
(*num_ubo_blocks)++;
|
||||
num_ubo_variables += block_size.num_active_uniforms;
|
||||
}
|
||||
blocks[i].NumUniforms =
|
||||
(unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms);
|
||||
|
||||
blocks[i].IsShaderStorage = b->is_shader_storage;
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
assert(parcel.index == num_variables);
|
||||
create_buffer_blocks(mem_ctx, ctx, prog, ubo_blocks, *num_ubo_blocks,
|
||||
block_hash, num_ubo_variables, true);
|
||||
create_buffer_blocks(mem_ctx, ctx, prog, ssbo_blocks, *num_ssbo_blocks,
|
||||
block_hash, num_ssbo_variables, false);
|
||||
|
||||
_mesa_hash_table_destroy(block_hash, NULL);
|
||||
|
||||
*blocks_ret = blocks;
|
||||
return num_blocks;
|
||||
}
|
||||
|
||||
bool
|
||||
link_uniform_blocks_are_compatible(const gl_uniform_block *a,
|
||||
const gl_uniform_block *b)
|
||||
const gl_uniform_block *b)
|
||||
{
|
||||
assert(strcmp(a->Name, b->Name) == 0);
|
||||
|
||||
|
@ -464,13 +489,13 @@ link_uniform_blocks_are_compatible(const gl_uniform_block *a,
|
|||
|
||||
for (unsigned i = 0; i < a->NumUniforms; i++) {
|
||||
if (strcmp(a->Uniforms[i].Name, b->Uniforms[i].Name) != 0)
|
||||
return false;
|
||||
return false;
|
||||
|
||||
if (a->Uniforms[i].Type != b->Uniforms[i].Type)
|
||||
return false;
|
||||
return false;
|
||||
|
||||
if (a->Uniforms[i].RowMajor != b->Uniforms[i].RowMajor)
|
||||
return false;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -154,11 +154,17 @@ set_opaque_binding(void *mem_ctx, gl_shader_program *prog,
|
|||
}
|
||||
|
||||
void
|
||||
set_block_binding(gl_shader_program *prog, const char *block_name, int binding)
|
||||
set_block_binding(gl_shader_program *prog, const char *block_name,
|
||||
unsigned mode, int binding)
|
||||
{
|
||||
for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
|
||||
if (!strcmp(prog->BufferInterfaceBlocks[i].Name, block_name)) {
|
||||
prog->BufferInterfaceBlocks[i].Binding = binding;
|
||||
unsigned num_blocks = mode == ir_var_uniform ? prog->NumUniformBlocks :
|
||||
prog->NumShaderStorageBlocks;
|
||||
struct gl_uniform_block *blks = mode == ir_var_uniform ?
|
||||
prog->UniformBlocks : prog->ShaderStorageBlocks;
|
||||
|
||||
for (unsigned i = 0; i < num_blocks; i++) {
|
||||
if (!strcmp(blks[i].Name, block_name)) {
|
||||
blks[i].Binding = binding;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -308,11 +314,12 @@ link_set_uniform_initializers(struct gl_shader_program *prog,
|
|||
* each subsequent element takes the next consecutive
|
||||
* uniform block binding point."
|
||||
*/
|
||||
linker::set_block_binding(prog, name,
|
||||
linker::set_block_binding(prog, name, var->data.mode,
|
||||
var->data.binding + i);
|
||||
}
|
||||
} else {
|
||||
linker::set_block_binding(prog, iface_type->name,
|
||||
var->data.mode,
|
||||
var->data.binding);
|
||||
}
|
||||
} else if (type->contains_atomic()) {
|
||||
|
|
|
@ -462,7 +462,7 @@ public:
|
|||
|
||||
buffer_block_index = -1;
|
||||
if (var->is_in_buffer_block()) {
|
||||
struct gl_uniform_block **blks = var->is_in_shader_storage_block() ?
|
||||
struct gl_uniform_block *blks = var->is_in_shader_storage_block() ?
|
||||
prog->ShaderStorageBlocks : prog->UniformBlocks;
|
||||
unsigned num_blks = var->is_in_shader_storage_block() ?
|
||||
prog->NumShaderStorageBlocks : prog->NumUniformBlocks;
|
||||
|
@ -471,15 +471,15 @@ public:
|
|||
unsigned l = strlen(var->get_interface_type()->name);
|
||||
|
||||
for (unsigned i = 0; i < num_blks; i++) {
|
||||
if (strncmp(var->get_interface_type()->name, blks[i]->Name, l)
|
||||
== 0 && blks[i]->Name[l] == '[') {
|
||||
if (strncmp(var->get_interface_type()->name, blks[i].Name, l)
|
||||
== 0 && blks[i].Name[l] == '[') {
|
||||
buffer_block_index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (unsigned i = 0; i < num_blks; i++) {
|
||||
if (strcmp(var->get_interface_type()->name, blks[i]->Name) ==
|
||||
if (strcmp(var->get_interface_type()->name, blks[i].Name) ==
|
||||
0) {
|
||||
buffer_block_index = i;
|
||||
break;
|
||||
|
@ -500,7 +500,7 @@ public:
|
|||
var->get_interface_type()->name);
|
||||
} else {
|
||||
const struct gl_uniform_block *const block =
|
||||
blks[buffer_block_index];
|
||||
&blks[buffer_block_index];
|
||||
|
||||
assert(var->data.location != -1);
|
||||
|
||||
|
@ -960,11 +960,16 @@ link_update_uniform_buffer_variables(struct gl_shader *shader)
|
|||
sentinel = '[';
|
||||
}
|
||||
|
||||
unsigned num_blocks = var->data.mode == ir_var_uniform ?
|
||||
shader->NumUniformBlocks : shader->NumShaderStorageBlocks;
|
||||
struct gl_uniform_block **blks = var->data.mode == ir_var_uniform ?
|
||||
shader->UniformBlocks : shader->ShaderStorageBlocks;
|
||||
|
||||
const unsigned l = strlen(var->name);
|
||||
for (unsigned i = 0; i < shader->NumBufferInterfaceBlocks; i++) {
|
||||
for (unsigned j = 0; j < shader->BufferInterfaceBlocks[i]->NumUniforms; j++) {
|
||||
for (unsigned i = 0; i < num_blocks; i++) {
|
||||
for (unsigned j = 0; j < blks[i]->NumUniforms; j++) {
|
||||
if (sentinel) {
|
||||
const char *begin = shader->BufferInterfaceBlocks[i]->Uniforms[j].Name;
|
||||
const char *begin = blks[i]->Uniforms[j].Name;
|
||||
const char *end = strchr(begin, sentinel);
|
||||
|
||||
if (end == NULL)
|
||||
|
@ -978,8 +983,7 @@ link_update_uniform_buffer_variables(struct gl_shader *shader)
|
|||
var->data.location = j;
|
||||
break;
|
||||
}
|
||||
} else if (!strcmp(var->name,
|
||||
shader->BufferInterfaceBlocks[i]->Uniforms[j].Name)) {
|
||||
} else if (!strcmp(var->name, blks[i]->Uniforms[j].Name)) {
|
||||
found = true;
|
||||
var->data.location = j;
|
||||
break;
|
||||
|
@ -1104,11 +1108,9 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
|
|||
sh->num_uniform_components = uniform_size.num_shader_uniform_components;
|
||||
sh->num_combined_uniform_components = sh->num_uniform_components;
|
||||
|
||||
for (unsigned i = 0; i < sh->NumBufferInterfaceBlocks; i++) {
|
||||
if (!sh->BufferInterfaceBlocks[i]->IsShaderStorage) {
|
||||
sh->num_combined_uniform_components +=
|
||||
sh->BufferInterfaceBlocks[i]->UniformBufferSize / 4;
|
||||
}
|
||||
for (unsigned i = 0; i < sh->NumUniformBlocks; i++) {
|
||||
sh->num_combined_uniform_components +=
|
||||
sh->UniformBlocks[i]->UniformBufferSize / 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1165,39 +1165,58 @@ cross_validate_uniforms(struct gl_shader_program *prog)
|
|||
}
|
||||
|
||||
/**
|
||||
* Accumulates the array of prog->BufferInterfaceBlocks and checks that all
|
||||
* definitons of blocks agree on their contents.
|
||||
* Accumulates the array of buffer blocks and checks that all definitions of
|
||||
* blocks agree on their contents.
|
||||
*/
|
||||
static bool
|
||||
interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog)
|
||||
interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog,
|
||||
bool validate_ssbo)
|
||||
{
|
||||
int *InterfaceBlockStageIndex[MESA_SHADER_STAGES];
|
||||
struct gl_uniform_block *blks = NULL;
|
||||
unsigned *num_blks = validate_ssbo ? &prog->NumShaderStorageBlocks :
|
||||
&prog->NumUniformBlocks;
|
||||
|
||||
unsigned max_num_uniform_blocks = 0;
|
||||
unsigned max_num_buffer_blocks = 0;
|
||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
if (prog->_LinkedShaders[i])
|
||||
max_num_uniform_blocks += prog->_LinkedShaders[i]->NumBufferInterfaceBlocks;
|
||||
if (prog->_LinkedShaders[i]) {
|
||||
if (validate_ssbo) {
|
||||
max_num_buffer_blocks +=
|
||||
prog->_LinkedShaders[i]->NumShaderStorageBlocks;
|
||||
} else {
|
||||
max_num_buffer_blocks +=
|
||||
prog->_LinkedShaders[i]->NumUniformBlocks;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
struct gl_shader *sh = prog->_LinkedShaders[i];
|
||||
|
||||
InterfaceBlockStageIndex[i] = new int[max_num_uniform_blocks];
|
||||
for (unsigned int j = 0; j < max_num_uniform_blocks; j++)
|
||||
InterfaceBlockStageIndex[i] = new int[max_num_buffer_blocks];
|
||||
for (unsigned int j = 0; j < max_num_buffer_blocks; j++)
|
||||
InterfaceBlockStageIndex[i][j] = -1;
|
||||
|
||||
if (sh == NULL)
|
||||
continue;
|
||||
|
||||
for (unsigned int j = 0; j < sh->NumBufferInterfaceBlocks; j++) {
|
||||
int index = link_cross_validate_uniform_block(prog,
|
||||
&prog->BufferInterfaceBlocks,
|
||||
&prog->NumBufferInterfaceBlocks,
|
||||
sh->BufferInterfaceBlocks[j]);
|
||||
unsigned sh_num_blocks;
|
||||
struct gl_uniform_block **sh_blks;
|
||||
if (validate_ssbo) {
|
||||
sh_num_blocks = prog->_LinkedShaders[i]->NumShaderStorageBlocks;
|
||||
sh_blks = sh->ShaderStorageBlocks;
|
||||
} else {
|
||||
sh_num_blocks = prog->_LinkedShaders[i]->NumUniformBlocks;
|
||||
sh_blks = sh->UniformBlocks;
|
||||
}
|
||||
|
||||
for (unsigned int j = 0; j < sh_num_blocks; j++) {
|
||||
int index = link_cross_validate_uniform_block(prog, &blks, num_blks,
|
||||
sh_blks[j]);
|
||||
|
||||
if (index == -1) {
|
||||
linker_error(prog, "uniform block `%s' has mismatching definitions\n",
|
||||
sh->BufferInterfaceBlocks[j]->Name);
|
||||
linker_error(prog, "buffer block `%s' has mismatching "
|
||||
"definitions\n", sh_blks[j]->Name);
|
||||
|
||||
for (unsigned k = 0; k <= i; k++) {
|
||||
delete[] InterfaceBlockStageIndex[k];
|
||||
|
@ -1213,16 +1232,18 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog)
|
|||
* FIXME: We should be able to free the per stage blocks here.
|
||||
*/
|
||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) {
|
||||
for (unsigned j = 0; j < *num_blks; j++) {
|
||||
int stage_index = InterfaceBlockStageIndex[i][j];
|
||||
|
||||
if (stage_index != -1) {
|
||||
struct gl_shader *sh = prog->_LinkedShaders[i];
|
||||
|
||||
prog->BufferInterfaceBlocks[j].stageref |= (1 << i);
|
||||
blks[j].stageref |= (1 << i);
|
||||
|
||||
sh->BufferInterfaceBlocks[stage_index] =
|
||||
&prog->BufferInterfaceBlocks[j];
|
||||
struct gl_uniform_block **sh_blks = validate_ssbo ?
|
||||
sh->ShaderStorageBlocks : sh->UniformBlocks;
|
||||
|
||||
sh_blks[stage_index] = &blks[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1231,6 +1252,11 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog)
|
|||
delete[] InterfaceBlockStageIndex[i];
|
||||
}
|
||||
|
||||
if (validate_ssbo)
|
||||
prog->ShaderStorageBlocks = blks;
|
||||
else
|
||||
prog->UniformBlocks = blks;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -2074,7 +2100,10 @@ link_intrastage_shaders(void *mem_ctx,
|
|||
struct gl_shader **shader_list,
|
||||
unsigned num_shaders)
|
||||
{
|
||||
struct gl_uniform_block *uniform_blocks = NULL;
|
||||
struct gl_uniform_block *ubo_blocks = NULL;
|
||||
struct gl_uniform_block *ssbo_blocks = NULL;
|
||||
unsigned num_ubo_blocks = 0;
|
||||
unsigned num_ssbo_blocks = 0;
|
||||
|
||||
/* Check that global variables defined in multiple shaders are consistent.
|
||||
*/
|
||||
|
@ -2090,9 +2119,10 @@ link_intrastage_shaders(void *mem_ctx,
|
|||
return NULL;
|
||||
|
||||
/* Link up uniform blocks defined within this stage. */
|
||||
const unsigned num_uniform_blocks =
|
||||
link_uniform_blocks(mem_ctx, ctx, prog, shader_list, num_shaders,
|
||||
&uniform_blocks);
|
||||
link_uniform_blocks(mem_ctx, ctx, prog, shader_list, num_shaders,
|
||||
&ubo_blocks, &num_ubo_blocks, &ssbo_blocks,
|
||||
&num_ssbo_blocks);
|
||||
|
||||
if (!prog->LinkStatus)
|
||||
return NULL;
|
||||
|
||||
|
@ -2159,15 +2189,23 @@ link_intrastage_shaders(void *mem_ctx,
|
|||
linked->ir = new(linked) exec_list;
|
||||
clone_ir_list(mem_ctx, linked->ir, main->ir);
|
||||
|
||||
linked->BufferInterfaceBlocks =
|
||||
ralloc_array(linked, gl_uniform_block *, num_uniform_blocks);
|
||||
|
||||
ralloc_steal(linked, uniform_blocks);
|
||||
for (unsigned i = 0; i < num_uniform_blocks; i++) {
|
||||
linked->BufferInterfaceBlocks[i] = &uniform_blocks[i];
|
||||
/* Copy ubo blocks to linked shader list */
|
||||
linked->UniformBlocks =
|
||||
ralloc_array(linked, gl_uniform_block *, num_ubo_blocks);
|
||||
ralloc_steal(linked, ubo_blocks);
|
||||
for (unsigned i = 0; i < num_ubo_blocks; i++) {
|
||||
linked->UniformBlocks[i] = &ubo_blocks[i];
|
||||
}
|
||||
linked->NumUniformBlocks = num_ubo_blocks;
|
||||
|
||||
linked->NumBufferInterfaceBlocks = num_uniform_blocks;
|
||||
/* Copy ssbo blocks to linked shader list */
|
||||
linked->ShaderStorageBlocks =
|
||||
ralloc_array(linked, gl_uniform_block *, num_ssbo_blocks);
|
||||
ralloc_steal(linked, ssbo_blocks);
|
||||
for (unsigned i = 0; i < num_ssbo_blocks; i++) {
|
||||
linked->ShaderStorageBlocks[i] = &ssbo_blocks[i];
|
||||
}
|
||||
linked->NumShaderStorageBlocks = num_ssbo_blocks;
|
||||
|
||||
link_fs_input_layout_qualifiers(prog, linked, shader_list, num_shaders);
|
||||
link_tcs_out_layout_qualifiers(prog, linked, shader_list, num_shaders);
|
||||
|
@ -2973,21 +3011,22 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
ctx->Const.MaxCombinedShaderStorageBlocks);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
|
||||
/* Don't check SSBOs for Uniform Block Size */
|
||||
if (!prog->BufferInterfaceBlocks[i].IsShaderStorage &&
|
||||
prog->BufferInterfaceBlocks[i].UniformBufferSize > ctx->Const.MaxUniformBlockSize) {
|
||||
for (unsigned i = 0; i < prog->NumUniformBlocks; i++) {
|
||||
if (prog->UniformBlocks[i].UniformBufferSize >
|
||||
ctx->Const.MaxUniformBlockSize) {
|
||||
linker_error(prog, "Uniform block %s too big (%d/%d)\n",
|
||||
prog->BufferInterfaceBlocks[i].Name,
|
||||
prog->BufferInterfaceBlocks[i].UniformBufferSize,
|
||||
prog->UniformBlocks[i].Name,
|
||||
prog->UniformBlocks[i].UniformBufferSize,
|
||||
ctx->Const.MaxUniformBlockSize);
|
||||
}
|
||||
}
|
||||
|
||||
if (prog->BufferInterfaceBlocks[i].IsShaderStorage &&
|
||||
prog->BufferInterfaceBlocks[i].UniformBufferSize > ctx->Const.MaxShaderStorageBlockSize) {
|
||||
for (unsigned i = 0; i < prog->NumShaderStorageBlocks; i++) {
|
||||
if (prog->ShaderStorageBlocks[i].UniformBufferSize >
|
||||
ctx->Const.MaxShaderStorageBlockSize) {
|
||||
linker_error(prog, "Shader storage block %s too big (%d/%d)\n",
|
||||
prog->BufferInterfaceBlocks[i].Name,
|
||||
prog->BufferInterfaceBlocks[i].UniformBufferSize,
|
||||
prog->ShaderStorageBlocks[i].Name,
|
||||
prog->ShaderStorageBlocks[i].UniformBufferSize,
|
||||
ctx->Const.MaxShaderStorageBlockSize);
|
||||
}
|
||||
}
|
||||
|
@ -3295,8 +3334,8 @@ should_add_buffer_variable(struct gl_shader_program *shProg,
|
|||
if (type != GL_BUFFER_VARIABLE)
|
||||
return true;
|
||||
|
||||
for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
|
||||
const char *block_name = shProg->BufferInterfaceBlocks[i].Name;
|
||||
for (unsigned i = 0; i < shProg->NumShaderStorageBlocks; i++) {
|
||||
const char *block_name = shProg->ShaderStorageBlocks[i].Name;
|
||||
block_name_len = strlen(block_name);
|
||||
|
||||
const char *block_square_bracket = strchr(block_name, '[');
|
||||
|
@ -3805,8 +3844,8 @@ calculate_array_size_and_stride(struct gl_shader_program *shProg,
|
|||
char *var_name = get_top_level_name(uni->name);
|
||||
char *interface_name =
|
||||
get_top_level_name(uni->is_shader_storage ?
|
||||
shProg->ShaderStorageBlocks[block_index]->Name :
|
||||
shProg->UniformBlocks[block_index]->Name);
|
||||
shProg->ShaderStorageBlocks[block_index].Name :
|
||||
shProg->UniformBlocks[block_index].Name);
|
||||
|
||||
if (strcmp(var_name, interface_name) == 0) {
|
||||
/* Deal with instanced array of SSBOs */
|
||||
|
@ -3947,8 +3986,8 @@ build_program_resource_list(struct gl_context *ctx,
|
|||
int block_index = shProg->UniformStorage[i].block_index;
|
||||
if (block_index != -1) {
|
||||
stageref |= is_shader_storage ?
|
||||
shProg->ShaderStorageBlocks[block_index]->stageref :
|
||||
shProg->UniformBlocks[block_index]->stageref;
|
||||
shProg->ShaderStorageBlocks[block_index].stageref :
|
||||
shProg->UniformBlocks[block_index].stageref;
|
||||
}
|
||||
|
||||
GLenum type = is_shader_storage ? GL_BUFFER_VARIABLE : GL_UNIFORM;
|
||||
|
@ -3965,12 +4004,17 @@ build_program_resource_list(struct gl_context *ctx,
|
|||
return;
|
||||
}
|
||||
|
||||
/* Add program uniform blocks and shader storage blocks. */
|
||||
for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
|
||||
bool is_shader_storage = shProg->BufferInterfaceBlocks[i].IsShaderStorage;
|
||||
GLenum type = is_shader_storage ? GL_SHADER_STORAGE_BLOCK : GL_UNIFORM_BLOCK;
|
||||
if (!add_program_resource(shProg, type,
|
||||
&shProg->BufferInterfaceBlocks[i], 0))
|
||||
/* Add program uniform blocks. */
|
||||
for (unsigned i = 0; i < shProg->NumUniformBlocks; i++) {
|
||||
if (!add_program_resource(shProg, GL_UNIFORM_BLOCK,
|
||||
&shProg->UniformBlocks[i], 0))
|
||||
return;
|
||||
}
|
||||
|
||||
/* Add program shader storage blocks. */
|
||||
for (unsigned i = 0; i < shProg->NumShaderStorageBlocks; i++) {
|
||||
if (!add_program_resource(shProg, GL_SHADER_STORAGE_BLOCK,
|
||||
&shProg->ShaderStorageBlocks[i], 0))
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -4115,49 +4159,6 @@ link_assign_subroutine_types(struct gl_shader_program *prog)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
split_ubos_and_ssbos(void *mem_ctx,
|
||||
struct gl_uniform_block **s_blks,
|
||||
struct gl_uniform_block *p_blks,
|
||||
unsigned num_blocks,
|
||||
struct gl_uniform_block ***ubos,
|
||||
unsigned *num_ubos,
|
||||
struct gl_uniform_block ***ssbos,
|
||||
unsigned *num_ssbos)
|
||||
{
|
||||
unsigned num_ubo_blocks = 0;
|
||||
unsigned num_ssbo_blocks = 0;
|
||||
|
||||
/* Are we spliting the list of blocks for the shader or the program */
|
||||
bool is_shader = p_blks == NULL;
|
||||
|
||||
for (unsigned i = 0; i < num_blocks; i++) {
|
||||
if (is_shader ? s_blks[i]->IsShaderStorage : p_blks[i].IsShaderStorage)
|
||||
num_ssbo_blocks++;
|
||||
else
|
||||
num_ubo_blocks++;
|
||||
}
|
||||
|
||||
*ubos = ralloc_array(mem_ctx, gl_uniform_block *, num_ubo_blocks);
|
||||
*num_ubos = 0;
|
||||
|
||||
*ssbos = ralloc_array(mem_ctx, gl_uniform_block *, num_ssbo_blocks);
|
||||
*num_ssbos = 0;
|
||||
|
||||
for (unsigned i = 0; i < num_blocks; i++) {
|
||||
struct gl_uniform_block *blk = is_shader ? s_blks[i] : &p_blks[i];
|
||||
if (blk->IsShaderStorage) {
|
||||
(*ssbos)[*num_ssbos] = blk;
|
||||
(*num_ssbos)++;
|
||||
} else {
|
||||
(*ubos)[*num_ubos] = blk;
|
||||
(*num_ubos)++;
|
||||
}
|
||||
}
|
||||
|
||||
assert(*num_ubos + *num_ssbos == num_blocks);
|
||||
}
|
||||
|
||||
static void
|
||||
set_always_active_io(exec_list *ir, ir_variable_mode io_mode)
|
||||
{
|
||||
|
@ -4498,7 +4499,12 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
if (prog->SeparateShader)
|
||||
disable_varying_optimizations_for_sso(prog);
|
||||
|
||||
if (!interstage_cross_validate_uniform_blocks(prog))
|
||||
/* Process UBOs */
|
||||
if (!interstage_cross_validate_uniform_blocks(prog, false))
|
||||
goto done;
|
||||
|
||||
/* Process SSBOs */
|
||||
if (!interstage_cross_validate_uniform_blocks(prog, true))
|
||||
goto done;
|
||||
|
||||
/* Do common optimization before assigning storage for attributes,
|
||||
|
@ -4695,33 +4701,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
has_xfb_qualifiers))
|
||||
goto done;
|
||||
|
||||
/* Split BufferInterfaceBlocks into UniformBlocks and ShaderStorageBlocks
|
||||
* for gl_shader_program and gl_shader, so that drivers that need separate
|
||||
* index spaces for each set can have that.
|
||||
*/
|
||||
for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++) {
|
||||
if (prog->_LinkedShaders[i] != NULL) {
|
||||
gl_shader *sh = prog->_LinkedShaders[i];
|
||||
split_ubos_and_ssbos(sh,
|
||||
sh->BufferInterfaceBlocks,
|
||||
NULL,
|
||||
sh->NumBufferInterfaceBlocks,
|
||||
&sh->UniformBlocks,
|
||||
&sh->NumUniformBlocks,
|
||||
&sh->ShaderStorageBlocks,
|
||||
&sh->NumShaderStorageBlocks);
|
||||
}
|
||||
}
|
||||
|
||||
split_ubos_and_ssbos(prog,
|
||||
NULL,
|
||||
prog->BufferInterfaceBlocks,
|
||||
prog->NumBufferInterfaceBlocks,
|
||||
&prog->UniformBlocks,
|
||||
&prog->NumUniformBlocks,
|
||||
&prog->ShaderStorageBlocks,
|
||||
&prog->NumShaderStorageBlocks);
|
||||
|
||||
update_array_sizes(prog);
|
||||
link_assign_uniform_locations(prog, ctx->Const.UniformBooleanTrue,
|
||||
num_explicit_uniform_locs,
|
||||
|
|
|
@ -53,13 +53,16 @@ extern bool
|
|||
link_uniform_blocks_are_compatible(const gl_uniform_block *a,
|
||||
const gl_uniform_block *b);
|
||||
|
||||
extern unsigned
|
||||
extern void
|
||||
link_uniform_blocks(void *mem_ctx,
|
||||
struct gl_context *ctx,
|
||||
struct gl_shader_program *prog,
|
||||
struct gl_shader **shader_list,
|
||||
unsigned num_shaders,
|
||||
struct gl_uniform_block **blocks_ret);
|
||||
struct gl_uniform_block **ubo_blocks,
|
||||
unsigned *num_ubo_blocks,
|
||||
struct gl_uniform_block **ssbo_blocks,
|
||||
unsigned *num_ssbo_blocks);
|
||||
|
||||
bool
|
||||
validate_intrastage_arrays(struct gl_shader_program *prog,
|
||||
|
|
|
@ -372,8 +372,7 @@ lower_ubo_reference_visitor::ubo_load(void *mem_ctx,
|
|||
static bool
|
||||
shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
|
||||
{
|
||||
return state->ARB_shader_storage_buffer_object_enable ||
|
||||
state->is_version(430, 310);
|
||||
return state->has_shader_storage_buffer_objects();
|
||||
}
|
||||
|
||||
uint32_t
|
||||
|
|
|
@ -385,6 +385,26 @@ public:
|
|||
case ir_var_const_in:
|
||||
return this->lower_temps;
|
||||
|
||||
case ir_var_system_value:
|
||||
/* There are only a few system values that have array types:
|
||||
*
|
||||
* gl_TessLevelInner[]
|
||||
* gl_TessLevelOuter[]
|
||||
* gl_SampleMaskIn[]
|
||||
*
|
||||
* The tessellation factor arrays are lowered to vec4/vec2s
|
||||
* by lower_tess_level() before this pass occurs, so we'll
|
||||
* never see them here.
|
||||
*
|
||||
* The only remaining case is gl_SampleMaskIn[], which has
|
||||
* a length of ceil(ctx->Const.MaxSamples / 32). Most hardware
|
||||
* supports no more than 32 samples, in which case our lowering
|
||||
* produces a single read of gl_SampleMaskIn[0]. Even with 64x
|
||||
* MSAA, the array length is only 2, so the lowering is fairly
|
||||
* efficient. Therefore, lower unconditionally.
|
||||
*/
|
||||
return true;
|
||||
|
||||
case ir_var_shader_in:
|
||||
/* The input array size is unknown at compiler time for non-patch
|
||||
* inputs in TCS and TES. The arrays are sized to
|
||||
|
|
|
@ -105,10 +105,6 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
|
|||
ralloc_free(shProg->InfoLog);
|
||||
shProg->InfoLog = ralloc_strdup(shProg, "");
|
||||
|
||||
ralloc_free(shProg->BufferInterfaceBlocks);
|
||||
shProg->BufferInterfaceBlocks = NULL;
|
||||
shProg->NumBufferInterfaceBlocks = 0;
|
||||
|
||||
ralloc_free(shProg->UniformBlocks);
|
||||
shProg->UniformBlocks = NULL;
|
||||
shProg->NumUniformBlocks = 0;
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include <inttypes.h>
|
||||
#include "nir_search.h"
|
||||
|
||||
struct match_state {
|
||||
|
@ -494,7 +495,7 @@ construct_value(const nir_search_value *value,
|
|||
break;
|
||||
|
||||
case nir_type_int:
|
||||
load->def.name = ralloc_asprintf(load, "%ld", c->data.i);
|
||||
load->def.name = ralloc_asprintf(load, "%" PRIi64, c->data.i);
|
||||
switch (bitsize->dest_size) {
|
||||
case 32:
|
||||
load->value.i32[0] = c->data.i;
|
||||
|
@ -508,7 +509,7 @@ construct_value(const nir_search_value *value,
|
|||
break;
|
||||
|
||||
case nir_type_uint:
|
||||
load->def.name = ralloc_asprintf(load, "%lu", c->data.u);
|
||||
load->def.name = ralloc_asprintf(load, "%" PRIu64, c->data.u);
|
||||
switch (bitsize->dest_size) {
|
||||
case 32:
|
||||
load->value.u32[0] = c->data.u;
|
||||
|
|
|
@ -38,6 +38,8 @@
|
|||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <c11/threads.h>
|
||||
#include <time.h>
|
||||
#ifdef HAVE_LIBDRM
|
||||
#include <xf86drm.h>
|
||||
#include <drm_fourcc.h>
|
||||
|
@ -623,6 +625,8 @@ dri2_setup_screen(_EGLDisplay *disp)
|
|||
disp->Extensions.KHR_cl_event2 = EGL_TRUE;
|
||||
}
|
||||
|
||||
disp->Extensions.KHR_reusable_sync = EGL_TRUE;
|
||||
|
||||
if (dri2_dpy->image) {
|
||||
if (dri2_dpy->image->base.version >= 10 &&
|
||||
dri2_dpy->image->getCapabilities != NULL) {
|
||||
|
@ -2394,7 +2398,12 @@ dri2_egl_unref_sync(struct dri2_egl_display *dri2_dpy,
|
|||
struct dri2_egl_sync *dri2_sync)
|
||||
{
|
||||
if (p_atomic_dec_zero(&dri2_sync->refcount)) {
|
||||
dri2_dpy->fence->destroy_fence(dri2_dpy->dri_screen, dri2_sync->fence);
|
||||
if (dri2_sync->base.Type == EGL_SYNC_REUSABLE_KHR)
|
||||
cnd_destroy(&dri2_sync->cond);
|
||||
|
||||
if (dri2_sync->fence)
|
||||
dri2_dpy->fence->destroy_fence(dri2_dpy->dri_screen, dri2_sync->fence);
|
||||
|
||||
free(dri2_sync);
|
||||
}
|
||||
}
|
||||
|
@ -2408,6 +2417,8 @@ dri2_create_sync(_EGLDriver *drv, _EGLDisplay *dpy,
|
|||
struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
|
||||
struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
|
||||
struct dri2_egl_sync *dri2_sync;
|
||||
EGLint ret;
|
||||
pthread_condattr_t attr;
|
||||
|
||||
dri2_sync = calloc(1, sizeof(struct dri2_egl_sync));
|
||||
if (!dri2_sync) {
|
||||
|
@ -2450,6 +2461,37 @@ dri2_create_sync(_EGLDriver *drv, _EGLDisplay *dpy,
|
|||
dri2_sync->fence, 0, 0))
|
||||
dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR;
|
||||
break;
|
||||
|
||||
case EGL_SYNC_REUSABLE_KHR:
|
||||
/* intialize attr */
|
||||
ret = pthread_condattr_init(&attr);
|
||||
|
||||
if (ret) {
|
||||
_eglError(EGL_BAD_ACCESS, "eglCreateSyncKHR");
|
||||
free(dri2_sync);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* change clock attribute to CLOCK_MONOTONIC */
|
||||
ret = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
|
||||
|
||||
if (ret) {
|
||||
_eglError(EGL_BAD_ACCESS, "eglCreateSyncKHR");
|
||||
free(dri2_sync);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ret = pthread_cond_init(&dri2_sync->cond, &attr);
|
||||
|
||||
if (ret) {
|
||||
_eglError(EGL_BAD_ACCESS, "eglCreateSyncKHR");
|
||||
free(dri2_sync);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* initial status of reusable sync must be "unsignaled" */
|
||||
dri2_sync->base.SyncStatus = EGL_UNSIGNALED_KHR;
|
||||
break;
|
||||
}
|
||||
|
||||
p_atomic_set(&dri2_sync->refcount, 1);
|
||||
|
@ -2461,9 +2503,27 @@ dri2_destroy_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync)
|
|||
{
|
||||
struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
|
||||
struct dri2_egl_sync *dri2_sync = dri2_egl_sync(sync);
|
||||
EGLint ret = EGL_TRUE;
|
||||
EGLint err;
|
||||
|
||||
/* if type of sync is EGL_SYNC_REUSABLE_KHR and it is not signaled yet,
|
||||
* then unlock all threads possibly blocked by the reusable sync before
|
||||
* destroying it.
|
||||
*/
|
||||
if (dri2_sync->base.Type == EGL_SYNC_REUSABLE_KHR &&
|
||||
dri2_sync->base.SyncStatus == EGL_UNSIGNALED_KHR) {
|
||||
dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR;
|
||||
/* unblock all threads currently blocked by sync */
|
||||
err = cnd_broadcast(&dri2_sync->cond);
|
||||
|
||||
if (err) {
|
||||
_eglError(EGL_BAD_ACCESS, "eglDestroySyncKHR");
|
||||
ret = EGL_FALSE;
|
||||
}
|
||||
}
|
||||
dri2_egl_unref_sync(dri2_dpy, dri2_sync);
|
||||
return EGL_TRUE;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static EGLint
|
||||
|
@ -2471,10 +2531,16 @@ dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
|
|||
EGLint flags, EGLTime timeout)
|
||||
{
|
||||
_EGLContext *ctx = _eglGetCurrentContext();
|
||||
struct dri2_egl_driver *dri2_drv = dri2_egl_driver(drv);
|
||||
struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
|
||||
struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
|
||||
struct dri2_egl_sync *dri2_sync = dri2_egl_sync(sync);
|
||||
unsigned wait_flags = 0;
|
||||
|
||||
/* timespecs for cnd_timedwait */
|
||||
struct timespec current;
|
||||
xtime expire;
|
||||
|
||||
EGLint ret = EGL_CONDITION_SATISFIED_KHR;
|
||||
|
||||
/* The EGL_KHR_fence_sync spec states:
|
||||
|
@ -2488,17 +2554,130 @@ dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
|
|||
/* the sync object should take a reference while waiting */
|
||||
dri2_egl_ref_sync(dri2_sync);
|
||||
|
||||
if (dri2_dpy->fence->client_wait_sync(dri2_ctx ? dri2_ctx->dri_context : NULL,
|
||||
switch (sync->Type) {
|
||||
case EGL_SYNC_FENCE_KHR:
|
||||
case EGL_SYNC_CL_EVENT_KHR:
|
||||
if (dri2_dpy->fence->client_wait_sync(dri2_ctx ? dri2_ctx->dri_context : NULL,
|
||||
dri2_sync->fence, wait_flags,
|
||||
timeout))
|
||||
dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR;
|
||||
else
|
||||
ret = EGL_TIMEOUT_EXPIRED_KHR;
|
||||
dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR;
|
||||
else
|
||||
ret = EGL_TIMEOUT_EXPIRED_KHR;
|
||||
break;
|
||||
|
||||
case EGL_SYNC_REUSABLE_KHR:
|
||||
if (dri2_ctx && dri2_sync->base.SyncStatus == EGL_UNSIGNALED_KHR &&
|
||||
(flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)) {
|
||||
/* flush context if EGL_SYNC_FLUSH_COMMANDS_BIT_KHR is set */
|
||||
if (dri2_drv->glFlush)
|
||||
dri2_drv->glFlush();
|
||||
}
|
||||
|
||||
/* if timeout is EGL_FOREVER_KHR, it should wait without any timeout.*/
|
||||
if (timeout == EGL_FOREVER_KHR) {
|
||||
if (mtx_lock(&dri2_sync->mutex)) {
|
||||
ret = EGL_FALSE;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
ret = cnd_wait(&dri2_sync->cond, &dri2_sync->mutex);
|
||||
|
||||
if (mtx_unlock(&dri2_sync->mutex)) {
|
||||
ret = EGL_FALSE;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
_eglError(EGL_BAD_PARAMETER, "eglClientWaitSyncKHR");
|
||||
ret = EGL_FALSE;
|
||||
}
|
||||
} else {
|
||||
/* if reusable sync has not been yet signaled */
|
||||
if (dri2_sync->base.SyncStatus != EGL_SIGNALED_KHR) {
|
||||
clock_gettime(CLOCK_MONOTONIC, ¤t);
|
||||
|
||||
/* calculating when to expire */
|
||||
expire.nsec = timeout % 1000000000L;
|
||||
expire.sec = timeout / 1000000000L;
|
||||
|
||||
expire.nsec += current.tv_nsec;
|
||||
expire.sec += current.tv_sec;
|
||||
|
||||
/* expire.nsec now is a number between 0 and 1999999998 */
|
||||
if (expire.nsec > 999999999L) {
|
||||
expire.sec++;
|
||||
expire.nsec -= 1000000000L;
|
||||
}
|
||||
|
||||
if (mtx_lock(&dri2_sync->mutex)) {
|
||||
ret = EGL_FALSE;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
ret = cnd_timedwait(&dri2_sync->cond, &dri2_sync->mutex, &expire);
|
||||
|
||||
if (mtx_unlock(&dri2_sync->mutex)) {
|
||||
ret = EGL_FALSE;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
if (ret == thrd_busy) {
|
||||
if (dri2_sync->base.SyncStatus == EGL_UNSIGNALED_KHR) {
|
||||
ret = EGL_TIMEOUT_EXPIRED_KHR;
|
||||
} else {
|
||||
_eglError(EGL_BAD_ACCESS, "eglClientWaitSyncKHR");
|
||||
ret = EGL_FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
dri2_egl_unref_sync(dri2_dpy, dri2_sync);
|
||||
|
||||
if (ret == EGL_FALSE) {
|
||||
_eglError(EGL_BAD_ACCESS, "eglClientWaitSyncKHR");
|
||||
return EGL_FALSE;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static EGLBoolean
|
||||
dri2_signal_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
|
||||
EGLenum mode)
|
||||
{
|
||||
struct dri2_egl_sync *dri2_sync = dri2_egl_sync(sync);
|
||||
EGLint ret;
|
||||
|
||||
if (sync->Type != EGL_SYNC_REUSABLE_KHR) {
|
||||
_eglError(EGL_BAD_MATCH, "eglSignalSyncKHR");
|
||||
return EGL_FALSE;
|
||||
}
|
||||
|
||||
if (mode != EGL_SIGNALED_KHR && mode != EGL_UNSIGNALED_KHR) {
|
||||
_eglError(EGL_BAD_ATTRIBUTE, "eglSignalSyncKHR");
|
||||
return EGL_FALSE;
|
||||
}
|
||||
|
||||
dri2_sync->base.SyncStatus = mode;
|
||||
|
||||
if (mode == EGL_SIGNALED_KHR) {
|
||||
ret = cnd_broadcast(&dri2_sync->cond);
|
||||
|
||||
/* fail to broadcast */
|
||||
if (ret) {
|
||||
_eglError(EGL_BAD_ACCESS, "eglSignalSyncKHR");
|
||||
return EGL_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
return EGL_TRUE;
|
||||
}
|
||||
|
||||
static EGLint
|
||||
dri2_server_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync)
|
||||
{
|
||||
|
@ -2620,6 +2799,7 @@ _eglBuiltInDriverDRI2(const char *args)
|
|||
dri2_drv->base.API.GetSyncValuesCHROMIUM = dri2_get_sync_values_chromium;
|
||||
dri2_drv->base.API.CreateSyncKHR = dri2_create_sync;
|
||||
dri2_drv->base.API.ClientWaitSyncKHR = dri2_client_wait_sync;
|
||||
dri2_drv->base.API.SignalSyncKHR = dri2_signal_sync;
|
||||
dri2_drv->base.API.WaitSyncKHR = dri2_server_wait_sync;
|
||||
dri2_drv->base.API.DestroySyncKHR = dri2_destroy_sync;
|
||||
|
||||
|
|
|
@ -307,6 +307,8 @@ struct dri2_egl_image
|
|||
|
||||
struct dri2_egl_sync {
|
||||
_EGLSync base;
|
||||
mtx_t mutex;
|
||||
cnd_t cond;
|
||||
int refcount;
|
||||
void *fence;
|
||||
};
|
||||
|
|
|
@ -1469,9 +1469,24 @@ eglClientWaitSync(EGLDisplay dpy, EGLSync sync, EGLint flags, EGLTime timeout)
|
|||
if (s->SyncStatus == EGL_SIGNALED_KHR)
|
||||
RETURN_EGL_EVAL(disp, EGL_CONDITION_SATISFIED_KHR);
|
||||
|
||||
/* if sync type is EGL_SYNC_REUSABLE_KHR, dpy should be
|
||||
* unlocked here to allow other threads also to be able to
|
||||
* go into waiting state.
|
||||
*/
|
||||
|
||||
if (s->Type == EGL_SYNC_REUSABLE_KHR)
|
||||
_eglUnlockDisplay(dpy);
|
||||
|
||||
ret = drv->API.ClientWaitSyncKHR(drv, disp, s, flags, timeout);
|
||||
|
||||
RETURN_EGL_EVAL(disp, ret);
|
||||
/*
|
||||
* 'disp' is already unlocked for reusable sync type,
|
||||
* so passing 'NULL' to bypass unlocking display.
|
||||
*/
|
||||
if (s->Type == EGL_SYNC_REUSABLE_KHR)
|
||||
RETURN_EGL_EVAL(NULL, ret);
|
||||
else
|
||||
RETURN_EGL_EVAL(disp, ret);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -152,7 +152,8 @@ _eglGetSyncAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
|
|||
/* update the sync status */
|
||||
if (sync->SyncStatus != EGL_SIGNALED_KHR &&
|
||||
(sync->Type == EGL_SYNC_FENCE_KHR ||
|
||||
sync->Type == EGL_SYNC_CL_EVENT_KHR))
|
||||
sync->Type == EGL_SYNC_CL_EVENT_KHR ||
|
||||
sync->Type == EGL_SYNC_REUSABLE_KHR))
|
||||
drv->API.ClientWaitSyncKHR(drv, dpy, sync, 0, 0);
|
||||
|
||||
*value = sync->SyncStatus;
|
||||
|
|
|
@ -163,7 +163,7 @@ aa_transform_decl(struct tgsi_transform_context *ctx,
|
|||
uint i;
|
||||
for (i = decl->Range.First;
|
||||
i <= decl->Range.Last; i++) {
|
||||
aactx->samplersUsed |= 1 << i;
|
||||
aactx->samplersUsed |= 1u << i;
|
||||
}
|
||||
}
|
||||
else if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
|
||||
|
@ -208,9 +208,11 @@ aa_transform_prolog(struct tgsi_transform_context *ctx)
|
|||
struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
|
||||
uint i;
|
||||
|
||||
STATIC_ASSERT(sizeof(aactx->samplersUsed) * 8 >= PIPE_MAX_SAMPLERS);
|
||||
|
||||
/* find free sampler */
|
||||
aactx->freeSampler = free_bit(aactx->samplersUsed);
|
||||
if (aactx->freeSampler >= PIPE_MAX_SAMPLERS)
|
||||
if (aactx->freeSampler < 0 || aactx->freeSampler >= PIPE_MAX_SAMPLERS)
|
||||
aactx->freeSampler = PIPE_MAX_SAMPLERS - 1;
|
||||
|
||||
/* find two free temp regs */
|
||||
|
|
|
@ -464,7 +464,8 @@ scan_declaration(struct tgsi_shader_info *info,
|
|||
}
|
||||
}
|
||||
} else if (file == TGSI_FILE_SAMPLER) {
|
||||
info->samplers_declared |= 1 << reg;
|
||||
STATIC_ASSERT(sizeof(info->samplers_declared) * 8 >= PIPE_MAX_SAMPLERS);
|
||||
info->samplers_declared |= 1u << reg;
|
||||
} else if (file == TGSI_FILE_SAMPLER_VIEW) {
|
||||
unsigned target = fulldecl->SamplerView.Resource;
|
||||
assert(target < TGSI_TEXTURE_UNKNOWN);
|
||||
|
|
|
@ -645,6 +645,8 @@ util_dump_framebuffer_state(FILE *stream, const struct pipe_framebuffer_state *s
|
|||
|
||||
util_dump_member(stream, uint, state, width);
|
||||
util_dump_member(stream, uint, state, height);
|
||||
util_dump_member(stream, uint, state, samples);
|
||||
util_dump_member(stream, uint, state, layers);
|
||||
util_dump_member(stream, uint, state, nr_cbufs);
|
||||
util_dump_member_array(stream, ptr, state, cbufs);
|
||||
util_dump_member(stream, ptr, state, zsbuf);
|
||||
|
|
|
@ -55,6 +55,10 @@ util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst,
|
|||
dst->height != src->height)
|
||||
return FALSE;
|
||||
|
||||
if (dst->samples != src->samples ||
|
||||
dst->layers != src->layers)
|
||||
return FALSE;
|
||||
|
||||
if (dst->nr_cbufs != src->nr_cbufs) {
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -85,6 +89,9 @@ util_copy_framebuffer_state(struct pipe_framebuffer_state *dst,
|
|||
dst->width = src->width;
|
||||
dst->height = src->height;
|
||||
|
||||
dst->samples = src->samples;
|
||||
dst->layers = src->layers;
|
||||
|
||||
for (i = 0; i < src->nr_cbufs; i++)
|
||||
pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]);
|
||||
|
||||
|
@ -109,6 +116,7 @@ util_unreference_framebuffer_state(struct pipe_framebuffer_state *fb)
|
|||
|
||||
pipe_surface_reference(&fb->zsbuf, NULL);
|
||||
|
||||
fb->samples = fb->layers = 0;
|
||||
fb->width = fb->height = 0;
|
||||
fb->nr_cbufs = 0;
|
||||
}
|
||||
|
@ -160,6 +168,14 @@ util_framebuffer_get_num_layers(const struct pipe_framebuffer_state *fb)
|
|||
{
|
||||
unsigned i, num_layers = 0;
|
||||
|
||||
/**
|
||||
* In the case of ARB_framebuffer_no_attachment
|
||||
* we obtain the number of layers directly from
|
||||
* the framebuffer state.
|
||||
*/
|
||||
if (!(fb->nr_cbufs || fb->zsbuf))
|
||||
return fb->layers;
|
||||
|
||||
for (i = 0; i < fb->nr_cbufs; i++) {
|
||||
if (fb->cbufs[i]) {
|
||||
unsigned num = fb->cbufs[i]->u.tex.last_layer -
|
||||
|
@ -184,6 +200,20 @@ util_framebuffer_get_num_samples(const struct pipe_framebuffer_state *fb)
|
|||
{
|
||||
unsigned i;
|
||||
|
||||
/**
|
||||
* In the case of ARB_framebuffer_no_attachment
|
||||
* we obtain the number of samples directly from
|
||||
* the framebuffer state.
|
||||
*
|
||||
* NOTE: fb->samples may wind up as zero due to memset()'s on internal
|
||||
* driver structures on their initialization and so we take the
|
||||
* MAX here to ensure we have a valid number of samples. However,
|
||||
* if samples is legitimately not getting set somewhere
|
||||
* multi-sampling will evidently break.
|
||||
*/
|
||||
if (!(fb->nr_cbufs || fb->zsbuf))
|
||||
return MAX2(fb->samples, 1);
|
||||
|
||||
for (i = 0; i < fb->nr_cbufs; i++) {
|
||||
if (fb->cbufs[i]) {
|
||||
return MAX2(1, fb->cbufs[i]->texture->nr_samples);
|
||||
|
|
|
@ -204,7 +204,7 @@ pstip_transform_decl(struct tgsi_transform_context *ctx,
|
|||
if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
|
||||
uint i;
|
||||
for (i = decl->Range.First; i <= decl->Range.Last; i++) {
|
||||
pctx->samplersUsed |= 1 << i;
|
||||
pctx->samplersUsed |= 1u << i;
|
||||
}
|
||||
}
|
||||
else if (decl->Declaration.File == pctx->wincoordFile) {
|
||||
|
@ -266,9 +266,11 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
|
|||
int texTemp;
|
||||
int sampIdx;
|
||||
|
||||
STATIC_ASSERT(sizeof(pctx->samplersUsed) * 8 >= PIPE_MAX_SAMPLERS);
|
||||
|
||||
/* find free texture sampler */
|
||||
pctx->freeSampler = free_bit(pctx->samplersUsed);
|
||||
if (pctx->freeSampler >= PIPE_MAX_SAMPLERS)
|
||||
if (pctx->freeSampler < 0 || pctx->freeSampler >= PIPE_MAX_SAMPLERS)
|
||||
pctx->freeSampler = PIPE_MAX_SAMPLERS - 1;
|
||||
|
||||
if (pctx->wincoordInput < 0)
|
||||
|
|
|
@ -323,6 +323,14 @@ The integer capabilities:
|
|||
* ``PIPE_CAP_PCI_BUS``: Return the PCI bus number.
|
||||
* ``PIPE_CAP_PCI_DEVICE``: Return the PCI device number.
|
||||
* ``PIPE_CAP_PCI_FUNCTION``: Return the PCI function number.
|
||||
* ``PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT``:
|
||||
If non-zero, rendering to framebuffers with no surface attachments
|
||||
is supported. The context->is_format_supported function will be expected
|
||||
to be implemented with PIPE_FORMAT_NONE yeilding the MSAA modes the hardware
|
||||
supports. N.B., The maximum number of layers supported for rasterizing a
|
||||
primitive on a layer is obtained from ``PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS``
|
||||
even though it can be larger than the number of layers supported by either
|
||||
rendering or textures.
|
||||
|
||||
|
||||
.. _pipe_capf:
|
||||
|
|
|
@ -255,6 +255,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_INVALIDATE_BUFFER:
|
||||
case PIPE_CAP_GENERATE_MIPMAP:
|
||||
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
|
||||
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_MAX_VIEWPORTS:
|
||||
|
|
|
@ -243,7 +243,7 @@ static void print_instr_cat2(instr_t *instr)
|
|||
"?6?",
|
||||
};
|
||||
|
||||
switch (cat2->opc) {
|
||||
switch (_OPC(2, cat2->opc)) {
|
||||
case OPC_CMPS_F:
|
||||
case OPC_CMPS_U:
|
||||
case OPC_CMPS_S:
|
||||
|
@ -274,7 +274,7 @@ static void print_instr_cat2(instr_t *instr)
|
|||
cat2->src1_abs, false);
|
||||
}
|
||||
|
||||
switch (cat2->opc) {
|
||||
switch (_OPC(2, cat2->opc)) {
|
||||
case OPC_ABSNEG_F:
|
||||
case OPC_ABSNEG_S:
|
||||
case OPC_CLZ_B:
|
||||
|
@ -382,34 +382,34 @@ static void print_instr_cat5(instr_t *instr)
|
|||
static const struct {
|
||||
bool src1, src2, samp, tex;
|
||||
} info[0x1f] = {
|
||||
[OPC_ISAM] = { true, false, true, true, },
|
||||
[OPC_ISAML] = { true, true, true, true, },
|
||||
[OPC_ISAMM] = { true, false, true, true, },
|
||||
[OPC_SAM] = { true, false, true, true, },
|
||||
[OPC_SAMB] = { true, true, true, true, },
|
||||
[OPC_SAML] = { true, true, true, true, },
|
||||
[OPC_SAMGQ] = { true, false, true, true, },
|
||||
[OPC_GETLOD] = { true, false, true, true, },
|
||||
[OPC_CONV] = { true, true, true, true, },
|
||||
[OPC_CONVM] = { true, true, true, true, },
|
||||
[OPC_GETSIZE] = { true, false, false, true, },
|
||||
[OPC_GETBUF] = { false, false, false, true, },
|
||||
[OPC_GETPOS] = { true, false, false, true, },
|
||||
[OPC_GETINFO] = { false, false, false, true, },
|
||||
[OPC_DSX] = { true, false, false, false, },
|
||||
[OPC_DSY] = { true, false, false, false, },
|
||||
[OPC_GATHER4R] = { true, false, true, true, },
|
||||
[OPC_GATHER4G] = { true, false, true, true, },
|
||||
[OPC_GATHER4B] = { true, false, true, true, },
|
||||
[OPC_GATHER4A] = { true, false, true, true, },
|
||||
[OPC_SAMGP0] = { true, false, true, true, },
|
||||
[OPC_SAMGP1] = { true, false, true, true, },
|
||||
[OPC_SAMGP2] = { true, false, true, true, },
|
||||
[OPC_SAMGP3] = { true, false, true, true, },
|
||||
[OPC_DSXPP_1] = { true, false, false, false, },
|
||||
[OPC_DSYPP_1] = { true, false, false, false, },
|
||||
[OPC_RGETPOS] = { false, false, false, false, },
|
||||
[OPC_RGETINFO] = { false, false, false, false, },
|
||||
[opc_op(OPC_ISAM)] = { true, false, true, true, },
|
||||
[opc_op(OPC_ISAML)] = { true, true, true, true, },
|
||||
[opc_op(OPC_ISAMM)] = { true, false, true, true, },
|
||||
[opc_op(OPC_SAM)] = { true, false, true, true, },
|
||||
[opc_op(OPC_SAMB)] = { true, true, true, true, },
|
||||
[opc_op(OPC_SAML)] = { true, true, true, true, },
|
||||
[opc_op(OPC_SAMGQ)] = { true, false, true, true, },
|
||||
[opc_op(OPC_GETLOD)] = { true, false, true, true, },
|
||||
[opc_op(OPC_CONV)] = { true, true, true, true, },
|
||||
[opc_op(OPC_CONVM)] = { true, true, true, true, },
|
||||
[opc_op(OPC_GETSIZE)] = { true, false, false, true, },
|
||||
[opc_op(OPC_GETBUF)] = { false, false, false, true, },
|
||||
[opc_op(OPC_GETPOS)] = { true, false, false, true, },
|
||||
[opc_op(OPC_GETINFO)] = { false, false, false, true, },
|
||||
[opc_op(OPC_DSX)] = { true, false, false, false, },
|
||||
[opc_op(OPC_DSY)] = { true, false, false, false, },
|
||||
[opc_op(OPC_GATHER4R)] = { true, false, true, true, },
|
||||
[opc_op(OPC_GATHER4G)] = { true, false, true, true, },
|
||||
[opc_op(OPC_GATHER4B)] = { true, false, true, true, },
|
||||
[opc_op(OPC_GATHER4A)] = { true, false, true, true, },
|
||||
[opc_op(OPC_SAMGP0)] = { true, false, true, true, },
|
||||
[opc_op(OPC_SAMGP1)] = { true, false, true, true, },
|
||||
[opc_op(OPC_SAMGP2)] = { true, false, true, true, },
|
||||
[opc_op(OPC_SAMGP3)] = { true, false, true, true, },
|
||||
[opc_op(OPC_DSXPP_1)] = { true, false, false, false, },
|
||||
[opc_op(OPC_DSYPP_1)] = { true, false, false, false, },
|
||||
[opc_op(OPC_RGETPOS)] = { false, false, false, false, },
|
||||
[opc_op(OPC_RGETINFO)] = { false, false, false, false, },
|
||||
};
|
||||
instr_cat5_t *cat5 = &instr->cat5;
|
||||
int i;
|
||||
|
@ -423,7 +423,7 @@ static void print_instr_cat5(instr_t *instr)
|
|||
|
||||
printf(" ");
|
||||
|
||||
switch (cat5->opc) {
|
||||
switch (_OPC(5, cat5->opc)) {
|
||||
case OPC_DSXPP_1:
|
||||
case OPC_DSYPP_1:
|
||||
break;
|
||||
|
@ -488,7 +488,7 @@ static void print_instr_cat6(instr_t *instr)
|
|||
memset(&src1, 0, sizeof(src1));
|
||||
memset(&src2, 0, sizeof(src2));
|
||||
|
||||
switch (cat6->opc) {
|
||||
switch (_OPC(6, cat6->opc)) {
|
||||
case OPC_RESINFO:
|
||||
case OPC_RESFMT:
|
||||
dst.full = type_size(cat6->type) == 32;
|
||||
|
@ -519,7 +519,7 @@ static void print_instr_cat6(instr_t *instr)
|
|||
break;
|
||||
}
|
||||
|
||||
switch (cat6->opc) {
|
||||
switch (_OPC(6, cat6->opc)) {
|
||||
case OPC_PREFETCH:
|
||||
case OPC_RESINFO:
|
||||
break;
|
||||
|
@ -545,7 +545,7 @@ static void print_instr_cat6(instr_t *instr)
|
|||
}
|
||||
printf(" ");
|
||||
|
||||
switch (cat6->opc) {
|
||||
switch (_OPC(6, cat6->opc)) {
|
||||
case OPC_STG:
|
||||
sd = 'g';
|
||||
break;
|
||||
|
@ -636,7 +636,7 @@ static void print_instr_cat6(instr_t *instr)
|
|||
if (ss)
|
||||
printf("]");
|
||||
|
||||
switch (cat6->opc) {
|
||||
switch (_OPC(6, cat6->opc)) {
|
||||
case OPC_RESINFO:
|
||||
case OPC_RESFMT:
|
||||
break;
|
||||
|
@ -656,7 +656,7 @@ static const struct opc_info {
|
|||
const char *name;
|
||||
void (*print)(instr_t *instr);
|
||||
} opcs[1 << (3+NOPC_BITS)] = {
|
||||
#define OPC(cat, opc, name) [((cat) << NOPC_BITS) | (opc)] = { (cat), (opc), #name, print_instr_cat##cat }
|
||||
#define OPC(cat, opc, name) [(opc)] = { (cat), (opc), #name, print_instr_cat##cat }
|
||||
/* category 0: */
|
||||
OPC(0, OPC_NOP, nop),
|
||||
OPC(0, OPC_BR, br),
|
||||
|
@ -672,7 +672,7 @@ static const struct opc_info {
|
|||
OPC(0, OPC_FLOW_REV, flow_rev),
|
||||
|
||||
/* category 1: */
|
||||
OPC(1, 0, ),
|
||||
OPC(1, OPC_MOV, ),
|
||||
|
||||
/* category 2: */
|
||||
OPC(2, OPC_ADD_F, add.f),
|
||||
|
@ -822,8 +822,8 @@ static const struct opc_info {
|
|||
#include "ir3.h"
|
||||
const char *ir3_instr_name(struct ir3_instruction *instr)
|
||||
{
|
||||
if (instr->category == -1) return "??meta??";
|
||||
return opcs[(instr->category << NOPC_BITS) | instr->opc].name;
|
||||
if (opc_cat(instr->opc) == -1) return "??meta??";
|
||||
return opcs[instr->opc].name;
|
||||
}
|
||||
|
||||
static void print_instr(uint32_t *dwords, int level, int n)
|
||||
|
|
|
@ -29,181 +29,189 @@
|
|||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
|
||||
/* size of largest OPC field of all the instruction categories: */
|
||||
#define NOPC_BITS 6
|
||||
|
||||
#define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)
|
||||
|
||||
typedef enum {
|
||||
/* category 0: */
|
||||
OPC_NOP = 0,
|
||||
OPC_BR = 1,
|
||||
OPC_JUMP = 2,
|
||||
OPC_CALL = 3,
|
||||
OPC_RET = 4,
|
||||
OPC_KILL = 5,
|
||||
OPC_END = 6,
|
||||
OPC_EMIT = 7,
|
||||
OPC_CUT = 8,
|
||||
OPC_CHMASK = 9,
|
||||
OPC_CHSH = 10,
|
||||
OPC_FLOW_REV = 11,
|
||||
OPC_NOP = _OPC(0, 0),
|
||||
OPC_BR = _OPC(0, 1),
|
||||
OPC_JUMP = _OPC(0, 2),
|
||||
OPC_CALL = _OPC(0, 3),
|
||||
OPC_RET = _OPC(0, 4),
|
||||
OPC_KILL = _OPC(0, 5),
|
||||
OPC_END = _OPC(0, 6),
|
||||
OPC_EMIT = _OPC(0, 7),
|
||||
OPC_CUT = _OPC(0, 8),
|
||||
OPC_CHMASK = _OPC(0, 9),
|
||||
OPC_CHSH = _OPC(0, 10),
|
||||
OPC_FLOW_REV = _OPC(0, 11),
|
||||
|
||||
/* category 1: */
|
||||
/* no opc.. all category 1 are variants of mov */
|
||||
OPC_MOV = _OPC(1, 0),
|
||||
|
||||
/* category 2: */
|
||||
OPC_ADD_F = 0,
|
||||
OPC_MIN_F = 1,
|
||||
OPC_MAX_F = 2,
|
||||
OPC_MUL_F = 3,
|
||||
OPC_SIGN_F = 4,
|
||||
OPC_CMPS_F = 5,
|
||||
OPC_ABSNEG_F = 6,
|
||||
OPC_CMPV_F = 7,
|
||||
OPC_ADD_F = _OPC(2, 0),
|
||||
OPC_MIN_F = _OPC(2, 1),
|
||||
OPC_MAX_F = _OPC(2, 2),
|
||||
OPC_MUL_F = _OPC(2, 3),
|
||||
OPC_SIGN_F = _OPC(2, 4),
|
||||
OPC_CMPS_F = _OPC(2, 5),
|
||||
OPC_ABSNEG_F = _OPC(2, 6),
|
||||
OPC_CMPV_F = _OPC(2, 7),
|
||||
/* 8 - invalid */
|
||||
OPC_FLOOR_F = 9,
|
||||
OPC_CEIL_F = 10,
|
||||
OPC_RNDNE_F = 11,
|
||||
OPC_RNDAZ_F = 12,
|
||||
OPC_TRUNC_F = 13,
|
||||
OPC_FLOOR_F = _OPC(2, 9),
|
||||
OPC_CEIL_F = _OPC(2, 10),
|
||||
OPC_RNDNE_F = _OPC(2, 11),
|
||||
OPC_RNDAZ_F = _OPC(2, 12),
|
||||
OPC_TRUNC_F = _OPC(2, 13),
|
||||
/* 14-15 - invalid */
|
||||
OPC_ADD_U = 16,
|
||||
OPC_ADD_S = 17,
|
||||
OPC_SUB_U = 18,
|
||||
OPC_SUB_S = 19,
|
||||
OPC_CMPS_U = 20,
|
||||
OPC_CMPS_S = 21,
|
||||
OPC_MIN_U = 22,
|
||||
OPC_MIN_S = 23,
|
||||
OPC_MAX_U = 24,
|
||||
OPC_MAX_S = 25,
|
||||
OPC_ABSNEG_S = 26,
|
||||
OPC_ADD_U = _OPC(2, 16),
|
||||
OPC_ADD_S = _OPC(2, 17),
|
||||
OPC_SUB_U = _OPC(2, 18),
|
||||
OPC_SUB_S = _OPC(2, 19),
|
||||
OPC_CMPS_U = _OPC(2, 20),
|
||||
OPC_CMPS_S = _OPC(2, 21),
|
||||
OPC_MIN_U = _OPC(2, 22),
|
||||
OPC_MIN_S = _OPC(2, 23),
|
||||
OPC_MAX_U = _OPC(2, 24),
|
||||
OPC_MAX_S = _OPC(2, 25),
|
||||
OPC_ABSNEG_S = _OPC(2, 26),
|
||||
/* 27 - invalid */
|
||||
OPC_AND_B = 28,
|
||||
OPC_OR_B = 29,
|
||||
OPC_NOT_B = 30,
|
||||
OPC_XOR_B = 31,
|
||||
OPC_AND_B = _OPC(2, 28),
|
||||
OPC_OR_B = _OPC(2, 29),
|
||||
OPC_NOT_B = _OPC(2, 30),
|
||||
OPC_XOR_B = _OPC(2, 31),
|
||||
/* 32 - invalid */
|
||||
OPC_CMPV_U = 33,
|
||||
OPC_CMPV_S = 34,
|
||||
OPC_CMPV_U = _OPC(2, 33),
|
||||
OPC_CMPV_S = _OPC(2, 34),
|
||||
/* 35-47 - invalid */
|
||||
OPC_MUL_U = 48,
|
||||
OPC_MUL_S = 49,
|
||||
OPC_MULL_U = 50,
|
||||
OPC_BFREV_B = 51,
|
||||
OPC_CLZ_S = 52,
|
||||
OPC_CLZ_B = 53,
|
||||
OPC_SHL_B = 54,
|
||||
OPC_SHR_B = 55,
|
||||
OPC_ASHR_B = 56,
|
||||
OPC_BARY_F = 57,
|
||||
OPC_MGEN_B = 58,
|
||||
OPC_GETBIT_B = 59,
|
||||
OPC_SETRM = 60,
|
||||
OPC_CBITS_B = 61,
|
||||
OPC_SHB = 62,
|
||||
OPC_MSAD = 63,
|
||||
OPC_MUL_U = _OPC(2, 48),
|
||||
OPC_MUL_S = _OPC(2, 49),
|
||||
OPC_MULL_U = _OPC(2, 50),
|
||||
OPC_BFREV_B = _OPC(2, 51),
|
||||
OPC_CLZ_S = _OPC(2, 52),
|
||||
OPC_CLZ_B = _OPC(2, 53),
|
||||
OPC_SHL_B = _OPC(2, 54),
|
||||
OPC_SHR_B = _OPC(2, 55),
|
||||
OPC_ASHR_B = _OPC(2, 56),
|
||||
OPC_BARY_F = _OPC(2, 57),
|
||||
OPC_MGEN_B = _OPC(2, 58),
|
||||
OPC_GETBIT_B = _OPC(2, 59),
|
||||
OPC_SETRM = _OPC(2, 60),
|
||||
OPC_CBITS_B = _OPC(2, 61),
|
||||
OPC_SHB = _OPC(2, 62),
|
||||
OPC_MSAD = _OPC(2, 63),
|
||||
|
||||
/* category 3: */
|
||||
OPC_MAD_U16 = 0,
|
||||
OPC_MADSH_U16 = 1,
|
||||
OPC_MAD_S16 = 2,
|
||||
OPC_MADSH_M16 = 3, /* should this be .s16? */
|
||||
OPC_MAD_U24 = 4,
|
||||
OPC_MAD_S24 = 5,
|
||||
OPC_MAD_F16 = 6,
|
||||
OPC_MAD_F32 = 7,
|
||||
OPC_SEL_B16 = 8,
|
||||
OPC_SEL_B32 = 9,
|
||||
OPC_SEL_S16 = 10,
|
||||
OPC_SEL_S32 = 11,
|
||||
OPC_SEL_F16 = 12,
|
||||
OPC_SEL_F32 = 13,
|
||||
OPC_SAD_S16 = 14,
|
||||
OPC_SAD_S32 = 15,
|
||||
OPC_MAD_U16 = _OPC(3, 0),
|
||||
OPC_MADSH_U16 = _OPC(3, 1),
|
||||
OPC_MAD_S16 = _OPC(3, 2),
|
||||
OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */
|
||||
OPC_MAD_U24 = _OPC(3, 4),
|
||||
OPC_MAD_S24 = _OPC(3, 5),
|
||||
OPC_MAD_F16 = _OPC(3, 6),
|
||||
OPC_MAD_F32 = _OPC(3, 7),
|
||||
OPC_SEL_B16 = _OPC(3, 8),
|
||||
OPC_SEL_B32 = _OPC(3, 9),
|
||||
OPC_SEL_S16 = _OPC(3, 10),
|
||||
OPC_SEL_S32 = _OPC(3, 11),
|
||||
OPC_SEL_F16 = _OPC(3, 12),
|
||||
OPC_SEL_F32 = _OPC(3, 13),
|
||||
OPC_SAD_S16 = _OPC(3, 14),
|
||||
OPC_SAD_S32 = _OPC(3, 15),
|
||||
|
||||
/* category 4: */
|
||||
OPC_RCP = 0,
|
||||
OPC_RSQ = 1,
|
||||
OPC_LOG2 = 2,
|
||||
OPC_EXP2 = 3,
|
||||
OPC_SIN = 4,
|
||||
OPC_COS = 5,
|
||||
OPC_SQRT = 6,
|
||||
OPC_RCP = _OPC(4, 0),
|
||||
OPC_RSQ = _OPC(4, 1),
|
||||
OPC_LOG2 = _OPC(4, 2),
|
||||
OPC_EXP2 = _OPC(4, 3),
|
||||
OPC_SIN = _OPC(4, 4),
|
||||
OPC_COS = _OPC(4, 5),
|
||||
OPC_SQRT = _OPC(4, 6),
|
||||
// 7-63 - invalid
|
||||
|
||||
/* category 5: */
|
||||
OPC_ISAM = 0,
|
||||
OPC_ISAML = 1,
|
||||
OPC_ISAMM = 2,
|
||||
OPC_SAM = 3,
|
||||
OPC_SAMB = 4,
|
||||
OPC_SAML = 5,
|
||||
OPC_SAMGQ = 6,
|
||||
OPC_GETLOD = 7,
|
||||
OPC_CONV = 8,
|
||||
OPC_CONVM = 9,
|
||||
OPC_GETSIZE = 10,
|
||||
OPC_GETBUF = 11,
|
||||
OPC_GETPOS = 12,
|
||||
OPC_GETINFO = 13,
|
||||
OPC_DSX = 14,
|
||||
OPC_DSY = 15,
|
||||
OPC_GATHER4R = 16,
|
||||
OPC_GATHER4G = 17,
|
||||
OPC_GATHER4B = 18,
|
||||
OPC_GATHER4A = 19,
|
||||
OPC_SAMGP0 = 20,
|
||||
OPC_SAMGP1 = 21,
|
||||
OPC_SAMGP2 = 22,
|
||||
OPC_SAMGP3 = 23,
|
||||
OPC_DSXPP_1 = 24,
|
||||
OPC_DSYPP_1 = 25,
|
||||
OPC_RGETPOS = 26,
|
||||
OPC_RGETINFO = 27,
|
||||
OPC_ISAM = _OPC(5, 0),
|
||||
OPC_ISAML = _OPC(5, 1),
|
||||
OPC_ISAMM = _OPC(5, 2),
|
||||
OPC_SAM = _OPC(5, 3),
|
||||
OPC_SAMB = _OPC(5, 4),
|
||||
OPC_SAML = _OPC(5, 5),
|
||||
OPC_SAMGQ = _OPC(5, 6),
|
||||
OPC_GETLOD = _OPC(5, 7),
|
||||
OPC_CONV = _OPC(5, 8),
|
||||
OPC_CONVM = _OPC(5, 9),
|
||||
OPC_GETSIZE = _OPC(5, 10),
|
||||
OPC_GETBUF = _OPC(5, 11),
|
||||
OPC_GETPOS = _OPC(5, 12),
|
||||
OPC_GETINFO = _OPC(5, 13),
|
||||
OPC_DSX = _OPC(5, 14),
|
||||
OPC_DSY = _OPC(5, 15),
|
||||
OPC_GATHER4R = _OPC(5, 16),
|
||||
OPC_GATHER4G = _OPC(5, 17),
|
||||
OPC_GATHER4B = _OPC(5, 18),
|
||||
OPC_GATHER4A = _OPC(5, 19),
|
||||
OPC_SAMGP0 = _OPC(5, 20),
|
||||
OPC_SAMGP1 = _OPC(5, 21),
|
||||
OPC_SAMGP2 = _OPC(5, 22),
|
||||
OPC_SAMGP3 = _OPC(5, 23),
|
||||
OPC_DSXPP_1 = _OPC(5, 24),
|
||||
OPC_DSYPP_1 = _OPC(5, 25),
|
||||
OPC_RGETPOS = _OPC(5, 26),
|
||||
OPC_RGETINFO = _OPC(5, 27),
|
||||
|
||||
/* category 6: */
|
||||
OPC_LDG = 0, /* load-global */
|
||||
OPC_LDL = 1,
|
||||
OPC_LDP = 2,
|
||||
OPC_STG = 3, /* store-global */
|
||||
OPC_STL = 4,
|
||||
OPC_STP = 5,
|
||||
OPC_STI = 6,
|
||||
OPC_G2L = 7,
|
||||
OPC_L2G = 8,
|
||||
OPC_PREFETCH = 9,
|
||||
OPC_LDLW = 10,
|
||||
OPC_STLW = 11,
|
||||
OPC_RESFMT = 14,
|
||||
OPC_RESINFO = 15,
|
||||
OPC_ATOMIC_ADD = 16,
|
||||
OPC_ATOMIC_SUB = 17,
|
||||
OPC_ATOMIC_XCHG = 18,
|
||||
OPC_ATOMIC_INC = 19,
|
||||
OPC_ATOMIC_DEC = 20,
|
||||
OPC_ATOMIC_CMPXCHG = 21,
|
||||
OPC_ATOMIC_MIN = 22,
|
||||
OPC_ATOMIC_MAX = 23,
|
||||
OPC_ATOMIC_AND = 24,
|
||||
OPC_ATOMIC_OR = 25,
|
||||
OPC_ATOMIC_XOR = 26,
|
||||
OPC_LDGB_TYPED_4D = 27,
|
||||
OPC_STGB_4D_4 = 28,
|
||||
OPC_STIB = 29,
|
||||
OPC_LDC_4 = 30,
|
||||
OPC_LDLV = 31,
|
||||
OPC_LDG = _OPC(6, 0), /* load-global */
|
||||
OPC_LDL = _OPC(6, 1),
|
||||
OPC_LDP = _OPC(6, 2),
|
||||
OPC_STG = _OPC(6, 3), /* store-global */
|
||||
OPC_STL = _OPC(6, 4),
|
||||
OPC_STP = _OPC(6, 5),
|
||||
OPC_STI = _OPC(6, 6),
|
||||
OPC_G2L = _OPC(6, 7),
|
||||
OPC_L2G = _OPC(6, 8),
|
||||
OPC_PREFETCH = _OPC(6, 9),
|
||||
OPC_LDLW = _OPC(6, 10),
|
||||
OPC_STLW = _OPC(6, 11),
|
||||
OPC_RESFMT = _OPC(6, 14),
|
||||
OPC_RESINFO = _OPC(6, 15),
|
||||
OPC_ATOMIC_ADD = _OPC(6, 16),
|
||||
OPC_ATOMIC_SUB = _OPC(6, 17),
|
||||
OPC_ATOMIC_XCHG = _OPC(6, 18),
|
||||
OPC_ATOMIC_INC = _OPC(6, 19),
|
||||
OPC_ATOMIC_DEC = _OPC(6, 20),
|
||||
OPC_ATOMIC_CMPXCHG = _OPC(6, 21),
|
||||
OPC_ATOMIC_MIN = _OPC(6, 22),
|
||||
OPC_ATOMIC_MAX = _OPC(6, 23),
|
||||
OPC_ATOMIC_AND = _OPC(6, 24),
|
||||
OPC_ATOMIC_OR = _OPC(6, 25),
|
||||
OPC_ATOMIC_XOR = _OPC(6, 26),
|
||||
OPC_LDGB_TYPED_4D = _OPC(6, 27),
|
||||
OPC_STGB_4D_4 = _OPC(6, 28),
|
||||
OPC_STIB = _OPC(6, 29),
|
||||
OPC_LDC_4 = _OPC(6, 30),
|
||||
OPC_LDLV = _OPC(6, 31),
|
||||
|
||||
/* meta instructions (category -1): */
|
||||
/* placeholder instr to mark shader inputs: */
|
||||
OPC_META_INPUT = 0,
|
||||
OPC_META_PHI = 1,
|
||||
OPC_META_INPUT = _OPC(-1, 0),
|
||||
OPC_META_PHI = _OPC(-1, 1),
|
||||
/* The "fan-in" and "fan-out" instructions are used for keeping
|
||||
* track of instructions that write to multiple dst registers
|
||||
* (fan-out) like texture sample instructions, or read multiple
|
||||
* consecutive scalar registers (fan-in) (bary.f, texture samp)
|
||||
*/
|
||||
OPC_META_FO = 2,
|
||||
OPC_META_FI = 3,
|
||||
OPC_META_FO = _OPC(-1, 2),
|
||||
OPC_META_FI = _OPC(-1, 3),
|
||||
|
||||
} opc_t;
|
||||
|
||||
#define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
|
||||
#define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
|
||||
|
||||
typedef enum {
|
||||
TYPE_F16 = 0,
|
||||
TYPE_F32 = 1,
|
||||
|
@ -472,7 +480,7 @@ typedef struct PACKED {
|
|||
|
||||
static inline bool instr_cat3_full(instr_cat3_t *cat3)
|
||||
{
|
||||
switch (cat3->opc) {
|
||||
switch (_OPC(3, cat3->opc)) {
|
||||
case OPC_MAD_F16:
|
||||
case OPC_MAD_U16:
|
||||
case OPC_MAD_S16:
|
||||
|
|
|
@ -612,7 +612,7 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
|
|||
|
||||
list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
|
||||
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
|
||||
int ret = emit[instr->category](instr, dwords, info);
|
||||
int ret = emit[opc_cat(instr->opc)](instr, dwords, info);
|
||||
if (ret)
|
||||
goto fail;
|
||||
info->instrs_count += 1 + instr->repeat;
|
||||
|
@ -683,23 +683,21 @@ static struct ir3_instruction *instr_create(struct ir3_block *block, int nreg)
|
|||
}
|
||||
|
||||
struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
|
||||
int category, opc_t opc, int nreg)
|
||||
opc_t opc, int nreg)
|
||||
{
|
||||
struct ir3_instruction *instr = instr_create(block, nreg);
|
||||
instr->block = block;
|
||||
instr->category = category;
|
||||
instr->opc = opc;
|
||||
insert_instr(block, instr);
|
||||
return instr;
|
||||
}
|
||||
|
||||
struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
|
||||
int category, opc_t opc)
|
||||
struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc)
|
||||
{
|
||||
/* NOTE: we could be slightly more clever, at least for non-meta,
|
||||
* and choose # of regs based on category.
|
||||
*/
|
||||
return ir3_instr_create2(block, category, opc, 4);
|
||||
return ir3_instr_create2(block, opc, 4);
|
||||
}
|
||||
|
||||
struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
|
||||
|
|
|
@ -130,7 +130,6 @@ struct ir3_register {
|
|||
|
||||
struct ir3_instruction {
|
||||
struct ir3_block *block;
|
||||
int category;
|
||||
opc_t opc;
|
||||
enum {
|
||||
/* (sy) flag is set on first instruction, and after sample
|
||||
|
@ -435,6 +434,16 @@ struct ir3_block {
|
|||
#endif
|
||||
};
|
||||
|
||||
static inline uint32_t
|
||||
block_id(struct ir3_block *block)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
return block->serialno;
|
||||
#else
|
||||
return (uint32_t)(unsigned long)block;
|
||||
#endif
|
||||
}
|
||||
|
||||
struct ir3 * ir3_create(struct ir3_compiler *compiler,
|
||||
unsigned nin, unsigned nout);
|
||||
void ir3_destroy(struct ir3 *shader);
|
||||
|
@ -444,10 +453,9 @@ void * ir3_alloc(struct ir3 *shader, int sz);
|
|||
|
||||
struct ir3_block * ir3_block_create(struct ir3 *shader);
|
||||
|
||||
struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
|
||||
int category, opc_t opc);
|
||||
struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc);
|
||||
struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
|
||||
int category, opc_t opc, int nreg);
|
||||
opc_t opc, int nreg);
|
||||
struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
|
||||
const char *ir3_instr_name(struct ir3_instruction *instr);
|
||||
|
||||
|
@ -508,17 +516,17 @@ static inline uint32_t reg_comp(struct ir3_register *reg)
|
|||
|
||||
static inline bool is_flow(struct ir3_instruction *instr)
|
||||
{
|
||||
return (instr->category == 0);
|
||||
return (opc_cat(instr->opc) == 0);
|
||||
}
|
||||
|
||||
static inline bool is_kill(struct ir3_instruction *instr)
|
||||
{
|
||||
return is_flow(instr) && (instr->opc == OPC_KILL);
|
||||
return instr->opc == OPC_KILL;
|
||||
}
|
||||
|
||||
static inline bool is_nop(struct ir3_instruction *instr)
|
||||
{
|
||||
return is_flow(instr) && (instr->opc == OPC_NOP);
|
||||
return instr->opc == OPC_NOP;
|
||||
}
|
||||
|
||||
/* Is it a non-transformative (ie. not type changing) mov? This can
|
||||
|
@ -538,75 +546,71 @@ static inline bool is_same_type_mov(struct ir3_instruction *instr)
|
|||
if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
|
||||
return false;
|
||||
|
||||
if ((instr->category == 1) &&
|
||||
(instr->cat1.src_type == instr->cat1.dst_type))
|
||||
switch (instr->opc) {
|
||||
case OPC_MOV:
|
||||
return instr->cat1.src_type == instr->cat1.dst_type;
|
||||
case OPC_ABSNEG_F:
|
||||
case OPC_ABSNEG_S:
|
||||
return true;
|
||||
if ((instr->category == 2) && ((instr->opc == OPC_ABSNEG_F) ||
|
||||
(instr->opc == OPC_ABSNEG_S)))
|
||||
return true;
|
||||
return false;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool is_alu(struct ir3_instruction *instr)
|
||||
{
|
||||
return (1 <= instr->category) && (instr->category <= 3);
|
||||
return (1 <= opc_cat(instr->opc)) && (opc_cat(instr->opc) <= 3);
|
||||
}
|
||||
|
||||
static inline bool is_sfu(struct ir3_instruction *instr)
|
||||
{
|
||||
return (instr->category == 4);
|
||||
return (opc_cat(instr->opc) == 4);
|
||||
}
|
||||
|
||||
static inline bool is_tex(struct ir3_instruction *instr)
|
||||
{
|
||||
return (instr->category == 5);
|
||||
return (opc_cat(instr->opc) == 5);
|
||||
}
|
||||
|
||||
static inline bool is_mem(struct ir3_instruction *instr)
|
||||
{
|
||||
return (instr->category == 6);
|
||||
return (opc_cat(instr->opc) == 6);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_store(struct ir3_instruction *instr)
|
||||
{
|
||||
if (is_mem(instr)) {
|
||||
/* these instructions, the "destination" register is
|
||||
* actually a source, the address to store to.
|
||||
*/
|
||||
switch (instr->opc) {
|
||||
case OPC_STG:
|
||||
case OPC_STP:
|
||||
case OPC_STL:
|
||||
case OPC_STLW:
|
||||
case OPC_L2G:
|
||||
case OPC_G2L:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
/* these instructions, the "destination" register is
|
||||
* actually a source, the address to store to.
|
||||
*/
|
||||
switch (instr->opc) {
|
||||
case OPC_STG:
|
||||
case OPC_STP:
|
||||
case OPC_STL:
|
||||
case OPC_STLW:
|
||||
case OPC_L2G:
|
||||
case OPC_G2L:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool is_load(struct ir3_instruction *instr)
|
||||
{
|
||||
if (is_mem(instr)) {
|
||||
switch (instr->opc) {
|
||||
case OPC_LDG:
|
||||
case OPC_LDL:
|
||||
case OPC_LDP:
|
||||
case OPC_L2G:
|
||||
case OPC_LDLW:
|
||||
case OPC_LDC_4:
|
||||
case OPC_LDLV:
|
||||
switch (instr->opc) {
|
||||
case OPC_LDG:
|
||||
case OPC_LDL:
|
||||
case OPC_LDP:
|
||||
case OPC_L2G:
|
||||
case OPC_LDLW:
|
||||
case OPC_LDC_4:
|
||||
case OPC_LDLV:
|
||||
/* probably some others too.. */
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool is_input(struct ir3_instruction *instr)
|
||||
|
@ -615,9 +619,25 @@ static inline bool is_input(struct ir3_instruction *instr)
|
|||
* interpolation.. fortunately inloc is the first src
|
||||
* register in either case
|
||||
*/
|
||||
if (is_mem(instr) && (instr->opc == OPC_LDLV))
|
||||
switch (instr->opc) {
|
||||
case OPC_LDLV:
|
||||
case OPC_BARY_F:
|
||||
return true;
|
||||
return (instr->category == 2) && (instr->opc == OPC_BARY_F);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool is_bool(struct ir3_instruction *instr)
|
||||
{
|
||||
switch (instr->opc) {
|
||||
case OPC_CMPS_F:
|
||||
case OPC_CMPS_S:
|
||||
case OPC_CMPS_U:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool is_meta(struct ir3_instruction *instr)
|
||||
|
@ -626,7 +646,7 @@ static inline bool is_meta(struct ir3_instruction *instr)
|
|||
* might actually contribute some instructions to the final
|
||||
* result?
|
||||
*/
|
||||
return (instr->category == -1);
|
||||
return (opc_cat(instr->opc) == -1);
|
||||
}
|
||||
|
||||
static inline bool writes_addr(struct ir3_instruction *instr)
|
||||
|
@ -901,8 +921,7 @@ void ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary);
|
|||
static inline struct ir3_instruction *
|
||||
ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
|
||||
{
|
||||
struct ir3_instruction *instr =
|
||||
ir3_instr_create(block, 1, 0);
|
||||
struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV);
|
||||
ir3_reg_create(instr, 0, 0); /* dst */
|
||||
if (src->regs[0]->flags & IR3_REG_ARRAY) {
|
||||
struct ir3_register *src_reg =
|
||||
|
@ -922,8 +941,7 @@ static inline struct ir3_instruction *
|
|||
ir3_COV(struct ir3_block *block, struct ir3_instruction *src,
|
||||
type_t src_type, type_t dst_type)
|
||||
{
|
||||
struct ir3_instruction *instr =
|
||||
ir3_instr_create(block, 1, 0);
|
||||
struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV);
|
||||
ir3_reg_create(instr, 0, 0); /* dst */
|
||||
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
|
||||
instr->cat1.src_type = src_type;
|
||||
|
@ -935,45 +953,45 @@ ir3_COV(struct ir3_block *block, struct ir3_instruction *src,
|
|||
static inline struct ir3_instruction *
|
||||
ir3_NOP(struct ir3_block *block)
|
||||
{
|
||||
return ir3_instr_create(block, 0, OPC_NOP);
|
||||
return ir3_instr_create(block, OPC_NOP);
|
||||
}
|
||||
|
||||
#define INSTR0(CAT, name) \
|
||||
#define INSTR0(name) \
|
||||
static inline struct ir3_instruction * \
|
||||
ir3_##name(struct ir3_block *block) \
|
||||
{ \
|
||||
struct ir3_instruction *instr = \
|
||||
ir3_instr_create(block, CAT, OPC_##name); \
|
||||
ir3_instr_create(block, OPC_##name); \
|
||||
return instr; \
|
||||
}
|
||||
|
||||
#define INSTR1(CAT, name) \
|
||||
#define INSTR1(name) \
|
||||
static inline struct ir3_instruction * \
|
||||
ir3_##name(struct ir3_block *block, \
|
||||
struct ir3_instruction *a, unsigned aflags) \
|
||||
{ \
|
||||
struct ir3_instruction *instr = \
|
||||
ir3_instr_create(block, CAT, OPC_##name); \
|
||||
ir3_instr_create(block, OPC_##name); \
|
||||
ir3_reg_create(instr, 0, 0); /* dst */ \
|
||||
ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \
|
||||
return instr; \
|
||||
}
|
||||
|
||||
#define INSTR2(CAT, name) \
|
||||
#define INSTR2(name) \
|
||||
static inline struct ir3_instruction * \
|
||||
ir3_##name(struct ir3_block *block, \
|
||||
struct ir3_instruction *a, unsigned aflags, \
|
||||
struct ir3_instruction *b, unsigned bflags) \
|
||||
{ \
|
||||
struct ir3_instruction *instr = \
|
||||
ir3_instr_create(block, CAT, OPC_##name); \
|
||||
ir3_instr_create(block, OPC_##name); \
|
||||
ir3_reg_create(instr, 0, 0); /* dst */ \
|
||||
ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \
|
||||
ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \
|
||||
return instr; \
|
||||
}
|
||||
|
||||
#define INSTR3(CAT, name) \
|
||||
#define INSTR3(name) \
|
||||
static inline struct ir3_instruction * \
|
||||
ir3_##name(struct ir3_block *block, \
|
||||
struct ir3_instruction *a, unsigned aflags, \
|
||||
|
@ -981,7 +999,7 @@ ir3_##name(struct ir3_block *block, \
|
|||
struct ir3_instruction *c, unsigned cflags) \
|
||||
{ \
|
||||
struct ir3_instruction *instr = \
|
||||
ir3_instr_create(block, CAT, OPC_##name); \
|
||||
ir3_instr_create(block, OPC_##name); \
|
||||
ir3_reg_create(instr, 0, 0); /* dst */ \
|
||||
ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \
|
||||
ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \
|
||||
|
@ -990,89 +1008,89 @@ ir3_##name(struct ir3_block *block, \
|
|||
}
|
||||
|
||||
/* cat0 instructions: */
|
||||
INSTR0(0, BR);
|
||||
INSTR0(0, JUMP);
|
||||
INSTR1(0, KILL);
|
||||
INSTR0(0, END);
|
||||
INSTR0(BR);
|
||||
INSTR0(JUMP);
|
||||
INSTR1(KILL);
|
||||
INSTR0(END);
|
||||
|
||||
/* cat2 instructions, most 2 src but some 1 src: */
|
||||
INSTR2(2, ADD_F)
|
||||
INSTR2(2, MIN_F)
|
||||
INSTR2(2, MAX_F)
|
||||
INSTR2(2, MUL_F)
|
||||
INSTR1(2, SIGN_F)
|
||||
INSTR2(2, CMPS_F)
|
||||
INSTR1(2, ABSNEG_F)
|
||||
INSTR2(2, CMPV_F)
|
||||
INSTR1(2, FLOOR_F)
|
||||
INSTR1(2, CEIL_F)
|
||||
INSTR1(2, RNDNE_F)
|
||||
INSTR1(2, RNDAZ_F)
|
||||
INSTR1(2, TRUNC_F)
|
||||
INSTR2(2, ADD_U)
|
||||
INSTR2(2, ADD_S)
|
||||
INSTR2(2, SUB_U)
|
||||
INSTR2(2, SUB_S)
|
||||
INSTR2(2, CMPS_U)
|
||||
INSTR2(2, CMPS_S)
|
||||
INSTR2(2, MIN_U)
|
||||
INSTR2(2, MIN_S)
|
||||
INSTR2(2, MAX_U)
|
||||
INSTR2(2, MAX_S)
|
||||
INSTR1(2, ABSNEG_S)
|
||||
INSTR2(2, AND_B)
|
||||
INSTR2(2, OR_B)
|
||||
INSTR1(2, NOT_B)
|
||||
INSTR2(2, XOR_B)
|
||||
INSTR2(2, CMPV_U)
|
||||
INSTR2(2, CMPV_S)
|
||||
INSTR2(2, MUL_U)
|
||||
INSTR2(2, MUL_S)
|
||||
INSTR2(2, MULL_U)
|
||||
INSTR1(2, BFREV_B)
|
||||
INSTR1(2, CLZ_S)
|
||||
INSTR1(2, CLZ_B)
|
||||
INSTR2(2, SHL_B)
|
||||
INSTR2(2, SHR_B)
|
||||
INSTR2(2, ASHR_B)
|
||||
INSTR2(2, BARY_F)
|
||||
INSTR2(2, MGEN_B)
|
||||
INSTR2(2, GETBIT_B)
|
||||
INSTR1(2, SETRM)
|
||||
INSTR1(2, CBITS_B)
|
||||
INSTR2(2, SHB)
|
||||
INSTR2(2, MSAD)
|
||||
INSTR2(ADD_F)
|
||||
INSTR2(MIN_F)
|
||||
INSTR2(MAX_F)
|
||||
INSTR2(MUL_F)
|
||||
INSTR1(SIGN_F)
|
||||
INSTR2(CMPS_F)
|
||||
INSTR1(ABSNEG_F)
|
||||
INSTR2(CMPV_F)
|
||||
INSTR1(FLOOR_F)
|
||||
INSTR1(CEIL_F)
|
||||
INSTR1(RNDNE_F)
|
||||
INSTR1(RNDAZ_F)
|
||||
INSTR1(TRUNC_F)
|
||||
INSTR2(ADD_U)
|
||||
INSTR2(ADD_S)
|
||||
INSTR2(SUB_U)
|
||||
INSTR2(SUB_S)
|
||||
INSTR2(CMPS_U)
|
||||
INSTR2(CMPS_S)
|
||||
INSTR2(MIN_U)
|
||||
INSTR2(MIN_S)
|
||||
INSTR2(MAX_U)
|
||||
INSTR2(MAX_S)
|
||||
INSTR1(ABSNEG_S)
|
||||
INSTR2(AND_B)
|
||||
INSTR2(OR_B)
|
||||
INSTR1(NOT_B)
|
||||
INSTR2(XOR_B)
|
||||
INSTR2(CMPV_U)
|
||||
INSTR2(CMPV_S)
|
||||
INSTR2(MUL_U)
|
||||
INSTR2(MUL_S)
|
||||
INSTR2(MULL_U)
|
||||
INSTR1(BFREV_B)
|
||||
INSTR1(CLZ_S)
|
||||
INSTR1(CLZ_B)
|
||||
INSTR2(SHL_B)
|
||||
INSTR2(SHR_B)
|
||||
INSTR2(ASHR_B)
|
||||
INSTR2(BARY_F)
|
||||
INSTR2(MGEN_B)
|
||||
INSTR2(GETBIT_B)
|
||||
INSTR1(SETRM)
|
||||
INSTR1(CBITS_B)
|
||||
INSTR2(SHB)
|
||||
INSTR2(MSAD)
|
||||
|
||||
/* cat3 instructions: */
|
||||
INSTR3(3, MAD_U16)
|
||||
INSTR3(3, MADSH_U16)
|
||||
INSTR3(3, MAD_S16)
|
||||
INSTR3(3, MADSH_M16)
|
||||
INSTR3(3, MAD_U24)
|
||||
INSTR3(3, MAD_S24)
|
||||
INSTR3(3, MAD_F16)
|
||||
INSTR3(3, MAD_F32)
|
||||
INSTR3(3, SEL_B16)
|
||||
INSTR3(3, SEL_B32)
|
||||
INSTR3(3, SEL_S16)
|
||||
INSTR3(3, SEL_S32)
|
||||
INSTR3(3, SEL_F16)
|
||||
INSTR3(3, SEL_F32)
|
||||
INSTR3(3, SAD_S16)
|
||||
INSTR3(3, SAD_S32)
|
||||
INSTR3(MAD_U16)
|
||||
INSTR3(MADSH_U16)
|
||||
INSTR3(MAD_S16)
|
||||
INSTR3(MADSH_M16)
|
||||
INSTR3(MAD_U24)
|
||||
INSTR3(MAD_S24)
|
||||
INSTR3(MAD_F16)
|
||||
INSTR3(MAD_F32)
|
||||
INSTR3(SEL_B16)
|
||||
INSTR3(SEL_B32)
|
||||
INSTR3(SEL_S16)
|
||||
INSTR3(SEL_S32)
|
||||
INSTR3(SEL_F16)
|
||||
INSTR3(SEL_F32)
|
||||
INSTR3(SAD_S16)
|
||||
INSTR3(SAD_S32)
|
||||
|
||||
/* cat4 instructions: */
|
||||
INSTR1(4, RCP)
|
||||
INSTR1(4, RSQ)
|
||||
INSTR1(4, LOG2)
|
||||
INSTR1(4, EXP2)
|
||||
INSTR1(4, SIN)
|
||||
INSTR1(4, COS)
|
||||
INSTR1(4, SQRT)
|
||||
INSTR1(RCP)
|
||||
INSTR1(RSQ)
|
||||
INSTR1(LOG2)
|
||||
INSTR1(EXP2)
|
||||
INSTR1(SIN)
|
||||
INSTR1(COS)
|
||||
INSTR1(SQRT)
|
||||
|
||||
/* cat5 instructions: */
|
||||
INSTR1(5, DSX)
|
||||
INSTR1(5, DSY)
|
||||
INSTR1(DSX)
|
||||
INSTR1(DSY)
|
||||
|
||||
static inline struct ir3_instruction *
|
||||
ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
|
||||
|
@ -1082,7 +1100,7 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
|
|||
struct ir3_instruction *sam;
|
||||
struct ir3_register *reg;
|
||||
|
||||
sam = ir3_instr_create(block, 5, opc);
|
||||
sam = ir3_instr_create(block, opc);
|
||||
sam->flags |= flags;
|
||||
ir3_reg_create(sam, 0, 0)->wrmask = wrmask;
|
||||
if (src0) {
|
||||
|
@ -1103,9 +1121,9 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
|
|||
}
|
||||
|
||||
/* cat6 instructions: */
|
||||
INSTR2(6, LDLV)
|
||||
INSTR2(6, LDG)
|
||||
INSTR3(6, STG)
|
||||
INSTR2(LDLV)
|
||||
INSTR2(LDG)
|
||||
INSTR3(STG)
|
||||
|
||||
/* ************************************************************************* */
|
||||
/* split this out or find some helper to use.. like main/bitset.h.. */
|
||||
|
|
|
@ -286,7 +286,7 @@ create_immed(struct ir3_block *block, uint32_t val)
|
|||
{
|
||||
struct ir3_instruction *mov;
|
||||
|
||||
mov = ir3_instr_create(block, 1, 0);
|
||||
mov = ir3_instr_create(block, OPC_MOV);
|
||||
mov->cat1.src_type = TYPE_U32;
|
||||
mov->cat1.dst_type = TYPE_U32;
|
||||
ir3_reg_create(mov, 0, 0);
|
||||
|
@ -366,7 +366,7 @@ create_uniform(struct ir3_compile *ctx, unsigned n)
|
|||
{
|
||||
struct ir3_instruction *mov;
|
||||
|
||||
mov = ir3_instr_create(ctx->block, 1, 0);
|
||||
mov = ir3_instr_create(ctx->block, OPC_MOV);
|
||||
/* TODO get types right? */
|
||||
mov->cat1.src_type = TYPE_F32;
|
||||
mov->cat1.dst_type = TYPE_F32;
|
||||
|
@ -382,7 +382,7 @@ create_uniform_indirect(struct ir3_compile *ctx, int n,
|
|||
{
|
||||
struct ir3_instruction *mov;
|
||||
|
||||
mov = ir3_instr_create(ctx->block, 1, 0);
|
||||
mov = ir3_instr_create(ctx->block, OPC_MOV);
|
||||
mov->cat1.src_type = TYPE_U32;
|
||||
mov->cat1.dst_type = TYPE_U32;
|
||||
ir3_reg_create(mov, 0, 0);
|
||||
|
@ -402,7 +402,7 @@ create_collect(struct ir3_block *block, struct ir3_instruction **arr,
|
|||
if (arrsz == 0)
|
||||
return NULL;
|
||||
|
||||
collect = ir3_instr_create2(block, -1, OPC_META_FI, 1 + arrsz);
|
||||
collect = ir3_instr_create2(block, OPC_META_FI, 1 + arrsz);
|
||||
ir3_reg_create(collect, 0, 0); /* dst */
|
||||
for (unsigned i = 0; i < arrsz; i++)
|
||||
ir3_reg_create(collect, 0, IR3_REG_SSA)->instr = arr[i];
|
||||
|
@ -418,7 +418,7 @@ create_indirect_load(struct ir3_compile *ctx, unsigned arrsz, int n,
|
|||
struct ir3_instruction *mov;
|
||||
struct ir3_register *src;
|
||||
|
||||
mov = ir3_instr_create(block, 1, 0);
|
||||
mov = ir3_instr_create(block, OPC_MOV);
|
||||
mov->cat1.src_type = TYPE_U32;
|
||||
mov->cat1.dst_type = TYPE_U32;
|
||||
ir3_reg_create(mov, 0, 0);
|
||||
|
@ -441,7 +441,7 @@ create_var_load(struct ir3_compile *ctx, struct ir3_array *arr, int n,
|
|||
struct ir3_instruction *mov;
|
||||
struct ir3_register *src;
|
||||
|
||||
mov = ir3_instr_create(block, 1, 0);
|
||||
mov = ir3_instr_create(block, OPC_MOV);
|
||||
mov->cat1.src_type = TYPE_U32;
|
||||
mov->cat1.dst_type = TYPE_U32;
|
||||
ir3_reg_create(mov, 0, 0);
|
||||
|
@ -469,7 +469,7 @@ create_var_store(struct ir3_compile *ctx, struct ir3_array *arr, int n,
|
|||
struct ir3_instruction *mov;
|
||||
struct ir3_register *dst;
|
||||
|
||||
mov = ir3_instr_create(block, 1, 0);
|
||||
mov = ir3_instr_create(block, OPC_MOV);
|
||||
mov->cat1.src_type = TYPE_U32;
|
||||
mov->cat1.dst_type = TYPE_U32;
|
||||
dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
|
||||
|
@ -492,7 +492,7 @@ create_input(struct ir3_block *block, unsigned n)
|
|||
{
|
||||
struct ir3_instruction *in;
|
||||
|
||||
in = ir3_instr_create(block, -1, OPC_META_INPUT);
|
||||
in = ir3_instr_create(block, OPC_META_INPUT);
|
||||
in->inout.block = block;
|
||||
ir3_reg_create(in, n, 0);
|
||||
|
||||
|
@ -617,8 +617,7 @@ split_dest(struct ir3_block *block, struct ir3_instruction **dst,
|
|||
{
|
||||
struct ir3_instruction *prev = NULL;
|
||||
for (int i = 0, j = 0; i < n; i++) {
|
||||
struct ir3_instruction *split =
|
||||
ir3_instr_create(block, -1, OPC_META_FO);
|
||||
struct ir3_instruction *split = ir3_instr_create(block, OPC_META_FO);
|
||||
ir3_reg_create(split, 0, IR3_REG_SSA);
|
||||
ir3_reg_create(split, 0, IR3_REG_SSA)->instr = src;
|
||||
split->fo.off = i;
|
||||
|
@ -1631,7 +1630,7 @@ emit_phi(struct ir3_compile *ctx, nir_phi_instr *nphi)
|
|||
|
||||
dst = get_dst(ctx, &nphi->dest, 1);
|
||||
|
||||
phi = ir3_instr_create2(ctx->block, -1, OPC_META_PHI,
|
||||
phi = ir3_instr_create2(ctx->block, OPC_META_PHI,
|
||||
1 + exec_list_length(&nphi->srcs));
|
||||
ir3_reg_create(phi, 0, 0); /* dst */
|
||||
phi->phi.nphi = nphi;
|
||||
|
@ -1651,7 +1650,7 @@ resolve_phis(struct ir3_compile *ctx, struct ir3_block *block)
|
|||
nir_phi_instr *nphi;
|
||||
|
||||
/* phi's only come at start of block: */
|
||||
if (!(is_meta(instr) && (instr->opc == OPC_META_PHI)))
|
||||
if (instr->opc != OPC_META_PHI)
|
||||
break;
|
||||
|
||||
if (!instr->phi.nphi)
|
||||
|
@ -1662,6 +1661,16 @@ resolve_phis(struct ir3_compile *ctx, struct ir3_block *block)
|
|||
|
||||
foreach_list_typed(nir_phi_src, nsrc, node, &nphi->srcs) {
|
||||
struct ir3_instruction *src = get_src(ctx, &nsrc->src)[0];
|
||||
|
||||
/* NOTE: src might not be in the same block as it comes from
|
||||
* according to the phi.. but in the end the backend assumes
|
||||
* it will be able to assign the same register to each (which
|
||||
* only works if it is assigned in the src block), so insert
|
||||
* an extra mov to make sure the phi src is assigned in the
|
||||
* block it comes from:
|
||||
*/
|
||||
src = ir3_MOV(get_block(ctx, nsrc->pred), src, TYPE_U32);
|
||||
|
||||
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
|
||||
}
|
||||
}
|
||||
|
@ -2144,7 +2153,7 @@ emit_instructions(struct ir3_compile *ctx)
|
|||
if (ctx->so->type == SHADER_FRAGMENT) {
|
||||
// TODO maybe a helper for fi since we need it a few places..
|
||||
struct ir3_instruction *instr;
|
||||
instr = ir3_instr_create(ctx->block, -1, OPC_META_FI);
|
||||
instr = ir3_instr_create(ctx->block, OPC_META_FI);
|
||||
ir3_reg_create(instr, 0, 0);
|
||||
ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.x */
|
||||
ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.y */
|
||||
|
@ -2323,12 +2332,12 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
* in which case we need to propagate the half-reg flag
|
||||
* up to the definer so that RA sees it:
|
||||
*/
|
||||
if (is_meta(out) && (out->opc == OPC_META_FO)) {
|
||||
if (out->opc == OPC_META_FO) {
|
||||
out = out->regs[1]->instr;
|
||||
out->regs[0]->flags |= IR3_REG_HALF;
|
||||
}
|
||||
|
||||
if (out->category == 1) {
|
||||
if (out->opc == OPC_MOV) {
|
||||
out->cat1.dst_type = half_type(out->cat1.dst_type);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -58,14 +58,14 @@ static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags)
|
|||
return false;
|
||||
|
||||
/* TODO: remove this hack: */
|
||||
if (is_meta(src_instr) && (src_instr->opc == OPC_META_FO))
|
||||
if (src_instr->opc == OPC_META_FO)
|
||||
return false;
|
||||
/* TODO: we currently don't handle left/right neighbors
|
||||
* very well when inserting parallel-copies into phi..
|
||||
* to avoid problems don't eliminate a mov coming out
|
||||
* of phi..
|
||||
*/
|
||||
if (is_meta(src_instr) && (src_instr->opc == OPC_META_PHI))
|
||||
if (src_instr->opc == OPC_META_PHI)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
@ -96,7 +96,7 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n,
|
|||
return false;
|
||||
|
||||
/* clear flags that are 'ok' */
|
||||
switch (instr->category) {
|
||||
switch (opc_cat(instr->opc)) {
|
||||
case 1:
|
||||
valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV;
|
||||
if (flags & ~valid_flags)
|
||||
|
@ -111,6 +111,19 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n,
|
|||
valid_flags = IR3_REG_IMMED;
|
||||
if (flags & ~valid_flags)
|
||||
return false;
|
||||
|
||||
if (flags & IR3_REG_IMMED) {
|
||||
/* doesn't seem like we can have immediate src for store
|
||||
* instructions:
|
||||
*
|
||||
* TODO this restriction could also apply to load instructions,
|
||||
* but for load instructions this arg is the address (and not
|
||||
* really sure any good way to test a hard-coded immed addr src)
|
||||
*/
|
||||
if (is_store(instr) && (n == 1))
|
||||
return false;
|
||||
}
|
||||
|
||||
break;
|
||||
case 2:
|
||||
valid_flags = ir3_cat2_absneg(instr->opc) |
|
||||
|
@ -176,8 +189,10 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n,
|
|||
/* propagate register flags from src to dst.. negates need special
|
||||
* handling to cancel each other out.
|
||||
*/
|
||||
static void combine_flags(unsigned *dstflags, unsigned srcflags)
|
||||
static void combine_flags(unsigned *dstflags, struct ir3_instruction *src)
|
||||
{
|
||||
unsigned srcflags = src->regs[1]->flags;
|
||||
|
||||
/* if what we are combining into already has (abs) flags,
|
||||
* we can drop (neg) from src:
|
||||
*/
|
||||
|
@ -203,15 +218,15 @@ static void combine_flags(unsigned *dstflags, unsigned srcflags)
|
|||
*dstflags |= srcflags & IR3_REG_IMMED;
|
||||
*dstflags |= srcflags & IR3_REG_RELATIV;
|
||||
*dstflags |= srcflags & IR3_REG_ARRAY;
|
||||
}
|
||||
|
||||
/* the "plain" MAD's (ie. the ones that don't shift first src prior to
|
||||
* multiply) can swap their first two srcs if src[0] is !CONST and
|
||||
* src[1] is CONST:
|
||||
*/
|
||||
static bool is_valid_mad(struct ir3_instruction *instr)
|
||||
{
|
||||
return (instr->category == 3) && is_mad(instr->opc);
|
||||
/* if src of the src is boolean we can drop the (abs) since we know
|
||||
* the source value is already a postitive integer. This cleans
|
||||
* up the absnegs that get inserted when converting between nir and
|
||||
* native boolean (see ir3_b2n/n2b)
|
||||
*/
|
||||
struct ir3_instruction *srcsrc = ssa(src->regs[1]);
|
||||
if (srcsrc && is_bool(srcsrc))
|
||||
*dstflags &= ~IR3_REG_SABS;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -226,12 +241,18 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
|
|||
{
|
||||
struct ir3_instruction *src = ssa(reg);
|
||||
|
||||
/* don't propagate copies into a PHI, since we don't know if the
|
||||
* src block executed:
|
||||
*/
|
||||
if (instr->opc == OPC_META_PHI)
|
||||
return;
|
||||
|
||||
if (is_eligible_mov(src, true)) {
|
||||
/* simple case, no immed/const/relativ, only mov's w/ ssa src: */
|
||||
struct ir3_register *src_reg = src->regs[1];
|
||||
unsigned new_flags = reg->flags;
|
||||
|
||||
combine_flags(&new_flags, src_reg->flags);
|
||||
combine_flags(&new_flags, src);
|
||||
|
||||
if (valid_flags(instr, n, new_flags)) {
|
||||
if (new_flags & IR3_REG_ARRAY) {
|
||||
|
@ -252,13 +273,17 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
|
|||
struct ir3_register *src_reg = src->regs[1];
|
||||
unsigned new_flags = reg->flags;
|
||||
|
||||
combine_flags(&new_flags, src_reg->flags);
|
||||
combine_flags(&new_flags, src);
|
||||
|
||||
if (!valid_flags(instr, n, new_flags)) {
|
||||
/* special case for "normal" mad instructions, we can
|
||||
* try swapping the first two args if that fits better.
|
||||
*
|
||||
* the "plain" MAD's (ie. the ones that don't shift first
|
||||
* src prior to multiply) can swap their first two srcs if
|
||||
* src[0] is !CONST and src[1] is CONST:
|
||||
*/
|
||||
if ((n == 1) && is_valid_mad(instr) &&
|
||||
if ((n == 1) && is_mad(instr->opc) &&
|
||||
!(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) &&
|
||||
valid_flags(instr, 0, new_flags)) {
|
||||
/* swap src[0] and src[1]: */
|
||||
|
@ -292,7 +317,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
|
|||
* just somehow don't work out. This restriction may only
|
||||
* apply if the first src is also CONST.
|
||||
*/
|
||||
if ((instr->category == 3) && (n == 2) &&
|
||||
if ((opc_cat(instr->opc) == 3) && (n == 2) &&
|
||||
(src_reg->flags & IR3_REG_RELATIV) &&
|
||||
(src_reg->array.offset == 0))
|
||||
return;
|
||||
|
@ -328,10 +353,9 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
|
|||
if (src_reg->flags & IR3_REG_IMMED) {
|
||||
int32_t iim_val = src_reg->iim_val;
|
||||
|
||||
debug_assert((instr->category == 1) ||
|
||||
(instr->category == 6) ||
|
||||
((instr->category == 2) &&
|
||||
ir3_cat2_int(instr->opc)));
|
||||
debug_assert((opc_cat(instr->opc) == 1) ||
|
||||
(opc_cat(instr->opc) == 6) ||
|
||||
ir3_cat2_int(instr->opc));
|
||||
|
||||
if (new_flags & IR3_REG_SABS)
|
||||
iim_val = abs(iim_val);
|
||||
|
@ -343,7 +367,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
|
|||
iim_val = ~iim_val;
|
||||
|
||||
/* other than category 1 (mov) we can only encode up to 10 bits: */
|
||||
if ((instr->category == 1) || !(iim_val & ~0x3ff)) {
|
||||
if ((instr->opc == OPC_MOV) || !(iim_val & ~0x3ff)) {
|
||||
new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
|
||||
src_reg = ir3_reg_clone(instr->block->shader, src_reg);
|
||||
src_reg->flags = new_flags;
|
||||
|
|
|
@ -74,8 +74,7 @@ int ir3_delayslots(struct ir3_instruction *assigner,
|
|||
if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) ||
|
||||
is_mem(consumer)) {
|
||||
return 6;
|
||||
} else if ((consumer->category == 3) &&
|
||||
(is_mad(consumer->opc) || is_madsh(consumer->opc)) &&
|
||||
} else if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) &&
|
||||
(n == 3)) {
|
||||
/* special case, 3rd src to cat3 not required on first cycle */
|
||||
return 1;
|
||||
|
|
|
@ -63,14 +63,13 @@ static void arr_insert_mov_in(void *arr, int idx, struct ir3_instruction *instr)
|
|||
|
||||
debug_assert(instr->regs_count == 1);
|
||||
|
||||
in = ir3_instr_create(instr->block, -1, OPC_META_INPUT);
|
||||
in = ir3_instr_create(instr->block, OPC_META_INPUT);
|
||||
in->inout.block = instr->block;
|
||||
ir3_reg_create(in, instr->regs[0]->num, 0);
|
||||
|
||||
/* create src reg for meta:in and fixup to now be a mov: */
|
||||
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = in;
|
||||
instr->category = 1;
|
||||
instr->opc = 0;
|
||||
instr->opc = OPC_MOV;
|
||||
instr->cat1.src_type = TYPE_F32;
|
||||
instr->cat1.dst_type = TYPE_F32;
|
||||
|
||||
|
@ -117,7 +116,7 @@ restart:
|
|||
conflicts(instr->cp.right, right);
|
||||
|
||||
/* RA can't yet deal very well w/ group'd phi's: */
|
||||
if (is_meta(instr) && (instr->opc == OPC_META_PHI))
|
||||
if (instr->opc == OPC_META_PHI)
|
||||
conflict = true;
|
||||
|
||||
/* we also can't have an instr twice in the group: */
|
||||
|
@ -168,7 +167,7 @@ instr_find_neighbors(struct ir3_instruction *instr)
|
|||
if (ir3_instr_check_mark(instr))
|
||||
return;
|
||||
|
||||
if (is_meta(instr) && (instr->opc == OPC_META_FI))
|
||||
if (instr->opc == OPC_META_FI)
|
||||
group_n(&instr_ops, instr, instr->regs_count - 1);
|
||||
|
||||
foreach_ssa_src(src, instr)
|
||||
|
|
|
@ -146,7 +146,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
|||
* clever if we were aware of this during scheduling, but
|
||||
* this should be a pretty rare case:
|
||||
*/
|
||||
if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) {
|
||||
if ((n->flags & IR3_INSTR_SS) && (opc_cat(n->opc) >= 5)) {
|
||||
struct ir3_instruction *nop;
|
||||
nop = ir3_NOP(block);
|
||||
nop->flags |= IR3_INSTR_SS;
|
||||
|
@ -154,7 +154,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
|||
}
|
||||
|
||||
/* need to be able to set (ss) on first instruction: */
|
||||
if (list_empty(&block->instr_list) && (n->category >= 5))
|
||||
if (list_empty(&block->instr_list) && (opc_cat(n->opc) >= 5))
|
||||
ir3_NOP(block);
|
||||
|
||||
if (is_nop(n) && !list_empty(&block->instr_list)) {
|
||||
|
@ -209,7 +209,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
|||
struct ir3_instruction *baryf;
|
||||
|
||||
/* (ss)bary.f (ei)r63.x, 0, r0.x */
|
||||
baryf = ir3_instr_create(block, 2, OPC_BARY_F);
|
||||
baryf = ir3_instr_create(block, OPC_BARY_F);
|
||||
baryf->flags |= IR3_INSTR_SS;
|
||||
ir3_reg_create(baryf, regid(63, 0), 0);
|
||||
ir3_reg_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0;
|
||||
|
|
|
@ -35,9 +35,12 @@
|
|||
|
||||
static void print_instr_name(struct ir3_instruction *instr)
|
||||
{
|
||||
if (!instr)
|
||||
return;
|
||||
#ifdef DEBUG
|
||||
printf("%04u:", instr->serialno);
|
||||
#endif
|
||||
printf("%04u:", instr->name);
|
||||
printf("%03u: ", instr->depth);
|
||||
|
||||
if (instr->flags & IR3_INSTR_SY)
|
||||
|
@ -61,7 +64,7 @@ static void print_instr_name(struct ir3_instruction *instr)
|
|||
}
|
||||
break;
|
||||
}
|
||||
} else if (instr->category == 1) {
|
||||
} else if (instr->opc == OPC_MOV) {
|
||||
static const char *type[] = {
|
||||
[TYPE_F16] = "f16",
|
||||
[TYPE_F32] = "f32",
|
||||
|
@ -146,16 +149,6 @@ tab(int lvl)
|
|||
printf("\t");
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
block_id(struct ir3_block *block)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
return block->serialno;
|
||||
#else
|
||||
return (uint32_t)(unsigned long)block;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
print_instr(struct ir3_instruction *instr, int lvl)
|
||||
{
|
||||
|
@ -191,10 +184,8 @@ print_instr(struct ir3_instruction *instr, int lvl)
|
|||
printf("]");
|
||||
}
|
||||
|
||||
if (is_meta(instr)) {
|
||||
if (instr->opc == OPC_META_FO) {
|
||||
printf(", off=%d", instr->fo.off);
|
||||
}
|
||||
if (instr->opc == OPC_META_FO) {
|
||||
printf(", off=%d", instr->fo.off);
|
||||
}
|
||||
|
||||
if (is_flow(instr) && instr->cat0.target) {
|
||||
|
|
|
@ -31,6 +31,8 @@
|
|||
#include "util/ralloc.h"
|
||||
#include "util/bitset.h"
|
||||
|
||||
#include "freedreno_util.h"
|
||||
|
||||
#include "ir3.h"
|
||||
#include "ir3_compiler.h"
|
||||
|
||||
|
@ -342,7 +344,7 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
|
|||
return id->defn;
|
||||
}
|
||||
|
||||
if (is_meta(instr) && (instr->opc == OPC_META_FI)) {
|
||||
if (instr->opc == OPC_META_FI) {
|
||||
/* What about the case where collect is subset of array, we
|
||||
* need to find the distance between where actual array starts
|
||||
* and fanin.. that probably doesn't happen currently.
|
||||
|
@ -436,7 +438,7 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
|
|||
}
|
||||
}
|
||||
|
||||
if (is_meta(d) && (d->opc == OPC_META_PHI)) {
|
||||
if (d->opc == OPC_META_PHI) {
|
||||
/* we have already inserted parallel-copies into
|
||||
* the phi, so we don't need to chase definers
|
||||
*/
|
||||
|
@ -456,7 +458,7 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
|
|||
d = dd;
|
||||
}
|
||||
|
||||
if (is_meta(d) && (d->opc == OPC_META_FO)) {
|
||||
if (d->opc == OPC_META_FO) {
|
||||
struct ir3_instruction *dd;
|
||||
int dsz, doff;
|
||||
|
||||
|
@ -809,6 +811,22 @@ ra_compute_livein_liveout(struct ir3_ra_ctx *ctx)
|
|||
return progress;
|
||||
}
|
||||
|
||||
static void
|
||||
print_bitset(const char *name, BITSET_WORD *bs, unsigned cnt)
|
||||
{
|
||||
bool first = true;
|
||||
debug_printf(" %s:", name);
|
||||
for (unsigned i = 0; i < cnt; i++) {
|
||||
if (BITSET_TEST(bs, i)) {
|
||||
if (!first)
|
||||
debug_printf(",");
|
||||
debug_printf(" %04u", i);
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
debug_printf("\n");
|
||||
}
|
||||
|
||||
static void
|
||||
ra_add_interference(struct ir3_ra_ctx *ctx)
|
||||
{
|
||||
|
@ -831,12 +849,24 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
|
|||
/* update per-block livein/liveout: */
|
||||
while (ra_compute_livein_liveout(ctx)) {}
|
||||
|
||||
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
|
||||
debug_printf("AFTER LIVEIN/OUT:\n");
|
||||
ir3_print(ir);
|
||||
list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
|
||||
struct ir3_ra_block_data *bd = block->data;
|
||||
debug_printf("block%u:\n", block_id(block));
|
||||
print_bitset("def", bd->def, ctx->alloc_count);
|
||||
print_bitset("use", bd->use, ctx->alloc_count);
|
||||
print_bitset("l/i", bd->livein, ctx->alloc_count);
|
||||
print_bitset("l/o", bd->liveout, ctx->alloc_count);
|
||||
}
|
||||
}
|
||||
|
||||
/* extend start/end ranges based on livein/liveout info from cfg: */
|
||||
unsigned bitset_words = BITSET_WORDS(ctx->alloc_count);
|
||||
list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
|
||||
struct ir3_ra_block_data *bd = block->data;
|
||||
|
||||
for (unsigned i = 0; i < bitset_words; i++) {
|
||||
for (unsigned i = 0; i < ctx->alloc_count; i++) {
|
||||
if (BITSET_TEST(bd->livein, i)) {
|
||||
ctx->def[i] = MIN2(ctx->def[i], block->start_ip);
|
||||
ctx->use[i] = MAX2(ctx->use[i], block->start_ip);
|
||||
|
@ -869,7 +899,7 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
|
|||
/* some instructions need fix-up if dst register is half precision: */
|
||||
static void fixup_half_instr_dst(struct ir3_instruction *instr)
|
||||
{
|
||||
switch (instr->category) {
|
||||
switch (opc_cat(instr->opc)) {
|
||||
case 1: /* move instructions */
|
||||
instr->cat1.dst_type = half_type(instr->cat1.dst_type);
|
||||
break;
|
||||
|
@ -910,10 +940,12 @@ static void fixup_half_instr_dst(struct ir3_instruction *instr)
|
|||
/* some instructions need fix-up if src register is half precision: */
|
||||
static void fixup_half_instr_src(struct ir3_instruction *instr)
|
||||
{
|
||||
switch (instr->category) {
|
||||
case 1: /* move instructions */
|
||||
switch (instr->opc) {
|
||||
case OPC_MOV:
|
||||
instr->cat1.src_type = half_type(instr->cat1.src_type);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -511,8 +511,7 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
|
|||
* occupied), and move remaining to depth sorted list:
|
||||
*/
|
||||
list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) {
|
||||
if (is_meta(instr) && ((instr->opc == OPC_META_INPUT) ||
|
||||
(instr->opc == OPC_META_PHI))) {
|
||||
if ((instr->opc == OPC_META_INPUT) || (instr->opc == OPC_META_PHI)) {
|
||||
schedule(ctx, instr);
|
||||
} else {
|
||||
ir3_insert_by_depth(instr, &ctx->depth_list);
|
||||
|
@ -627,14 +626,29 @@ static void
|
|||
sched_insert_parallel_copies(struct ir3_block *block)
|
||||
{
|
||||
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
|
||||
if (is_meta(instr) && (instr->opc == OPC_META_PHI)) {
|
||||
struct ir3_register *reg;
|
||||
if (instr->opc == OPC_META_PHI) {
|
||||
struct ir3_register *reg, *reg2;
|
||||
foreach_src(reg, instr) {
|
||||
struct ir3_instruction *src = reg->instr;
|
||||
struct ir3_instruction *mov =
|
||||
ir3_MOV(src->block, src, TYPE_U32);
|
||||
mov->regs[0]->flags |= IR3_REG_PHI_SRC;
|
||||
mov->regs[0]->instr = instr;
|
||||
struct ir3_instruction *mov = NULL;
|
||||
|
||||
/* after CP we could end up w/ duplicate phi srcs: */
|
||||
foreach_src(reg2, instr) {
|
||||
if (reg == reg2)
|
||||
break;
|
||||
/* reg2 is before reg1 so already an inserted mov: */
|
||||
else if (reg2->instr->regs[1]->instr == src) {
|
||||
mov = reg2->instr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!mov) {
|
||||
mov = ir3_MOV(src->block, src, TYPE_U32);
|
||||
mov->regs[0]->flags |= IR3_REG_PHI_SRC;
|
||||
mov->regs[0]->instr = instr;
|
||||
}
|
||||
|
||||
reg->instr = mov;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -269,6 +269,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
|
|||
case PIPE_CAP_PCI_BUS:
|
||||
case PIPE_CAP_PCI_DEVICE:
|
||||
case PIPE_CAP_PCI_FUNCTION:
|
||||
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
|
||||
|
|
|
@ -498,6 +498,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_PCI_BUS:
|
||||
case PIPE_CAP_PCI_DEVICE:
|
||||
case PIPE_CAP_PCI_FUNCTION:
|
||||
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
|
@ -319,6 +319,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_PCI_BUS:
|
||||
case PIPE_CAP_PCI_DEVICE:
|
||||
case PIPE_CAP_PCI_FUNCTION:
|
||||
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
|
||||
return 0;
|
||||
}
|
||||
/* should only get here on unhandled cases */
|
||||
|
|
|
@ -2824,7 +2824,7 @@ FlatteningPass::visit(BasicBlock *bb)
|
|||
!isSurfaceOp(insn->op) && // not confirmed
|
||||
insn->op != OP_LINTERP && // probably just nve4
|
||||
insn->op != OP_PINTERP && // probably just nve4
|
||||
((insn->op != OP_LOAD && insn->op != OP_STORE) ||
|
||||
((insn->op != OP_LOAD && insn->op != OP_STORE && insn->op != OP_ATOM) ||
|
||||
(typeSizeof(insn->dType) <= 4 && !insn->src(0).isIndirect(0))) &&
|
||||
!insn->isNop()) {
|
||||
insn->join = 1;
|
||||
|
|
|
@ -192,6 +192,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_PCI_BUS:
|
||||
case PIPE_CAP_PCI_DEVICE:
|
||||
case PIPE_CAP_PCI_FUNCTION:
|
||||
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
|
@ -245,6 +245,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_PCI_BUS:
|
||||
case PIPE_CAP_PCI_DEVICE:
|
||||
case PIPE_CAP_PCI_FUNCTION:
|
||||
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
|
@ -251,6 +251,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_PCI_BUS:
|
||||
case PIPE_CAP_PCI_DEVICE:
|
||||
case PIPE_CAP_PCI_FUNCTION:
|
||||
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
|
@ -214,6 +214,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
|
||||
case PIPE_CAP_QUERY_BUFFER_OBJECT:
|
||||
case PIPE_CAP_QUERY_MEMORY_INFO:
|
||||
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
|
||||
return 0;
|
||||
|
||||
/* SWTCL-only features. */
|
||||
|
|
|
@ -83,29 +83,26 @@ writable images will consume TEX slots, VTX slots too because of linear indexing
|
|||
|
||||
*/
|
||||
|
||||
struct r600_resource* r600_compute_buffer_alloc_vram(
|
||||
struct r600_screen *screen,
|
||||
unsigned size)
|
||||
struct r600_resource *r600_compute_buffer_alloc_vram(struct r600_screen *screen,
|
||||
unsigned size)
|
||||
{
|
||||
struct pipe_resource * buffer = NULL;
|
||||
struct pipe_resource *buffer = NULL;
|
||||
assert(size);
|
||||
|
||||
buffer = pipe_buffer_create(
|
||||
(struct pipe_screen*) screen,
|
||||
PIPE_BIND_CUSTOM,
|
||||
PIPE_USAGE_IMMUTABLE,
|
||||
size);
|
||||
buffer = pipe_buffer_create((struct pipe_screen*) screen,
|
||||
PIPE_BIND_CUSTOM,
|
||||
PIPE_USAGE_IMMUTABLE,
|
||||
size);
|
||||
|
||||
return (struct r600_resource *)buffer;
|
||||
}
|
||||
|
||||
|
||||
static void evergreen_set_rat(
|
||||
struct r600_pipe_compute *pipe,
|
||||
unsigned id,
|
||||
struct r600_resource* bo,
|
||||
int start,
|
||||
int size)
|
||||
static void evergreen_set_rat(struct r600_pipe_compute *pipe,
|
||||
unsigned id,
|
||||
struct r600_resource *bo,
|
||||
int start,
|
||||
int size)
|
||||
{
|
||||
struct pipe_surface rat_templ;
|
||||
struct r600_surface *surf = NULL;
|
||||
|
@ -145,11 +142,10 @@ static void evergreen_set_rat(
|
|||
evergreen_init_color_surface_rat(rctx, surf);
|
||||
}
|
||||
|
||||
static void evergreen_cs_set_vertex_buffer(
|
||||
struct r600_context * rctx,
|
||||
unsigned vb_index,
|
||||
unsigned offset,
|
||||
struct pipe_resource * buffer)
|
||||
static void evergreen_cs_set_vertex_buffer(struct r600_context *rctx,
|
||||
unsigned vb_index,
|
||||
unsigned offset,
|
||||
struct pipe_resource *buffer)
|
||||
{
|
||||
struct r600_vertexbuf_state *state = &rctx->cs_vertex_buffer_state;
|
||||
struct pipe_vertex_buffer *vb = &state->vb[vb_index];
|
||||
|
@ -166,12 +162,11 @@ static void evergreen_cs_set_vertex_buffer(
|
|||
r600_mark_atom_dirty(rctx, &state->atom);
|
||||
}
|
||||
|
||||
static void evergreen_cs_set_constant_buffer(
|
||||
struct r600_context * rctx,
|
||||
unsigned cb_index,
|
||||
unsigned offset,
|
||||
unsigned size,
|
||||
struct pipe_resource * buffer)
|
||||
static void evergreen_cs_set_constant_buffer(struct r600_context *rctx,
|
||||
unsigned cb_index,
|
||||
unsigned offset,
|
||||
unsigned size,
|
||||
struct pipe_resource *buffer)
|
||||
{
|
||||
struct pipe_constant_buffer cb;
|
||||
cb.buffer_size = size;
|
||||
|
@ -182,16 +177,6 @@ static void evergreen_cs_set_constant_buffer(
|
|||
rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_COMPUTE, cb_index, &cb);
|
||||
}
|
||||
|
||||
static const struct u_resource_vtbl r600_global_buffer_vtbl =
|
||||
{
|
||||
u_default_resource_get_handle, /* get_handle */
|
||||
r600_compute_global_buffer_destroy, /* resource_destroy */
|
||||
r600_compute_global_transfer_map, /* transfer_map */
|
||||
r600_compute_global_transfer_flush_region,/* transfer_flush_region */
|
||||
r600_compute_global_transfer_unmap, /* transfer_unmap */
|
||||
r600_compute_global_transfer_inline_write /* transfer_inline_write */
|
||||
};
|
||||
|
||||
/* We need to define these R600 registers here, because we can't include
|
||||
* evergreend.h and r600d.h.
|
||||
*/
|
||||
|
@ -256,33 +241,32 @@ static void r600_destroy_shader(struct r600_bytecode *bc)
|
|||
FREE(bc->bytecode);
|
||||
}
|
||||
|
||||
void *evergreen_create_compute_state(
|
||||
struct pipe_context *ctx_,
|
||||
const const struct pipe_compute_state *cso)
|
||||
static void *evergreen_create_compute_state(struct pipe_context *ctx,
|
||||
const const struct pipe_compute_state *cso)
|
||||
{
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
struct r600_context *rctx = (struct r600_context *)ctx;
|
||||
struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute);
|
||||
#ifdef HAVE_OPENCL
|
||||
const struct pipe_llvm_program_header * header;
|
||||
const struct pipe_llvm_program_header *header;
|
||||
const char *code;
|
||||
void *p;
|
||||
boolean use_kill;
|
||||
|
||||
COMPUTE_DBG(ctx->screen, "*** evergreen_create_compute_state\n");
|
||||
COMPUTE_DBG(rctx->screen, "*** evergreen_create_compute_state\n");
|
||||
header = cso->prog;
|
||||
code = cso->prog + sizeof(struct pipe_llvm_program_header);
|
||||
radeon_shader_binary_init(&shader->binary);
|
||||
radeon_elf_read(code, header->num_bytes, &shader->binary);
|
||||
r600_create_shader(&shader->bc, &shader->binary, &use_kill);
|
||||
|
||||
shader->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
|
||||
shader->code_bo = r600_compute_buffer_alloc_vram(rctx->screen,
|
||||
shader->bc.ndw * 4);
|
||||
p = r600_buffer_map_sync_with_rings(&ctx->b, shader->code_bo, PIPE_TRANSFER_WRITE);
|
||||
p = r600_buffer_map_sync_with_rings(&rctx->b, shader->code_bo, PIPE_TRANSFER_WRITE);
|
||||
memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4);
|
||||
ctx->b.ws->buffer_unmap(shader->code_bo->buf);
|
||||
rctx->b.ws->buffer_unmap(shader->code_bo->buf);
|
||||
#endif
|
||||
|
||||
shader->ctx = ctx;
|
||||
shader->ctx = rctx;
|
||||
shader->local_size = cso->req_local_mem;
|
||||
shader->private_size = cso->req_private_mem;
|
||||
shader->input_size = cso->req_input_mem;
|
||||
|
@ -290,12 +274,13 @@ void *evergreen_create_compute_state(
|
|||
return shader;
|
||||
}
|
||||
|
||||
void evergreen_delete_compute_state(struct pipe_context *ctx_, void* state)
|
||||
static void evergreen_delete_compute_state(struct pipe_context *ctx, void *state)
|
||||
{
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
COMPUTE_DBG(ctx->screen, "*** evergreen_delete_compute_state\n");
|
||||
struct r600_context *rctx = (struct r600_context *)ctx;
|
||||
struct r600_pipe_compute *shader = state;
|
||||
|
||||
COMPUTE_DBG(rctx->screen, "*** evergreen_delete_compute_state\n");
|
||||
|
||||
if (!shader)
|
||||
return;
|
||||
|
||||
|
@ -307,13 +292,13 @@ void evergreen_delete_compute_state(struct pipe_context *ctx_, void* state)
|
|||
FREE(shader);
|
||||
}
|
||||
|
||||
static void evergreen_bind_compute_state(struct pipe_context *ctx_, void *state)
|
||||
static void evergreen_bind_compute_state(struct pipe_context *ctx, void *state)
|
||||
{
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
struct r600_context *rctx = (struct r600_context *)ctx;
|
||||
|
||||
COMPUTE_DBG(ctx->screen, "*** evergreen_bind_compute_state\n");
|
||||
COMPUTE_DBG(rctx->screen, "*** evergreen_bind_compute_state\n");
|
||||
|
||||
ctx->cs_shader_state.shader = (struct r600_pipe_compute *)state;
|
||||
rctx->cs_shader_state.shader = (struct r600_pipe_compute *)state;
|
||||
}
|
||||
|
||||
/* The kernel parameters are stored a vtx buffer (ID=0), besides the explicit
|
||||
|
@ -327,23 +312,20 @@ static void evergreen_bind_compute_state(struct pipe_context *ctx_, void *state)
|
|||
* (x,y,z)
|
||||
* DWORDS 9+ : Kernel parameters
|
||||
*/
|
||||
void evergreen_compute_upload_input(
|
||||
struct pipe_context *ctx_,
|
||||
const uint *block_layout,
|
||||
const uint *grid_layout,
|
||||
const void *input)
|
||||
static void evergreen_compute_upload_input(struct pipe_context *ctx,
|
||||
const struct pipe_grid_info *info)
|
||||
{
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
struct r600_pipe_compute *shader = ctx->cs_shader_state.shader;
|
||||
struct r600_context *rctx = (struct r600_context *)ctx;
|
||||
struct r600_pipe_compute *shader = rctx->cs_shader_state.shader;
|
||||
unsigned i;
|
||||
/* We need to reserve 9 dwords (36 bytes) for implicit kernel
|
||||
* parameters.
|
||||
*/
|
||||
unsigned input_size = shader->input_size + 36;
|
||||
uint32_t * num_work_groups_start;
|
||||
uint32_t * global_size_start;
|
||||
uint32_t * local_size_start;
|
||||
uint32_t * kernel_parameters_start;
|
||||
uint32_t *num_work_groups_start;
|
||||
uint32_t *global_size_start;
|
||||
uint32_t *local_size_start;
|
||||
uint32_t *kernel_parameters_start;
|
||||
struct pipe_box box;
|
||||
struct pipe_transfer *transfer = NULL;
|
||||
|
||||
|
@ -354,12 +336,12 @@ void evergreen_compute_upload_input(
|
|||
if (!shader->kernel_param) {
|
||||
/* Add space for the grid dimensions */
|
||||
shader->kernel_param = (struct r600_resource *)
|
||||
pipe_buffer_create(ctx_->screen, PIPE_BIND_CUSTOM,
|
||||
pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM,
|
||||
PIPE_USAGE_IMMUTABLE, input_size);
|
||||
}
|
||||
|
||||
u_box_1d(0, input_size, &box);
|
||||
num_work_groups_start = ctx_->transfer_map(ctx_,
|
||||
num_work_groups_start = ctx->transfer_map(ctx,
|
||||
(struct pipe_resource*)shader->kernel_param,
|
||||
0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE,
|
||||
&box, &transfer);
|
||||
|
@ -368,34 +350,33 @@ void evergreen_compute_upload_input(
|
|||
kernel_parameters_start = local_size_start + (3 * (sizeof(uint)) / 4);
|
||||
|
||||
/* Copy the work group size */
|
||||
memcpy(num_work_groups_start, grid_layout, 3 * sizeof(uint));
|
||||
memcpy(num_work_groups_start, info->grid, 3 * sizeof(uint));
|
||||
|
||||
/* Copy the global size */
|
||||
for (i = 0; i < 3; i++) {
|
||||
global_size_start[i] = grid_layout[i] * block_layout[i];
|
||||
global_size_start[i] = info->grid[i] * info->block[i];
|
||||
}
|
||||
|
||||
/* Copy the local dimensions */
|
||||
memcpy(local_size_start, block_layout, 3 * sizeof(uint));
|
||||
memcpy(local_size_start, info->block, 3 * sizeof(uint));
|
||||
|
||||
/* Copy the kernel inputs */
|
||||
memcpy(kernel_parameters_start, input, shader->input_size);
|
||||
memcpy(kernel_parameters_start, info->input, shader->input_size);
|
||||
|
||||
for (i = 0; i < (input_size / 4); i++) {
|
||||
COMPUTE_DBG(ctx->screen, "input %i : %u\n", i,
|
||||
COMPUTE_DBG(rctx->screen, "input %i : %u\n", i,
|
||||
((unsigned*)num_work_groups_start)[i]);
|
||||
}
|
||||
|
||||
ctx_->transfer_unmap(ctx_, transfer);
|
||||
ctx->transfer_unmap(ctx, transfer);
|
||||
|
||||
/* ID=0 is reserved for the parameters */
|
||||
evergreen_cs_set_constant_buffer(ctx, 0, 0, input_size,
|
||||
evergreen_cs_set_constant_buffer(rctx, 0, 0, input_size,
|
||||
(struct pipe_resource*)shader->kernel_param);
|
||||
}
|
||||
|
||||
static void evergreen_emit_direct_dispatch(
|
||||
struct r600_context *rctx,
|
||||
const uint *block_layout, const uint *grid_layout)
|
||||
static void evergreen_emit_dispatch(struct r600_context *rctx,
|
||||
const struct pipe_grid_info *info)
|
||||
{
|
||||
int i;
|
||||
struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
|
||||
|
@ -411,15 +392,15 @@ static void evergreen_emit_direct_dispatch(
|
|||
|
||||
/* Calculate group_size/grid_size */
|
||||
for (i = 0; i < 3; i++) {
|
||||
group_size *= block_layout[i];
|
||||
group_size *= info->block[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
grid_size *= grid_layout[i];
|
||||
grid_size *= info->grid[i];
|
||||
}
|
||||
|
||||
/* num_waves = ceil((tg_size.x * tg_size.y, tg_size.z) / (16 * num_pipes)) */
|
||||
num_waves = (block_layout[0] * block_layout[1] * block_layout[2] +
|
||||
num_waves = (info->block[0] * info->block[1] * info->block[2] +
|
||||
wave_divisor - 1) / wave_divisor;
|
||||
|
||||
COMPUTE_DBG(rctx->screen, "Using %u pipes, "
|
||||
|
@ -438,9 +419,9 @@ static void evergreen_emit_direct_dispatch(
|
|||
group_size);
|
||||
|
||||
radeon_compute_set_context_reg_seq(cs, R_0286EC_SPI_COMPUTE_NUM_THREAD_X, 3);
|
||||
radeon_emit(cs, block_layout[0]); /* R_0286EC_SPI_COMPUTE_NUM_THREAD_X */
|
||||
radeon_emit(cs, block_layout[1]); /* R_0286F0_SPI_COMPUTE_NUM_THREAD_Y */
|
||||
radeon_emit(cs, block_layout[2]); /* R_0286F4_SPI_COMPUTE_NUM_THREAD_Z */
|
||||
radeon_emit(cs, info->block[0]); /* R_0286EC_SPI_COMPUTE_NUM_THREAD_X */
|
||||
radeon_emit(cs, info->block[1]); /* R_0286F0_SPI_COMPUTE_NUM_THREAD_Y */
|
||||
radeon_emit(cs, info->block[2]); /* R_0286F4_SPI_COMPUTE_NUM_THREAD_Z */
|
||||
|
||||
if (rctx->b.chip_class < CAYMAN) {
|
||||
assert(lds_size <= 8192);
|
||||
|
@ -455,22 +436,22 @@ static void evergreen_emit_direct_dispatch(
|
|||
|
||||
/* Dispatch packet */
|
||||
radeon_emit(cs, PKT3C(PKT3_DISPATCH_DIRECT, 3, 0));
|
||||
radeon_emit(cs, grid_layout[0]);
|
||||
radeon_emit(cs, grid_layout[1]);
|
||||
radeon_emit(cs, grid_layout[2]);
|
||||
radeon_emit(cs, info->grid[0]);
|
||||
radeon_emit(cs, info->grid[1]);
|
||||
radeon_emit(cs, info->grid[2]);
|
||||
/* VGT_DISPATCH_INITIATOR = COMPUTE_SHADER_EN */
|
||||
radeon_emit(cs, 1);
|
||||
}
|
||||
|
||||
static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
|
||||
const uint *grid_layout)
|
||||
static void compute_emit_cs(struct r600_context *rctx,
|
||||
const struct pipe_grid_info *info)
|
||||
{
|
||||
struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
|
||||
struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
|
||||
unsigned i;
|
||||
|
||||
/* make sure that the gfx ring is only one active */
|
||||
if (ctx->b.dma.cs && ctx->b.dma.cs->cdw) {
|
||||
ctx->b.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
|
||||
if (rctx->b.dma.cs && rctx->b.dma.cs->cdw) {
|
||||
rctx->b.dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
|
||||
}
|
||||
|
||||
/* Initialize all the compute-related registers.
|
||||
|
@ -478,20 +459,20 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
|
|||
* See evergreen_init_atom_start_compute_cs() in this file for the list
|
||||
* of registers initialized by the start_compute_cs_cmd atom.
|
||||
*/
|
||||
r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd);
|
||||
r600_emit_command_buffer(cs, &rctx->start_compute_cs_cmd);
|
||||
|
||||
/* emit config state */
|
||||
if (ctx->b.chip_class == EVERGREEN)
|
||||
r600_emit_atom(ctx, &ctx->config_state.atom);
|
||||
if (rctx->b.chip_class == EVERGREEN)
|
||||
r600_emit_atom(rctx, &rctx->config_state.atom);
|
||||
|
||||
ctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
|
||||
r600_flush_emit(ctx);
|
||||
rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
|
||||
r600_flush_emit(rctx);
|
||||
|
||||
/* Emit colorbuffers. */
|
||||
/* XXX support more than 8 colorbuffers (the offsets are not a multiple of 0x3C for CB8-11) */
|
||||
for (i = 0; i < 8 && i < ctx->framebuffer.state.nr_cbufs; i++) {
|
||||
struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i];
|
||||
unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.gfx,
|
||||
for (i = 0; i < 8 && i < rctx->framebuffer.state.nr_cbufs; i++) {
|
||||
struct r600_surface *cb = (struct r600_surface*)rctx->framebuffer.state.cbufs[i];
|
||||
unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
|
||||
(struct r600_resource*)cb->base.texture,
|
||||
RADEON_USAGE_READWRITE,
|
||||
RADEON_PRIO_SHADER_RW_BUFFER);
|
||||
|
@ -520,51 +501,51 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
|
|||
|
||||
/* Set CB_TARGET_MASK XXX: Use cb_misc_state */
|
||||
radeon_compute_set_context_reg(cs, R_028238_CB_TARGET_MASK,
|
||||
ctx->compute_cb_target_mask);
|
||||
rctx->compute_cb_target_mask);
|
||||
|
||||
|
||||
/* Emit vertex buffer state */
|
||||
ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask);
|
||||
r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom);
|
||||
rctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(rctx->cs_vertex_buffer_state.dirty_mask);
|
||||
r600_emit_atom(rctx, &rctx->cs_vertex_buffer_state.atom);
|
||||
|
||||
/* Emit constant buffer state */
|
||||
r600_emit_atom(ctx, &ctx->constbuf_state[PIPE_SHADER_COMPUTE].atom);
|
||||
r600_emit_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_COMPUTE].atom);
|
||||
|
||||
/* Emit sampler state */
|
||||
r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].states.atom);
|
||||
r600_emit_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].states.atom);
|
||||
|
||||
/* Emit sampler view (texture resource) state */
|
||||
r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].views.atom);
|
||||
r600_emit_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views.atom);
|
||||
|
||||
/* Emit compute shader state */
|
||||
r600_emit_atom(ctx, &ctx->cs_shader_state.atom);
|
||||
r600_emit_atom(rctx, &rctx->cs_shader_state.atom);
|
||||
|
||||
/* Emit dispatch state and dispatch packet */
|
||||
evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout);
|
||||
evergreen_emit_dispatch(rctx, info);
|
||||
|
||||
/* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff
|
||||
*/
|
||||
ctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
|
||||
rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
|
||||
R600_CONTEXT_INV_VERTEX_CACHE |
|
||||
R600_CONTEXT_INV_TEX_CACHE;
|
||||
r600_flush_emit(ctx);
|
||||
ctx->b.flags = 0;
|
||||
r600_flush_emit(rctx);
|
||||
rctx->b.flags = 0;
|
||||
|
||||
if (ctx->b.chip_class >= CAYMAN) {
|
||||
cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
|
||||
cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4);
|
||||
if (rctx->b.chip_class >= CAYMAN) {
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
|
||||
/* DEALLOC_STATE prevents the GPU from hanging when a
|
||||
* SURFACE_SYNC packet is emitted some time after a DISPATCH_DIRECT
|
||||
* with any of the CB*_DEST_BASE_ENA or DB_DEST_BASE_ENA bits set.
|
||||
*/
|
||||
cs->buf[cs->cdw++] = PKT3C(PKT3_DEALLOC_STATE, 0, 0);
|
||||
cs->buf[cs->cdw++] = 0;
|
||||
radeon_emit(cs, PKT3C(PKT3_DEALLOC_STATE, 0, 0));
|
||||
radeon_emit(cs, 0);
|
||||
}
|
||||
|
||||
#if 0
|
||||
COMPUTE_DBG(ctx->screen, "cdw: %i\n", cs->cdw);
|
||||
COMPUTE_DBG(rctx->screen, "cdw: %i\n", cs->cdw);
|
||||
for (i = 0; i < cs->cdw; i++) {
|
||||
COMPUTE_DBG(ctx->screen, "%4i : 0x%08X\n", i, cs->buf[i]);
|
||||
COMPUTE_DBG(rctx->screen, "%4i : 0x%08X\n", i, cs->buf[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -574,9 +555,8 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
|
|||
/**
|
||||
* Emit function for r600_cs_shader_state atom
|
||||
*/
|
||||
void evergreen_emit_cs_shader(
|
||||
struct r600_context *rctx,
|
||||
struct r600_atom *atom)
|
||||
void evergreen_emit_cs_shader(struct r600_context *rctx,
|
||||
struct r600_atom *atom)
|
||||
{
|
||||
struct r600_cs_shader_state *state =
|
||||
(struct r600_cs_shader_state*)atom;
|
||||
|
@ -604,35 +584,35 @@ void evergreen_emit_cs_shader(
|
|||
RADEON_PRIO_USER_SHADER));
|
||||
}
|
||||
|
||||
static void evergreen_launch_grid(
|
||||
struct pipe_context *ctx_, const struct pipe_grid_info *info)
|
||||
static void evergreen_launch_grid(struct pipe_context *ctx,
|
||||
const struct pipe_grid_info *info)
|
||||
{
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
struct r600_context *rctx = (struct r600_context *)ctx;
|
||||
#ifdef HAVE_OPENCL
|
||||
struct r600_pipe_compute *shader = ctx->cs_shader_state.shader;
|
||||
struct r600_pipe_compute *shader = rctx->cs_shader_state.shader;
|
||||
boolean use_kill;
|
||||
|
||||
ctx->cs_shader_state.pc = info->pc;
|
||||
rctx->cs_shader_state.pc = info->pc;
|
||||
/* Get the config information for this kernel. */
|
||||
r600_shader_binary_read_config(&shader->binary, &shader->bc,
|
||||
info->pc, &use_kill);
|
||||
#endif
|
||||
|
||||
COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", info->pc);
|
||||
COMPUTE_DBG(rctx->screen, "*** evergreen_launch_grid: pc = %u\n", info->pc);
|
||||
|
||||
|
||||
evergreen_compute_upload_input(ctx_, info->block, info->grid, info->input);
|
||||
compute_emit_cs(ctx, info->block, info->grid);
|
||||
evergreen_compute_upload_input(ctx, info);
|
||||
compute_emit_cs(rctx, info);
|
||||
}
|
||||
|
||||
static void evergreen_set_compute_resources(struct pipe_context * ctx_,
|
||||
unsigned start, unsigned count,
|
||||
struct pipe_surface ** surfaces)
|
||||
static void evergreen_set_compute_resources(struct pipe_context *ctx,
|
||||
unsigned start, unsigned count,
|
||||
struct pipe_surface **surfaces)
|
||||
{
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
struct r600_context *rctx = (struct r600_context *)ctx;
|
||||
struct r600_surface **resources = (struct r600_surface **)surfaces;
|
||||
|
||||
COMPUTE_DBG(ctx->screen, "*** evergreen_set_compute_resources: start = %u count = %u\n",
|
||||
COMPUTE_DBG(rctx->screen, "*** evergreen_set_compute_resources: start = %u count = %u\n",
|
||||
start, count);
|
||||
|
||||
for (unsigned i = 0; i < count; i++) {
|
||||
|
@ -646,31 +626,31 @@ static void evergreen_set_compute_resources(struct pipe_context * ctx_,
|
|||
if (resources[i]->base.writable) {
|
||||
assert(i+1 < 12);
|
||||
|
||||
evergreen_set_rat(ctx->cs_shader_state.shader, i+1,
|
||||
evergreen_set_rat(rctx->cs_shader_state.shader, i+1,
|
||||
(struct r600_resource *)resources[i]->base.texture,
|
||||
buffer->chunk->start_in_dw*4,
|
||||
resources[i]->base.texture->width0);
|
||||
}
|
||||
|
||||
evergreen_cs_set_vertex_buffer(ctx, vtx_id,
|
||||
evergreen_cs_set_vertex_buffer(rctx, vtx_id,
|
||||
buffer->chunk->start_in_dw * 4,
|
||||
resources[i]->base.texture);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void evergreen_set_global_binding(
|
||||
struct pipe_context *ctx_, unsigned first, unsigned n,
|
||||
struct pipe_resource **resources,
|
||||
uint32_t **handles)
|
||||
static void evergreen_set_global_binding(struct pipe_context *ctx,
|
||||
unsigned first, unsigned n,
|
||||
struct pipe_resource **resources,
|
||||
uint32_t **handles)
|
||||
{
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
struct compute_memory_pool *pool = ctx->screen->global_pool;
|
||||
struct r600_context *rctx = (struct r600_context *)ctx;
|
||||
struct compute_memory_pool *pool = rctx->screen->global_pool;
|
||||
struct r600_resource_global **buffers =
|
||||
(struct r600_resource_global **)resources;
|
||||
unsigned i;
|
||||
|
||||
COMPUTE_DBG(ctx->screen, "*** evergreen_set_global_binding first = %u n = %u\n",
|
||||
COMPUTE_DBG(rctx->screen, "*** evergreen_set_global_binding first = %u n = %u\n",
|
||||
first, n);
|
||||
|
||||
if (!resources) {
|
||||
|
@ -687,7 +667,7 @@ static void evergreen_set_global_binding(
|
|||
buffers[i]->chunk->status |= ITEM_FOR_PROMOTING;
|
||||
}
|
||||
|
||||
if (compute_memory_finalize_pending(pool, ctx_) == -1) {
|
||||
if (compute_memory_finalize_pending(pool, ctx) == -1) {
|
||||
/* XXX: Unset */
|
||||
return;
|
||||
}
|
||||
|
@ -705,8 +685,8 @@ static void evergreen_set_global_binding(
|
|||
*(handles[i]) = util_cpu_to_le32(handle);
|
||||
}
|
||||
|
||||
evergreen_set_rat(ctx->cs_shader_state.shader, 0, pool->bo, 0, pool->size_in_dw * 4);
|
||||
evergreen_cs_set_vertex_buffer(ctx, 1, 0,
|
||||
evergreen_set_rat(rctx->cs_shader_state.shader, 0, pool->bo, 0, pool->size_in_dw * 4);
|
||||
evergreen_cs_set_vertex_buffer(rctx, 1, 0,
|
||||
(struct pipe_resource*)pool->bo);
|
||||
}
|
||||
|
||||
|
@ -721,9 +701,9 @@ static void evergreen_set_global_binding(
|
|||
* functions evergreen_init_atom_start_cs or cayman_init_atom_start_cs depending
|
||||
* on the GPU family.
|
||||
*/
|
||||
void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
|
||||
void evergreen_init_atom_start_compute_cs(struct r600_context *rctx)
|
||||
{
|
||||
struct r600_command_buffer *cb = &ctx->start_compute_cs_cmd;
|
||||
struct r600_command_buffer *cb = &rctx->start_compute_cs_cmd;
|
||||
int num_threads;
|
||||
int num_stack_entries;
|
||||
|
||||
|
@ -742,7 +722,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
|
|||
r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
|
||||
|
||||
switch (ctx->b.family) {
|
||||
switch (rctx->b.family) {
|
||||
case CHIP_CEDAR:
|
||||
default:
|
||||
num_threads = 128;
|
||||
|
@ -788,18 +768,18 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
|
|||
}
|
||||
|
||||
/* Config Registers */
|
||||
if (ctx->b.chip_class < CAYMAN)
|
||||
evergreen_init_common_regs(ctx, cb, ctx->b.chip_class, ctx->b.family,
|
||||
ctx->screen->b.info.drm_minor);
|
||||
if (rctx->b.chip_class < CAYMAN)
|
||||
evergreen_init_common_regs(rctx, cb, rctx->b.chip_class, rctx->b.family,
|
||||
rctx->screen->b.info.drm_minor);
|
||||
else
|
||||
cayman_init_common_regs(cb, ctx->b.chip_class, ctx->b.family,
|
||||
ctx->screen->b.info.drm_minor);
|
||||
cayman_init_common_regs(cb, rctx->b.chip_class, rctx->b.family,
|
||||
rctx->screen->b.info.drm_minor);
|
||||
|
||||
/* The primitive type always needs to be POINTLIST for compute. */
|
||||
r600_store_config_reg(cb, R_008958_VGT_PRIMITIVE_TYPE,
|
||||
V_008958_DI_PT_POINTLIST);
|
||||
|
||||
if (ctx->b.chip_class < CAYMAN) {
|
||||
if (rctx->b.chip_class < CAYMAN) {
|
||||
|
||||
/* These registers control which simds can be used by each stage.
|
||||
* The default for these registers is 0xffffffff, which means
|
||||
|
@ -849,7 +829,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
|
|||
* allocate the appropriate amount of LDS dwords using the
|
||||
* CM_R_0288E8_SQ_LDS_ALLOC register.
|
||||
*/
|
||||
if (ctx->b.chip_class < CAYMAN) {
|
||||
if (rctx->b.chip_class < CAYMAN) {
|
||||
r600_store_config_reg(cb, R_008E2C_SQ_LDS_RESOURCE_MGMT,
|
||||
S_008E2C_NUM_PS_LDS(0x0000) | S_008E2C_NUM_LS_LDS(8192));
|
||||
} else {
|
||||
|
@ -860,7 +840,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
|
|||
|
||||
/* Context Registers */
|
||||
|
||||
if (ctx->b.chip_class < CAYMAN) {
|
||||
if (rctx->b.chip_class < CAYMAN) {
|
||||
/* workaround for hw issues with dyn gpr - must set all limits
|
||||
* to 240 instead of 0, 0x1e == 240 / 8
|
||||
*/
|
||||
|
@ -902,21 +882,134 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx)
|
|||
eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (160 * 4), 0x1000FFF);
|
||||
}
|
||||
|
||||
void evergreen_init_compute_state_functions(struct r600_context *ctx)
|
||||
void evergreen_init_compute_state_functions(struct r600_context *rctx)
|
||||
{
|
||||
ctx->b.b.create_compute_state = evergreen_create_compute_state;
|
||||
ctx->b.b.delete_compute_state = evergreen_delete_compute_state;
|
||||
ctx->b.b.bind_compute_state = evergreen_bind_compute_state;
|
||||
// ctx->context.create_sampler_view = evergreen_compute_create_sampler_view;
|
||||
ctx->b.b.set_compute_resources = evergreen_set_compute_resources;
|
||||
ctx->b.b.set_global_binding = evergreen_set_global_binding;
|
||||
ctx->b.b.launch_grid = evergreen_launch_grid;
|
||||
rctx->b.b.create_compute_state = evergreen_create_compute_state;
|
||||
rctx->b.b.delete_compute_state = evergreen_delete_compute_state;
|
||||
rctx->b.b.bind_compute_state = evergreen_bind_compute_state;
|
||||
// rctx->context.create_sampler_view = evergreen_compute_create_sampler_view;
|
||||
rctx->b.b.set_compute_resources = evergreen_set_compute_resources;
|
||||
rctx->b.b.set_global_binding = evergreen_set_global_binding;
|
||||
rctx->b.b.launch_grid = evergreen_launch_grid;
|
||||
|
||||
}
|
||||
|
||||
struct pipe_resource *r600_compute_global_buffer_create(
|
||||
struct pipe_screen *screen,
|
||||
const struct pipe_resource *templ)
|
||||
static void *r600_compute_global_transfer_map(struct pipe_context *ctx,
|
||||
struct pipe_resource *resource,
|
||||
unsigned level,
|
||||
unsigned usage,
|
||||
const struct pipe_box *box,
|
||||
struct pipe_transfer **ptransfer)
|
||||
{
|
||||
struct r600_context *rctx = (struct r600_context*)ctx;
|
||||
struct compute_memory_pool *pool = rctx->screen->global_pool;
|
||||
struct r600_resource_global* buffer =
|
||||
(struct r600_resource_global*)resource;
|
||||
|
||||
struct compute_memory_item *item = buffer->chunk;
|
||||
struct pipe_resource *dst = NULL;
|
||||
unsigned offset = box->x;
|
||||
|
||||
if (is_item_in_pool(item)) {
|
||||
compute_memory_demote_item(pool, item, ctx);
|
||||
}
|
||||
else {
|
||||
if (item->real_buffer == NULL) {
|
||||
item->real_buffer =
|
||||
r600_compute_buffer_alloc_vram(pool->screen, item->size_in_dw * 4);
|
||||
}
|
||||
}
|
||||
|
||||
dst = (struct pipe_resource*)item->real_buffer;
|
||||
|
||||
if (usage & PIPE_TRANSFER_READ)
|
||||
buffer->chunk->status |= ITEM_MAPPED_FOR_READING;
|
||||
|
||||
COMPUTE_DBG(rctx->screen, "* r600_compute_global_transfer_map()\n"
|
||||
"level = %u, usage = %u, box(x = %u, y = %u, z = %u "
|
||||
"width = %u, height = %u, depth = %u)\n", level, usage,
|
||||
box->x, box->y, box->z, box->width, box->height,
|
||||
box->depth);
|
||||
COMPUTE_DBG(rctx->screen, "Buffer id = %"PRIi64" offset = "
|
||||
"%u (box.x)\n", item->id, box->x);
|
||||
|
||||
|
||||
assert(resource->target == PIPE_BUFFER);
|
||||
assert(resource->bind & PIPE_BIND_GLOBAL);
|
||||
assert(box->x >= 0);
|
||||
assert(box->y == 0);
|
||||
assert(box->z == 0);
|
||||
|
||||
///TODO: do it better, mapping is not possible if the pool is too big
|
||||
return pipe_buffer_map_range(ctx, dst,
|
||||
offset, box->width, usage, ptransfer);
|
||||
}
|
||||
|
||||
static void r600_compute_global_transfer_unmap(struct pipe_context *ctx,
|
||||
struct pipe_transfer *transfer)
|
||||
{
|
||||
/* struct r600_resource_global are not real resources, they just map
|
||||
* to an offset within the compute memory pool. The function
|
||||
* r600_compute_global_transfer_map() maps the memory pool
|
||||
* resource rather than the struct r600_resource_global passed to
|
||||
* it as an argument and then initalizes ptransfer->resource with
|
||||
* the memory pool resource (via pipe_buffer_map_range).
|
||||
* When transfer_unmap is called it uses the memory pool's
|
||||
* vtable which calls r600_buffer_transfer_map() rather than
|
||||
* this function.
|
||||
*/
|
||||
assert (!"This function should not be called");
|
||||
}
|
||||
|
||||
static void r600_compute_global_transfer_flush_region(struct pipe_context *ctx,
|
||||
struct pipe_transfer *transfer,
|
||||
const struct pipe_box *box)
|
||||
{
|
||||
assert(0 && "TODO");
|
||||
}
|
||||
|
||||
static void r600_compute_global_transfer_inline_write(struct pipe_context *pipe,
|
||||
struct pipe_resource *resource,
|
||||
unsigned level,
|
||||
unsigned usage,
|
||||
const struct pipe_box *box,
|
||||
const void *data,
|
||||
unsigned stride,
|
||||
unsigned layer_stride)
|
||||
{
|
||||
assert(0 && "TODO");
|
||||
}
|
||||
|
||||
static void r600_compute_global_buffer_destroy(struct pipe_screen *screen,
|
||||
struct pipe_resource *res)
|
||||
{
|
||||
struct r600_resource_global* buffer = NULL;
|
||||
struct r600_screen* rscreen = NULL;
|
||||
|
||||
assert(res->target == PIPE_BUFFER);
|
||||
assert(res->bind & PIPE_BIND_GLOBAL);
|
||||
|
||||
buffer = (struct r600_resource_global*)res;
|
||||
rscreen = (struct r600_screen*)screen;
|
||||
|
||||
compute_memory_free(rscreen->global_pool, buffer->chunk->id);
|
||||
|
||||
buffer->chunk = NULL;
|
||||
free(res);
|
||||
}
|
||||
|
||||
static const struct u_resource_vtbl r600_global_buffer_vtbl =
|
||||
{
|
||||
u_default_resource_get_handle, /* get_handle */
|
||||
r600_compute_global_buffer_destroy, /* resource_destroy */
|
||||
r600_compute_global_transfer_map, /* transfer_map */
|
||||
r600_compute_global_transfer_flush_region,/* transfer_flush_region */
|
||||
r600_compute_global_transfer_unmap, /* transfer_unmap */
|
||||
r600_compute_global_transfer_inline_write /* transfer_inline_write */
|
||||
};
|
||||
|
||||
struct pipe_resource *r600_compute_global_buffer_create(struct pipe_screen *screen,
|
||||
const struct pipe_resource *templ)
|
||||
{
|
||||
struct r600_resource_global* result = NULL;
|
||||
struct r600_screen* rscreen = NULL;
|
||||
|
@ -953,112 +1046,3 @@ struct pipe_resource *r600_compute_global_buffer_create(
|
|||
|
||||
return &result->base.b.b;
|
||||
}
|
||||
|
||||
void r600_compute_global_buffer_destroy(
|
||||
struct pipe_screen *screen,
|
||||
struct pipe_resource *res)
|
||||
{
|
||||
struct r600_resource_global* buffer = NULL;
|
||||
struct r600_screen* rscreen = NULL;
|
||||
|
||||
assert(res->target == PIPE_BUFFER);
|
||||
assert(res->bind & PIPE_BIND_GLOBAL);
|
||||
|
||||
buffer = (struct r600_resource_global*)res;
|
||||
rscreen = (struct r600_screen*)screen;
|
||||
|
||||
compute_memory_free(rscreen->global_pool, buffer->chunk->id);
|
||||
|
||||
buffer->chunk = NULL;
|
||||
free(res);
|
||||
}
|
||||
|
||||
void *r600_compute_global_transfer_map(
|
||||
struct pipe_context *ctx_,
|
||||
struct pipe_resource *resource,
|
||||
unsigned level,
|
||||
unsigned usage,
|
||||
const struct pipe_box *box,
|
||||
struct pipe_transfer **ptransfer)
|
||||
{
|
||||
struct r600_context *rctx = (struct r600_context*)ctx_;
|
||||
struct compute_memory_pool *pool = rctx->screen->global_pool;
|
||||
struct r600_resource_global* buffer =
|
||||
(struct r600_resource_global*)resource;
|
||||
|
||||
struct compute_memory_item *item = buffer->chunk;
|
||||
struct pipe_resource *dst = NULL;
|
||||
unsigned offset = box->x;
|
||||
|
||||
if (is_item_in_pool(item)) {
|
||||
compute_memory_demote_item(pool, item, ctx_);
|
||||
}
|
||||
else {
|
||||
if (item->real_buffer == NULL) {
|
||||
item->real_buffer =
|
||||
r600_compute_buffer_alloc_vram(pool->screen, item->size_in_dw * 4);
|
||||
}
|
||||
}
|
||||
|
||||
dst = (struct pipe_resource*)item->real_buffer;
|
||||
|
||||
if (usage & PIPE_TRANSFER_READ)
|
||||
buffer->chunk->status |= ITEM_MAPPED_FOR_READING;
|
||||
|
||||
COMPUTE_DBG(rctx->screen, "* r600_compute_global_transfer_map()\n"
|
||||
"level = %u, usage = %u, box(x = %u, y = %u, z = %u "
|
||||
"width = %u, height = %u, depth = %u)\n", level, usage,
|
||||
box->x, box->y, box->z, box->width, box->height,
|
||||
box->depth);
|
||||
COMPUTE_DBG(rctx->screen, "Buffer id = %"PRIi64" offset = "
|
||||
"%u (box.x)\n", item->id, box->x);
|
||||
|
||||
|
||||
assert(resource->target == PIPE_BUFFER);
|
||||
assert(resource->bind & PIPE_BIND_GLOBAL);
|
||||
assert(box->x >= 0);
|
||||
assert(box->y == 0);
|
||||
assert(box->z == 0);
|
||||
|
||||
///TODO: do it better, mapping is not possible if the pool is too big
|
||||
return pipe_buffer_map_range(ctx_, dst,
|
||||
offset, box->width, usage, ptransfer);
|
||||
}
|
||||
|
||||
void r600_compute_global_transfer_unmap(
|
||||
struct pipe_context *ctx_,
|
||||
struct pipe_transfer* transfer)
|
||||
{
|
||||
/* struct r600_resource_global are not real resources, they just map
|
||||
* to an offset within the compute memory pool. The function
|
||||
* r600_compute_global_transfer_map() maps the memory pool
|
||||
* resource rather than the struct r600_resource_global passed to
|
||||
* it as an argument and then initalizes ptransfer->resource with
|
||||
* the memory pool resource (via pipe_buffer_map_range).
|
||||
* When transfer_unmap is called it uses the memory pool's
|
||||
* vtable which calls r600_buffer_transfer_map() rather than
|
||||
* this function.
|
||||
*/
|
||||
assert (!"This function should not be called");
|
||||
}
|
||||
|
||||
void r600_compute_global_transfer_flush_region(
|
||||
struct pipe_context *ctx_,
|
||||
struct pipe_transfer *transfer,
|
||||
const struct pipe_box *box)
|
||||
{
|
||||
assert(0 && "TODO");
|
||||
}
|
||||
|
||||
void r600_compute_global_transfer_inline_write(
|
||||
struct pipe_context *pipe,
|
||||
struct pipe_resource *resource,
|
||||
unsigned level,
|
||||
unsigned usage,
|
||||
const struct pipe_box *box,
|
||||
const void *data,
|
||||
unsigned stride,
|
||||
unsigned layer_stride)
|
||||
{
|
||||
assert(0 && "TODO");
|
||||
}
|
||||
|
|
|
@ -38,26 +38,11 @@ struct r600_resource_global {
|
|||
struct compute_memory_item *chunk;
|
||||
};
|
||||
|
||||
void *evergreen_create_compute_state(struct pipe_context *ctx, const struct pipe_compute_state *cso);
|
||||
void evergreen_delete_compute_state(struct pipe_context *ctx, void *state);
|
||||
void evergreen_compute_upload_input(struct pipe_context *context, const uint *block_layout, const uint *grid_layout, const void *input);
|
||||
void evergreen_init_atom_start_compute_cs(struct r600_context *rctx);
|
||||
void evergreen_init_compute_state_functions(struct r600_context *rctx);
|
||||
void evergreen_emit_cs_shader(struct r600_context *rctx, struct r600_atom * atom);
|
||||
|
||||
struct r600_resource* r600_compute_buffer_alloc_vram(struct r600_screen *screen, unsigned size);
|
||||
struct pipe_resource *r600_compute_global_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ);
|
||||
void r600_compute_global_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *res);
|
||||
void *r600_compute_global_transfer_map(
|
||||
struct pipe_context *ctx_,
|
||||
struct pipe_resource *resource,
|
||||
unsigned level,
|
||||
unsigned usage,
|
||||
const struct pipe_box *box,
|
||||
struct pipe_transfer **ptransfer);
|
||||
void r600_compute_global_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer);
|
||||
void r600_compute_global_transfer_flush_region( struct pipe_context *, struct pipe_transfer *, const struct pipe_box *);
|
||||
void r600_compute_global_transfer_inline_write( struct pipe_context *, struct pipe_resource *, unsigned level,
|
||||
unsigned usage, const struct pipe_box *, const void *data, unsigned stride, unsigned layer_stride);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -281,6 +281,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_INVALIDATE_BUFFER:
|
||||
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
|
||||
case PIPE_CAP_QUERY_MEMORY_INFO:
|
||||
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
|
||||
return 1;
|
||||
|
||||
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
|
||||
|
|
|
@ -425,8 +425,9 @@ struct r600_common_context {
|
|||
unsigned flags; /* flush flags */
|
||||
|
||||
/* Queries. */
|
||||
/* The list of active queries. Only one query of each type can be active. */
|
||||
/* The list of active queries. */
|
||||
int num_occlusion_queries;
|
||||
int num_perfect_occlusion_queries;
|
||||
/* Keep track of non-timer queries, because they should be suspended
|
||||
* during context flushing.
|
||||
* The timer queries (TIME_ELAPSED) shouldn't be suspended for blits,
|
||||
|
|
|
@ -414,14 +414,22 @@ static void r600_update_occlusion_query_state(struct r600_common_context *rctx,
|
|||
if (type == PIPE_QUERY_OCCLUSION_COUNTER ||
|
||||
type == PIPE_QUERY_OCCLUSION_PREDICATE) {
|
||||
bool old_enable = rctx->num_occlusion_queries != 0;
|
||||
bool enable;
|
||||
bool old_perfect_enable =
|
||||
rctx->num_perfect_occlusion_queries != 0;
|
||||
bool enable, perfect_enable;
|
||||
|
||||
rctx->num_occlusion_queries += diff;
|
||||
assert(rctx->num_occlusion_queries >= 0);
|
||||
|
||||
enable = rctx->num_occlusion_queries != 0;
|
||||
if (type == PIPE_QUERY_OCCLUSION_COUNTER) {
|
||||
rctx->num_perfect_occlusion_queries += diff;
|
||||
assert(rctx->num_perfect_occlusion_queries >= 0);
|
||||
}
|
||||
|
||||
if (enable != old_enable) {
|
||||
enable = rctx->num_occlusion_queries != 0;
|
||||
perfect_enable = rctx->num_perfect_occlusion_queries != 0;
|
||||
|
||||
if (enable != old_enable || perfect_enable != old_perfect_enable) {
|
||||
rctx->set_occlusion_query_state(&rctx->b, enable);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -329,6 +329,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
|
|||
struct r600_resource *res = (struct r600_resource*)resource;
|
||||
struct r600_texture *rtex = (struct r600_texture*)resource;
|
||||
struct radeon_bo_metadata metadata;
|
||||
bool update_metadata = false;
|
||||
|
||||
/* This is not supported now, but it might be required for OpenCL
|
||||
* interop in the future.
|
||||
|
@ -337,29 +338,30 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
|
|||
(resource->nr_samples > 1 || rtex->is_depth))
|
||||
return false;
|
||||
|
||||
if (!res->is_shared) {
|
||||
res->is_shared = true;
|
||||
res->external_usage = usage;
|
||||
if (resource->target != PIPE_BUFFER) {
|
||||
/* Since shader image stores don't support DCC on VI,
|
||||
* disable it for external clients that want write
|
||||
* access.
|
||||
*/
|
||||
if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) {
|
||||
r600_texture_disable_dcc(rscreen, rtex);
|
||||
update_metadata = true;
|
||||
}
|
||||
|
||||
if (resource->target != PIPE_BUFFER) {
|
||||
/* Since shader image stores don't support DCC on VI,
|
||||
* disable it for external clients that want write
|
||||
* access.
|
||||
if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
|
||||
rtex->cmask.size) {
|
||||
/* Eliminate fast clear (both CMASK and DCC) */
|
||||
r600_eliminate_fast_color_clear(rscreen, rtex);
|
||||
|
||||
/* Disable CMASK if flush_resource isn't going
|
||||
* to be called.
|
||||
*/
|
||||
if (usage & PIPE_HANDLE_USAGE_WRITE)
|
||||
r600_texture_disable_dcc(rscreen, rtex);
|
||||
r600_texture_disable_cmask(rscreen, rtex);
|
||||
update_metadata = true;
|
||||
}
|
||||
|
||||
if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) {
|
||||
/* Eliminate fast clear (both CMASK and DCC) */
|
||||
r600_eliminate_fast_color_clear(rscreen, rtex);
|
||||
|
||||
/* Disable CMASK if flush_resource isn't going
|
||||
* to be called.
|
||||
*/
|
||||
r600_texture_disable_cmask(rscreen, rtex);
|
||||
}
|
||||
|
||||
/* Set metadata. */
|
||||
/* Set metadata. */
|
||||
if (!res->is_shared || update_metadata) {
|
||||
r600_texture_init_metadata(rtex, &metadata);
|
||||
if (rscreen->query_opaque_metadata)
|
||||
rscreen->query_opaque_metadata(rscreen, rtex,
|
||||
|
@ -367,8 +369,18 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
|
|||
|
||||
rscreen->ws->buffer_set_metadata(res->buf, &metadata);
|
||||
}
|
||||
}
|
||||
|
||||
if (res->is_shared) {
|
||||
/* USAGE_EXPLICIT_FLUSH must be cleared if at least one user
|
||||
* doesn't set it.
|
||||
*/
|
||||
res->external_usage |= usage & ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
|
||||
if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
|
||||
res->external_usage &= ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
|
||||
} else {
|
||||
assert(res->external_usage == usage);
|
||||
res->is_shared = true;
|
||||
res->external_usage = usage;
|
||||
}
|
||||
|
||||
return rscreen->ws->buffer_get_handle(res->buf,
|
||||
|
|
|
@ -55,6 +55,13 @@ enum radeon_llvm_shader_type {
|
|||
RADEON_LLVM_SHADER_CS = 3,
|
||||
};
|
||||
|
||||
enum radeon_llvm_calling_convention {
|
||||
RADEON_LLVM_AMDGPU_VS = 87,
|
||||
RADEON_LLVM_AMDGPU_GS = 88,
|
||||
RADEON_LLVM_AMDGPU_PS = 89,
|
||||
RADEON_LLVM_AMDGPU_CS = 90,
|
||||
};
|
||||
|
||||
void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
|
||||
{
|
||||
char str[16];
|
||||
|
@ -71,27 +78,35 @@ void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
|
|||
void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
|
||||
{
|
||||
enum radeon_llvm_shader_type llvm_type;
|
||||
enum radeon_llvm_calling_convention calling_conv;
|
||||
|
||||
switch (type) {
|
||||
case TGSI_PROCESSOR_VERTEX:
|
||||
case TGSI_PROCESSOR_TESS_CTRL:
|
||||
case TGSI_PROCESSOR_TESS_EVAL:
|
||||
llvm_type = RADEON_LLVM_SHADER_VS;
|
||||
calling_conv = RADEON_LLVM_AMDGPU_VS;
|
||||
break;
|
||||
case TGSI_PROCESSOR_GEOMETRY:
|
||||
llvm_type = RADEON_LLVM_SHADER_GS;
|
||||
calling_conv = RADEON_LLVM_AMDGPU_GS;
|
||||
break;
|
||||
case TGSI_PROCESSOR_FRAGMENT:
|
||||
llvm_type = RADEON_LLVM_SHADER_PS;
|
||||
calling_conv = RADEON_LLVM_AMDGPU_PS;
|
||||
break;
|
||||
case TGSI_PROCESSOR_COMPUTE:
|
||||
llvm_type = RADEON_LLVM_SHADER_CS;
|
||||
calling_conv = RADEON_LLVM_AMDGPU_CS;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
radeon_llvm_add_attribute(F, "ShaderType", llvm_type);
|
||||
if (HAVE_LLVM >= 0x309)
|
||||
LLVMSetFunctionCallConv(F, calling_conv);
|
||||
else
|
||||
radeon_llvm_add_attribute(F, "ShaderType", llvm_type);
|
||||
}
|
||||
|
||||
static void init_r600_target()
|
||||
|
|
|
@ -246,14 +246,14 @@ si_flush_depth_textures(struct si_context *sctx,
|
|||
struct si_textures_info *textures)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned mask = textures->depth_texture_mask;
|
||||
uint64_t mask = textures->depth_texture_mask;
|
||||
|
||||
while (mask) {
|
||||
struct pipe_sampler_view *view;
|
||||
struct si_sampler_view *sview;
|
||||
struct r600_texture *tex;
|
||||
|
||||
i = u_bit_scan(&mask);
|
||||
i = u_bit_scan64(&mask);
|
||||
|
||||
view = textures->views.views[i];
|
||||
assert(view);
|
||||
|
@ -329,13 +329,13 @@ si_decompress_sampler_color_textures(struct si_context *sctx,
|
|||
struct si_textures_info *textures)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned mask = textures->compressed_colortex_mask;
|
||||
uint64_t mask = textures->compressed_colortex_mask;
|
||||
|
||||
while (mask) {
|
||||
struct pipe_sampler_view *view;
|
||||
struct r600_texture *tex;
|
||||
|
||||
i = u_bit_scan(&mask);
|
||||
i = u_bit_scan64(&mask);
|
||||
|
||||
view = textures->views.views[i];
|
||||
assert(view);
|
||||
|
@ -355,13 +355,13 @@ si_decompress_image_color_textures(struct si_context *sctx,
|
|||
struct si_images_info *images)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned mask = images->compressed_colortex_mask;
|
||||
uint64_t mask = images->compressed_colortex_mask;
|
||||
|
||||
while (mask) {
|
||||
const struct pipe_image_view *view;
|
||||
struct r600_texture *tex;
|
||||
|
||||
i = u_bit_scan(&mask);
|
||||
i = u_bit_scan64(&mask);
|
||||
|
||||
view = &images->views[i];
|
||||
assert(view->resource->target != PIPE_BUFFER);
|
||||
|
|
|
@ -264,8 +264,8 @@ static void si_set_sampler_views(struct pipe_context *ctx,
|
|||
unsigned slot = start + i;
|
||||
|
||||
if (!views || !views[i]) {
|
||||
samplers->depth_texture_mask &= ~(1 << slot);
|
||||
samplers->compressed_colortex_mask &= ~(1 << slot);
|
||||
samplers->depth_texture_mask &= ~(1llu << slot);
|
||||
samplers->compressed_colortex_mask &= ~(1llu << slot);
|
||||
si_set_sampler_view(sctx, &samplers->views, slot, NULL);
|
||||
continue;
|
||||
}
|
||||
|
@ -277,18 +277,18 @@ static void si_set_sampler_views(struct pipe_context *ctx,
|
|||
(struct r600_texture*)views[i]->texture;
|
||||
|
||||
if (rtex->is_depth && !rtex->is_flushing_texture) {
|
||||
samplers->depth_texture_mask |= 1 << slot;
|
||||
samplers->depth_texture_mask |= 1llu << slot;
|
||||
} else {
|
||||
samplers->depth_texture_mask &= ~(1 << slot);
|
||||
samplers->depth_texture_mask &= ~(1llu << slot);
|
||||
}
|
||||
if (is_compressed_colortex(rtex)) {
|
||||
samplers->compressed_colortex_mask |= 1 << slot;
|
||||
samplers->compressed_colortex_mask |= 1llu << slot;
|
||||
} else {
|
||||
samplers->compressed_colortex_mask &= ~(1 << slot);
|
||||
samplers->compressed_colortex_mask &= ~(1llu << slot);
|
||||
}
|
||||
} else {
|
||||
samplers->depth_texture_mask &= ~(1 << slot);
|
||||
samplers->compressed_colortex_mask &= ~(1 << slot);
|
||||
samplers->depth_texture_mask &= ~(1llu << slot);
|
||||
samplers->compressed_colortex_mask &= ~(1llu << slot);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -306,9 +306,9 @@ si_samplers_update_compressed_colortex_mask(struct si_textures_info *samplers)
|
|||
struct r600_texture *rtex = (struct r600_texture *)res;
|
||||
|
||||
if (is_compressed_colortex(rtex)) {
|
||||
samplers->compressed_colortex_mask |= 1 << i;
|
||||
samplers->compressed_colortex_mask |= 1llu << i;
|
||||
} else {
|
||||
samplers->compressed_colortex_mask &= ~(1 << i);
|
||||
samplers->compressed_colortex_mask &= ~(1llu << i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -307,6 +307,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
|
||||
case PIPE_CAP_QUERY_MEMORY_INFO:
|
||||
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
|
||||
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
|
||||
return 1;
|
||||
|
||||
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
|
||||
|
@ -522,7 +523,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
|
|||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
|
||||
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
|
||||
return 16;
|
||||
return SI_NUM_USER_SAMPLERS;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
case PIPE_SHADER_CAP_SUPPORTED_IRS:
|
||||
|
|
|
@ -137,8 +137,8 @@ struct si_cs_shader_state {
|
|||
|
||||
struct si_textures_info {
|
||||
struct si_sampler_views views;
|
||||
uint32_t depth_texture_mask; /* which textures are depth */
|
||||
uint32_t compressed_colortex_mask;
|
||||
uint64_t depth_texture_mask; /* which textures are depth */
|
||||
uint64_t compressed_colortex_mask;
|
||||
};
|
||||
|
||||
struct si_images_info {
|
||||
|
|
|
@ -1328,8 +1328,9 @@ static LLVMValueRef fetch_constant(
|
|||
if (reg->Register.Dimension && reg->Dimension.Indirect) {
|
||||
LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS);
|
||||
LLVMValueRef index;
|
||||
index = get_indirect_index(ctx, ®->DimIndirect,
|
||||
reg->Dimension.Index);
|
||||
index = get_bounded_indirect_index(ctx, ®->DimIndirect,
|
||||
reg->Dimension.Index,
|
||||
SI_NUM_USER_CONST_BUFFERS);
|
||||
bufp = build_indexed_load_const(ctx, ptr, index);
|
||||
} else
|
||||
bufp = ctx->const_buffers[buf];
|
||||
|
@ -3356,7 +3357,10 @@ static void tex_fetch_ptrs(
|
|||
const struct tgsi_full_src_register *reg = &emit_data->inst->Src[sampler_src];
|
||||
LLVMValueRef ind_index;
|
||||
|
||||
ind_index = get_indirect_index(ctx, ®->Indirect, reg->Register.Index);
|
||||
ind_index = get_bounded_indirect_index(ctx,
|
||||
®->Indirect,
|
||||
reg->Register.Index,
|
||||
SI_NUM_USER_SAMPLERS);
|
||||
|
||||
*res_ptr = get_sampler_desc(ctx, ind_index, DESC_IMAGE);
|
||||
|
||||
|
@ -4278,6 +4282,14 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
|
|||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
||||
|
||||
/* The real barrier instruction isn’t needed, because an entire patch
|
||||
* always fits into a single wave.
|
||||
*/
|
||||
if (ctx->type == TGSI_PROCESSOR_TESS_CTRL) {
|
||||
emit_optimization_barrier(ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
lp_build_intrinsic(gallivm->builder,
|
||||
HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.s.barrier"
|
||||
: "llvm.AMDGPU.barrier.local",
|
||||
|
|
|
@ -830,25 +830,93 @@ static void si_set_scissor_states(struct pipe_context *ctx,
|
|||
for (i = 0; i < num_scissors; i++)
|
||||
sctx->scissors.states[start_slot + i] = state[i];
|
||||
|
||||
if (!sctx->queued.named.rasterizer ||
|
||||
!sctx->queued.named.rasterizer->scissor_enable)
|
||||
return;
|
||||
|
||||
sctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
|
||||
si_mark_atom_dirty(sctx, &sctx->scissors.atom);
|
||||
}
|
||||
|
||||
static void si_get_scissor_from_viewport(struct pipe_viewport_state *vp,
|
||||
struct pipe_scissor_state *scissor)
|
||||
{
|
||||
/* These must be signed, unlike pipe_scissor_state. */
|
||||
int minx, miny, maxx, maxy, tmp;
|
||||
|
||||
/* Convert (-1, -1) and (1, 1) from clip space into window space. */
|
||||
minx = -vp->scale[0] + vp->translate[0];
|
||||
miny = -vp->scale[1] + vp->translate[1];
|
||||
maxx = vp->scale[0] + vp->translate[0];
|
||||
maxy = vp->scale[1] + vp->translate[1];
|
||||
|
||||
/* r600_draw_rectangle sets this. Disable the scissor. */
|
||||
if (minx == -1 && miny == -1 && maxx == 1 && maxy == 1) {
|
||||
minx = miny = 0;
|
||||
maxx = maxy = 16384;
|
||||
}
|
||||
|
||||
/* Handle inverted viewports. */
|
||||
if (minx > maxx) {
|
||||
tmp = minx;
|
||||
minx = maxx;
|
||||
maxx = tmp;
|
||||
}
|
||||
if (miny > maxy) {
|
||||
tmp = miny;
|
||||
miny = maxy;
|
||||
maxy = tmp;
|
||||
}
|
||||
|
||||
scissor->minx = CLAMP(minx, 0, 16384);
|
||||
scissor->miny = CLAMP(miny, 0, 16384);
|
||||
scissor->maxx = CLAMP(maxx, 0, 16384);
|
||||
scissor->maxy = CLAMP(maxy, 0, 16384);
|
||||
}
|
||||
|
||||
static void si_clip_scissor(struct pipe_scissor_state *out,
|
||||
struct pipe_scissor_state *clip)
|
||||
{
|
||||
out->minx = MAX2(out->minx, clip->minx);
|
||||
out->miny = MAX2(out->miny, clip->miny);
|
||||
out->maxx = MIN2(out->maxx, clip->maxx);
|
||||
out->maxy = MIN2(out->maxy, clip->maxy);
|
||||
}
|
||||
|
||||
static void si_emit_one_scissor(struct radeon_winsys_cs *cs,
|
||||
struct pipe_viewport_state *vp,
|
||||
struct pipe_scissor_state *scissor)
|
||||
{
|
||||
struct pipe_scissor_state final;
|
||||
|
||||
/* Since the guard band disables clipping, we have to clip per-pixel
|
||||
* using a scissor.
|
||||
*/
|
||||
si_get_scissor_from_viewport(vp, &final);
|
||||
|
||||
if (scissor)
|
||||
si_clip_scissor(&final, scissor);
|
||||
|
||||
radeon_emit(cs, S_028250_TL_X(final.minx) |
|
||||
S_028250_TL_Y(final.miny) |
|
||||
S_028250_WINDOW_OFFSET_DISABLE(1));
|
||||
radeon_emit(cs, S_028254_BR_X(final.maxx) |
|
||||
S_028254_BR_Y(final.maxy));
|
||||
}
|
||||
|
||||
static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom)
|
||||
{
|
||||
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
|
||||
struct pipe_scissor_state *states = sctx->scissors.states;
|
||||
unsigned mask = sctx->scissors.dirty_mask;
|
||||
bool scissor_enable = sctx->queued.named.rasterizer->scissor_enable;
|
||||
|
||||
/* The simple case: Only 1 viewport is active. */
|
||||
if (mask & 1 &&
|
||||
!si_get_vs_info(sctx)->writes_viewport_index) {
|
||||
radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
|
||||
radeon_emit(cs, S_028250_TL_X(states[0].minx) |
|
||||
S_028250_TL_Y(states[0].miny) |
|
||||
S_028250_WINDOW_OFFSET_DISABLE(1));
|
||||
radeon_emit(cs, S_028254_BR_X(states[0].maxx) |
|
||||
S_028254_BR_Y(states[0].maxy));
|
||||
si_emit_one_scissor(cs, &sctx->viewports.states[0],
|
||||
scissor_enable ? &states[0] : NULL);
|
||||
sctx->scissors.dirty_mask &= ~1; /* clear one bit */
|
||||
return;
|
||||
}
|
||||
|
@ -861,11 +929,8 @@ static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom)
|
|||
radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
|
||||
start * 4 * 2, count * 2);
|
||||
for (i = start; i < start+count; i++) {
|
||||
radeon_emit(cs, S_028250_TL_X(states[i].minx) |
|
||||
S_028250_TL_Y(states[i].miny) |
|
||||
S_028250_WINDOW_OFFSET_DISABLE(1));
|
||||
radeon_emit(cs, S_028254_BR_X(states[i].maxx) |
|
||||
S_028254_BR_Y(states[i].maxy));
|
||||
si_emit_one_scissor(cs, &sctx->viewports.states[i],
|
||||
scissor_enable ? &states[i] : NULL);
|
||||
}
|
||||
}
|
||||
sctx->scissors.dirty_mask = 0;
|
||||
|
@ -883,7 +948,9 @@ static void si_set_viewport_states(struct pipe_context *ctx,
|
|||
sctx->viewports.states[start_slot + i] = state[i];
|
||||
|
||||
sctx->viewports.dirty_mask |= ((1 << num_viewports) - 1) << start_slot;
|
||||
sctx->scissors.dirty_mask |= ((1 << num_viewports) - 1) << start_slot;
|
||||
si_mark_atom_dirty(sctx, &sctx->viewports.atom);
|
||||
si_mark_atom_dirty(sctx, &sctx->scissors.atom);
|
||||
}
|
||||
|
||||
static void si_emit_viewports(struct si_context *sctx, struct r600_atom *atom)
|
||||
|
@ -980,6 +1047,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
rs->scissor_enable = state->scissor;
|
||||
rs->two_side = state->light_twoside;
|
||||
rs->multisample_enable = state->multisample;
|
||||
rs->force_persample_interp = state->force_persample_interp;
|
||||
|
@ -1038,7 +1106,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
|
|||
S_028A48_MSAA_ENABLE(state->multisample ||
|
||||
state->poly_smooth ||
|
||||
state->line_smooth) |
|
||||
S_028A48_VPORT_SCISSOR_ENABLE(state->scissor));
|
||||
S_028A48_VPORT_SCISSOR_ENABLE(1));
|
||||
|
||||
si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL,
|
||||
S_028BE4_PIX_CENTER(state->half_pixel_center) |
|
||||
|
@ -1105,6 +1173,11 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
|
|||
(!old_rs || old_rs->multisample_enable != rs->multisample_enable))
|
||||
si_mark_atom_dirty(sctx, &sctx->db_render_state);
|
||||
|
||||
if (!old_rs || old_rs->scissor_enable != rs->scissor_enable) {
|
||||
sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
|
||||
si_mark_atom_dirty(sctx, &sctx->scissors.atom);
|
||||
}
|
||||
|
||||
si_pm4_bind_state(sctx, rasterizer, rs);
|
||||
si_update_poly_offset_state(sctx);
|
||||
|
||||
|
@ -1310,16 +1383,18 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
|
|||
|
||||
/* DB_COUNT_CONTROL (occlusion queries) */
|
||||
if (sctx->b.num_occlusion_queries > 0) {
|
||||
bool perfect = sctx->b.num_perfect_occlusion_queries > 0;
|
||||
|
||||
if (sctx->b.chip_class >= CIK) {
|
||||
radeon_emit(cs,
|
||||
S_028004_PERFECT_ZPASS_COUNTS(1) |
|
||||
S_028004_PERFECT_ZPASS_COUNTS(perfect) |
|
||||
S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
|
||||
S_028004_ZPASS_ENABLE(1) |
|
||||
S_028004_SLICE_EVEN_ENABLE(1) |
|
||||
S_028004_SLICE_ODD_ENABLE(1));
|
||||
} else {
|
||||
radeon_emit(cs,
|
||||
S_028004_PERFECT_ZPASS_COUNTS(1) |
|
||||
S_028004_PERFECT_ZPASS_COUNTS(perfect) |
|
||||
S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples));
|
||||
}
|
||||
} else {
|
||||
|
@ -2000,6 +2075,11 @@ boolean si_is_format_supported(struct pipe_screen *screen,
|
|||
case 4:
|
||||
case 8:
|
||||
break;
|
||||
case 16:
|
||||
if (format == PIPE_FORMAT_NONE)
|
||||
return TRUE;
|
||||
else
|
||||
return FALSE;
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -2623,6 +2703,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
|
|||
constbuf.user_buffer = sctx->b.sample_locations_16x;
|
||||
break;
|
||||
default:
|
||||
R600_ERR("Requested an invalid number of samples %i.\n",
|
||||
sctx->framebuffer.nr_samples);
|
||||
assert(0);
|
||||
}
|
||||
constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4;
|
||||
|
|
|
@ -68,6 +68,7 @@ struct si_state_rasterizer {
|
|||
bool uses_poly_offset;
|
||||
bool clamp_fragment_color;
|
||||
bool rasterizer_discard;
|
||||
bool scissor_enable;
|
||||
};
|
||||
|
||||
struct si_dsa_stencil_ref_part {
|
||||
|
@ -144,10 +145,10 @@ struct si_shader_data {
|
|||
uint32_t sh_base[SI_NUM_SHADERS];
|
||||
};
|
||||
|
||||
/* User sampler views: 0..15
|
||||
* Polygon stipple tex: 16
|
||||
/* User sampler views: 0..31
|
||||
* Polygon stipple tex: 32
|
||||
*/
|
||||
#define SI_NUM_USER_SAMPLERS 16 /* AKA OpenGL textures units per shader */
|
||||
#define SI_NUM_USER_SAMPLERS 32 /* AKA OpenGL textures units per shader */
|
||||
#define SI_POLY_STIPPLE_SAMPLER SI_NUM_USER_SAMPLERS
|
||||
#define SI_NUM_SAMPLERS (SI_POLY_STIPPLE_SAMPLER + 1)
|
||||
|
||||
|
|
|
@ -270,6 +270,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_PCI_BUS:
|
||||
case PIPE_CAP_PCI_DEVICE:
|
||||
case PIPE_CAP_PCI_FUNCTION:
|
||||
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
|
||||
return 0;
|
||||
}
|
||||
/* should only get here on unhandled cases */
|
||||
|
|
|
@ -94,6 +94,8 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe,
|
|||
|
||||
sp->framebuffer.width = fb->width;
|
||||
sp->framebuffer.height = fb->height;
|
||||
sp->framebuffer.samples = fb->samples;
|
||||
sp->framebuffer.layers = fb->layers;
|
||||
|
||||
sp->dirty |= SP_NEW_FRAMEBUFFER;
|
||||
}
|
||||
|
|
|
@ -142,6 +142,9 @@ svga_create_blend_state(struct pipe_context *pipe,
|
|||
struct svga_blend_state *blend = CALLOC_STRUCT( svga_blend_state );
|
||||
unsigned i;
|
||||
|
||||
if (!blend)
|
||||
return NULL;
|
||||
|
||||
/* Fill in the per-rendertarget blend state. We currently only
|
||||
* support independent blend enable and colormask per render target.
|
||||
*/
|
||||
|
|
|
@ -134,6 +134,9 @@ svga_create_depth_stencil_state(struct pipe_context *pipe,
|
|||
struct svga_context *svga = svga_context(pipe);
|
||||
struct svga_depth_stencil_state *ds = CALLOC_STRUCT( svga_depth_stencil_state );
|
||||
|
||||
if (!ds)
|
||||
return NULL;
|
||||
|
||||
/* Don't try to figure out CW/CCW correspondence with
|
||||
* stencil[0]/[1] at this point. Presumably this can change as
|
||||
* back/front face are modified.
|
||||
|
|
|
@ -161,6 +161,9 @@ svga_create_rasterizer_state(struct pipe_context *pipe,
|
|||
struct svga_rasterizer_state *rast = CALLOC_STRUCT( svga_rasterizer_state );
|
||||
struct svga_screen *screen = svga_screen(pipe->screen);
|
||||
|
||||
if (!rast)
|
||||
return NULL;
|
||||
|
||||
/* need this for draw module. */
|
||||
rast->templ = *templ;
|
||||
|
||||
|
|
|
@ -404,6 +404,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
|
||||
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
|
||||
case PIPE_CAP_QUERY_BUFFER_OBJECT:
|
||||
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -999,8 +1000,10 @@ svga_screen_create(struct svga_winsys_screen *sws)
|
|||
svgascreen->max_color_buffers = SVGA3D_DX_MAX_RENDER_TARGETS;
|
||||
|
||||
/* Multisample samples per pixel */
|
||||
svgascreen->ms_samples =
|
||||
get_uint_cap(sws, SVGA3D_DEVCAP_MULTISAMPLE_MASKABLESAMPLES, 0);
|
||||
if (debug_get_bool_option("SVGA_MSAA", TRUE)) {
|
||||
svgascreen->ms_samples =
|
||||
get_uint_cap(sws, SVGA3D_DEVCAP_MULTISAMPLE_MASKABLESAMPLES, 0);
|
||||
}
|
||||
|
||||
/* Maximum number of constant buffers */
|
||||
svgascreen->max_const_buffers =
|
||||
|
|
|
@ -1686,6 +1686,44 @@ static void trace_context_set_shader_buffers(struct pipe_context *_context,
|
|||
FREE(_buffers);
|
||||
}
|
||||
|
||||
static void trace_context_set_shader_images(struct pipe_context *_context,
|
||||
unsigned shader,
|
||||
unsigned start, unsigned nr,
|
||||
struct pipe_image_view *images)
|
||||
{
|
||||
struct trace_context *tr_context = trace_context(_context);
|
||||
struct pipe_context *context = tr_context->pipe;
|
||||
struct pipe_image_view *_images = NULL;
|
||||
|
||||
trace_dump_call_begin("pipe_context", "set_shader_images");
|
||||
trace_dump_arg(ptr, context);
|
||||
trace_dump_arg(uint, shader);
|
||||
trace_dump_arg(uint, start);
|
||||
trace_dump_arg_begin("images");
|
||||
trace_dump_struct_array(image_view, images, nr);
|
||||
trace_dump_arg_end();
|
||||
trace_dump_call_end();
|
||||
|
||||
if (images) {
|
||||
int i;
|
||||
|
||||
_images = MALLOC(nr * sizeof(struct pipe_image_view));
|
||||
if (!_images)
|
||||
return;
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
_images[i] = images[i];
|
||||
_images[i].resource = trace_resource_unwrap(tr_context,
|
||||
_images[i].resource);
|
||||
}
|
||||
}
|
||||
|
||||
context->set_shader_images(context, shader, start, nr, _images);
|
||||
|
||||
if (_images)
|
||||
FREE(_images);
|
||||
}
|
||||
|
||||
static void trace_context_launch_grid(struct pipe_context *_pipe,
|
||||
const struct pipe_grid_info *info)
|
||||
{
|
||||
|
@ -1809,6 +1847,7 @@ trace_context_create(struct trace_screen *tr_scr,
|
|||
TR_CTX_INIT(set_tess_state);
|
||||
TR_CTX_INIT(set_shader_buffers);
|
||||
TR_CTX_INIT(launch_grid);
|
||||
TR_CTX_INIT(set_shader_images);
|
||||
|
||||
TR_CTX_INIT(transfer_map);
|
||||
TR_CTX_INIT(transfer_unmap);
|
||||
|
|
|
@ -481,6 +481,8 @@ void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state)
|
|||
|
||||
trace_dump_member(uint, state, width);
|
||||
trace_dump_member(uint, state, height);
|
||||
trace_dump_member(uint, state, samples);
|
||||
trace_dump_member(uint, state, layers);
|
||||
trace_dump_member(uint, state, nr_cbufs);
|
||||
trace_dump_member_array(ptr, state, cbufs);
|
||||
trace_dump_member(ptr, state, zsbuf);
|
||||
|
@ -738,6 +740,46 @@ void trace_dump_shader_buffer(const struct pipe_shader_buffer *state)
|
|||
}
|
||||
|
||||
|
||||
void trace_dump_image_view(const struct pipe_image_view *state)
|
||||
{
|
||||
if (!trace_dumping_enabled_locked())
|
||||
return;
|
||||
|
||||
if(!state) {
|
||||
trace_dump_null();
|
||||
return;
|
||||
}
|
||||
|
||||
trace_dump_struct_begin("pipe_image_view");
|
||||
trace_dump_member(resource_ptr, state, resource);
|
||||
trace_dump_member(uint, state, format);
|
||||
trace_dump_member(uint, state, access);
|
||||
|
||||
trace_dump_member_begin("u");
|
||||
trace_dump_struct_begin(""); /* anonymous */
|
||||
if (state->resource->target == PIPE_BUFFER) {
|
||||
trace_dump_member_begin("buf");
|
||||
trace_dump_struct_begin(""); /* anonymous */
|
||||
trace_dump_member(uint, &state->u.buf, first_element);
|
||||
trace_dump_member(uint, &state->u.buf, last_element);
|
||||
trace_dump_struct_end(); /* anonymous */
|
||||
trace_dump_member_end(); /* buf */
|
||||
} else {
|
||||
trace_dump_member_begin("tex");
|
||||
trace_dump_struct_begin(""); /* anonymous */
|
||||
trace_dump_member(uint, &state->u.tex, first_layer);
|
||||
trace_dump_member(uint, &state->u.tex, last_layer);
|
||||
trace_dump_member(uint, &state->u.tex, level);
|
||||
trace_dump_struct_end(); /* anonymous */
|
||||
trace_dump_member_end(); /* tex */
|
||||
}
|
||||
trace_dump_struct_end(); /* anonymous */
|
||||
trace_dump_member_end(); /* u */
|
||||
|
||||
trace_dump_struct_end();
|
||||
}
|
||||
|
||||
|
||||
void trace_dump_draw_info(const struct pipe_draw_info *state)
|
||||
{
|
||||
if (!trace_dumping_enabled_locked())
|
||||
|
|
|
@ -91,4 +91,6 @@ void trace_dump_query_result(unsigned query_type,
|
|||
|
||||
void trace_dump_grid_info(const struct pipe_grid_info *state);
|
||||
|
||||
void trace_dump_image_view(const struct pipe_image_view *view);
|
||||
|
||||
#endif /* TR_STATE_H */
|
||||
|
|
|
@ -207,6 +207,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_PCI_BUS:
|
||||
case PIPE_CAP_PCI_DEVICE:
|
||||
case PIPE_CAP_PCI_FUNCTION:
|
||||
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
|
||||
return 0;
|
||||
|
||||
/* Stream output. */
|
||||
|
|
|
@ -239,6 +239,7 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_PCI_BUS:
|
||||
case PIPE_CAP_PCI_DEVICE:
|
||||
case PIPE_CAP_PCI_FUNCTION:
|
||||
case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
|
||||
return 0;
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
return 0x1af4;
|
||||
|
|
|
@ -690,6 +690,7 @@ enum pipe_cap
|
|||
PIPE_CAP_PCI_BUS,
|
||||
PIPE_CAP_PCI_DEVICE,
|
||||
PIPE_CAP_PCI_FUNCTION,
|
||||
PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT,
|
||||
};
|
||||
|
||||
#define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)
|
||||
|
|
|
@ -57,7 +57,7 @@ extern "C" {
|
|||
#define PIPE_MAX_CLIP_PLANES 8
|
||||
#define PIPE_MAX_COLOR_BUFS 8
|
||||
#define PIPE_MAX_CONSTANT_BUFFERS 32
|
||||
#define PIPE_MAX_SAMPLERS 18 /* 16 public + 2 driver internal */
|
||||
#define PIPE_MAX_SAMPLERS 32
|
||||
#define PIPE_MAX_SHADER_INPUTS 80 /* 32 GENERIC + 32 PATCH + 16 others */
|
||||
#define PIPE_MAX_SHADER_OUTPUTS 80 /* 32 GENERIC + 32 PATCH + 16 others */
|
||||
#define PIPE_MAX_SHADER_SAMPLER_VIEWS 32
|
||||
|
@ -298,9 +298,17 @@ struct pipe_stencil_ref
|
|||
};
|
||||
|
||||
|
||||
/**
|
||||
* Note that pipe_surfaces are "texture views for rendering"
|
||||
* and so in the case of ARB_framebuffer_no_attachment there
|
||||
* is no pipe_surface state available such that we may
|
||||
* extract the number of samples and layers.
|
||||
*/
|
||||
struct pipe_framebuffer_state
|
||||
{
|
||||
unsigned width, height;
|
||||
unsigned samples; /**< Number of samples in a no-attachment framebuffer */
|
||||
unsigned layers; /**< Number of layers in a no-attachment framebuffer */
|
||||
|
||||
/** multiple color buffers for multiple render targets */
|
||||
unsigned nr_cbufs;
|
||||
|
|
|
@ -88,5 +88,13 @@ TODO: document the other workarounds.
|
|||
<application name="Second Life" executable="do-not-directly-run-secondlife-bin">
|
||||
<option name="allow_glsl_extension_directive_midshader" value="true" />
|
||||
</application>
|
||||
|
||||
<application name="Warsow (32-bit)" executable="warsow.i386">
|
||||
<option name="allow_glsl_extension_directive_midshader" value="true" />
|
||||
</application>
|
||||
|
||||
<application name="Warsow (64-bit)" executable="warsow.x86_64">
|
||||
<option name="allow_glsl_extension_directive_midshader" value="true" />
|
||||
</application>
|
||||
</device>
|
||||
</driconf>
|
||||
|
|
|
@ -121,24 +121,36 @@ bblock_end_const(const struct bblock_t *block)
|
|||
static inline struct bblock_t *
|
||||
bblock_next(struct bblock_t *block)
|
||||
{
|
||||
if (exec_node_is_tail_sentinel(block->link.next))
|
||||
return NULL;
|
||||
|
||||
return (struct bblock_t *)block->link.next;
|
||||
}
|
||||
|
||||
static inline const struct bblock_t *
|
||||
bblock_next_const(const struct bblock_t *block)
|
||||
{
|
||||
if (exec_node_is_tail_sentinel(block->link.next))
|
||||
return NULL;
|
||||
|
||||
return (const struct bblock_t *)block->link.next;
|
||||
}
|
||||
|
||||
static inline struct bblock_t *
|
||||
bblock_prev(struct bblock_t *block)
|
||||
{
|
||||
if (exec_node_is_head_sentinel(block->link.prev))
|
||||
return NULL;
|
||||
|
||||
return (struct bblock_t *)block->link.prev;
|
||||
}
|
||||
|
||||
static inline const struct bblock_t *
|
||||
bblock_prev_const(const struct bblock_t *block)
|
||||
{
|
||||
if (exec_node_is_head_sentinel(block->link.prev))
|
||||
return NULL;
|
||||
|
||||
return (const struct bblock_t *)block->link.prev;
|
||||
}
|
||||
|
||||
|
|
|
@ -402,6 +402,12 @@ struct brw_wm_prog_data {
|
|||
*/
|
||||
uint32_t barycentric_interp_modes;
|
||||
|
||||
/**
|
||||
* Mask of which FS inputs are marked flat by the shader source. This is
|
||||
* needed for setting up 3DSTATE_SF/SBE.
|
||||
*/
|
||||
uint32_t flat_inputs;
|
||||
|
||||
/**
|
||||
* Map from gl_varying_slot to the position within the FS setup data
|
||||
* payload where the varying's attribute vertex deltas should be delivered.
|
||||
|
|
|
@ -42,6 +42,10 @@ dead_control_flow_eliminate(backend_shader *s)
|
|||
|
||||
foreach_block_safe (block, s->cfg) {
|
||||
bblock_t *prev_block = block->prev();
|
||||
|
||||
if (!prev_block)
|
||||
continue;
|
||||
|
||||
backend_instruction *const inst = block->start();
|
||||
backend_instruction *const prev_inst = prev_block->end();
|
||||
|
||||
|
|
|
@ -2822,17 +2822,15 @@ fs_visitor::emit_repclear_shader()
|
|||
int color_mrf = base_mrf + 2;
|
||||
fs_inst *mov;
|
||||
|
||||
if (uniforms == 1) {
|
||||
if (uniforms > 0) {
|
||||
mov = bld.exec_all().group(4, 0)
|
||||
.MOV(brw_message_reg(color_mrf),
|
||||
fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
|
||||
} else {
|
||||
struct brw_reg reg =
|
||||
brw_reg(BRW_GENERAL_REGISTER_FILE,
|
||||
2, 3, 0, 0, BRW_REGISTER_TYPE_F,
|
||||
BRW_VERTICAL_STRIDE_8,
|
||||
BRW_WIDTH_2,
|
||||
BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
|
||||
brw_reg(BRW_GENERAL_REGISTER_FILE, 2, 3, 0, 0, BRW_REGISTER_TYPE_F,
|
||||
BRW_VERTICAL_STRIDE_8, BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_4,
|
||||
BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
|
||||
|
||||
mov = bld.exec_all().group(4, 0)
|
||||
.MOV(vec4(brw_message_reg(color_mrf)), fs_reg(reg));
|
||||
|
@ -2865,7 +2863,7 @@ fs_visitor::emit_repclear_shader()
|
|||
assign_curb_setup();
|
||||
|
||||
/* Now that we have the uniform assigned, go ahead and force it to a vec4. */
|
||||
if (uniforms == 1) {
|
||||
if (uniforms > 0) {
|
||||
assert(mov->src[0].file == FIXED_GRF);
|
||||
mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0);
|
||||
}
|
||||
|
@ -5614,6 +5612,31 @@ brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo,
|
|||
return barycentric_interp_modes;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_compute_flat_inputs(struct brw_wm_prog_data *prog_data,
|
||||
bool shade_model_flat, const nir_shader *shader)
|
||||
{
|
||||
prog_data->flat_inputs = 0;
|
||||
|
||||
nir_foreach_variable(var, &shader->inputs) {
|
||||
enum glsl_interp_qualifier interp_qualifier =
|
||||
(enum glsl_interp_qualifier)var->data.interpolation;
|
||||
bool is_gl_Color = (var->data.location == VARYING_SLOT_COL0) ||
|
||||
(var->data.location == VARYING_SLOT_COL1);
|
||||
|
||||
int input_index = prog_data->urb_setup[var->data.location];
|
||||
|
||||
if (input_index < 0)
|
||||
continue;
|
||||
|
||||
/* flat shading */
|
||||
if (interp_qualifier == INTERP_QUALIFIER_FLAT ||
|
||||
(shade_model_flat && is_gl_Color &&
|
||||
interp_qualifier == INTERP_QUALIFIER_NONE))
|
||||
prog_data->flat_inputs |= (1 << input_index);
|
||||
}
|
||||
}
|
||||
|
||||
static uint8_t
|
||||
computed_depth_mode(const nir_shader *shader)
|
||||
{
|
||||
|
@ -5698,6 +5721,12 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
|
|||
}
|
||||
}
|
||||
|
||||
/* We have to compute the flat inputs after the visitor is finished running
|
||||
* because it relies on prog_data->urb_setup which is computed in
|
||||
* fs_visitor::calculate_urb_setup().
|
||||
*/
|
||||
brw_compute_flat_inputs(prog_data, key->flat_shade, shader);
|
||||
|
||||
cfg_t *simd8_cfg;
|
||||
int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || use_rep_send;
|
||||
if ((no_simd8 || compiler->devinfo->gen < 5) && simd16_cfg) {
|
||||
|
|
|
@ -3079,7 +3079,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
|
|||
|
||||
fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, tex_offset;
|
||||
|
||||
/* Our hardware requires a LOD for buffer textures */
|
||||
/* The hardware requires a LOD for buffer textures */
|
||||
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
|
||||
lod = brw_imm_d(0);
|
||||
|
||||
|
|
|
@ -938,7 +938,7 @@ static void
|
|||
adjust_later_block_ips(bblock_t *start_block, int ip_adjustment)
|
||||
{
|
||||
for (bblock_t *block_iter = start_block->next();
|
||||
!block_iter->link.is_tail_sentinel();
|
||||
block_iter;
|
||||
block_iter = block_iter->next()) {
|
||||
block_iter->start_ip += ip_adjustment;
|
||||
block_iter->end_ip += ip_adjustment;
|
||||
|
|
|
@ -368,7 +368,6 @@ void
|
|||
calculate_attr_overrides(const struct brw_context *brw,
|
||||
uint16_t *attr_overrides,
|
||||
uint32_t *point_sprite_enables,
|
||||
uint32_t *flat_enables,
|
||||
uint32_t *urb_entry_read_length,
|
||||
uint32_t *urb_entry_read_offset);
|
||||
|
||||
|
|
|
@ -1724,7 +1724,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
|
|||
nir_tex_instr_dest_size(instr));
|
||||
dst_reg dest = get_nir_dest(instr->dest, instr->dest_type);
|
||||
|
||||
/* Our hardware requires a LOD for buffer textures */
|
||||
/* The hardware requires a LOD for buffer textures */
|
||||
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
|
||||
lod = brw_imm_d(0);
|
||||
|
||||
|
|
|
@ -155,14 +155,12 @@ void
|
|||
calculate_attr_overrides(const struct brw_context *brw,
|
||||
uint16_t *attr_overrides,
|
||||
uint32_t *point_sprite_enables,
|
||||
uint32_t *flat_enables,
|
||||
uint32_t *urb_entry_read_length,
|
||||
uint32_t *urb_entry_read_offset)
|
||||
{
|
||||
uint32_t max_source_attr = 0;
|
||||
|
||||
*point_sprite_enables = 0;
|
||||
*flat_enables = 0;
|
||||
|
||||
*urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
|
||||
|
||||
|
@ -180,9 +178,6 @@ calculate_attr_overrides(const struct brw_context *brw,
|
|||
|
||||
*urb_entry_read_offset = fs_needs_vue_header ? 0 : 1;
|
||||
|
||||
/* _NEW_LIGHT */
|
||||
bool shade_model_flat = brw->ctx.Light.ShadeModel == GL_FLAT;
|
||||
|
||||
/* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE,
|
||||
* description of dw10 Point Sprite Texture Coordinate Enable:
|
||||
*
|
||||
|
@ -208,10 +203,6 @@ calculate_attr_overrides(const struct brw_context *brw,
|
|||
memset(attr_overrides, 0, 16*sizeof(*attr_overrides));
|
||||
|
||||
for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
|
||||
/* BRW_NEW_FRAGMENT_PROGRAM */
|
||||
enum glsl_interp_qualifier interp_qualifier =
|
||||
brw->fragment_program->InterpQualifier[attr];
|
||||
bool is_gl_Color = attr == VARYING_SLOT_COL0 || attr == VARYING_SLOT_COL1;
|
||||
/* BRW_NEW_FS_PROG_DATA */
|
||||
int input_index = brw->wm.prog_data->urb_setup[attr];
|
||||
|
||||
|
@ -234,12 +225,6 @@ calculate_attr_overrides(const struct brw_context *brw,
|
|||
*point_sprite_enables |= (1 << input_index);
|
||||
}
|
||||
|
||||
/* flat shading */
|
||||
if (interp_qualifier == INTERP_QUALIFIER_FLAT ||
|
||||
(shade_model_flat && is_gl_Color &&
|
||||
interp_qualifier == INTERP_QUALIFIER_NONE))
|
||||
*flat_enables |= (1 << input_index);
|
||||
|
||||
/* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
|
||||
uint16_t attr_override = point_sprite ? 0 :
|
||||
get_attr_override(&brw->vue_map_geom_out,
|
||||
|
@ -285,7 +270,6 @@ upload_sf_state(struct brw_context *brw)
|
|||
uint32_t num_outputs = brw->wm.prog_data->num_varying_inputs;
|
||||
uint32_t dw1, dw2, dw3, dw4;
|
||||
uint32_t point_sprite_enables;
|
||||
uint32_t flat_enables;
|
||||
int i;
|
||||
/* _NEW_BUFFER */
|
||||
bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
|
||||
|
@ -428,8 +412,7 @@ upload_sf_state(struct brw_context *brw)
|
|||
uint32_t urb_entry_read_length;
|
||||
uint32_t urb_entry_read_offset;
|
||||
calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables,
|
||||
&flat_enables, &urb_entry_read_length,
|
||||
&urb_entry_read_offset);
|
||||
&urb_entry_read_length, &urb_entry_read_offset);
|
||||
dw1 |= (urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
|
||||
urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
|
||||
|
||||
|
@ -446,7 +429,7 @@ upload_sf_state(struct brw_context *brw)
|
|||
OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16);
|
||||
}
|
||||
OUT_BATCH(point_sprite_enables); /* dw16 */
|
||||
OUT_BATCH(flat_enables);
|
||||
OUT_BATCH(brw->wm.prog_data->flat_inputs);
|
||||
OUT_BATCH(0); /* wrapshortest enables 0-7 */
|
||||
OUT_BATCH(0); /* wrapshortest enables 8-15 */
|
||||
ADVANCE_BATCH();
|
||||
|
|
|
@ -38,7 +38,6 @@ upload_sbe_state(struct brw_context *brw)
|
|||
uint32_t num_outputs = brw->wm.prog_data->num_varying_inputs;
|
||||
uint32_t dw1;
|
||||
uint32_t point_sprite_enables;
|
||||
uint32_t flat_enables;
|
||||
int i;
|
||||
uint16_t attr_overrides[16];
|
||||
/* _NEW_BUFFERS */
|
||||
|
@ -66,8 +65,7 @@ upload_sbe_state(struct brw_context *brw)
|
|||
uint32_t urb_entry_read_length;
|
||||
uint32_t urb_entry_read_offset;
|
||||
calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables,
|
||||
&flat_enables, &urb_entry_read_length,
|
||||
&urb_entry_read_offset);
|
||||
&urb_entry_read_length, &urb_entry_read_offset);
|
||||
dw1 |= urb_entry_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
|
||||
urb_entry_read_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
|
||||
|
||||
|
@ -81,7 +79,7 @@ upload_sbe_state(struct brw_context *brw)
|
|||
}
|
||||
|
||||
OUT_BATCH(point_sprite_enables); /* dw10 */
|
||||
OUT_BATCH(flat_enables);
|
||||
OUT_BATCH(brw->wm.prog_data->flat_inputs);
|
||||
OUT_BATCH(0); /* wrapshortest enables 0-7 */
|
||||
OUT_BATCH(0); /* wrapshortest enables 8-15 */
|
||||
ADVANCE_BATCH();
|
||||
|
|
|
@ -39,7 +39,6 @@ upload_sbe(struct brw_context *brw)
|
|||
uint32_t urb_entry_read_length;
|
||||
uint32_t urb_entry_read_offset;
|
||||
uint32_t point_sprite_enables;
|
||||
uint32_t flat_enables;
|
||||
int sbe_cmd_length;
|
||||
|
||||
uint32_t dw1 =
|
||||
|
@ -66,7 +65,6 @@ upload_sbe(struct brw_context *brw)
|
|||
*/
|
||||
calculate_attr_overrides(brw, attr_overrides,
|
||||
&point_sprite_enables,
|
||||
&flat_enables,
|
||||
&urb_entry_read_length,
|
||||
&urb_entry_read_offset);
|
||||
|
||||
|
@ -109,7 +107,7 @@ upload_sbe(struct brw_context *brw)
|
|||
OUT_BATCH(_3DSTATE_SBE << 16 | (sbe_cmd_length - 2));
|
||||
OUT_BATCH(dw1);
|
||||
OUT_BATCH(point_sprite_enables);
|
||||
OUT_BATCH(flat_enables);
|
||||
OUT_BATCH(brw->wm.prog_data->flat_inputs);
|
||||
if (sbe_cmd_length >= 6) {
|
||||
OUT_BATCH(dw4);
|
||||
OUT_BATCH(dw5);
|
||||
|
|
|
@ -1369,6 +1369,11 @@ _mesa_BindRenderbufferEXT(GLenum target, GLuint renderbuffer)
|
|||
bind_renderbuffer(target, renderbuffer, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* ARB_framebuffer_no_attachment - Application passes requested param's
|
||||
* here. NOTE: NumSamples requested need not be _NumSamples which is
|
||||
* what the hw supports.
|
||||
*/
|
||||
static void
|
||||
framebuffer_parameteri(struct gl_context *ctx, struct gl_framebuffer *fb,
|
||||
GLenum pname, GLint param, const char *func)
|
||||
|
|
|
@ -2295,30 +2295,6 @@ struct gl_shader
|
|||
*/
|
||||
unsigned num_combined_uniform_components;
|
||||
|
||||
/**
|
||||
* This shader's uniform/ssbo block information.
|
||||
*
|
||||
* These fields are only set post-linking.
|
||||
*
|
||||
* BufferInterfaceBlocks is a list containing both UBOs and SSBOs. This is
|
||||
* useful during the linking process so that we don't have to handle SSBOs
|
||||
* specifically.
|
||||
*
|
||||
* UniformBlocks is a list of UBOs. This is useful for backends that need
|
||||
* or prefer to see separate index spaces for UBOS and SSBOs like the GL
|
||||
* API specifies.
|
||||
*
|
||||
* ShaderStorageBlocks is a list of SSBOs. This is useful for backends that
|
||||
* need or prefer to see separate index spaces for UBOS and SSBOs like the
|
||||
* GL API specifies.
|
||||
*
|
||||
* UniformBlocks and ShaderStorageBlocks only have pointers into
|
||||
* BufferInterfaceBlocks so the actual resource information is not
|
||||
* duplicated.
|
||||
*/
|
||||
unsigned NumBufferInterfaceBlocks;
|
||||
struct gl_uniform_block **BufferInterfaceBlocks;
|
||||
|
||||
unsigned NumUniformBlocks;
|
||||
struct gl_uniform_block **UniformBlocks;
|
||||
|
||||
|
@ -2529,11 +2505,6 @@ struct gl_uniform_block
|
|||
*/
|
||||
GLuint UniformBufferSize;
|
||||
|
||||
/**
|
||||
* Is this actually an interface block for a shader storage buffer?
|
||||
*/
|
||||
bool IsShaderStorage;
|
||||
|
||||
/** Stages that reference this block */
|
||||
uint8_t stageref;
|
||||
|
||||
|
@ -2809,33 +2780,11 @@ struct gl_shader_program
|
|||
*/
|
||||
unsigned LastClipDistanceArraySize;
|
||||
|
||||
/**
|
||||
* This shader's uniform/ssbo block information.
|
||||
*
|
||||
* BufferInterfaceBlocks is a list containing both UBOs and SSBOs. This is
|
||||
* useful during the linking process so that we don't have to handle SSBOs
|
||||
* specifically.
|
||||
*
|
||||
* UniformBlocks is a list of UBOs. This is useful for backends that need
|
||||
* or prefer to see separate index spaces for UBOS and SSBOs like the GL
|
||||
* API specifies.
|
||||
*
|
||||
* ShaderStorageBlocks is a list of SSBOs. This is useful for backends that
|
||||
* need or prefer to see separate index spaces for UBOS and SSBOs like the
|
||||
* GL API specifies.
|
||||
*
|
||||
* UniformBlocks and ShaderStorageBlocks only have pointers into
|
||||
* BufferInterfaceBlocks so the actual resource information is not
|
||||
* duplicated and are only set after linking.
|
||||
*/
|
||||
unsigned NumBufferInterfaceBlocks;
|
||||
struct gl_uniform_block *BufferInterfaceBlocks;
|
||||
|
||||
unsigned NumUniformBlocks;
|
||||
struct gl_uniform_block **UniformBlocks;
|
||||
struct gl_uniform_block *UniformBlocks;
|
||||
|
||||
unsigned NumShaderStorageBlocks;
|
||||
struct gl_uniform_block **ShaderStorageBlocks;
|
||||
struct gl_uniform_block *ShaderStorageBlocks;
|
||||
|
||||
/**
|
||||
* Map of active uniform names to locations
|
||||
|
|
|
@ -925,8 +925,11 @@ is_resource_referenced(struct gl_shader_program *shProg,
|
|||
if (res->Type == GL_ATOMIC_COUNTER_BUFFER)
|
||||
return RESOURCE_ATC(res)->StageReferences[stage];
|
||||
|
||||
if (res->Type == GL_UNIFORM_BLOCK || res->Type == GL_SHADER_STORAGE_BLOCK)
|
||||
return shProg->BufferInterfaceBlocks[index].stageref & (1 << stage);
|
||||
if (res->Type == GL_UNIFORM_BLOCK)
|
||||
return shProg->UniformBlocks[index].stageref & (1 << stage);
|
||||
|
||||
if (res->Type == GL_SHADER_STORAGE_BLOCK)
|
||||
return shProg->ShaderStorageBlocks[index].stageref & (1 << stage);
|
||||
|
||||
return res->StageReferences & (1 << stage);
|
||||
}
|
||||
|
|
|
@ -727,7 +727,7 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname,
|
|||
for (i = 0; i < shProg->NumUniformBlocks; i++) {
|
||||
/* Add one for the terminating NUL character.
|
||||
*/
|
||||
const GLint len = strlen(shProg->UniformBlocks[i]->Name) + 1;
|
||||
const GLint len = strlen(shProg->UniformBlocks[i].Name) + 1;
|
||||
|
||||
if (len > max_len)
|
||||
max_len = len;
|
||||
|
|
|
@ -292,9 +292,13 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
|
|||
ralloc_free(shProg->InfoLog);
|
||||
shProg->InfoLog = ralloc_strdup(shProg, "");
|
||||
|
||||
ralloc_free(shProg->BufferInterfaceBlocks);
|
||||
shProg->BufferInterfaceBlocks = NULL;
|
||||
shProg->NumBufferInterfaceBlocks = 0;
|
||||
ralloc_free(shProg->UniformBlocks);
|
||||
shProg->UniformBlocks = NULL;
|
||||
shProg->NumUniformBlocks = 0;
|
||||
|
||||
ralloc_free(shProg->ShaderStorageBlocks);
|
||||
shProg->ShaderStorageBlocks = NULL;
|
||||
shProg->NumShaderStorageBlocks = 0;
|
||||
|
||||
ralloc_free(shProg->AtomicBuffers);
|
||||
shProg->AtomicBuffers = NULL;
|
||||
|
|
|
@ -1016,13 +1016,13 @@ _mesa_UniformBlockBinding(GLuint program,
|
|||
return;
|
||||
}
|
||||
|
||||
if (shProg->UniformBlocks[uniformBlockIndex]->Binding !=
|
||||
if (shProg->UniformBlocks[uniformBlockIndex].Binding !=
|
||||
uniformBlockBinding) {
|
||||
|
||||
FLUSH_VERTICES(ctx, 0);
|
||||
ctx->NewDriverState |= ctx->DriverFlags.NewUniformBuffer;
|
||||
|
||||
shProg->UniformBlocks[uniformBlockIndex]->Binding = uniformBlockBinding;
|
||||
shProg->UniformBlocks[uniformBlockIndex].Binding = uniformBlockBinding;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1059,13 +1059,13 @@ _mesa_ShaderStorageBlockBinding(GLuint program,
|
|||
return;
|
||||
}
|
||||
|
||||
if (shProg->ShaderStorageBlocks[shaderStorageBlockIndex]->Binding !=
|
||||
if (shProg->ShaderStorageBlocks[shaderStorageBlockIndex].Binding !=
|
||||
shaderStorageBlockBinding) {
|
||||
|
||||
FLUSH_VERTICES(ctx, 0);
|
||||
ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer;
|
||||
|
||||
shProg->ShaderStorageBlocks[shaderStorageBlockIndex]->Binding =
|
||||
shProg->ShaderStorageBlocks[shaderStorageBlockIndex].Binding =
|
||||
shaderStorageBlockBinding;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@
|
|||
#include "util/u_math.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_format.h"
|
||||
#include "main/framebuffer.h"
|
||||
|
||||
|
||||
/**
|
||||
|
@ -64,6 +65,41 @@ update_framebuffer_size(struct pipe_framebuffer_state *framebuffer,
|
|||
framebuffer->height = MIN2(framebuffer->height, surface->height);
|
||||
}
|
||||
|
||||
/**
|
||||
* Round up the requested multisample count to the next supported sample size.
|
||||
*/
|
||||
static unsigned
|
||||
framebuffer_quantize_num_samples(struct st_context *st, unsigned num_samples)
|
||||
{
|
||||
struct pipe_screen *screen = st->pipe->screen;
|
||||
int quantized_samples = 0;
|
||||
unsigned msaa_mode;
|
||||
|
||||
if (!num_samples)
|
||||
return 0;
|
||||
|
||||
/* Assumes the highest supported MSAA is a power of 2 */
|
||||
msaa_mode = util_next_power_of_two(st->ctx->Const.MaxFramebufferSamples);
|
||||
assert(!(num_samples > msaa_mode)); /* be safe from infinite loops */
|
||||
|
||||
/**
|
||||
* Check if the MSAA mode that is higher than the requested
|
||||
* num_samples is supported, and if so returning it.
|
||||
*/
|
||||
for (; msaa_mode >= num_samples; msaa_mode = msaa_mode / 2) {
|
||||
/**
|
||||
* For ARB_framebuffer_no_attachment, A format of
|
||||
* PIPE_FORMAT_NONE implies what number of samples is
|
||||
* supported for a framebuffer with no attachment. Thus the
|
||||
* drivers callback must be adjusted for this.
|
||||
*/
|
||||
if (screen->is_format_supported(screen, PIPE_FORMAT_NONE,
|
||||
PIPE_TEXTURE_2D, msaa_mode,
|
||||
PIPE_BIND_RENDER_TARGET))
|
||||
quantized_samples = msaa_mode;
|
||||
}
|
||||
return quantized_samples;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update framebuffer state (color, depth, stencil, etc. buffers)
|
||||
|
@ -79,10 +115,22 @@ update_framebuffer_state( struct st_context *st )
|
|||
st_flush_bitmap_cache(st);
|
||||
|
||||
st->state.fb_orientation = st_fb_orientation(fb);
|
||||
framebuffer->width = UINT_MAX;
|
||||
framebuffer->height = UINT_MAX;
|
||||
|
||||
/*printf("------ fb size %d x %d\n", fb->Width, fb->Height);*/
|
||||
/**
|
||||
* Quantize the derived default number of samples:
|
||||
*
|
||||
* A query to the driver of supported MSAA values the
|
||||
* hardware supports is done as to legalize the number
|
||||
* of application requested samples, NumSamples.
|
||||
* See commit eb9cf3c for more information.
|
||||
*/
|
||||
fb->DefaultGeometry._NumSamples =
|
||||
framebuffer_quantize_num_samples(st, fb->DefaultGeometry.NumSamples);
|
||||
|
||||
framebuffer->width = _mesa_geometric_width(fb);
|
||||
framebuffer->height = _mesa_geometric_height(fb);
|
||||
framebuffer->samples = _mesa_geometric_samples(fb);
|
||||
framebuffer->layers = _mesa_geometric_layers(fb);
|
||||
|
||||
/* Examine Mesa's ctx->DrawBuffer->_ColorDrawBuffers state
|
||||
* to determine which surfaces to draw to
|
||||
|
|
|
@ -244,7 +244,7 @@ static void update_raster_state( struct st_context *st )
|
|||
_mesa_is_multisample_enabled(ctx) &&
|
||||
ctx->Multisample.SampleShading &&
|
||||
ctx->Multisample.MinSampleShadingValue *
|
||||
ctx->DrawBuffer->Visual.samples > 1;
|
||||
_mesa_geometric_samples(ctx->DrawBuffer) > 1;
|
||||
|
||||
/* _NEW_SCISSOR */
|
||||
raster->scissor = ctx->Scissor.EnableFlags;
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
|
||||
|
||||
#include "main/macros.h"
|
||||
#include "main/framebuffer.h"
|
||||
#include "st_context.h"
|
||||
#include "pipe/p_context.h"
|
||||
#include "st_atom.h"
|
||||
|
@ -46,14 +47,17 @@ update_scissor( struct st_context *st )
|
|||
struct pipe_scissor_state scissor[PIPE_MAX_VIEWPORTS];
|
||||
const struct gl_context *ctx = st->ctx;
|
||||
const struct gl_framebuffer *fb = ctx->DrawBuffer;
|
||||
const unsigned int fb_width = _mesa_geometric_width(fb);
|
||||
const unsigned int fb_height = _mesa_geometric_height(fb);
|
||||
GLint miny, maxy;
|
||||
unsigned i;
|
||||
bool changed = false;
|
||||
|
||||
for (i = 0 ; i < ctx->Const.MaxViewports; i++) {
|
||||
scissor[i].minx = 0;
|
||||
scissor[i].miny = 0;
|
||||
scissor[i].maxx = fb->Width;
|
||||
scissor[i].maxy = fb->Height;
|
||||
scissor[i].maxx = fb_width;
|
||||
scissor[i].maxy = fb_height;
|
||||
|
||||
if (ctx->Scissor.EnableFlags & (1 << i)) {
|
||||
/* need to be careful here with xmax or ymax < 0 */
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "main/image.h"
|
||||
#include "main/macros.h"
|
||||
#include "main/teximage.h"
|
||||
#include "main/framebuffer.h"
|
||||
#include "program/program.h"
|
||||
#include "program/prog_print.h"
|
||||
|
||||
|
@ -166,8 +167,8 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
|
|||
/* positions (in clip coords) */
|
||||
{
|
||||
const struct gl_framebuffer *fb = ctx->DrawBuffer;
|
||||
const GLfloat fb_width = (GLfloat)fb->Width;
|
||||
const GLfloat fb_height = (GLfloat)fb->Height;
|
||||
const GLfloat fb_width = (GLfloat)_mesa_geometric_width(fb);
|
||||
const GLfloat fb_height = (GLfloat)_mesa_geometric_height(fb);
|
||||
|
||||
const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0);
|
||||
const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0);
|
||||
|
@ -262,8 +263,8 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
|
|||
{
|
||||
const struct gl_framebuffer *fb = ctx->DrawBuffer;
|
||||
const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP);
|
||||
const GLfloat width = (GLfloat)fb->Width;
|
||||
const GLfloat height = (GLfloat)fb->Height;
|
||||
const GLfloat width = (GLfloat)_mesa_geometric_width(fb);
|
||||
const GLfloat height = (GLfloat)_mesa_geometric_height(fb);
|
||||
struct pipe_viewport_state vp;
|
||||
vp.scale[0] = 0.5f * width;
|
||||
vp.scale[1] = height * (invert ? -0.5f : 0.5f);
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
|
||||
#include "main/bufferobj.h"
|
||||
#include "main/imports.h"
|
||||
#include "main/framebuffer.h"
|
||||
|
||||
#include "state_tracker/st_cb_msaa.h"
|
||||
#include "state_tracker/st_context.h"
|
||||
|
@ -47,7 +48,8 @@ st_GetSamplePosition(struct gl_context *ctx,
|
|||
st_validate_state(st, ST_PIPELINE_RENDER);
|
||||
|
||||
if (st->pipe->get_sample_position)
|
||||
st->pipe->get_sample_position(st->pipe, (unsigned) fb->Visual.samples,
|
||||
st->pipe->get_sample_position(st->pipe,
|
||||
_mesa_geometric_samples(fb),
|
||||
index, outPos);
|
||||
else
|
||||
outPos[0] = outPos[1] = 0.5f;
|
||||
|
|
|
@ -445,6 +445,18 @@ void st_init_limits(struct pipe_screen *screen,
|
|||
extensions->ARB_shader_image_load_store = GL_TRUE;
|
||||
extensions->ARB_shader_image_size = GL_TRUE;
|
||||
}
|
||||
|
||||
/* ARB_framebuffer_no_attachments */
|
||||
c->MaxFramebufferWidth = c->MaxViewportWidth;
|
||||
c->MaxFramebufferHeight = c->MaxViewportHeight;
|
||||
/* NOTE: we cheat here a little by assuming that
|
||||
* PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS has the same
|
||||
* number of layers as we need, although we technically
|
||||
* could have more the generality is not really useful
|
||||
* in practicality.
|
||||
*/
|
||||
c->MaxFramebufferLayers =
|
||||
screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS);
|
||||
}
|
||||
|
||||
|
||||
|
@ -956,6 +968,9 @@ void st_init_extensions(struct pipe_screen *screen,
|
|||
enum pipe_format int_formats[] = {
|
||||
PIPE_FORMAT_R8G8B8A8_SINT
|
||||
};
|
||||
enum pipe_format void_formats[] = {
|
||||
PIPE_FORMAT_NONE
|
||||
};
|
||||
|
||||
consts->MaxSamples =
|
||||
get_max_samples_for_formats(screen, ARRAY_SIZE(color_formats),
|
||||
|
@ -976,6 +991,12 @@ void st_init_extensions(struct pipe_screen *screen,
|
|||
get_max_samples_for_formats(screen, ARRAY_SIZE(int_formats),
|
||||
int_formats, consts->MaxSamples,
|
||||
PIPE_BIND_SAMPLER_VIEW);
|
||||
|
||||
/* ARB_framebuffer_no_attachments, assume max no. of samples 32 */
|
||||
consts->MaxFramebufferSamples =
|
||||
get_max_samples_for_formats(screen, ARRAY_SIZE(void_formats),
|
||||
void_formats, 32,
|
||||
PIPE_BIND_RENDER_TARGET);
|
||||
}
|
||||
if (consts->MaxSamples == 1) {
|
||||
/* one sample doesn't really make sense */
|
||||
|
@ -1068,6 +1089,13 @@ void st_init_extensions(struct pipe_screen *screen,
|
|||
extensions->AMD_vertex_shader_viewport_index = GL_TRUE;
|
||||
}
|
||||
|
||||
/* ARB_framebuffer_no_attachments */
|
||||
if (screen->get_param(screen, PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT) &&
|
||||
((consts->MaxSamples >= 4 && consts->MaxFramebufferLayers >= 2048) ||
|
||||
(consts->MaxFramebufferSamples >= consts->MaxSamples &&
|
||||
consts->MaxFramebufferLayers >= consts->MaxArrayTextureLayers)))
|
||||
extensions->ARB_framebuffer_no_attachments = GL_TRUE;
|
||||
|
||||
/* GL_ARB_ES3_compatibility.
|
||||
*
|
||||
* Assume that ES3 is supported if GLSL 3.30 is supported.
|
||||
|
|
|
@ -389,7 +389,7 @@ public:
|
|||
unsigned num_output_arrays;
|
||||
|
||||
int num_address_regs;
|
||||
int samplers_used;
|
||||
uint32_t samplers_used;
|
||||
glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
|
||||
int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */
|
||||
int buffers_used;
|
||||
|
@ -4290,6 +4290,8 @@ glsl_to_tgsi_visitor::visit(ir_barrier *ir)
|
|||
|
||||
glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
|
||||
{
|
||||
STATIC_ASSERT(sizeof(samplers_used) * 8 >= PIPE_MAX_SAMPLERS);
|
||||
|
||||
result.file = PROGRAM_UNDEFINED;
|
||||
next_temp = 1;
|
||||
array_sizes = NULL;
|
||||
|
@ -4346,7 +4348,7 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
|
|||
if (inst->info->is_tex) {
|
||||
for (int i = 0; i < inst->sampler_array_size; i++) {
|
||||
unsigned idx = inst->sampler_base + i;
|
||||
v->samplers_used |= 1 << idx;
|
||||
v->samplers_used |= 1u << idx;
|
||||
|
||||
debug_assert(idx < (int)ARRAY_SIZE(v->sampler_types));
|
||||
v->sampler_types[idx] = inst->tex_type;
|
||||
|
@ -6325,7 +6327,7 @@ st_translate_program(
|
|||
|
||||
/* texture samplers */
|
||||
for (i = 0; i < frag_const->MaxTextureImageUnits; i++) {
|
||||
if (program->samplers_used & (1 << i)) {
|
||||
if (program->samplers_used & (1u << i)) {
|
||||
unsigned type;
|
||||
|
||||
t->samplers[i] = ureg_DECL_sampler(ureg, i);
|
||||
|
|
Loading…
Reference in New Issue