Merge remote-tracking branch 'public/master' into vulkan

This commit is contained in:
Jason Ekstrand 2016-04-14 17:14:28 -07:00
commit 5567ae0547
40 changed files with 501 additions and 537 deletions

View File

@ -84,6 +84,11 @@ env.Append(CPPPATH = [
#print env.Dump()
# Add a check target for running tests
check = env.Alias('check')
env.AlwaysBuild(check)
#######################################################################
# Invoke host SConscripts
#

View File

@ -65,6 +65,9 @@ install:
build_script:
- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=12.0 llvm=1
after_build:
- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=12.0 llvm=1 check
# It's possible to setup notification here, as described in
# http://www.appveyor.com/docs/notifications#appveyor-yml-configuration , but

View File

@ -2551,8 +2551,6 @@ AC_CONFIG_FILES([Makefile
src/gallium/drivers/softpipe/Makefile
src/gallium/drivers/svga/Makefile
src/gallium/drivers/swr/Makefile
src/gallium/drivers/swr/avx/Makefile
src/gallium/drivers/swr/avx2/Makefile
src/gallium/drivers/trace/Makefile
src/gallium/drivers/vc4/Makefile
src/gallium/drivers/virgl/Makefile

View File

@ -1,35 +0,0 @@
default: full
all: full subset
%.tag: %.doxy
doxygen $<
FULL = \
main.doxy \
math.doxy \
vbo.doxy \
glapi.doxy \
glsl.doxy \
swrast.doxy \
swrast_setup.doxy \
tnl.doxy \
tnl_dd.doxy \
gbm.doxy \
i965.doxy
full: $(FULL:.doxy=.tag)
$(foreach FILE,$(FULL),doxygen $(FILE);)
SUBSET = \
main.doxy \
math.doxy
subset: $(SUBSET:.doxy=.tag)
$(foreach FILE,$(SUBSET),doxygen $(FILE);)
clean:
-rm -rf $(FULL:.doxy=) $(SUBSET:.doxy=)
-rm -rf *.tag
-rm -rf *.db

View File

@ -1,19 +0,0 @@
<html>
<head>
<title>Mesa Source Code Documentation</title>
<link href="doxygen.css" rel="stylesheet" type="text/css">
</head>
<body>
<div class="qindex">
<a class="qindex" href="../main/index.html">core</a> |
<a class="qindex" href="../glapi/index.html">glapi</a> |
<a class="qindex" href="../glsl/index.html">glsl</a> |
<a class="qindex" href="../vbo/index.html">vbo</a> |
<a class="qindex" href="../math/index.html">math</a> |
<a class="qindex" href="../swrast/index.html">swrast</a> |
<a class="qindex" href="../swrast_setup/index.html">swrast_setup</a> |
<a class="qindex" href="../tnl/index.html">tnl</a> |
<a class="qindex" href="../tnl_dd/index.html">tnl_dd</a> |
<a class="qindex" href="../gbm/index.html">gbm</a> |
<a class="qindex" href="../i965/index.html">i965</a>
</div>

View File

@ -12,9 +12,9 @@ all-local : .install-gallium-links
link_dir=$(top_builddir)/$(LIB_DIR)/egl; \
fi; \
$(MKDIR_P) $$link_dir; \
file_list=$(dri_LTLIBRARIES:%.la=.libs/%.so); \
file_list+=$(egl_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*); \
file_list+=$(lib_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*); \
file_list="$(dri_LTLIBRARIES:%.la=.libs/%.so)"; \
file_list+="$(egl_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)"; \
file_list+="$(lib_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)"; \
for f in $$file_list; do \
if test -h .libs/$$f; then \
cp -d $$f $$link_dir; \

View File

@ -82,11 +82,6 @@ def install_shared_library(env, sources, version = ()):
return targets
def createInstallMethods(env):
env.AddMethod(install_program, 'InstallProgram')
env.AddMethod(install_shared_library, 'InstallSharedLibrary')
def msvc2013_compat(env):
if env['gcc']:
env.Append(CCFLAGS = [
@ -94,8 +89,20 @@ def msvc2013_compat(env):
'-Werror=pointer-arith',
])
def createMSVCCompatMethods(env):
env.AddMethod(msvc2013_compat, 'MSVC2013Compat')
def unit_test(env, test_name, program_target, args=None):
env.InstallProgram(program_target)
cmd = [program_target[0].abspath]
if args is not None:
cmd += args
cmd = ' '.join(cmd)
# http://www.scons.org/wiki/UnitTests
action = SCons.Action.Action(cmd, " Running %s ..." % test_name)
alias = env.Alias(test_name, program_target, action)
env.AlwaysBuild(alias)
env.Depends('check', alias)
def num_jobs():
@ -667,8 +674,10 @@ def generate(env):
# Custom builders and methods
env.Tool('custom')
createInstallMethods(env)
createMSVCCompatMethods(env)
env.AddMethod(install_program, 'InstallProgram')
env.AddMethod(install_shared_library, 'InstallSharedLibrary')
env.AddMethod(msvc2013_compat, 'MSVC2013Compat')
env.AddMethod(unit_test, 'UnitTest')
env.PkgCheckModules('X11', ['x11', 'xext', 'xdamage', 'xfixes', 'glproto >= 1.4.13'])
env.PkgCheckModules('XCB', ['x11-xcb', 'xcb-glx >= 1.8.1', 'xcb-dri2 >= 1.8'])

View File

@ -22,3 +22,4 @@ compiler = env.ConvenienceLibrary(
Export('compiler')
SConscript('SConscript.glsl')
SConscript('SConscript.nir')

View File

@ -0,0 +1,73 @@
import common
Import('*')
from sys import executable as python_cmd
env = env.Clone()
env.MSVC2013Compat()
env.Prepend(CPPPATH = [
'#include',
'#src',
'#src/mapi',
'#src/mesa',
'#src/gallium/include',
'#src/gallium/auxiliary',
'#src/compiler/nir',
])
# Make generated headers reachable from the include path.
env.Prepend(CPPPATH = [Dir('.').abspath, Dir('nir').abspath])
# nir generated sources
nir_builder_opcodes_h = env.CodeGenerate(
target = 'nir/nir_builder_opcodes.h',
script = 'nir/nir_builder_opcodes_h.py',
source = [],
command = python_cmd + ' $SCRIPT > $TARGET'
)
env.CodeGenerate(
target = 'nir/nir_constant_expressions.c',
script = 'nir/nir_constant_expressions.py',
source = [],
command = python_cmd + ' $SCRIPT > $TARGET'
)
env.CodeGenerate(
target = 'nir/nir_opcodes.h',
script = 'nir/nir_opcodes_h.py',
source = [],
command = python_cmd + ' $SCRIPT > $TARGET'
)
env.CodeGenerate(
target = 'nir/nir_opcodes.c',
script = 'nir/nir_opcodes_c.py',
source = [],
command = python_cmd + ' $SCRIPT > $TARGET'
)
env.CodeGenerate(
target = 'nir/nir_opt_algebraic.c',
script = 'nir/nir_opt_algebraic.py',
source = [],
command = python_cmd + ' $SCRIPT > $TARGET'
)
# parse Makefile.sources
source_lists = env.ParseSourceList('Makefile.sources')
nir_sources = source_lists['NIR_FILES']
nir_sources += source_lists['NIR_GENERATED_FILES']
nir = env.ConvenienceLibrary(
target = 'nir',
source = nir_sources,
)
env.Alias('nir', nir)
Export('nir')

View File

@ -507,7 +507,14 @@ typedef struct nir_src {
bool is_ssa;
} nir_src;
#define NIR_SRC_INIT (nir_src) { { NULL } }
static inline nir_src
nir_src_init(void)
{
nir_src src = { { NULL } };
return src;
}
#define NIR_SRC_INIT nir_src_init()
#define nir_foreach_use(reg_or_ssa_def, src) \
list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
@ -530,7 +537,14 @@ typedef struct {
bool is_ssa;
} nir_dest;
#define NIR_DEST_INIT (nir_dest) { { { NULL } } }
static inline nir_dest
nir_dest_init(void)
{
nir_dest dest = { { { NULL } } };
return dest;
}
#define NIR_DEST_INIT nir_dest_init()
#define nir_foreach_def(reg, dest) \
list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link)
@ -957,7 +971,7 @@ typedef enum {
NIR_INTRINSIC_UCP_ID = 4,
/**
* The ammount of data, starting from BASE, that this instruction may
* The amount of data, starting from BASE, that this instruction may
* access. This is used to provide bounds if the offset is not constant.
*/
NIR_INTRINSIC_RANGE = 5,

View File

@ -42,9 +42,9 @@
#define ARR(...) { __VA_ARGS__ }
INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
INTRINSIC(load_var, 0, ARR(0), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, WRMASK, xx, xx, 0)
INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, xx, xx, xx, 0)
INTRINSIC(copy_var, 0, ARR(0), false, 0, 2, 0, xx, xx, xx, 0)
/*
* Interpolation of input. The interp_var_at* intrinsics are similar to the
@ -72,7 +72,7 @@ INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0, xx, xx, xx,
* a barrier is an intrinsic with no inputs/outputs but which can't be moved
* around/optimized in general
*/
#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, xx, xx, xx, 0)
#define BARRIER(name) INTRINSIC(name, 0, ARR(0), false, 0, 0, 0, xx, xx, xx, 0)
BARRIER(barrier)
BARRIER(discard)
@ -89,7 +89,7 @@ BARRIER(memory_barrier)
* The latter can be used as code motion barrier, which is currently not
* feasible with NIR.
*/
INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
INTRINSIC(shader_clock, 0, ARR(0), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
/*
* Memory barrier with semantics analogous to the compute shader
@ -113,8 +113,8 @@ INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)
*
* end_primitive implements GLSL's EndPrimitive() built-in.
*/
INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
INTRINSIC(emit_vertex, 0, ARR(0), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
INTRINSIC(end_primitive, 0, ARR(0), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
/**
* Geometry Shader intrinsics with a vertex count.
@ -137,7 +137,7 @@ INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)
*/
#define ATOMIC(name, flags) \
INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, xx, xx, xx, flags) \
INTRINSIC(atomic_counter_##name##_var, 0, ARR(0), true, 1, 1, 0, xx, xx, xx, flags) \
INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, BASE, xx, xx, flags)
ATOMIC(inc, 0)
@ -170,9 +170,9 @@ INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0, xx, xx, xx,
INTRINSIC(image_size, 0, ARR(0), true, 4, 1, 0, xx, xx, xx,
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, xx, xx, xx,
INTRINSIC(image_samples, 0, ARR(0), true, 1, 1, 0, xx, xx, xx,
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
/*
@ -278,7 +278,7 @@ INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
#define SYSTEM_VALUE(name, components, num_indices, idx0, idx1, idx2) \
INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
INTRINSIC(load_##name, 0, ARR(0), true, components, 0, num_indices, \
idx0, idx1, idx2, \
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
@ -313,8 +313,9 @@ SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
* of the start of the variable being loaded and and the offset source is a
* offset into that variable.
*
* Uniform load operations have a second index that specifies the size of the
* variable being loaded. If const_index[1] == 0, then the size is unknown.
* Uniform load operations have a second "range" index that specifies the
* range (starting at base) of the data from which we are loading. If
* const_index[1] == 0, then the range is unknown.
*
* Some load operations such as UBO/SSBO load and per_vertex loads take an
* additional source to specify which UBO/SSBO/vertex to load from.
@ -328,9 +329,8 @@ SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
#define LOAD(name, srcs, num_indices, idx0, idx1, idx2, flags) \
INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, num_indices, idx0, idx1, idx2, flags)
/* src[] = { offset }. const_index[] = { base, range } */
LOAD(uniform, 1, 2, BASE, RANGE, xx,
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
/* src[] = { offset }. const_index[] = { base } */
LOAD(uniform, 1, 2, BASE, RANGE, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
/* src[] = { buffer_index, offset }. No const_index */
LOAD(ubo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
/* src[] = { offset }. const_index[] = { base } */

View File

@ -278,8 +278,8 @@ nir_lower_io_block(nir_block *block, void *void_state)
intrin->variables[0]->var->data.driver_location);
if (load->intrinsic == nir_intrinsic_load_uniform) {
load->const_index[1] =
state->type_size(intrin->variables[0]->var->type);
nir_intrinsic_set_range(load,
state->type_size(intrin->variables[0]->var->type));
}
if (per_vertex)

View File

@ -31,6 +31,10 @@
#include <stdlib.h>
#include <inttypes.h> /* for PRIx64 macro */
#if defined(_WIN32) && !defined(snprintf)
#define snprintf _snprintf
#endif
static void
print_tabs(unsigned num_tabs, FILE *fp)
{
@ -514,8 +518,6 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
[NIR_INTRINSIC_STREAM_ID] = "stream-id",
[NIR_INTRINSIC_UCP_ID] = "ucp-id",
[NIR_INTRINSIC_RANGE] = "range",
[NIR_INTRINSIC_DESC_SET] = "desc-set",
[NIR_INTRINSIC_BINDING] = "binding",
};
for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) {
if (!info->index_map[idx])

View File

@ -27,7 +27,6 @@
#include "nir.h"
#include <stdlib.h>
#include <unistd.h>
/*
* Implements the classic to-SSA algorithm described by Cytron et. al. in

View File

@ -80,8 +80,6 @@ endif
if HAVE_GALLIUM_SWR
SUBDIRS += drivers/swr
SUBDIRS += drivers/swr/avx
SUBDIRS += drivers/swr/avx2
endif
## vc4/rpi

View File

@ -38,10 +38,6 @@ if not env['embedded']:
target = testname,
source = [testname + '.c', 'lp_test_main.c'],
)
env.InstallProgram(target)
# http://www.scons.org/wiki/UnitTests
alias = env.Alias(testname, [target], target[0].abspath)
AlwaysBuild(alias)
env.UnitTest(testname, target)
Export('llvmpipe')

View File

@ -202,6 +202,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVE4_COMPUTE_CLASS 0x0000a0c0
#define NVF0_COMPUTE_CLASS 0x0000a1c0
#define GM107_COMPUTE_CLASS 0x0000b0c0
#define GM200_COMPUTE_CLASS 0x0000b1c0
#define NV84_CRYPT_CLASS 0x000074c1
#define BLOB_NVC0_PCOPY1_CLASS 0x000090b8
#define BLOB_NVC0_PCOPY0_CLASS 0x000090b5

View File

@ -644,9 +644,9 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
case 0xf0:
case 0x100:
case 0x110:
case 0x120:
if (debug_get_bool_option("NVF0_COMPUTE", false))
return nve4_screen_compute_setup(screen, screen->base.pushbuf);
case 0x120:
return 0;
default:
return -1;

View File

@ -54,6 +54,9 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
case 0x110:
obj_class = GM107_COMPUTE_CLASS;
break;
case 0x120:
obj_class = GM200_COMPUTE_CLASS;
break;
default:
NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
return -1;

View File

@ -376,6 +376,10 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
0, 0, resource, level, box);
data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ);
if (!data) {
pipe_resource_reference((struct pipe_resource **)&staging, NULL);
return NULL;
}
data += box->x % R600_MAP_BUFFER_ALIGNMENT;
return r600_buffer_get_transfer(ctx, resource, level, usage, box,

View File

@ -1192,7 +1192,9 @@ static void si_mark_shader_pointers_dirty(struct si_context *sctx,
{
sctx->const_buffers[shader].desc.pointer_dirty = true;
sctx->rw_buffers[shader].desc.pointer_dirty = true;
sctx->shader_buffers[shader].desc.pointer_dirty = true;
sctx->samplers[shader].views.desc.pointer_dirty = true;
sctx->images[shader].desc.pointer_dirty = true;
if (shader == PIPE_SHADER_VERTEX)
sctx->vertex_buffers.pointer_dirty = true;

View File

@ -5839,6 +5839,10 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
radeon_llvm_dispose(&ctx.radeon_bld);
/* Add the scratch offset to input SGPRs. */
if (shader->config.scratch_bytes_per_wave)
shader->info.num_input_sgprs += 1; /* scratch byte offset */
/* Calculate the number of fragment input VGPRs. */
if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
shader->info.num_input_vgprs = 0;
@ -6761,6 +6765,13 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
return true;
}
static void si_fix_num_sgprs(struct si_shader *shader)
{
unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */
shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
}
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
struct si_shader *shader,
struct pipe_debug_callback *debug)
@ -6850,6 +6861,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
}
}
si_fix_num_sgprs(shader);
si_shader_dump(sscreen, shader, debug, shader->selector->info.processor,
stderr);

View File

@ -1487,7 +1487,7 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
}
if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
sscreen->b.family >= CHIP_STONEY) {
sscreen->b.family == CHIP_STONEY) {
switch (format) {
case PIPE_FORMAT_ETC1_RGB8:
case PIPE_FORMAT_ETC2_RGB8:

View File

@ -306,7 +306,6 @@ static void si_set_tesseval_regs(struct si_shader *shader,
static void si_shader_ls(struct si_shader *shader)
{
struct si_pm4_state *pm4;
unsigned num_sgprs, num_user_sgprs;
unsigned vgpr_comp_cnt;
uint64_t va;
@ -321,30 +320,21 @@ static void si_shader_ls(struct si_shader *shader)
* VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
num_user_sgprs = SI_LS_NUM_USER_SGPR;
num_sgprs = shader->config.num_sgprs;
if (num_user_sgprs > num_sgprs) {
/* Last 2 reserved SGPRs are used for VCC */
num_sgprs = num_user_sgprs + 2;
}
assert(num_sgprs <= 104);
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B528_SGPRS((num_sgprs - 1) / 8) |
S_00B528_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B528_DX10_CLAMP(1) |
S_00B528_FLOAT_MODE(shader->config.float_mode);
shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_LS_NUM_USER_SGPR) |
S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
}
static void si_shader_hs(struct si_shader *shader)
{
struct si_pm4_state *pm4;
unsigned num_sgprs, num_user_sgprs;
uint64_t va;
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
@ -354,32 +344,22 @@ static void si_shader_hs(struct si_shader *shader)
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
num_user_sgprs = SI_TCS_NUM_USER_SGPR;
num_sgprs = shader->config.num_sgprs;
/* One SGPR after user SGPRs is pre-loaded with tessellation factor
* buffer offset. */
if ((num_user_sgprs + 1) > num_sgprs) {
/* Last 2 reserved SGPRs are used for VCC */
num_sgprs = num_user_sgprs + 1 + 2;
}
assert(num_sgprs <= 104);
si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B428_SGPRS((num_sgprs - 1) / 8) |
S_00B428_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B428_DX10_CLAMP(1) |
S_00B428_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
S_00B42C_USER_SGPR(num_user_sgprs) |
S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
}
static void si_shader_es(struct si_shader *shader)
{
struct si_pm4_state *pm4;
unsigned num_sgprs, num_user_sgprs;
unsigned num_user_sgprs;
unsigned vgpr_comp_cnt;
uint64_t va;
@ -400,21 +380,13 @@ static void si_shader_es(struct si_shader *shader)
} else
unreachable("invalid shader selector type");
num_sgprs = shader->config.num_sgprs;
/* One SGPR after user SGPRs is pre-loaded with es2gs_offset */
if ((num_user_sgprs + 1) > num_sgprs) {
/* Last 2 reserved SGPRs are used for VCC */
num_sgprs = num_user_sgprs + 1 + 2;
}
assert(num_sgprs <= 104);
si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
shader->selector->esgs_itemsize / 4);
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B328_SGPRS((num_sgprs - 1) / 8) |
S_00B328_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B328_DX10_CLAMP(1) |
S_00B328_FLOAT_MODE(shader->config.float_mode));
@ -458,7 +430,6 @@ static void si_shader_gs(struct si_shader *shader)
unsigned gsvs_itemsize = shader->selector->max_gsvs_emit_size >> 2;
unsigned gs_num_invocations = shader->selector->gs_num_invocations;
struct si_pm4_state *pm4;
unsigned num_sgprs, num_user_sgprs;
uint64_t va;
unsigned max_stream = shader->selector->max_gs_stream;
@ -494,22 +465,13 @@ static void si_shader_gs(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40);
num_user_sgprs = SI_GS_NUM_USER_SGPR;
num_sgprs = shader->config.num_sgprs;
/* Two SGPRs after user SGPRs are pre-loaded with gs2vs_offset, gs_wave_id */
if ((num_user_sgprs + 2) > num_sgprs) {
/* Last 2 reserved SGPRs are used for VCC */
num_sgprs = num_user_sgprs + 2 + 2;
}
assert(num_sgprs <= 104);
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B228_SGPRS((num_sgprs - 1) / 8) |
S_00B228_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B228_DX10_CLAMP(1) |
S_00B228_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
S_00B22C_USER_SGPR(num_user_sgprs) |
S_00B22C_USER_SGPR(SI_GS_NUM_USER_SGPR) |
S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
}
@ -523,7 +485,7 @@ static void si_shader_gs(struct si_shader *shader)
static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
{
struct si_pm4_state *pm4;
unsigned num_sgprs, num_user_sgprs;
unsigned num_user_sgprs;
unsigned nparams, vgpr_comp_cnt;
uint64_t va;
unsigned window_space =
@ -566,13 +528,6 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
} else
unreachable("invalid shader selector type");
num_sgprs = shader->config.num_sgprs;
if (num_user_sgprs > num_sgprs) {
/* Last 2 reserved SGPRs are used for VCC */
num_sgprs = num_user_sgprs + 2;
}
assert(num_sgprs <= 104);
/* VS is required to export at least one param. */
nparams = MAX2(shader->info.nr_param_exports, 1);
si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
@ -594,7 +549,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40);
si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B128_SGPRS((num_sgprs - 1) / 8) |
S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B128_DX10_CLAMP(1) |
S_00B128_FLOAT_MODE(shader->config.float_mode));
@ -684,7 +639,6 @@ static void si_shader_ps(struct si_shader *shader)
struct tgsi_shader_info *info = &shader->selector->info;
struct si_pm4_state *pm4;
unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask;
unsigned num_sgprs, num_user_sgprs;
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
uint64_t va;
bool has_centroid;
@ -771,23 +725,14 @@ static void si_shader_ps(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
num_user_sgprs = SI_PS_NUM_USER_SGPR;
num_sgprs = shader->config.num_sgprs;
/* One SGPR after user SGPRs is pre-loaded with {prim_mask, lds_offset} */
if ((num_user_sgprs + 1) > num_sgprs) {
/* Last 2 reserved SGPRs are used for VCC */
num_sgprs = num_user_sgprs + 1 + 2;
}
assert(num_sgprs <= 104);
si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B028_SGPRS((num_sgprs - 1) / 8) |
S_00B028_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B028_DX10_CLAMP(1) |
S_00B028_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
S_00B02C_USER_SGPR(num_user_sgprs) |
S_00B02C_USER_SGPR(SI_PS_NUM_USER_SGPR) |
S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
/* Prefer RE_Z if the shader is complex enough. The requirement is either:

View File

@ -28,4 +28,96 @@ noinst_LTLIBRARIES = libmesaswr.la
libmesaswr_la_SOURCES = $(LOADER_SOURCES)
EXTRA_DIST = Makefile.sources-arch
COMMON_CXXFLAGS = \
$(GALLIUM_DRIVER_CFLAGS) \
$(LLVM_CFLAGS) \
-I$(builddir)/rasterizer/scripts \
-I$(builddir)/rasterizer/jitter \
-I$(srcdir)/rasterizer \
-I$(srcdir)/rasterizer/core \
-I$(srcdir)/rasterizer/jitter
COMMON_SOURCES = \
$(CXX_SOURCES) \
$(COMMON_CXX_SOURCES) \
$(CORE_CXX_SOURCES) \
$(JITTER_CXX_SOURCES) \
$(MEMORY_CXX_SOURCES) \
$(BUILT_SOURCES)
BUILT_SOURCES = \
rasterizer/scripts/gen_knobs.cpp \
rasterizer/scripts/gen_knobs.h \
rasterizer/jitter/state_llvm.h \
rasterizer/jitter/builder_gen.h \
rasterizer/jitter/builder_gen.cpp \
rasterizer/jitter/builder_x86.h \
rasterizer/jitter/builder_x86.cpp
rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py rasterizer/scripts/templates/knobs.template
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/rasterizer/scripts/gen_knobs.py \
rasterizer/scripts
rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py rasterizer/core/state.h
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_types.py \
--input $(srcdir)/rasterizer/core/state.h \
--output rasterizer/jitter/state_llvm.h
rasterizer/jitter/builder_gen.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
--output rasterizer/jitter/builder_gen.h \
--gen_h
rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
--output rasterizer/jitter/builder_gen.cpp \
--gen_cpp
rasterizer/jitter/builder_x86.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--output rasterizer/jitter/builder_x86.h \
--gen_x86_h
rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--output rasterizer/jitter/builder_x86.cpp \
--gen_x86_cpp
COMMON_LIBADD = \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/mesa/libmesagallium.la
lib_LTLIBRARIES = libswrAVX.la libswrAVX2.la
libswrAVX_la_CXXFLAGS = \
-march=core-avx-i \
-DKNOB_ARCH=KNOB_ARCH_AVX \
$(COMMON_CXXFLAGS)
libswrAVX_la_SOURCES = \
$(COMMON_SOURCES)
libswrAVX_la_LIBADD = \
$(COMMON_LIBADD)
libswrAVX2_la_CXXFLAGS = \
-march=core-avx2 \
-DKNOB_ARCH=KNOB_ARCH_AVX2 \
$(COMMON_CXXFLAGS)
libswrAVX2_la_SOURCES = \
$(COMMON_SOURCES)
libswrAVX2_la_LIBADD = \
$(COMMON_LIBADD)
include $(top_srcdir)/install-gallium-links.mk

View File

@ -21,3 +21,94 @@
LOADER_SOURCES := \
swr_loader.cpp
CXX_SOURCES := \
swr_clear.cpp \
swr_context.cpp \
swr_context.h \
swr_context_llvm.h \
swr_draw.cpp \
swr_public.h \
swr_resource.h \
swr_screen.cpp \
swr_screen.h \
swr_state.cpp \
swr_state.h \
swr_tex_sample.cpp \
swr_tex_sample.h \
swr_scratch.h \
swr_scratch.cpp \
swr_shader.cpp \
swr_memory.h \
swr_fence.h \
swr_fence.cpp \
swr_query.h \
swr_query.cpp
COMMON_CXX_SOURCES := \
rasterizer/common/containers.hpp \
rasterizer/common/formats.cpp \
rasterizer/common/formats.h \
rasterizer/common/isa.hpp \
rasterizer/common/os.h \
rasterizer/common/rdtsc_buckets.cpp \
rasterizer/common/rdtsc_buckets.h \
rasterizer/common/rdtsc_buckets_shared.h \
rasterizer/common/rdtsc_buckets_shared.h \
rasterizer/common/simdintrin.h \
rasterizer/common/swr_assert.cpp \
rasterizer/common/swr_assert.h
CORE_CXX_SOURCES := \
rasterizer/core/api.cpp \
rasterizer/core/api.h \
rasterizer/core/arena.h \
rasterizer/core/backend.cpp \
rasterizer/core/backend.h \
rasterizer/core/blend.h \
rasterizer/core/clip.cpp \
rasterizer/core/clip.h \
rasterizer/core/context.h \
rasterizer/core/depthstencil.h \
rasterizer/core/fifo.hpp \
rasterizer/core/format_traits.h \
rasterizer/core/format_types.h \
rasterizer/core/frontend.cpp \
rasterizer/core/frontend.h \
rasterizer/core/knobs.h \
rasterizer/core/knobs_init.h \
rasterizer/core/multisample.cpp \
rasterizer/core/multisample.h \
rasterizer/core/pa_avx.cpp \
rasterizer/core/pa.h \
rasterizer/core/rasterizer.cpp \
rasterizer/core/rasterizer.h \
rasterizer/core/rdtsc_core.cpp \
rasterizer/core/rdtsc_core.h \
rasterizer/core/ringbuffer.h \
rasterizer/core/state.h \
rasterizer/core/threads.cpp \
rasterizer/core/threads.h \
rasterizer/core/tilemgr.cpp \
rasterizer/core/tilemgr.h \
rasterizer/core/utils.cpp \
rasterizer/core/utils.h
JITTER_CXX_SOURCES := \
rasterizer/jitter/blend_jit.cpp \
rasterizer/jitter/blend_jit.h \
rasterizer/jitter/builder.cpp \
rasterizer/jitter/builder.h \
rasterizer/jitter/builder_misc.cpp \
rasterizer/jitter/builder_misc.h \
rasterizer/jitter/fetch_jit.cpp \
rasterizer/jitter/fetch_jit.h \
rasterizer/jitter/JitManager.cpp \
rasterizer/jitter/JitManager.h \
rasterizer/jitter/streamout_jit.cpp \
rasterizer/jitter/streamout_jit.h
MEMORY_CXX_SOURCES := \
rasterizer/memory/ClearTile.cpp \
rasterizer/memory/LoadTile.cpp \
rasterizer/memory/StoreTile.cpp

View File

@ -1,111 +0,0 @@
# Copyright (C) 2015 Intel Corporation. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
CXX_SOURCES := \
swr_clear.cpp \
swr_context.cpp \
swr_context.h \
swr_context_llvm.h \
swr_draw.cpp \
swr_public.h \
swr_resource.h \
swr_screen.cpp \
swr_screen.h \
swr_state.cpp \
swr_state.h \
swr_tex_sample.cpp \
swr_tex_sample.h \
swr_scratch.h \
swr_scratch.cpp \
swr_shader.cpp \
swr_memory.h \
swr_fence.h \
swr_fence.cpp \
swr_query.h \
swr_query.cpp
COMMON_CXX_SOURCES := \
rasterizer/common/containers.hpp \
rasterizer/common/formats.cpp \
rasterizer/common/formats.h \
rasterizer/common/isa.hpp \
rasterizer/common/os.h \
rasterizer/common/rdtsc_buckets.cpp \
rasterizer/common/rdtsc_buckets.h \
rasterizer/common/rdtsc_buckets_shared.h \
rasterizer/common/rdtsc_buckets_shared.h \
rasterizer/common/simdintrin.h \
rasterizer/common/swr_assert.cpp \
rasterizer/common/swr_assert.h
CORE_CXX_SOURCES := \
rasterizer/core/api.cpp \
rasterizer/core/api.h \
rasterizer/core/arena.h \
rasterizer/core/backend.cpp \
rasterizer/core/backend.h \
rasterizer/core/blend.h \
rasterizer/core/clip.cpp \
rasterizer/core/clip.h \
rasterizer/core/context.h \
rasterizer/core/depthstencil.h \
rasterizer/core/fifo.hpp \
rasterizer/core/format_traits.h \
rasterizer/core/format_types.h \
rasterizer/core/frontend.cpp \
rasterizer/core/frontend.h \
rasterizer/core/knobs.h \
rasterizer/core/knobs_init.h \
rasterizer/core/multisample.cpp \
rasterizer/core/multisample.h \
rasterizer/core/pa_avx.cpp \
rasterizer/core/pa.h \
rasterizer/core/rasterizer.cpp \
rasterizer/core/rasterizer.h \
rasterizer/core/rdtsc_core.cpp \
rasterizer/core/rdtsc_core.h \
rasterizer/core/ringbuffer.h \
rasterizer/core/state.h \
rasterizer/core/threads.cpp \
rasterizer/core/threads.h \
rasterizer/core/tilemgr.cpp \
rasterizer/core/tilemgr.h \
rasterizer/core/utils.cpp \
rasterizer/core/utils.h
JITTER_CXX_SOURCES := \
rasterizer/jitter/blend_jit.cpp \
rasterizer/jitter/blend_jit.h \
rasterizer/jitter/builder.cpp \
rasterizer/jitter/builder.h \
rasterizer/jitter/builder_misc.cpp \
rasterizer/jitter/builder_misc.h \
rasterizer/jitter/fetch_jit.cpp \
rasterizer/jitter/fetch_jit.h \
rasterizer/jitter/JitManager.cpp \
rasterizer/jitter/JitManager.h \
rasterizer/jitter/streamout_jit.cpp \
rasterizer/jitter/streamout_jit.h
MEMORY_CXX_SOURCES := \
rasterizer/memory/ClearTile.cpp \
rasterizer/memory/LoadTile.cpp \
rasterizer/memory/StoreTile.cpp

View File

@ -1,99 +0,0 @@
# Copyright (C) 2015 Intel Corporation. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
include ../Makefile.sources-arch
include $(top_srcdir)/src/gallium/Automake.inc
VPATH = $(srcdir) $(srcdir)/..
AM_CXXFLAGS = \
-march=core-avx-i \
-DKNOB_ARCH=KNOB_ARCH_AVX \
$(GALLIUM_DRIVER_CFLAGS) \
$(LLVM_CFLAGS) \
-I$(builddir)/rasterizer/scripts \
-I$(builddir)/rasterizer/jitter \
-I$(srcdir)/../rasterizer \
-I$(srcdir)/../rasterizer/core \
-I$(srcdir)/../rasterizer/jitter
lib_LTLIBRARIES = libswrAVX.la
BUILT_SOURCES = \
rasterizer/scripts/gen_knobs.cpp \
rasterizer/scripts/gen_knobs.h \
rasterizer/jitter/state_llvm.h \
rasterizer/jitter/builder_gen.h \
rasterizer/jitter/builder_gen.cpp \
rasterizer/jitter/builder_x86.h \
rasterizer/jitter/builder_x86.cpp
libswrAVX_la_SOURCES = \
$(CXX_SOURCES) \
$(COMMON_CXX_SOURCES) \
$(CORE_CXX_SOURCES) \
$(JITTER_CXX_SOURCES) \
$(MEMORY_CXX_SOURCES) \
$(BUILT_SOURCES)
rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py rasterizer/scripts/templates/knobs.template
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/../rasterizer/scripts/gen_knobs.py \
rasterizer/scripts
rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py rasterizer/core/state.h
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_types.py \
--input $(srcdir)/../rasterizer/core/state.h \
--output rasterizer/jitter/state_llvm.h
rasterizer/jitter/builder_gen.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
--output rasterizer/jitter/builder_gen.h \
--gen_h
rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
--output rasterizer/jitter/builder_gen.cpp \
--gen_cpp
rasterizer/jitter/builder_x86.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--output rasterizer/jitter/builder_x86.h \
--gen_x86_h
rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--output rasterizer/jitter/builder_x86.cpp \
--gen_x86_cpp
libswrAVX_la_LIBADD = \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/mesa/libmesagallium.la
include $(top_srcdir)/install-gallium-links.mk

View File

@ -1,99 +0,0 @@
# Copyright (C) 2015 Intel Corporation. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
include ../Makefile.sources-arch
include $(top_srcdir)/src/gallium/Automake.inc
VPATH = $(srcdir) $(srcdir)/..
AM_CXXFLAGS = \
-march=core-avx2 \
-DKNOB_ARCH=KNOB_ARCH_AVX2 \
$(GALLIUM_DRIVER_CFLAGS) \
$(LLVM_CFLAGS) \
-I$(builddir)/rasterizer/scripts \
-I$(builddir)/rasterizer/jitter \
-I$(srcdir)/../rasterizer \
-I$(srcdir)/../rasterizer/core \
-I$(srcdir)/../rasterizer/jitter
lib_LTLIBRARIES = libswrAVX2.la
BUILT_SOURCES = \
rasterizer/scripts/gen_knobs.cpp \
rasterizer/scripts/gen_knobs.h \
rasterizer/jitter/state_llvm.h \
rasterizer/jitter/builder_gen.h \
rasterizer/jitter/builder_gen.cpp \
rasterizer/jitter/builder_x86.h \
rasterizer/jitter/builder_x86.cpp
libswrAVX2_la_SOURCES = \
$(CXX_SOURCES) \
$(COMMON_CXX_SOURCES) \
$(CORE_CXX_SOURCES) \
$(JITTER_CXX_SOURCES) \
$(MEMORY_CXX_SOURCES) \
$(BUILT_SOURCES)
rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py rasterizer/scripts/templates/knobs.template
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/../rasterizer/scripts/gen_knobs.py \
rasterizer/scripts
rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py rasterizer/core/state.h
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_types.py \
--input $(srcdir)/../rasterizer/core/state.h \
--output rasterizer/jitter/state_llvm.h
rasterizer/jitter/builder_gen.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
--output rasterizer/jitter/builder_gen.h \
--gen_h
rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
--output rasterizer/jitter/builder_gen.cpp \
--gen_cpp
rasterizer/jitter/builder_x86.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--output rasterizer/jitter/builder_x86.h \
--gen_x86_h
rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
$(PYTHON2) $(PYTHON_FLAGS) \
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
--output rasterizer/jitter/builder_x86.cpp \
--gen_x86_cpp
libswrAVX2_la_LIBADD = \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/mesa/libmesagallium.la
include $(top_srcdir)/install-gallium-links.mk

View File

@ -24,10 +24,8 @@ for progname in progs:
target = progname,
source = progname + '.c',
)
env.Alias(progname, env.InstallProgram(prog))
# http://www.scons.org/wiki/UnitTests
test_alias = env.Alias('unit', [prog], prog[0].abspath)
AlwaysBuild(test_alias)
if progname not in [
'u_cache_test', # too long
'translate_test', # unreliable
]:
env.UnitTest(progname, prog)

View File

@ -35,37 +35,78 @@
#include <stdio.h>
#include <stdlib.h>
#include "os/os_thread.h"
#include "os/os_time.h"
#include "util/u_atomic.h"
#define NUM_THREADS 10
static int verbosity = 0;
static pipe_thread threads[NUM_THREADS];
static pipe_barrier barrier;
static int thread_ids[NUM_THREADS];
static volatile int waiting = 0;
static volatile int proceeded = 0;
#define LOG(fmt, ...) \
if (verbosity > 0) { \
fprintf(stdout, fmt, ##__VA_ARGS__); \
}
#define CHECK(_cond) \
if (!(_cond)) { \
fprintf(stderr, "%s:%u: `%s` failed\n", __FILE__, __LINE__, #_cond); \
_exit(EXIT_FAILURE); \
}
static PIPE_THREAD_ROUTINE(thread_function, thread_data)
{
int thread_id = *((int *) thread_data);
printf("thread %d starting\n", thread_id);
os_time_sleep(thread_id * 1000 * 1000);
printf("thread %d before barrier\n", thread_id);
LOG("thread %d starting\n", thread_id);
os_time_sleep(thread_id * 100 * 1000);
LOG("thread %d before barrier\n", thread_id);
CHECK(p_atomic_read(&proceeded) == 0);
p_atomic_inc(&waiting);
pipe_barrier_wait(&barrier);
printf("thread %d exiting\n", thread_id);
CHECK(p_atomic_read(&waiting) == NUM_THREADS);
p_atomic_inc(&proceeded);
LOG("thread %d exiting\n", thread_id);
return 0;
}
int main()
int main(int argc, char *argv[])
{
int i;
printf("pipe_barrier_test starting\n");
for (i = 1; i < argc; ++i) {
const char *arg = argv[i];
if (strcmp(arg, "-v") == 0) {
++verbosity;
} else {
fprintf(stderr, "error: unrecognized option `%s`\n", arg);
exit(EXIT_FAILURE);
}
}
// Disable buffering
setbuf(stdout, NULL);
LOG("pipe_barrier_test starting\n");
pipe_barrier_init(&barrier, NUM_THREADS);
@ -78,9 +119,11 @@ int main()
pipe_thread_wait(threads[i]);
}
CHECK(p_atomic_read(&proceeded) == NUM_THREADS);
pipe_barrier_destroy(&barrier);
printf("pipe_barrier_test exiting\n");
LOG("pipe_barrier_test exiting\n");
return 0;
}

View File

@ -70,8 +70,9 @@ int main(int argc, char** argv)
util_cpu_detect();
if(argc <= 1)
{}
if (argc <= 1 ||
!strcmp(argv[1], "default") )
create_fn = translate_create;
else if (!strcmp(argv[1], "generic"))
create_fn = translate_generic_create;
else if (!strcmp(argv[1], "x86"))
@ -129,7 +130,7 @@ int main(int argc, char** argv)
if (!create_fn)
{
printf("Usage: ./translate_test [generic|x86|nosse|sse|sse2|sse3|sse4.1]\n");
printf("Usage: ./translate_test [default|generic|x86|nosse|sse|sse2|sse3|sse4.1]\n");
return 2;
}

View File

@ -1988,10 +1988,11 @@ fs_visitor::assign_constant_locations()
*/
const unsigned int max_push_components = 16 * 8;
/* For vulkan we don't limit the max_chunk_size. We set it to 32 float =
* 128 bytes, which is the maximum vulkan push constant size.
/* We push small arrays, but no bigger than 16 floats. This is big enough
* for a vec4 but hopefully not large enough to push out other stuff. We
* should probably use a better heuristic at some point.
*/
const unsigned int max_chunk_size = 32;
const unsigned int max_chunk_size = 16;
unsigned int num_push_constants = 0;
unsigned int num_pull_constants = 0;
@ -2018,8 +2019,14 @@ fs_visitor::assign_constant_locations()
if (!contiguous[u]) {
unsigned chunk_size = u - chunk_start + 1;
if (num_push_constants + chunk_size <= max_push_components &&
chunk_size <= max_chunk_size) {
/* Decide whether we should push or pull this parameter. In the
* Vulkan driver, push constants are explicitly exposed via the API
* so we push everything. In GL, we only push small arrays.
*/
if (stage_prog_data->pull_param == NULL ||
(num_push_constants + chunk_size <= max_push_components &&
chunk_size <= max_chunk_size)) {
assert(num_push_constants + chunk_size <= max_push_components);
for (unsigned j = chunk_start; j <= u; j++)
push_constant_loc[j] = num_push_constants++;
} else {
@ -4515,7 +4522,7 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
case SHADER_OPCODE_MOV_INDIRECT:
/* Prior to Broadwell, we only have 8 address subregisters */
return devinfo->gen < 8 ? 8 : inst->exec_size;
return devinfo->gen < 8 ? 8 : MIN2(inst->exec_size, 16);
default:
return inst->exec_size;

View File

@ -367,29 +367,53 @@ fs_generator::generate_mov_indirect(fs_inst *inst,
/* The destination stride of an instruction (in bytes) must be greater
* than or equal to the size of the rest of the instruction. Since the
* address register is of type UW, we can't use a D-type instruction.
* In order to get around this, re re-type to UW and use a stride.
* In order to get around this, re retype to UW and use a stride.
*/
indirect_byte_offset =
retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
struct brw_reg ind_src;
if (devinfo->gen < 8) {
/* Prior to broadwell, we have a restriction that the bottom 5 bits
* of the base offset and the bottom 5 bits of the indirect must add
* to less than 32. In other words, the hardware needs to be able to
* add the bottom five bits of the two to get the subnumber and add
* the next 7 bits of each to get the actual register number. Since
* the indirect may cause us to cross a register boundary, this makes
* it almost useless. We could try and do something clever where we
* use a actual base offset if base_offset % 32 == 0 but that would
* mean we were generating different code depending on the base
* offset. Instead, for the sake of consistency, we'll just do the
* add ourselves.
/* From the Haswell PRM section "Register Region Restrictions":
*
* "The lower bits of the AddressImmediate must not overflow to
* change the register address. The lower 5 bits of Address
* Immediate when added to lower 5 bits of address register gives
* the sub-register offset. The upper bits of Address Immediate
* when added to upper bits of address register gives the register
* address. Any overflow from sub-register offset is dropped."
*
* This restriction is only listed in the Haswell PRM but emperical
* testing indicates that it applies on all older generations and is
* lifted on Broadwell.
*
* Since the indirect may cause us to cross a register boundary, this
* makes the base offset almost useless. We could try and do
* something clever where we use a actual base offset if
* base_offset % 32 == 0 but that would mean we were generating
* different code depending on the base offset. Instead, for the
* sake of consistency, we'll just do the add ourselves.
*/
brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset));
brw_MOV(p, dst, retype(brw_VxH_indirect(0, 0), dst.type));
ind_src = brw_VxH_indirect(0, 0);
} else {
brw_MOV(p, addr, indirect_byte_offset);
brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
ind_src = brw_VxH_indirect(0, imm_byte_offset);
}
brw_inst *mov = brw_MOV(p, dst, retype(ind_src, dst.type));
if (devinfo->gen == 6 && dst.file == BRW_MESSAGE_REGISTER_FILE &&
!inst->get_next()->is_tail_sentinel() &&
((fs_inst *)inst->get_next())->mlen > 0) {
/* From the Sandybridge PRM:
*
* "[Errata: DevSNB(SNB)] If MRF register is updated by any
* instruction that indexed/indirect source AND is followed by a
* send, the instruction requires a Switch. This is to avoid
* race condition where send may dispatch before MRF is updated."
*/
brw_inst_set_thread_control(devinfo, mov, BRW_THREAD_SWITCH);
}
}
}

View File

@ -2743,7 +2743,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
if (const_offset == NULL) {
fs_reg base_offset = retype(get_nir_src(instr->src[1]),
BRW_REGISTER_TYPE_D);
BRW_REGISTER_TYPE_UD);
for (int i = 0; i < instr->num_components; i++)
VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index,

View File

@ -285,7 +285,7 @@ public:
void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
dst_reg dst,
src_reg orig_src,
int base_offset,
int base_offset,
src_reg indirect);
void emit_pull_constant_load_reg(dst_reg dst,
src_reg surf_index,

View File

@ -758,7 +758,7 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
pull->mlen = 2;
pull->header_size = 1;
} else if (devinfo->gen >= 7) {
dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
dst_reg grf_offset = dst_reg(this, glsl_type::uint_type);
grf_offset.type = offset_reg.type;
@ -1587,21 +1587,21 @@ vec4_visitor::move_grf_array_access_to_scratch()
void
vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
dst_reg temp, src_reg orig_src,
int base_offset, src_reg indirect)
int base_offset, src_reg indirect)
{
int reg_offset = base_offset + orig_src.reg_offset;
const unsigned index = prog_data->base.binding_table.pull_constants_start;
src_reg offset;
if (indirect.file != BAD_FILE) {
offset = src_reg(this, glsl_type::int_type);
offset = src_reg(this, glsl_type::uint_type);
emit_before(block, inst, ADD(dst_reg(offset), indirect,
brw_imm_d(reg_offset * 16)));
brw_imm_ud(reg_offset * 16)));
} else if (devinfo->gen >= 8) {
/* Store the offset in a GRF so we can send-from-GRF. */
offset = src_reg(this, glsl_type::int_type);
emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset * 16)));
offset = src_reg(this, glsl_type::uint_type);
emit_before(block, inst, MOV(dst_reg(offset), brw_imm_ud(reg_offset * 16)));
} else {
offset = brw_imm_d(reg_offset * 16);
}
@ -1629,6 +1629,12 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
void
vec4_visitor::move_uniform_array_access_to_pull_constants()
{
/* The vulkan dirver doesn't support pull constants other than UBOs so
* everything has to be pushed regardless.
*/
if (stage_prog_data->pull_param == NULL)
return;
int pull_constant_loc[this->uniforms];
memset(pull_constant_loc, -1, sizeof(pull_constant_loc));

View File

@ -932,7 +932,7 @@ static const __DRIextension *intelRobustScreenExtensions[] = {
NULL
};
static bool
static int
intel_get_param(__DRIscreen *psp, int param, int *value)
{
int ret;
@ -943,20 +943,17 @@ intel_get_param(__DRIscreen *psp, int param, int *value)
gp.value = value;
ret = drmCommandWriteRead(psp->fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
if (ret) {
if (ret != -EINVAL)
if (ret < 0 && ret != -EINVAL)
_mesa_warning(NULL, "drm_i915_getparam: %d", ret);
return false;
}
return true;
return ret;
}
static bool
intel_get_boolean(__DRIscreen *psp, int param)
{
int value = 0;
return intel_get_param(psp, param, &value) && value;
return (intel_get_param(psp, param, &value) == 0) && value;
}
static void
@ -1093,12 +1090,12 @@ intel_detect_sseu(struct intel_screen *intelScreen)
ret = intel_get_param(intelScreen->driScrnPriv, I915_PARAM_SUBSLICE_TOTAL,
&intelScreen->subslice_total);
if (ret != -EINVAL)
if (ret < 0 && ret != -EINVAL)
goto err_out;
ret = intel_get_param(intelScreen->driScrnPriv,
I915_PARAM_EU_TOTAL, &intelScreen->eu_total);
if (ret != -EINVAL)
if (ret < 0 && ret != -EINVAL)
goto err_out;
/* Without this information, we cannot get the right Braswell brandstrings,
@ -1114,7 +1111,7 @@ intel_detect_sseu(struct intel_screen *intelScreen)
err_out:
intelScreen->subslice_total = -1;
intelScreen->eu_total = -1;
_mesa_warning(NULL, "Failed to query GPU properties.\n");
_mesa_warning(NULL, "Failed to query GPU properties (%s).\n", strerror(ret));
}
static bool

View File

@ -704,6 +704,10 @@ st_DrawAtlasBitmaps(struct gl_context *ctx,
st_validate_state(st, ST_PIPELINE_RENDER);
sv = st_create_texture_sampler_view(pipe, stObj->pt);
if (!sv) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glCallLists(bitmap text)");
return;
}
setup_render_state(ctx, sv, color, true);
@ -793,6 +797,8 @@ st_DrawAtlasBitmaps(struct gl_context *ctx,
pipe_resource_reference(&vb.buffer, NULL);
pipe_sampler_view_reference(&sv, NULL);
/* We uploaded modified constants, need to invalidate them. */
st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS;
}

View File

@ -47,17 +47,14 @@ env.Alias('mesautil', mesautil)
Export('mesautil')
# http://www.scons.org/wiki/UnitTests
u_atomic_test = env.Program(
target = 'u_atomic_test',
source = ['u_atomic_test.c'],
)
alias = env.Alias("u_atomic_test", u_atomic_test, u_atomic_test[0].abspath)
AlwaysBuild(alias)
env.UnitTest("u_atomic_test", u_atomic_test)
roundeven_test = env.Program(
target = 'roundeven_test',
source = ['roundeven_test.c'],
)
alias = env.Alias("roundeven_test", roundeven_test, roundeven_test[0].abspath)
AlwaysBuild(alias)
env.UnitTest("roundeven_test", roundeven_test)