Merge remote-tracking branch 'public/master' into vulkan
This commit is contained in:
commit
5567ae0547
|
@ -84,6 +84,11 @@ env.Append(CPPPATH = [
|
|||
#print env.Dump()
|
||||
|
||||
|
||||
# Add a check target for running tests
|
||||
check = env.Alias('check')
|
||||
env.AlwaysBuild(check)
|
||||
|
||||
|
||||
#######################################################################
|
||||
# Invoke host SConscripts
|
||||
#
|
||||
|
|
|
@ -65,6 +65,9 @@ install:
|
|||
build_script:
|
||||
- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=12.0 llvm=1
|
||||
|
||||
after_build:
|
||||
- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=12.0 llvm=1 check
|
||||
|
||||
|
||||
# It's possible to setup notification here, as described in
|
||||
# http://www.appveyor.com/docs/notifications#appveyor-yml-configuration , but
|
||||
|
|
|
@ -2551,8 +2551,6 @@ AC_CONFIG_FILES([Makefile
|
|||
src/gallium/drivers/softpipe/Makefile
|
||||
src/gallium/drivers/svga/Makefile
|
||||
src/gallium/drivers/swr/Makefile
|
||||
src/gallium/drivers/swr/avx/Makefile
|
||||
src/gallium/drivers/swr/avx2/Makefile
|
||||
src/gallium/drivers/trace/Makefile
|
||||
src/gallium/drivers/vc4/Makefile
|
||||
src/gallium/drivers/virgl/Makefile
|
||||
|
|
|
@ -1,35 +0,0 @@
|
|||
|
||||
default: full
|
||||
|
||||
all: full subset
|
||||
|
||||
%.tag: %.doxy
|
||||
doxygen $<
|
||||
|
||||
FULL = \
|
||||
main.doxy \
|
||||
math.doxy \
|
||||
vbo.doxy \
|
||||
glapi.doxy \
|
||||
glsl.doxy \
|
||||
swrast.doxy \
|
||||
swrast_setup.doxy \
|
||||
tnl.doxy \
|
||||
tnl_dd.doxy \
|
||||
gbm.doxy \
|
||||
i965.doxy
|
||||
|
||||
full: $(FULL:.doxy=.tag)
|
||||
$(foreach FILE,$(FULL),doxygen $(FILE);)
|
||||
|
||||
SUBSET = \
|
||||
main.doxy \
|
||||
math.doxy
|
||||
|
||||
subset: $(SUBSET:.doxy=.tag)
|
||||
$(foreach FILE,$(SUBSET),doxygen $(FILE);)
|
||||
|
||||
clean:
|
||||
-rm -rf $(FULL:.doxy=) $(SUBSET:.doxy=)
|
||||
-rm -rf *.tag
|
||||
-rm -rf *.db
|
|
@ -1,19 +0,0 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>Mesa Source Code Documentation</title>
|
||||
<link href="doxygen.css" rel="stylesheet" type="text/css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="qindex">
|
||||
<a class="qindex" href="../main/index.html">core</a> |
|
||||
<a class="qindex" href="../glapi/index.html">glapi</a> |
|
||||
<a class="qindex" href="../glsl/index.html">glsl</a> |
|
||||
<a class="qindex" href="../vbo/index.html">vbo</a> |
|
||||
<a class="qindex" href="../math/index.html">math</a> |
|
||||
<a class="qindex" href="../swrast/index.html">swrast</a> |
|
||||
<a class="qindex" href="../swrast_setup/index.html">swrast_setup</a> |
|
||||
<a class="qindex" href="../tnl/index.html">tnl</a> |
|
||||
<a class="qindex" href="../tnl_dd/index.html">tnl_dd</a> |
|
||||
<a class="qindex" href="../gbm/index.html">gbm</a> |
|
||||
<a class="qindex" href="../i965/index.html">i965</a>
|
||||
</div>
|
|
@ -12,9 +12,9 @@ all-local : .install-gallium-links
|
|||
link_dir=$(top_builddir)/$(LIB_DIR)/egl; \
|
||||
fi; \
|
||||
$(MKDIR_P) $$link_dir; \
|
||||
file_list=$(dri_LTLIBRARIES:%.la=.libs/%.so); \
|
||||
file_list+=$(egl_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*); \
|
||||
file_list+=$(lib_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*); \
|
||||
file_list="$(dri_LTLIBRARIES:%.la=.libs/%.so)"; \
|
||||
file_list+="$(egl_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)"; \
|
||||
file_list+="$(lib_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)"; \
|
||||
for f in $$file_list; do \
|
||||
if test -h .libs/$$f; then \
|
||||
cp -d $$f $$link_dir; \
|
||||
|
|
|
@ -82,11 +82,6 @@ def install_shared_library(env, sources, version = ()):
|
|||
return targets
|
||||
|
||||
|
||||
def createInstallMethods(env):
|
||||
env.AddMethod(install_program, 'InstallProgram')
|
||||
env.AddMethod(install_shared_library, 'InstallSharedLibrary')
|
||||
|
||||
|
||||
def msvc2013_compat(env):
|
||||
if env['gcc']:
|
||||
env.Append(CCFLAGS = [
|
||||
|
@ -94,8 +89,20 @@ def msvc2013_compat(env):
|
|||
'-Werror=pointer-arith',
|
||||
])
|
||||
|
||||
def createMSVCCompatMethods(env):
|
||||
env.AddMethod(msvc2013_compat, 'MSVC2013Compat')
|
||||
|
||||
def unit_test(env, test_name, program_target, args=None):
|
||||
env.InstallProgram(program_target)
|
||||
|
||||
cmd = [program_target[0].abspath]
|
||||
if args is not None:
|
||||
cmd += args
|
||||
cmd = ' '.join(cmd)
|
||||
|
||||
# http://www.scons.org/wiki/UnitTests
|
||||
action = SCons.Action.Action(cmd, " Running %s ..." % test_name)
|
||||
alias = env.Alias(test_name, program_target, action)
|
||||
env.AlwaysBuild(alias)
|
||||
env.Depends('check', alias)
|
||||
|
||||
|
||||
def num_jobs():
|
||||
|
@ -667,8 +674,10 @@ def generate(env):
|
|||
|
||||
# Custom builders and methods
|
||||
env.Tool('custom')
|
||||
createInstallMethods(env)
|
||||
createMSVCCompatMethods(env)
|
||||
env.AddMethod(install_program, 'InstallProgram')
|
||||
env.AddMethod(install_shared_library, 'InstallSharedLibrary')
|
||||
env.AddMethod(msvc2013_compat, 'MSVC2013Compat')
|
||||
env.AddMethod(unit_test, 'UnitTest')
|
||||
|
||||
env.PkgCheckModules('X11', ['x11', 'xext', 'xdamage', 'xfixes', 'glproto >= 1.4.13'])
|
||||
env.PkgCheckModules('XCB', ['x11-xcb', 'xcb-glx >= 1.8.1', 'xcb-dri2 >= 1.8'])
|
||||
|
|
|
@ -22,3 +22,4 @@ compiler = env.ConvenienceLibrary(
|
|||
Export('compiler')
|
||||
|
||||
SConscript('SConscript.glsl')
|
||||
SConscript('SConscript.nir')
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
import common
|
||||
|
||||
Import('*')
|
||||
|
||||
from sys import executable as python_cmd
|
||||
|
||||
env = env.Clone()
|
||||
|
||||
env.MSVC2013Compat()
|
||||
|
||||
env.Prepend(CPPPATH = [
|
||||
'#include',
|
||||
'#src',
|
||||
'#src/mapi',
|
||||
'#src/mesa',
|
||||
'#src/gallium/include',
|
||||
'#src/gallium/auxiliary',
|
||||
'#src/compiler/nir',
|
||||
])
|
||||
|
||||
# Make generated headers reachable from the include path.
|
||||
env.Prepend(CPPPATH = [Dir('.').abspath, Dir('nir').abspath])
|
||||
|
||||
# nir generated sources
|
||||
|
||||
nir_builder_opcodes_h = env.CodeGenerate(
|
||||
target = 'nir/nir_builder_opcodes.h',
|
||||
script = 'nir/nir_builder_opcodes_h.py',
|
||||
source = [],
|
||||
command = python_cmd + ' $SCRIPT > $TARGET'
|
||||
)
|
||||
|
||||
env.CodeGenerate(
|
||||
target = 'nir/nir_constant_expressions.c',
|
||||
script = 'nir/nir_constant_expressions.py',
|
||||
source = [],
|
||||
command = python_cmd + ' $SCRIPT > $TARGET'
|
||||
)
|
||||
|
||||
env.CodeGenerate(
|
||||
target = 'nir/nir_opcodes.h',
|
||||
script = 'nir/nir_opcodes_h.py',
|
||||
source = [],
|
||||
command = python_cmd + ' $SCRIPT > $TARGET'
|
||||
)
|
||||
|
||||
env.CodeGenerate(
|
||||
target = 'nir/nir_opcodes.c',
|
||||
script = 'nir/nir_opcodes_c.py',
|
||||
source = [],
|
||||
command = python_cmd + ' $SCRIPT > $TARGET'
|
||||
)
|
||||
|
||||
env.CodeGenerate(
|
||||
target = 'nir/nir_opt_algebraic.c',
|
||||
script = 'nir/nir_opt_algebraic.py',
|
||||
source = [],
|
||||
command = python_cmd + ' $SCRIPT > $TARGET'
|
||||
)
|
||||
|
||||
# parse Makefile.sources
|
||||
source_lists = env.ParseSourceList('Makefile.sources')
|
||||
|
||||
nir_sources = source_lists['NIR_FILES']
|
||||
nir_sources += source_lists['NIR_GENERATED_FILES']
|
||||
|
||||
nir = env.ConvenienceLibrary(
|
||||
target = 'nir',
|
||||
source = nir_sources,
|
||||
)
|
||||
|
||||
env.Alias('nir', nir)
|
||||
Export('nir')
|
|
@ -507,7 +507,14 @@ typedef struct nir_src {
|
|||
bool is_ssa;
|
||||
} nir_src;
|
||||
|
||||
#define NIR_SRC_INIT (nir_src) { { NULL } }
|
||||
static inline nir_src
|
||||
nir_src_init(void)
|
||||
{
|
||||
nir_src src = { { NULL } };
|
||||
return src;
|
||||
}
|
||||
|
||||
#define NIR_SRC_INIT nir_src_init()
|
||||
|
||||
#define nir_foreach_use(reg_or_ssa_def, src) \
|
||||
list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
|
||||
|
@ -530,7 +537,14 @@ typedef struct {
|
|||
bool is_ssa;
|
||||
} nir_dest;
|
||||
|
||||
#define NIR_DEST_INIT (nir_dest) { { { NULL } } }
|
||||
static inline nir_dest
|
||||
nir_dest_init(void)
|
||||
{
|
||||
nir_dest dest = { { { NULL } } };
|
||||
return dest;
|
||||
}
|
||||
|
||||
#define NIR_DEST_INIT nir_dest_init()
|
||||
|
||||
#define nir_foreach_def(reg, dest) \
|
||||
list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link)
|
||||
|
@ -957,7 +971,7 @@ typedef enum {
|
|||
NIR_INTRINSIC_UCP_ID = 4,
|
||||
|
||||
/**
|
||||
* The ammount of data, starting from BASE, that this instruction may
|
||||
* The amount of data, starting from BASE, that this instruction may
|
||||
* access. This is used to provide bounds if the offset is not constant.
|
||||
*/
|
||||
NIR_INTRINSIC_RANGE = 5,
|
||||
|
|
|
@ -42,9 +42,9 @@
|
|||
#define ARR(...) { __VA_ARGS__ }
|
||||
|
||||
|
||||
INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
INTRINSIC(load_var, 0, ARR(0), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, WRMASK, xx, xx, 0)
|
||||
INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(copy_var, 0, ARR(0), false, 0, 2, 0, xx, xx, xx, 0)
|
||||
|
||||
/*
|
||||
* Interpolation of input. The interp_var_at* intrinsics are similar to the
|
||||
|
@ -72,7 +72,7 @@ INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0, xx, xx, xx,
|
|||
* a barrier is an intrinsic with no inputs/outputs but which can't be moved
|
||||
* around/optimized in general
|
||||
*/
|
||||
#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, xx, xx, xx, 0)
|
||||
#define BARRIER(name) INTRINSIC(name, 0, ARR(0), false, 0, 0, 0, xx, xx, xx, 0)
|
||||
|
||||
BARRIER(barrier)
|
||||
BARRIER(discard)
|
||||
|
@ -89,7 +89,7 @@ BARRIER(memory_barrier)
|
|||
* The latter can be used as code motion barrier, which is currently not
|
||||
* feasible with NIR.
|
||||
*/
|
||||
INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
INTRINSIC(shader_clock, 0, ARR(0), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
|
||||
/*
|
||||
* Memory barrier with semantics analogous to the compute shader
|
||||
|
@ -113,8 +113,8 @@ INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)
|
|||
*
|
||||
* end_primitive implements GLSL's EndPrimitive() built-in.
|
||||
*/
|
||||
INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
|
||||
INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
|
||||
INTRINSIC(emit_vertex, 0, ARR(0), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
|
||||
INTRINSIC(end_primitive, 0, ARR(0), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
|
||||
|
||||
/**
|
||||
* Geometry Shader intrinsics with a vertex count.
|
||||
|
@ -137,7 +137,7 @@ INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)
|
|||
*/
|
||||
|
||||
#define ATOMIC(name, flags) \
|
||||
INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, xx, xx, xx, flags) \
|
||||
INTRINSIC(atomic_counter_##name##_var, 0, ARR(0), true, 1, 1, 0, xx, xx, xx, flags) \
|
||||
INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, BASE, xx, xx, flags)
|
||||
|
||||
ATOMIC(inc, 0)
|
||||
|
@ -170,9 +170,9 @@ INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
|
|||
INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
|
||||
INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0, xx, xx, xx,
|
||||
INTRINSIC(image_size, 0, ARR(0), true, 4, 1, 0, xx, xx, xx,
|
||||
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, xx, xx, xx,
|
||||
INTRINSIC(image_samples, 0, ARR(0), true, 1, 1, 0, xx, xx, xx,
|
||||
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
|
||||
/*
|
||||
|
@ -278,7 +278,7 @@ INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
|||
INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
|
||||
|
||||
#define SYSTEM_VALUE(name, components, num_indices, idx0, idx1, idx2) \
|
||||
INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
|
||||
INTRINSIC(load_##name, 0, ARR(0), true, components, 0, num_indices, \
|
||||
idx0, idx1, idx2, \
|
||||
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
|
||||
|
@ -313,8 +313,9 @@ SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
|
|||
* of the start of the variable being loaded and and the offset source is a
|
||||
* offset into that variable.
|
||||
*
|
||||
* Uniform load operations have a second index that specifies the size of the
|
||||
* variable being loaded. If const_index[1] == 0, then the size is unknown.
|
||||
* Uniform load operations have a second "range" index that specifies the
|
||||
* range (starting at base) of the data from which we are loading. If
|
||||
* const_index[1] == 0, then the range is unknown.
|
||||
*
|
||||
* Some load operations such as UBO/SSBO load and per_vertex loads take an
|
||||
* additional source to specify which UBO/SSBO/vertex to load from.
|
||||
|
@ -328,9 +329,8 @@ SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
|
|||
#define LOAD(name, srcs, num_indices, idx0, idx1, idx2, flags) \
|
||||
INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, num_indices, idx0, idx1, idx2, flags)
|
||||
|
||||
/* src[] = { offset }. const_index[] = { base, range } */
|
||||
LOAD(uniform, 1, 2, BASE, RANGE, xx,
|
||||
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
/* src[] = { offset }. const_index[] = { base } */
|
||||
LOAD(uniform, 1, 2, BASE, RANGE, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
/* src[] = { buffer_index, offset }. No const_index */
|
||||
LOAD(ubo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
/* src[] = { offset }. const_index[] = { base } */
|
||||
|
|
|
@ -278,8 +278,8 @@ nir_lower_io_block(nir_block *block, void *void_state)
|
|||
intrin->variables[0]->var->data.driver_location);
|
||||
|
||||
if (load->intrinsic == nir_intrinsic_load_uniform) {
|
||||
load->const_index[1] =
|
||||
state->type_size(intrin->variables[0]->var->type);
|
||||
nir_intrinsic_set_range(load,
|
||||
state->type_size(intrin->variables[0]->var->type));
|
||||
}
|
||||
|
||||
if (per_vertex)
|
||||
|
|
|
@ -31,6 +31,10 @@
|
|||
#include <stdlib.h>
|
||||
#include <inttypes.h> /* for PRIx64 macro */
|
||||
|
||||
#if defined(_WIN32) && !defined(snprintf)
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
static void
|
||||
print_tabs(unsigned num_tabs, FILE *fp)
|
||||
{
|
||||
|
@ -514,8 +518,6 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
|
|||
[NIR_INTRINSIC_STREAM_ID] = "stream-id",
|
||||
[NIR_INTRINSIC_UCP_ID] = "ucp-id",
|
||||
[NIR_INTRINSIC_RANGE] = "range",
|
||||
[NIR_INTRINSIC_DESC_SET] = "desc-set",
|
||||
[NIR_INTRINSIC_BINDING] = "binding",
|
||||
};
|
||||
for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) {
|
||||
if (!info->index_map[idx])
|
||||
|
|
|
@ -27,7 +27,6 @@
|
|||
|
||||
#include "nir.h"
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
/*
|
||||
* Implements the classic to-SSA algorithm described by Cytron et. al. in
|
||||
|
|
|
@ -80,8 +80,6 @@ endif
|
|||
|
||||
if HAVE_GALLIUM_SWR
|
||||
SUBDIRS += drivers/swr
|
||||
SUBDIRS += drivers/swr/avx
|
||||
SUBDIRS += drivers/swr/avx2
|
||||
endif
|
||||
|
||||
## vc4/rpi
|
||||
|
|
|
@ -38,10 +38,6 @@ if not env['embedded']:
|
|||
target = testname,
|
||||
source = [testname + '.c', 'lp_test_main.c'],
|
||||
)
|
||||
env.InstallProgram(target)
|
||||
|
||||
# http://www.scons.org/wiki/UnitTests
|
||||
alias = env.Alias(testname, [target], target[0].abspath)
|
||||
AlwaysBuild(alias)
|
||||
env.UnitTest(testname, target)
|
||||
|
||||
Export('llvmpipe')
|
||||
|
|
|
@ -202,6 +202,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
#define NVE4_COMPUTE_CLASS 0x0000a0c0
|
||||
#define NVF0_COMPUTE_CLASS 0x0000a1c0
|
||||
#define GM107_COMPUTE_CLASS 0x0000b0c0
|
||||
#define GM200_COMPUTE_CLASS 0x0000b1c0
|
||||
#define NV84_CRYPT_CLASS 0x000074c1
|
||||
#define BLOB_NVC0_PCOPY1_CLASS 0x000090b8
|
||||
#define BLOB_NVC0_PCOPY0_CLASS 0x000090b5
|
||||
|
|
|
@ -644,9 +644,9 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
|
|||
case 0xf0:
|
||||
case 0x100:
|
||||
case 0x110:
|
||||
case 0x120:
|
||||
if (debug_get_bool_option("NVF0_COMPUTE", false))
|
||||
return nve4_screen_compute_setup(screen, screen->base.pushbuf);
|
||||
case 0x120:
|
||||
return 0;
|
||||
default:
|
||||
return -1;
|
||||
|
|
|
@ -54,6 +54,9 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
|
|||
case 0x110:
|
||||
obj_class = GM107_COMPUTE_CLASS;
|
||||
break;
|
||||
case 0x120:
|
||||
obj_class = GM200_COMPUTE_CLASS;
|
||||
break;
|
||||
default:
|
||||
NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
|
||||
return -1;
|
||||
|
|
|
@ -376,6 +376,10 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
|
|||
0, 0, resource, level, box);
|
||||
|
||||
data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ);
|
||||
if (!data) {
|
||||
pipe_resource_reference((struct pipe_resource **)&staging, NULL);
|
||||
return NULL;
|
||||
}
|
||||
data += box->x % R600_MAP_BUFFER_ALIGNMENT;
|
||||
|
||||
return r600_buffer_get_transfer(ctx, resource, level, usage, box,
|
||||
|
|
|
@ -1192,7 +1192,9 @@ static void si_mark_shader_pointers_dirty(struct si_context *sctx,
|
|||
{
|
||||
sctx->const_buffers[shader].desc.pointer_dirty = true;
|
||||
sctx->rw_buffers[shader].desc.pointer_dirty = true;
|
||||
sctx->shader_buffers[shader].desc.pointer_dirty = true;
|
||||
sctx->samplers[shader].views.desc.pointer_dirty = true;
|
||||
sctx->images[shader].desc.pointer_dirty = true;
|
||||
|
||||
if (shader == PIPE_SHADER_VERTEX)
|
||||
sctx->vertex_buffers.pointer_dirty = true;
|
||||
|
|
|
@ -5839,6 +5839,10 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
|
|||
|
||||
radeon_llvm_dispose(&ctx.radeon_bld);
|
||||
|
||||
/* Add the scratch offset to input SGPRs. */
|
||||
if (shader->config.scratch_bytes_per_wave)
|
||||
shader->info.num_input_sgprs += 1; /* scratch byte offset */
|
||||
|
||||
/* Calculate the number of fragment input VGPRs. */
|
||||
if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
|
||||
shader->info.num_input_vgprs = 0;
|
||||
|
@ -6761,6 +6765,13 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
|
|||
return true;
|
||||
}
|
||||
|
||||
static void si_fix_num_sgprs(struct si_shader *shader)
|
||||
{
|
||||
unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */
|
||||
|
||||
shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
|
||||
}
|
||||
|
||||
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
|
||||
struct si_shader *shader,
|
||||
struct pipe_debug_callback *debug)
|
||||
|
@ -6850,6 +6861,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
|
|||
}
|
||||
}
|
||||
|
||||
si_fix_num_sgprs(shader);
|
||||
si_shader_dump(sscreen, shader, debug, shader->selector->info.processor,
|
||||
stderr);
|
||||
|
||||
|
|
|
@ -1487,7 +1487,7 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
|
|||
}
|
||||
|
||||
if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
|
||||
sscreen->b.family >= CHIP_STONEY) {
|
||||
sscreen->b.family == CHIP_STONEY) {
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_ETC1_RGB8:
|
||||
case PIPE_FORMAT_ETC2_RGB8:
|
||||
|
|
|
@ -306,7 +306,6 @@ static void si_set_tesseval_regs(struct si_shader *shader,
|
|||
static void si_shader_ls(struct si_shader *shader)
|
||||
{
|
||||
struct si_pm4_state *pm4;
|
||||
unsigned num_sgprs, num_user_sgprs;
|
||||
unsigned vgpr_comp_cnt;
|
||||
uint64_t va;
|
||||
|
||||
|
@ -321,30 +320,21 @@ static void si_shader_ls(struct si_shader *shader)
|
|||
* VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
|
||||
vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
|
||||
|
||||
num_user_sgprs = SI_LS_NUM_USER_SGPR;
|
||||
num_sgprs = shader->config.num_sgprs;
|
||||
if (num_user_sgprs > num_sgprs) {
|
||||
/* Last 2 reserved SGPRs are used for VCC */
|
||||
num_sgprs = num_user_sgprs + 2;
|
||||
}
|
||||
assert(num_sgprs <= 104);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
|
||||
|
||||
shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
||||
S_00B528_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B528_SGPRS((shader->config.num_sgprs - 1) / 8) |
|
||||
S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
|
||||
S_00B528_DX10_CLAMP(1) |
|
||||
S_00B528_FLOAT_MODE(shader->config.float_mode);
|
||||
shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
|
||||
shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_LS_NUM_USER_SGPR) |
|
||||
S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
|
||||
}
|
||||
|
||||
static void si_shader_hs(struct si_shader *shader)
|
||||
{
|
||||
struct si_pm4_state *pm4;
|
||||
unsigned num_sgprs, num_user_sgprs;
|
||||
uint64_t va;
|
||||
|
||||
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
|
||||
|
@ -354,32 +344,22 @@ static void si_shader_hs(struct si_shader *shader)
|
|||
va = shader->bo->gpu_address;
|
||||
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
|
||||
|
||||
num_user_sgprs = SI_TCS_NUM_USER_SGPR;
|
||||
num_sgprs = shader->config.num_sgprs;
|
||||
/* One SGPR after user SGPRs is pre-loaded with tessellation factor
|
||||
* buffer offset. */
|
||||
if ((num_user_sgprs + 1) > num_sgprs) {
|
||||
/* Last 2 reserved SGPRs are used for VCC */
|
||||
num_sgprs = num_user_sgprs + 1 + 2;
|
||||
}
|
||||
assert(num_sgprs <= 104);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
|
||||
si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
|
||||
si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
|
||||
S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
||||
S_00B428_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B428_SGPRS((shader->config.num_sgprs - 1) / 8) |
|
||||
S_00B428_DX10_CLAMP(1) |
|
||||
S_00B428_FLOAT_MODE(shader->config.float_mode));
|
||||
si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
|
||||
S_00B42C_USER_SGPR(num_user_sgprs) |
|
||||
S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
|
||||
S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
|
||||
}
|
||||
|
||||
static void si_shader_es(struct si_shader *shader)
|
||||
{
|
||||
struct si_pm4_state *pm4;
|
||||
unsigned num_sgprs, num_user_sgprs;
|
||||
unsigned num_user_sgprs;
|
||||
unsigned vgpr_comp_cnt;
|
||||
uint64_t va;
|
||||
|
||||
|
@ -400,21 +380,13 @@ static void si_shader_es(struct si_shader *shader)
|
|||
} else
|
||||
unreachable("invalid shader selector type");
|
||||
|
||||
num_sgprs = shader->config.num_sgprs;
|
||||
/* One SGPR after user SGPRs is pre-loaded with es2gs_offset */
|
||||
if ((num_user_sgprs + 1) > num_sgprs) {
|
||||
/* Last 2 reserved SGPRs are used for VCC */
|
||||
num_sgprs = num_user_sgprs + 1 + 2;
|
||||
}
|
||||
assert(num_sgprs <= 104);
|
||||
|
||||
si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
|
||||
shader->selector->esgs_itemsize / 4);
|
||||
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
|
||||
si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
|
||||
S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
||||
S_00B328_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B328_SGPRS((shader->config.num_sgprs - 1) / 8) |
|
||||
S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
|
||||
S_00B328_DX10_CLAMP(1) |
|
||||
S_00B328_FLOAT_MODE(shader->config.float_mode));
|
||||
|
@ -458,7 +430,6 @@ static void si_shader_gs(struct si_shader *shader)
|
|||
unsigned gsvs_itemsize = shader->selector->max_gsvs_emit_size >> 2;
|
||||
unsigned gs_num_invocations = shader->selector->gs_num_invocations;
|
||||
struct si_pm4_state *pm4;
|
||||
unsigned num_sgprs, num_user_sgprs;
|
||||
uint64_t va;
|
||||
unsigned max_stream = shader->selector->max_gs_stream;
|
||||
|
||||
|
@ -494,22 +465,13 @@ static void si_shader_gs(struct si_shader *shader)
|
|||
si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
|
||||
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40);
|
||||
|
||||
num_user_sgprs = SI_GS_NUM_USER_SGPR;
|
||||
num_sgprs = shader->config.num_sgprs;
|
||||
/* Two SGPRs after user SGPRs are pre-loaded with gs2vs_offset, gs_wave_id */
|
||||
if ((num_user_sgprs + 2) > num_sgprs) {
|
||||
/* Last 2 reserved SGPRs are used for VCC */
|
||||
num_sgprs = num_user_sgprs + 2 + 2;
|
||||
}
|
||||
assert(num_sgprs <= 104);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
|
||||
S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
||||
S_00B228_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B228_SGPRS((shader->config.num_sgprs - 1) / 8) |
|
||||
S_00B228_DX10_CLAMP(1) |
|
||||
S_00B228_FLOAT_MODE(shader->config.float_mode));
|
||||
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
|
||||
S_00B22C_USER_SGPR(num_user_sgprs) |
|
||||
S_00B22C_USER_SGPR(SI_GS_NUM_USER_SGPR) |
|
||||
S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
|
||||
}
|
||||
|
||||
|
@ -523,7 +485,7 @@ static void si_shader_gs(struct si_shader *shader)
|
|||
static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
|
||||
{
|
||||
struct si_pm4_state *pm4;
|
||||
unsigned num_sgprs, num_user_sgprs;
|
||||
unsigned num_user_sgprs;
|
||||
unsigned nparams, vgpr_comp_cnt;
|
||||
uint64_t va;
|
||||
unsigned window_space =
|
||||
|
@ -566,13 +528,6 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
|
|||
} else
|
||||
unreachable("invalid shader selector type");
|
||||
|
||||
num_sgprs = shader->config.num_sgprs;
|
||||
if (num_user_sgprs > num_sgprs) {
|
||||
/* Last 2 reserved SGPRs are used for VCC */
|
||||
num_sgprs = num_user_sgprs + 2;
|
||||
}
|
||||
assert(num_sgprs <= 104);
|
||||
|
||||
/* VS is required to export at least one param. */
|
||||
nparams = MAX2(shader->info.nr_param_exports, 1);
|
||||
si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
|
||||
|
@ -594,7 +549,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
|
|||
si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40);
|
||||
si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
|
||||
S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
||||
S_00B128_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8) |
|
||||
S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
|
||||
S_00B128_DX10_CLAMP(1) |
|
||||
S_00B128_FLOAT_MODE(shader->config.float_mode));
|
||||
|
@ -684,7 +639,6 @@ static void si_shader_ps(struct si_shader *shader)
|
|||
struct tgsi_shader_info *info = &shader->selector->info;
|
||||
struct si_pm4_state *pm4;
|
||||
unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask;
|
||||
unsigned num_sgprs, num_user_sgprs;
|
||||
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
|
||||
uint64_t va;
|
||||
bool has_centroid;
|
||||
|
@ -771,23 +725,14 @@ static void si_shader_ps(struct si_shader *shader)
|
|||
si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
|
||||
si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
|
||||
|
||||
num_user_sgprs = SI_PS_NUM_USER_SGPR;
|
||||
num_sgprs = shader->config.num_sgprs;
|
||||
/* One SGPR after user SGPRs is pre-loaded with {prim_mask, lds_offset} */
|
||||
if ((num_user_sgprs + 1) > num_sgprs) {
|
||||
/* Last 2 reserved SGPRs are used for VCC */
|
||||
num_sgprs = num_user_sgprs + 1 + 2;
|
||||
}
|
||||
assert(num_sgprs <= 104);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
|
||||
S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
||||
S_00B028_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B028_SGPRS((shader->config.num_sgprs - 1) / 8) |
|
||||
S_00B028_DX10_CLAMP(1) |
|
||||
S_00B028_FLOAT_MODE(shader->config.float_mode));
|
||||
si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
|
||||
S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
|
||||
S_00B02C_USER_SGPR(num_user_sgprs) |
|
||||
S_00B02C_USER_SGPR(SI_PS_NUM_USER_SGPR) |
|
||||
S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
|
||||
|
||||
/* Prefer RE_Z if the shader is complex enough. The requirement is either:
|
||||
|
|
|
@ -28,4 +28,96 @@ noinst_LTLIBRARIES = libmesaswr.la
|
|||
|
||||
libmesaswr_la_SOURCES = $(LOADER_SOURCES)
|
||||
|
||||
EXTRA_DIST = Makefile.sources-arch
|
||||
COMMON_CXXFLAGS = \
|
||||
$(GALLIUM_DRIVER_CFLAGS) \
|
||||
$(LLVM_CFLAGS) \
|
||||
-I$(builddir)/rasterizer/scripts \
|
||||
-I$(builddir)/rasterizer/jitter \
|
||||
-I$(srcdir)/rasterizer \
|
||||
-I$(srcdir)/rasterizer/core \
|
||||
-I$(srcdir)/rasterizer/jitter
|
||||
|
||||
COMMON_SOURCES = \
|
||||
$(CXX_SOURCES) \
|
||||
$(COMMON_CXX_SOURCES) \
|
||||
$(CORE_CXX_SOURCES) \
|
||||
$(JITTER_CXX_SOURCES) \
|
||||
$(MEMORY_CXX_SOURCES) \
|
||||
$(BUILT_SOURCES)
|
||||
|
||||
BUILT_SOURCES = \
|
||||
rasterizer/scripts/gen_knobs.cpp \
|
||||
rasterizer/scripts/gen_knobs.h \
|
||||
rasterizer/jitter/state_llvm.h \
|
||||
rasterizer/jitter/builder_gen.h \
|
||||
rasterizer/jitter/builder_gen.cpp \
|
||||
rasterizer/jitter/builder_x86.h \
|
||||
rasterizer/jitter/builder_x86.cpp
|
||||
|
||||
rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py rasterizer/scripts/templates/knobs.template
|
||||
$(PYTHON2) $(PYTHON_FLAGS) \
|
||||
$(srcdir)/rasterizer/scripts/gen_knobs.py \
|
||||
rasterizer/scripts
|
||||
|
||||
rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py rasterizer/core/state.h
|
||||
$(PYTHON2) $(PYTHON_FLAGS) \
|
||||
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_types.py \
|
||||
--input $(srcdir)/rasterizer/core/state.h \
|
||||
--output rasterizer/jitter/state_llvm.h
|
||||
|
||||
rasterizer/jitter/builder_gen.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
|
||||
$(PYTHON2) $(PYTHON_FLAGS) \
|
||||
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
|
||||
--input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
|
||||
--output rasterizer/jitter/builder_gen.h \
|
||||
--gen_h
|
||||
|
||||
rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
|
||||
$(PYTHON2) $(PYTHON_FLAGS) \
|
||||
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
|
||||
--input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
|
||||
--output rasterizer/jitter/builder_gen.cpp \
|
||||
--gen_cpp
|
||||
|
||||
rasterizer/jitter/builder_x86.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
|
||||
$(PYTHON2) $(PYTHON_FLAGS) \
|
||||
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
|
||||
--output rasterizer/jitter/builder_x86.h \
|
||||
--gen_x86_h
|
||||
|
||||
rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
|
||||
$(PYTHON2) $(PYTHON_FLAGS) \
|
||||
$(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
|
||||
--output rasterizer/jitter/builder_x86.cpp \
|
||||
--gen_x86_cpp
|
||||
|
||||
|
||||
COMMON_LIBADD = \
|
||||
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
|
||||
$(top_builddir)/src/mesa/libmesagallium.la
|
||||
|
||||
lib_LTLIBRARIES = libswrAVX.la libswrAVX2.la
|
||||
|
||||
libswrAVX_la_CXXFLAGS = \
|
||||
-march=core-avx-i \
|
||||
-DKNOB_ARCH=KNOB_ARCH_AVX \
|
||||
$(COMMON_CXXFLAGS)
|
||||
|
||||
libswrAVX_la_SOURCES = \
|
||||
$(COMMON_SOURCES)
|
||||
|
||||
libswrAVX_la_LIBADD = \
|
||||
$(COMMON_LIBADD)
|
||||
|
||||
libswrAVX2_la_CXXFLAGS = \
|
||||
-march=core-avx2 \
|
||||
-DKNOB_ARCH=KNOB_ARCH_AVX2 \
|
||||
$(COMMON_CXXFLAGS)
|
||||
|
||||
libswrAVX2_la_SOURCES = \
|
||||
$(COMMON_SOURCES)
|
||||
|
||||
libswrAVX2_la_LIBADD = \
|
||||
$(COMMON_LIBADD)
|
||||
|
||||
include $(top_srcdir)/install-gallium-links.mk
|
||||
|
|
|
@ -21,3 +21,94 @@
|
|||
|
||||
LOADER_SOURCES := \
|
||||
swr_loader.cpp
|
||||
|
||||
CXX_SOURCES := \
|
||||
swr_clear.cpp \
|
||||
swr_context.cpp \
|
||||
swr_context.h \
|
||||
swr_context_llvm.h \
|
||||
swr_draw.cpp \
|
||||
swr_public.h \
|
||||
swr_resource.h \
|
||||
swr_screen.cpp \
|
||||
swr_screen.h \
|
||||
swr_state.cpp \
|
||||
swr_state.h \
|
||||
swr_tex_sample.cpp \
|
||||
swr_tex_sample.h \
|
||||
swr_scratch.h \
|
||||
swr_scratch.cpp \
|
||||
swr_shader.cpp \
|
||||
swr_memory.h \
|
||||
swr_fence.h \
|
||||
swr_fence.cpp \
|
||||
swr_query.h \
|
||||
swr_query.cpp
|
||||
|
||||
COMMON_CXX_SOURCES := \
|
||||
rasterizer/common/containers.hpp \
|
||||
rasterizer/common/formats.cpp \
|
||||
rasterizer/common/formats.h \
|
||||
rasterizer/common/isa.hpp \
|
||||
rasterizer/common/os.h \
|
||||
rasterizer/common/rdtsc_buckets.cpp \
|
||||
rasterizer/common/rdtsc_buckets.h \
|
||||
rasterizer/common/rdtsc_buckets_shared.h \
|
||||
rasterizer/common/rdtsc_buckets_shared.h \
|
||||
rasterizer/common/simdintrin.h \
|
||||
rasterizer/common/swr_assert.cpp \
|
||||
rasterizer/common/swr_assert.h
|
||||
|
||||
CORE_CXX_SOURCES := \
|
||||
rasterizer/core/api.cpp \
|
||||
rasterizer/core/api.h \
|
||||
rasterizer/core/arena.h \
|
||||
rasterizer/core/backend.cpp \
|
||||
rasterizer/core/backend.h \
|
||||
rasterizer/core/blend.h \
|
||||
rasterizer/core/clip.cpp \
|
||||
rasterizer/core/clip.h \
|
||||
rasterizer/core/context.h \
|
||||
rasterizer/core/depthstencil.h \
|
||||
rasterizer/core/fifo.hpp \
|
||||
rasterizer/core/format_traits.h \
|
||||
rasterizer/core/format_types.h \
|
||||
rasterizer/core/frontend.cpp \
|
||||
rasterizer/core/frontend.h \
|
||||
rasterizer/core/knobs.h \
|
||||
rasterizer/core/knobs_init.h \
|
||||
rasterizer/core/multisample.cpp \
|
||||
rasterizer/core/multisample.h \
|
||||
rasterizer/core/pa_avx.cpp \
|
||||
rasterizer/core/pa.h \
|
||||
rasterizer/core/rasterizer.cpp \
|
||||
rasterizer/core/rasterizer.h \
|
||||
rasterizer/core/rdtsc_core.cpp \
|
||||
rasterizer/core/rdtsc_core.h \
|
||||
rasterizer/core/ringbuffer.h \
|
||||
rasterizer/core/state.h \
|
||||
rasterizer/core/threads.cpp \
|
||||
rasterizer/core/threads.h \
|
||||
rasterizer/core/tilemgr.cpp \
|
||||
rasterizer/core/tilemgr.h \
|
||||
rasterizer/core/utils.cpp \
|
||||
rasterizer/core/utils.h
|
||||
|
||||
JITTER_CXX_SOURCES := \
|
||||
rasterizer/jitter/blend_jit.cpp \
|
||||
rasterizer/jitter/blend_jit.h \
|
||||
rasterizer/jitter/builder.cpp \
|
||||
rasterizer/jitter/builder.h \
|
||||
rasterizer/jitter/builder_misc.cpp \
|
||||
rasterizer/jitter/builder_misc.h \
|
||||
rasterizer/jitter/fetch_jit.cpp \
|
||||
rasterizer/jitter/fetch_jit.h \
|
||||
rasterizer/jitter/JitManager.cpp \
|
||||
rasterizer/jitter/JitManager.h \
|
||||
rasterizer/jitter/streamout_jit.cpp \
|
||||
rasterizer/jitter/streamout_jit.h
|
||||
|
||||
MEMORY_CXX_SOURCES := \
|
||||
rasterizer/memory/ClearTile.cpp \
|
||||
rasterizer/memory/LoadTile.cpp \
|
||||
rasterizer/memory/StoreTile.cpp
|
||||
|
|
|
@ -1,111 +0,0 @@
|
|||
# Copyright (C) 2015 Intel Corporation. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
CXX_SOURCES := \
|
||||
swr_clear.cpp \
|
||||
swr_context.cpp \
|
||||
swr_context.h \
|
||||
swr_context_llvm.h \
|
||||
swr_draw.cpp \
|
||||
swr_public.h \
|
||||
swr_resource.h \
|
||||
swr_screen.cpp \
|
||||
swr_screen.h \
|
||||
swr_state.cpp \
|
||||
swr_state.h \
|
||||
swr_tex_sample.cpp \
|
||||
swr_tex_sample.h \
|
||||
swr_scratch.h \
|
||||
swr_scratch.cpp \
|
||||
swr_shader.cpp \
|
||||
swr_memory.h \
|
||||
swr_fence.h \
|
||||
swr_fence.cpp \
|
||||
swr_query.h \
|
||||
swr_query.cpp
|
||||
|
||||
COMMON_CXX_SOURCES := \
|
||||
rasterizer/common/containers.hpp \
|
||||
rasterizer/common/formats.cpp \
|
||||
rasterizer/common/formats.h \
|
||||
rasterizer/common/isa.hpp \
|
||||
rasterizer/common/os.h \
|
||||
rasterizer/common/rdtsc_buckets.cpp \
|
||||
rasterizer/common/rdtsc_buckets.h \
|
||||
rasterizer/common/rdtsc_buckets_shared.h \
|
||||
rasterizer/common/rdtsc_buckets_shared.h \
|
||||
rasterizer/common/simdintrin.h \
|
||||
rasterizer/common/swr_assert.cpp \
|
||||
rasterizer/common/swr_assert.h
|
||||
|
||||
CORE_CXX_SOURCES := \
|
||||
rasterizer/core/api.cpp \
|
||||
rasterizer/core/api.h \
|
||||
rasterizer/core/arena.h \
|
||||
rasterizer/core/backend.cpp \
|
||||
rasterizer/core/backend.h \
|
||||
rasterizer/core/blend.h \
|
||||
rasterizer/core/clip.cpp \
|
||||
rasterizer/core/clip.h \
|
||||
rasterizer/core/context.h \
|
||||
rasterizer/core/depthstencil.h \
|
||||
rasterizer/core/fifo.hpp \
|
||||
rasterizer/core/format_traits.h \
|
||||
rasterizer/core/format_types.h \
|
||||
rasterizer/core/frontend.cpp \
|
||||
rasterizer/core/frontend.h \
|
||||
rasterizer/core/knobs.h \
|
||||
rasterizer/core/knobs_init.h \
|
||||
rasterizer/core/multisample.cpp \
|
||||
rasterizer/core/multisample.h \
|
||||
rasterizer/core/pa_avx.cpp \
|
||||
rasterizer/core/pa.h \
|
||||
rasterizer/core/rasterizer.cpp \
|
||||
rasterizer/core/rasterizer.h \
|
||||
rasterizer/core/rdtsc_core.cpp \
|
||||
rasterizer/core/rdtsc_core.h \
|
||||
rasterizer/core/ringbuffer.h \
|
||||
rasterizer/core/state.h \
|
||||
rasterizer/core/threads.cpp \
|
||||
rasterizer/core/threads.h \
|
||||
rasterizer/core/tilemgr.cpp \
|
||||
rasterizer/core/tilemgr.h \
|
||||
rasterizer/core/utils.cpp \
|
||||
rasterizer/core/utils.h
|
||||
|
||||
JITTER_CXX_SOURCES := \
|
||||
rasterizer/jitter/blend_jit.cpp \
|
||||
rasterizer/jitter/blend_jit.h \
|
||||
rasterizer/jitter/builder.cpp \
|
||||
rasterizer/jitter/builder.h \
|
||||
rasterizer/jitter/builder_misc.cpp \
|
||||
rasterizer/jitter/builder_misc.h \
|
||||
rasterizer/jitter/fetch_jit.cpp \
|
||||
rasterizer/jitter/fetch_jit.h \
|
||||
rasterizer/jitter/JitManager.cpp \
|
||||
rasterizer/jitter/JitManager.h \
|
||||
rasterizer/jitter/streamout_jit.cpp \
|
||||
rasterizer/jitter/streamout_jit.h
|
||||
|
||||
MEMORY_CXX_SOURCES := \
|
||||
rasterizer/memory/ClearTile.cpp \
|
||||
rasterizer/memory/LoadTile.cpp \
|
||||
rasterizer/memory/StoreTile.cpp
|
|
@ -1,99 +0,0 @@
|
|||
# Copyright (C) 2015 Intel Corporation. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
include ../Makefile.sources-arch
|
||||
include $(top_srcdir)/src/gallium/Automake.inc
|
||||
|
||||
VPATH = $(srcdir) $(srcdir)/..
|
||||
|
||||
AM_CXXFLAGS = \
|
||||
-march=core-avx-i \
|
||||
-DKNOB_ARCH=KNOB_ARCH_AVX \
|
||||
$(GALLIUM_DRIVER_CFLAGS) \
|
||||
$(LLVM_CFLAGS) \
|
||||
-I$(builddir)/rasterizer/scripts \
|
||||
-I$(builddir)/rasterizer/jitter \
|
||||
-I$(srcdir)/../rasterizer \
|
||||
-I$(srcdir)/../rasterizer/core \
|
||||
-I$(srcdir)/../rasterizer/jitter
|
||||
|
||||
lib_LTLIBRARIES = libswrAVX.la
|
||||
|
||||
BUILT_SOURCES = \
|
||||
rasterizer/scripts/gen_knobs.cpp \
|
||||
rasterizer/scripts/gen_knobs.h \
|
||||
rasterizer/jitter/state_llvm.h \
|
||||
rasterizer/jitter/builder_gen.h \
|
||||
rasterizer/jitter/builder_gen.cpp \
|
||||
rasterizer/jitter/builder_x86.h \
|
||||
rasterizer/jitter/builder_x86.cpp
|
||||
|
||||
libswrAVX_la_SOURCES = \
|
||||
$(CXX_SOURCES) \
|
||||
$(COMMON_CXX_SOURCES) \
|
||||
$(CORE_CXX_SOURCES) \
|
||||
$(JITTER_CXX_SOURCES) \
|
||||
$(MEMORY_CXX_SOURCES) \
|
||||
$(BUILT_SOURCES)
|
||||
|
||||
rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py rasterizer/scripts/templates/knobs.template
|
||||
$(PYTHON2) $(PYTHON_FLAGS) \
|
||||
$(srcdir)/../rasterizer/scripts/gen_knobs.py \
|
||||
rasterizer/scripts
|
||||
|
||||
rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py rasterizer/core/state.h
|
||||
$(PYTHON2) $(PYTHON_FLAGS) \
|
||||
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_types.py \
|
||||
--input $(srcdir)/../rasterizer/core/state.h \
|
||||
--output rasterizer/jitter/state_llvm.h
|
||||
|
||||
rasterizer/jitter/builder_gen.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
|
||||
$(PYTHON2) $(PYTHON_FLAGS) \
|
||||
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
|
||||
--input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
|
||||
--output rasterizer/jitter/builder_gen.h \
|
||||
--gen_h
|
||||
|
||||
rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
|
||||
$(PYTHON2) $(PYTHON_FLAGS) \
|
||||
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
|
||||
--input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
|
||||
--output rasterizer/jitter/builder_gen.cpp \
|
||||
--gen_cpp
|
||||
|
||||
rasterizer/jitter/builder_x86.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
|
||||
$(PYTHON2) $(PYTHON_FLAGS) \
|
||||
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
|
||||
--output rasterizer/jitter/builder_x86.h \
|
||||
--gen_x86_h
|
||||
|
||||
rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
|
||||
$(PYTHON2) $(PYTHON_FLAGS) \
|
||||
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
|
||||
--output rasterizer/jitter/builder_x86.cpp \
|
||||
--gen_x86_cpp
|
||||
|
||||
|
||||
libswrAVX_la_LIBADD = \
|
||||
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
|
||||
$(top_builddir)/src/mesa/libmesagallium.la
|
||||
|
||||
include $(top_srcdir)/install-gallium-links.mk
|
|
@ -1,99 +0,0 @@
|
|||
# Copyright (C) 2015 Intel Corporation. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
include ../Makefile.sources-arch
|
||||
include $(top_srcdir)/src/gallium/Automake.inc
|
||||
|
||||
VPATH = $(srcdir) $(srcdir)/..
|
||||
|
||||
AM_CXXFLAGS = \
|
||||
-march=core-avx2 \
|
||||
-DKNOB_ARCH=KNOB_ARCH_AVX2 \
|
||||
$(GALLIUM_DRIVER_CFLAGS) \
|
||||
$(LLVM_CFLAGS) \
|
||||
-I$(builddir)/rasterizer/scripts \
|
||||
-I$(builddir)/rasterizer/jitter \
|
||||
-I$(srcdir)/../rasterizer \
|
||||
-I$(srcdir)/../rasterizer/core \
|
||||
-I$(srcdir)/../rasterizer/jitter
|
||||
|
||||
lib_LTLIBRARIES = libswrAVX2.la
|
||||
|
||||
BUILT_SOURCES = \
|
||||
rasterizer/scripts/gen_knobs.cpp \
|
||||
rasterizer/scripts/gen_knobs.h \
|
||||
rasterizer/jitter/state_llvm.h \
|
||||
rasterizer/jitter/builder_gen.h \
|
||||
rasterizer/jitter/builder_gen.cpp \
|
||||
rasterizer/jitter/builder_x86.h \
|
||||
rasterizer/jitter/builder_x86.cpp
|
||||
|
||||
libswrAVX2_la_SOURCES = \
|
||||
$(CXX_SOURCES) \
|
||||
$(COMMON_CXX_SOURCES) \
|
||||
$(CORE_CXX_SOURCES) \
|
||||
$(JITTER_CXX_SOURCES) \
|
||||
$(MEMORY_CXX_SOURCES) \
|
||||
$(BUILT_SOURCES)
|
||||
|
||||
rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py rasterizer/scripts/templates/knobs.template
|
||||
$(PYTHON2) $(PYTHON_FLAGS) \
|
||||
$(srcdir)/../rasterizer/scripts/gen_knobs.py \
|
||||
rasterizer/scripts
|
||||
|
||||
rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py rasterizer/core/state.h
|
||||
$(PYTHON2) $(PYTHON_FLAGS) \
|
||||
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_types.py \
|
||||
--input $(srcdir)/../rasterizer/core/state.h \
|
||||
--output rasterizer/jitter/state_llvm.h
|
||||
|
||||
rasterizer/jitter/builder_gen.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
|
||||
$(PYTHON2) $(PYTHON_FLAGS) \
|
||||
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
|
||||
--input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
|
||||
--output rasterizer/jitter/builder_gen.h \
|
||||
--gen_h
|
||||
|
||||
rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
|
||||
$(PYTHON2) $(PYTHON_FLAGS) \
|
||||
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
|
||||
--input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
|
||||
--output rasterizer/jitter/builder_gen.cpp \
|
||||
--gen_cpp
|
||||
|
||||
rasterizer/jitter/builder_x86.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
|
||||
$(PYTHON2) $(PYTHON_FLAGS) \
|
||||
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
|
||||
--output rasterizer/jitter/builder_x86.h \
|
||||
--gen_x86_h
|
||||
|
||||
rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
|
||||
$(PYTHON2) $(PYTHON_FLAGS) \
|
||||
$(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
|
||||
--output rasterizer/jitter/builder_x86.cpp \
|
||||
--gen_x86_cpp
|
||||
|
||||
|
||||
libswrAVX2_la_LIBADD = \
|
||||
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
|
||||
$(top_builddir)/src/mesa/libmesagallium.la
|
||||
|
||||
include $(top_srcdir)/install-gallium-links.mk
|
|
@ -24,10 +24,8 @@ for progname in progs:
|
|||
target = progname,
|
||||
source = progname + '.c',
|
||||
)
|
||||
|
||||
env.Alias(progname, env.InstallProgram(prog))
|
||||
|
||||
# http://www.scons.org/wiki/UnitTests
|
||||
test_alias = env.Alias('unit', [prog], prog[0].abspath)
|
||||
AlwaysBuild(test_alias)
|
||||
|
||||
if progname not in [
|
||||
'u_cache_test', # too long
|
||||
'translate_test', # unreliable
|
||||
]:
|
||||
env.UnitTest(progname, prog)
|
||||
|
|
|
@ -35,37 +35,78 @@
|
|||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "os/os_thread.h"
|
||||
#include "os/os_time.h"
|
||||
#include "util/u_atomic.h"
|
||||
|
||||
|
||||
#define NUM_THREADS 10
|
||||
|
||||
static int verbosity = 0;
|
||||
|
||||
static pipe_thread threads[NUM_THREADS];
|
||||
static pipe_barrier barrier;
|
||||
static int thread_ids[NUM_THREADS];
|
||||
|
||||
static volatile int waiting = 0;
|
||||
static volatile int proceeded = 0;
|
||||
|
||||
|
||||
#define LOG(fmt, ...) \
|
||||
if (verbosity > 0) { \
|
||||
fprintf(stdout, fmt, ##__VA_ARGS__); \
|
||||
}
|
||||
|
||||
#define CHECK(_cond) \
|
||||
if (!(_cond)) { \
|
||||
fprintf(stderr, "%s:%u: `%s` failed\n", __FILE__, __LINE__, #_cond); \
|
||||
_exit(EXIT_FAILURE); \
|
||||
}
|
||||
|
||||
|
||||
static PIPE_THREAD_ROUTINE(thread_function, thread_data)
|
||||
{
|
||||
int thread_id = *((int *) thread_data);
|
||||
|
||||
printf("thread %d starting\n", thread_id);
|
||||
os_time_sleep(thread_id * 1000 * 1000);
|
||||
printf("thread %d before barrier\n", thread_id);
|
||||
LOG("thread %d starting\n", thread_id);
|
||||
os_time_sleep(thread_id * 100 * 1000);
|
||||
LOG("thread %d before barrier\n", thread_id);
|
||||
|
||||
CHECK(p_atomic_read(&proceeded) == 0);
|
||||
p_atomic_inc(&waiting);
|
||||
|
||||
pipe_barrier_wait(&barrier);
|
||||
printf("thread %d exiting\n", thread_id);
|
||||
|
||||
CHECK(p_atomic_read(&waiting) == NUM_THREADS);
|
||||
|
||||
p_atomic_inc(&proceeded);
|
||||
|
||||
LOG("thread %d exiting\n", thread_id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int main()
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i;
|
||||
|
||||
printf("pipe_barrier_test starting\n");
|
||||
for (i = 1; i < argc; ++i) {
|
||||
const char *arg = argv[i];
|
||||
if (strcmp(arg, "-v") == 0) {
|
||||
++verbosity;
|
||||
} else {
|
||||
fprintf(stderr, "error: unrecognized option `%s`\n", arg);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
// Disable buffering
|
||||
setbuf(stdout, NULL);
|
||||
|
||||
LOG("pipe_barrier_test starting\n");
|
||||
|
||||
pipe_barrier_init(&barrier, NUM_THREADS);
|
||||
|
||||
|
@ -78,9 +119,11 @@ int main()
|
|||
pipe_thread_wait(threads[i]);
|
||||
}
|
||||
|
||||
CHECK(p_atomic_read(&proceeded) == NUM_THREADS);
|
||||
|
||||
pipe_barrier_destroy(&barrier);
|
||||
|
||||
printf("pipe_barrier_test exiting\n");
|
||||
LOG("pipe_barrier_test exiting\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -70,8 +70,9 @@ int main(int argc, char** argv)
|
|||
|
||||
util_cpu_detect();
|
||||
|
||||
if(argc <= 1)
|
||||
{}
|
||||
if (argc <= 1 ||
|
||||
!strcmp(argv[1], "default") )
|
||||
create_fn = translate_create;
|
||||
else if (!strcmp(argv[1], "generic"))
|
||||
create_fn = translate_generic_create;
|
||||
else if (!strcmp(argv[1], "x86"))
|
||||
|
@ -129,7 +130,7 @@ int main(int argc, char** argv)
|
|||
|
||||
if (!create_fn)
|
||||
{
|
||||
printf("Usage: ./translate_test [generic|x86|nosse|sse|sse2|sse3|sse4.1]\n");
|
||||
printf("Usage: ./translate_test [default|generic|x86|nosse|sse|sse2|sse3|sse4.1]\n");
|
||||
return 2;
|
||||
}
|
||||
|
||||
|
|
|
@ -1988,10 +1988,11 @@ fs_visitor::assign_constant_locations()
|
|||
*/
|
||||
const unsigned int max_push_components = 16 * 8;
|
||||
|
||||
/* For vulkan we don't limit the max_chunk_size. We set it to 32 float =
|
||||
* 128 bytes, which is the maximum vulkan push constant size.
|
||||
/* We push small arrays, but no bigger than 16 floats. This is big enough
|
||||
* for a vec4 but hopefully not large enough to push out other stuff. We
|
||||
* should probably use a better heuristic at some point.
|
||||
*/
|
||||
const unsigned int max_chunk_size = 32;
|
||||
const unsigned int max_chunk_size = 16;
|
||||
|
||||
unsigned int num_push_constants = 0;
|
||||
unsigned int num_pull_constants = 0;
|
||||
|
@ -2018,8 +2019,14 @@ fs_visitor::assign_constant_locations()
|
|||
if (!contiguous[u]) {
|
||||
unsigned chunk_size = u - chunk_start + 1;
|
||||
|
||||
if (num_push_constants + chunk_size <= max_push_components &&
|
||||
chunk_size <= max_chunk_size) {
|
||||
/* Decide whether we should push or pull this parameter. In the
|
||||
* Vulkan driver, push constants are explicitly exposed via the API
|
||||
* so we push everything. In GL, we only push small arrays.
|
||||
*/
|
||||
if (stage_prog_data->pull_param == NULL ||
|
||||
(num_push_constants + chunk_size <= max_push_components &&
|
||||
chunk_size <= max_chunk_size)) {
|
||||
assert(num_push_constants + chunk_size <= max_push_components);
|
||||
for (unsigned j = chunk_start; j <= u; j++)
|
||||
push_constant_loc[j] = num_push_constants++;
|
||||
} else {
|
||||
|
@ -4515,7 +4522,7 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
|
|||
|
||||
case SHADER_OPCODE_MOV_INDIRECT:
|
||||
/* Prior to Broadwell, we only have 8 address subregisters */
|
||||
return devinfo->gen < 8 ? 8 : inst->exec_size;
|
||||
return devinfo->gen < 8 ? 8 : MIN2(inst->exec_size, 16);
|
||||
|
||||
default:
|
||||
return inst->exec_size;
|
||||
|
|
|
@ -367,29 +367,53 @@ fs_generator::generate_mov_indirect(fs_inst *inst,
|
|||
/* The destination stride of an instruction (in bytes) must be greater
|
||||
* than or equal to the size of the rest of the instruction. Since the
|
||||
* address register is of type UW, we can't use a D-type instruction.
|
||||
* In order to get around this, re re-type to UW and use a stride.
|
||||
* In order to get around this, re retype to UW and use a stride.
|
||||
*/
|
||||
indirect_byte_offset =
|
||||
retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
|
||||
|
||||
struct brw_reg ind_src;
|
||||
if (devinfo->gen < 8) {
|
||||
/* Prior to broadwell, we have a restriction that the bottom 5 bits
|
||||
* of the base offset and the bottom 5 bits of the indirect must add
|
||||
* to less than 32. In other words, the hardware needs to be able to
|
||||
* add the bottom five bits of the two to get the subnumber and add
|
||||
* the next 7 bits of each to get the actual register number. Since
|
||||
* the indirect may cause us to cross a register boundary, this makes
|
||||
* it almost useless. We could try and do something clever where we
|
||||
* use a actual base offset if base_offset % 32 == 0 but that would
|
||||
* mean we were generating different code depending on the base
|
||||
* offset. Instead, for the sake of consistency, we'll just do the
|
||||
* add ourselves.
|
||||
/* From the Haswell PRM section "Register Region Restrictions":
|
||||
*
|
||||
* "The lower bits of the AddressImmediate must not overflow to
|
||||
* change the register address. The lower 5 bits of Address
|
||||
* Immediate when added to lower 5 bits of address register gives
|
||||
* the sub-register offset. The upper bits of Address Immediate
|
||||
* when added to upper bits of address register gives the register
|
||||
* address. Any overflow from sub-register offset is dropped."
|
||||
*
|
||||
* This restriction is only listed in the Haswell PRM but emperical
|
||||
* testing indicates that it applies on all older generations and is
|
||||
* lifted on Broadwell.
|
||||
*
|
||||
* Since the indirect may cause us to cross a register boundary, this
|
||||
* makes the base offset almost useless. We could try and do
|
||||
* something clever where we use a actual base offset if
|
||||
* base_offset % 32 == 0 but that would mean we were generating
|
||||
* different code depending on the base offset. Instead, for the
|
||||
* sake of consistency, we'll just do the add ourselves.
|
||||
*/
|
||||
brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset));
|
||||
brw_MOV(p, dst, retype(brw_VxH_indirect(0, 0), dst.type));
|
||||
ind_src = brw_VxH_indirect(0, 0);
|
||||
} else {
|
||||
brw_MOV(p, addr, indirect_byte_offset);
|
||||
brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
|
||||
ind_src = brw_VxH_indirect(0, imm_byte_offset);
|
||||
}
|
||||
|
||||
brw_inst *mov = brw_MOV(p, dst, retype(ind_src, dst.type));
|
||||
|
||||
if (devinfo->gen == 6 && dst.file == BRW_MESSAGE_REGISTER_FILE &&
|
||||
!inst->get_next()->is_tail_sentinel() &&
|
||||
((fs_inst *)inst->get_next())->mlen > 0) {
|
||||
/* From the Sandybridge PRM:
|
||||
*
|
||||
* "[Errata: DevSNB(SNB)] If MRF register is updated by any
|
||||
* instruction that “indexed/indirect” source AND is followed by a
|
||||
* send, the instruction requires a “Switch”. This is to avoid
|
||||
* race condition where send may dispatch before MRF is updated."
|
||||
*/
|
||||
brw_inst_set_thread_control(devinfo, mov, BRW_THREAD_SWITCH);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2743,7 +2743,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
|
||||
if (const_offset == NULL) {
|
||||
fs_reg base_offset = retype(get_nir_src(instr->src[1]),
|
||||
BRW_REGISTER_TYPE_D);
|
||||
BRW_REGISTER_TYPE_UD);
|
||||
|
||||
for (int i = 0; i < instr->num_components; i++)
|
||||
VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index,
|
||||
|
|
|
@ -285,7 +285,7 @@ public:
|
|||
void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
|
||||
dst_reg dst,
|
||||
src_reg orig_src,
|
||||
int base_offset,
|
||||
int base_offset,
|
||||
src_reg indirect);
|
||||
void emit_pull_constant_load_reg(dst_reg dst,
|
||||
src_reg surf_index,
|
||||
|
|
|
@ -758,7 +758,7 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
|
|||
pull->mlen = 2;
|
||||
pull->header_size = 1;
|
||||
} else if (devinfo->gen >= 7) {
|
||||
dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
|
||||
dst_reg grf_offset = dst_reg(this, glsl_type::uint_type);
|
||||
|
||||
grf_offset.type = offset_reg.type;
|
||||
|
||||
|
@ -1587,21 +1587,21 @@ vec4_visitor::move_grf_array_access_to_scratch()
|
|||
void
|
||||
vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
|
||||
dst_reg temp, src_reg orig_src,
|
||||
int base_offset, src_reg indirect)
|
||||
int base_offset, src_reg indirect)
|
||||
{
|
||||
int reg_offset = base_offset + orig_src.reg_offset;
|
||||
const unsigned index = prog_data->base.binding_table.pull_constants_start;
|
||||
|
||||
src_reg offset;
|
||||
if (indirect.file != BAD_FILE) {
|
||||
offset = src_reg(this, glsl_type::int_type);
|
||||
offset = src_reg(this, glsl_type::uint_type);
|
||||
|
||||
emit_before(block, inst, ADD(dst_reg(offset), indirect,
|
||||
brw_imm_d(reg_offset * 16)));
|
||||
brw_imm_ud(reg_offset * 16)));
|
||||
} else if (devinfo->gen >= 8) {
|
||||
/* Store the offset in a GRF so we can send-from-GRF. */
|
||||
offset = src_reg(this, glsl_type::int_type);
|
||||
emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset * 16)));
|
||||
offset = src_reg(this, glsl_type::uint_type);
|
||||
emit_before(block, inst, MOV(dst_reg(offset), brw_imm_ud(reg_offset * 16)));
|
||||
} else {
|
||||
offset = brw_imm_d(reg_offset * 16);
|
||||
}
|
||||
|
@ -1629,6 +1629,12 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
|
|||
void
|
||||
vec4_visitor::move_uniform_array_access_to_pull_constants()
|
||||
{
|
||||
/* The vulkan dirver doesn't support pull constants other than UBOs so
|
||||
* everything has to be pushed regardless.
|
||||
*/
|
||||
if (stage_prog_data->pull_param == NULL)
|
||||
return;
|
||||
|
||||
int pull_constant_loc[this->uniforms];
|
||||
memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
|
||||
|
||||
|
|
|
@ -932,7 +932,7 @@ static const __DRIextension *intelRobustScreenExtensions[] = {
|
|||
NULL
|
||||
};
|
||||
|
||||
static bool
|
||||
static int
|
||||
intel_get_param(__DRIscreen *psp, int param, int *value)
|
||||
{
|
||||
int ret;
|
||||
|
@ -943,20 +943,17 @@ intel_get_param(__DRIscreen *psp, int param, int *value)
|
|||
gp.value = value;
|
||||
|
||||
ret = drmCommandWriteRead(psp->fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
|
||||
if (ret) {
|
||||
if (ret != -EINVAL)
|
||||
if (ret < 0 && ret != -EINVAL)
|
||||
_mesa_warning(NULL, "drm_i915_getparam: %d", ret);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool
|
||||
intel_get_boolean(__DRIscreen *psp, int param)
|
||||
{
|
||||
int value = 0;
|
||||
return intel_get_param(psp, param, &value) && value;
|
||||
return (intel_get_param(psp, param, &value) == 0) && value;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -1093,12 +1090,12 @@ intel_detect_sseu(struct intel_screen *intelScreen)
|
|||
|
||||
ret = intel_get_param(intelScreen->driScrnPriv, I915_PARAM_SUBSLICE_TOTAL,
|
||||
&intelScreen->subslice_total);
|
||||
if (ret != -EINVAL)
|
||||
if (ret < 0 && ret != -EINVAL)
|
||||
goto err_out;
|
||||
|
||||
ret = intel_get_param(intelScreen->driScrnPriv,
|
||||
I915_PARAM_EU_TOTAL, &intelScreen->eu_total);
|
||||
if (ret != -EINVAL)
|
||||
if (ret < 0 && ret != -EINVAL)
|
||||
goto err_out;
|
||||
|
||||
/* Without this information, we cannot get the right Braswell brandstrings,
|
||||
|
@ -1114,7 +1111,7 @@ intel_detect_sseu(struct intel_screen *intelScreen)
|
|||
err_out:
|
||||
intelScreen->subslice_total = -1;
|
||||
intelScreen->eu_total = -1;
|
||||
_mesa_warning(NULL, "Failed to query GPU properties.\n");
|
||||
_mesa_warning(NULL, "Failed to query GPU properties (%s).\n", strerror(ret));
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
|
@ -704,6 +704,10 @@ st_DrawAtlasBitmaps(struct gl_context *ctx,
|
|||
st_validate_state(st, ST_PIPELINE_RENDER);
|
||||
|
||||
sv = st_create_texture_sampler_view(pipe, stObj->pt);
|
||||
if (!sv) {
|
||||
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glCallLists(bitmap text)");
|
||||
return;
|
||||
}
|
||||
|
||||
setup_render_state(ctx, sv, color, true);
|
||||
|
||||
|
@ -793,6 +797,8 @@ st_DrawAtlasBitmaps(struct gl_context *ctx,
|
|||
|
||||
pipe_resource_reference(&vb.buffer, NULL);
|
||||
|
||||
pipe_sampler_view_reference(&sv, NULL);
|
||||
|
||||
/* We uploaded modified constants, need to invalidate them. */
|
||||
st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS;
|
||||
}
|
||||
|
|
|
@ -47,17 +47,14 @@ env.Alias('mesautil', mesautil)
|
|||
Export('mesautil')
|
||||
|
||||
|
||||
# http://www.scons.org/wiki/UnitTests
|
||||
u_atomic_test = env.Program(
|
||||
target = 'u_atomic_test',
|
||||
source = ['u_atomic_test.c'],
|
||||
)
|
||||
alias = env.Alias("u_atomic_test", u_atomic_test, u_atomic_test[0].abspath)
|
||||
AlwaysBuild(alias)
|
||||
env.UnitTest("u_atomic_test", u_atomic_test)
|
||||
|
||||
roundeven_test = env.Program(
|
||||
target = 'roundeven_test',
|
||||
source = ['roundeven_test.c'],
|
||||
)
|
||||
alias = env.Alias("roundeven_test", roundeven_test, roundeven_test[0].abspath)
|
||||
AlwaysBuild(alias)
|
||||
env.UnitTest("roundeven_test", roundeven_test)
|
||||
|
|
Loading…
Reference in New Issue