Merge remote-tracking branch 'mesa-public/master' into vulkan

This commit is contained in:
Jason Ekstrand 2015-11-03 15:45:04 -08:00
commit b00e3f221b
340 changed files with 16562 additions and 3672 deletions

View File

@ -81,7 +81,7 @@ PRESENTPROTO_REQUIRED=1.0
LIBUDEV_REQUIRED=151
GLPROTO_REQUIRED=1.4.14
LIBOMXIL_BELLAGIO_REQUIRED=0.0
LIBVA_REQUIRED=0.35.0
LIBVA_REQUIRED=0.38.0
VDPAU_REQUIRED=1.1
WAYLAND_REQUIRED=1.2.0
XCB_REQUIRED=1.9.3
@ -867,7 +867,7 @@ GALLIUM_DRIVERS_DEFAULT="r300,r600,svga,swrast"
AC_ARG_WITH([gallium-drivers],
[AS_HELP_STRING([--with-gallium-drivers@<:@=DIRS...@:>@],
[comma delimited Gallium drivers list, e.g.
"i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4"
"i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl"
@<:@default=r300,r600,svga,swrast@:>@])],
[with_gallium_drivers="$withval"],
[with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT"])
@ -2188,6 +2188,12 @@ if test -n "$with_gallium_drivers"; then
PKG_CHECK_MODULES([SIMPENROSE], [simpenrose],
[USE_VC4_SIMULATOR=yes], [USE_VC4_SIMULATOR=no])
;;
xvirgl)
HAVE_GALLIUM_VIRGL=yes
gallium_require_drm "virgl"
gallium_require_drm_loader
require_egl_drm "virgl"
;;
*)
AC_MSG_ERROR([Unknown Gallium driver: $driver])
;;
@ -2259,6 +2265,7 @@ AM_CONDITIONAL(HAVE_GALLIUM_FREEDRENO, test "x$HAVE_GALLIUM_FREEDRENO" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_SOFTPIPE, test "x$HAVE_GALLIUM_SOFTPIPE" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_LLVMPIPE, test "x$HAVE_GALLIUM_LLVMPIPE" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_VC4, test "x$HAVE_GALLIUM_VC4" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_VIRGL, test "x$HAVE_GALLIUM_VIRGL" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_STATIC_TARGETS, test "x$enable_shared_pipe_drivers" = xno)
@ -2386,6 +2393,7 @@ AC_CONFIG_FILES([Makefile
src/gallium/drivers/svga/Makefile
src/gallium/drivers/trace/Makefile
src/gallium/drivers/vc4/Makefile
src/gallium/drivers/virgl/Makefile
src/gallium/state_trackers/clover/Makefile
src/gallium/state_trackers/dri/Makefile
src/gallium/state_trackers/glx/xlib/Makefile
@ -2426,6 +2434,8 @@ AC_CONFIG_FILES([Makefile
src/gallium/winsys/sw/wrapper/Makefile
src/gallium/winsys/sw/xlib/Makefile
src/gallium/winsys/vc4/drm/Makefile
src/gallium/winsys/virgl/drm/Makefile
src/gallium/winsys/virgl/vtest/Makefile
src/gbm/Makefile
src/gbm/main/gbm.pc
src/glsl/Makefile

View File

@ -153,10 +153,10 @@ GL 4.3, GLSL 4.30:
GL_ARB_ES3_compatibility DONE (all drivers that support GLSL 3.30)
GL_ARB_clear_buffer_object DONE (all drivers)
GL_ARB_compute_shader in progress (jljusten)
GL_ARB_copy_image DONE (i965) (gallium - in progress, VMware)
GL_ARB_copy_image DONE (i965, nv50, nvc0, radeonsi)
GL_KHR_debug DONE (all drivers)
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
GL_ARB_fragment_layer_viewport DONE (nv50, nvc0, r600, radeonsi, llvmpipe)
GL_ARB_fragment_layer_viewport DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe)
GL_ARB_framebuffer_no_attachments DONE (i965)
GL_ARB_internalformat_query2 not started
GL_ARB_invalidate_subdata DONE (all drivers)
@ -243,7 +243,7 @@ GLES3.2, GLSL ES 3.2
GL_KHR_texture_compression_astc_ldr DONE (i965/gen9+)
GL_OES_copy_image not started (based on GL_ARB_copy_image, which is done for some drivers)
GL_OES_draw_buffers_indexed not started
GL_OES_draw_elements_base_vertex not started (based on GL_ARB_draw_elements_base_vertex, which is done for all drivers)
GL_OES_draw_elements_base_vertex DONE (all drivers)
GL_OES_geometry_shader not started (based on GL_ARB_geometry_shader4, which is done for all drivers)
GL_OES_gpu_shader5 not started (based on parts of GL_ARB_gpu_shader5, which is done for some drivers)
GL_OES_primitive_bounding box not started

View File

@ -16,6 +16,12 @@
<h1>News</h1>
<h2>October 24, 2015</h2>
<p>
<a href="relnotes/11.0.4.html">Mesa 11.0.4</a> is released.
This is a bug-fix release.
</p>
<h2>October 10, 2015</h2>
<p>
<a href="relnotes/11.0.3.html">Mesa 11.0.3</a> is released.
@ -28,7 +34,7 @@ This is a bug-fix release.
This is a bug-fix release.
<br>
NOTE: It is anticipated that 10.6.9 will be the final release in the 10.6
series. Users of 10.5 are encouraged to migrate to the 11.0 series in order
series. Users of 10.6 are encouraged to migrate to the 11.0 series in order
to obtain future fixes.
</p>

View File

@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release.
</p>
<ul>
<li><a href="relnotes/11.0.4.html">11.0.4 release notes</a>
<li><a href="relnotes/11.0.3.html">11.0.3 release notes</a>
<li><a href="relnotes/10.6.9.html">10.6.9 release notes</a>
<li><a href="relnotes/11.0.2.html">11.0.2 release notes</a>

168
docs/relnotes/11.0.4.html Normal file
View File

@ -0,0 +1,168 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 11.0.4 Release Notes / October 24, 2015</h1>
<p>
Mesa 11.0.4 is a bug fix release which fixes bugs found since the 11.0.3 release.
</p>
<p>
Mesa 11.0.4 implements the OpenGL 4.1 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.1. OpenGL
4.1 is <strong>only</strong> available if requested at context creation
because compatibility contexts are not supported.
</p>
<h2>SHA256 checksums</h2>
<pre>
ed412ca6a46d1bd055120e5c12806c15419ae8c4dd6d3f6ea20a83091d5c78bf mesa-11.0.4.tar.gz
40201bf7fc6fa12a6d9edfe870b41eb4dd6669154e3c42c48a96f70805f5483d mesa-11.0.4.tar.xz
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<p>This list is likely incomplete.</p>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86281">Bug 86281</a> - brw_meta_fast_clear (brw=brw&#64;entry=0x7fffd4097a08, fb=fb&#64;entry=0x7fffd40fa900, buffers=buffers&#64;entry=2, partial_clear=partial_clear&#64;entry=false)</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86720">Bug 86720</a> - [radeon] Europa Universalis 4 freezing during game start (10.3.3+, still broken on 11.0.2)</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91788">Bug 91788</a> - [HSW Regression] Synmark2_v6 Multithread performance case FPS reduced by 36%</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92304">Bug 92304</a> - [cts] cts.shaders.negative conformance tests fail</li>
</ul>
<h2>Changes</h2>
<p>Alejandro Piñeiro (2):</p>
<ul>
<li>i965/vec4: check writemask when bailing out at register coalesce</li>
<li>i965/vec4: fill src_reg type using the constructor type parameter</li>
</ul>
<p>Brian Paul (2):</p>
<ul>
<li>vbo: fix incorrect switch statement in init_mat_currval()</li>
<li>mesa: fix incorrect opcode in save_BlendFunci()</li>
</ul>
<p>Chih-Wei Huang (3):</p>
<ul>
<li>mesa: android: Fix the incorrect path of sse_minmax.c</li>
<li>nv50/ir: use C++11 standard std::unordered_map if possible</li>
<li>nv30: include the header of ffs prototype</li>
</ul>
<p>Chris Wilson (1):</p>
<ul>
<li>i965: Remove early release of DRI2 miptree</li>
</ul>
<p>Dave Airlie (1):</p>
<ul>
<li>mesa/uniforms: fix get_uniform for doubles (v2)</li>
</ul>
<p>Emil Velikov (1):</p>
<ul>
<li>docs: add sha256 checksums for 11.0.3</li>
</ul>
<p>Francisco Jerez (5):</p>
<ul>
<li>i965: Don't tell the hardware about our UAV access.</li>
<li>mesa: Expose function to calculate whether a shader image unit is valid.</li>
<li>mesa: Skip redundant texture completeness checking during image validation.</li>
<li>i965: Use _mesa_is_image_unit_valid() instead of gl_image_unit::_Valid.</li>
<li>mesa: Get rid of texture-dependent image unit derived state.</li>
</ul>
<p>Ian Romanick (8):</p>
<ul>
<li>glsl: Allow built-in functions as constant expressions in OpenGL ES 1.00</li>
<li>ff_fragment_shader: Use binding to set the sampler unit</li>
<li>glsl/linker: Use constant_initializer instead of constant_value to initialize uniforms</li>
<li>glsl: Use constant_initializer instead of constant_value to determine whether to keep an unused uniform</li>
<li>glsl: Only set ir_variable::constant_value for const-decorated variables</li>
<li>glsl: Restrict initializers for global variables to constant expression in ES</li>
<li>glsl: Add method to determine whether an expression contains the sequence operator</li>
<li>glsl: In later GLSL versions, sequence operator is cannot be a constant expression</li>
</ul>
<p>Ilia Mirkin (1):</p>
<ul>
<li>nouveau: make sure there's always room to emit a fence</li>
</ul>
<p>Indrajit Das (1):</p>
<ul>
<li>st/va: Used correct parameter to derive the value of the "h" variable in vlVaCreateImage</li>
</ul>
<p>Jonathan Gray (1):</p>
<ul>
<li>configure.ac: ensure RM is set</li>
</ul>
<p>Krzysztof Sobiecki (1):</p>
<ul>
<li>st/fbo: use pipe_surface_release instead of pipe_surface_reference</li>
</ul>
<p>Leo Liu (1):</p>
<ul>
<li>st/omx/dec/h264: fix field picture type 0 poc disorder</li>
</ul>
<p>Marek Olšák (3):</p>
<ul>
<li>st/mesa: fix clip state dependencies</li>
<li>radeonsi: fix a GS copy shader leak</li>
<li>gallium: add PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT</li>
</ul>
<p>Nicolai Hähnle (1):</p>
<ul>
<li>u_vbuf: fix vb slot assignment for translated buffers</li>
</ul>
<p>Rob Clark (1):</p>
<ul>
<li>freedreno/a3xx: cache-flush is needed after MEM_WRITE</li>
</ul>
<p>Tapani Pälli (3):</p>
<ul>
<li>mesa: add GL_UNSIGNED_INT_24_8 to _mesa_pack_depth_span</li>
<li>mesa: Set api prefix to version string when overriding version</li>
<li>mesa: fix ARRAY_SIZE query for GetProgramResourceiv</li>
</ul>
</div>
</body>
</html>

View File

@ -45,15 +45,21 @@ Note: some of the new features are only available with certain drivers.
<ul>
<li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
<li>GL_ARB_copy_image on radeonsi</li>
<li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
<li>GL_ARB_gpu_shader5 on r600 for Evergreen and later chips</li>
<li>GL_ARB_shader_clock on i965 (gen7+)</li>
<li>GL_ARB_shader_stencil_export on i965 (gen9+)</li>
<li>GL_ARB_shader_storage_buffer_object on i965</li>
<li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li>
<li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
<li>GL_ARB_texture_query_lod on softpipe</li>
<li>GL_ARB_texture_view on radeonsi</li>
<li>GL_EXT_draw_elements_base_vertex on all drivers</li>
<li>GL_OES_draw_elements_base_vertex on all drivers</li>
<li>EGL_KHR_create_context on softpipe, llvmpipe</li>
<li>EGL_KHR_gl_colorspace on softpipe, llvmpipe</li>
<li>new virgl gallium driver for qemu virtio-gpu</li>
</ul>
<h2>Bug fixes</h2>

View File

@ -495,7 +495,7 @@ struct __DRIdamageExtensionRec {
* SWRast Loader extension.
*/
#define __DRI_SWRAST_LOADER "DRI_SWRastLoader"
#define __DRI_SWRAST_LOADER_VERSION 2
#define __DRI_SWRAST_LOADER_VERSION 3
struct __DRIswrastLoaderExtensionRec {
__DRIextension base;
@ -528,6 +528,15 @@ struct __DRIswrastLoaderExtensionRec {
void (*putImage2)(__DRIdrawable *drawable, int op,
int x, int y, int width, int height, int stride,
char *data, void *loaderPrivate);
/**
* Put image to drawable
*
* \since 3
*/
void (*getImage2)(__DRIdrawable *readable,
int x, int y, int width, int height, int stride,
char *data, void *loaderPrivate);
};
/**

View File

@ -109,21 +109,29 @@ CHIPSET(0x162A, bdw_gt3, "Intel(R) Iris Pro P6300 (Broadwell GT3e)")
CHIPSET(0x162B, bdw_gt3, "Intel(R) Iris 6100 (Broadwell GT3)")
CHIPSET(0x162D, bdw_gt3, "Intel(R) Broadwell GT3")
CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3")
CHIPSET(0x1902, skl_gt1, "Intel(R) Skylake DT GT1")
CHIPSET(0x1906, skl_gt1, "Intel(R) Skylake ULT GT1")
CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake SRV GT1")
CHIPSET(0x190B, skl_gt1, "Intel(R) Skylake Halo GT1")
CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake ULX GT1")
CHIPSET(0x1912, skl_gt2, "Intel(R) Skylake DT GT2")
CHIPSET(0x1916, skl_gt2, "Intel(R) Skylake ULT GT2")
CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake SRV GT2")
CHIPSET(0x191B, skl_gt2, "Intel(R) Skylake Halo GT2")
CHIPSET(0x191D, skl_gt2, "Intel(R) Skylake WKS GT2")
CHIPSET(0x191E, skl_gt2, "Intel(R) Skylake ULX GT2")
CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake ULT GT2F")
CHIPSET(0x1926, skl_gt3, "Intel(R) Skylake ULT GT3")
CHIPSET(0x192A, skl_gt3, "Intel(R) Skylake SRV GT3")
CHIPSET(0x192B, skl_gt3, "Intel(R) Skylake Halo GT3")
CHIPSET(0x1902, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
CHIPSET(0x1906, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake GT1")
CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake GT1")
CHIPSET(0x1912, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)")
CHIPSET(0x1913, skl_gt2, "Intel(R) Skylake GT2f")
CHIPSET(0x1915, skl_gt2, "Intel(R) Skylake GT2f")
CHIPSET(0x1916, skl_gt2, "Intel(R) HD Graphics 520 (Skylake GT2)")
CHIPSET(0x1917, skl_gt2, "Intel(R) Skylake GT2f")
CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake GT2")
CHIPSET(0x191B, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)")
CHIPSET(0x191D, skl_gt2, "Intel(R) HD Graphics P530 (Skylake GT2)")
CHIPSET(0x191E, skl_gt2, "Intel(R) HD Graphics 515 (Skylake GT2)")
CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake GT2")
CHIPSET(0x1923, skl_gt3, "Intel(R) Iris Graphics 540 (Skylake GT3e)")
CHIPSET(0x1926, skl_gt3, "Intel(R) HD Graphics 535 (Skylake GT3)")
CHIPSET(0x1927, skl_gt3, "Intel(R) Iris Graphics 550 (Skylake GT3e)")
CHIPSET(0x192A, skl_gt4, "Intel(R) Skylake GT4")
CHIPSET(0x192B, skl_gt3, "Intel(R) Iris Graphics (Skylake GT3fe)")
CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4")
CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4")
CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4")
CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4")
CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherryview)")
CHIPSET(0x22B1, chv, "Intel(R) HD Graphics (Cherryview)")
CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)")

View File

@ -181,3 +181,5 @@ CHIPSET(0x9876, CARRIZO_, CARRIZO)
CHIPSET(0x9877, CARRIZO_, CARRIZO)
CHIPSET(0x7300, FIJI_, FIJI)
CHIPSET(0x98E4, STONEY_, STONEY)

View File

@ -82,6 +82,11 @@ if HAVE_GALLIUM_VC4
SUBDIRS += drivers/vc4 winsys/vc4/drm
endif
## virgl
if HAVE_GALLIUM_VIRGL
SUBDIRS += drivers/virgl winsys/virgl/drm winsys/virgl/vtest
endif
## the sw winsys'
SUBDIRS += winsys/sw/null

View File

@ -427,6 +427,7 @@ lp_build_init(void)
*/
util_cpu_caps.has_avx = 0;
util_cpu_caps.has_avx2 = 0;
util_cpu_caps.has_f16c = 0;
}
#ifdef PIPE_ARCH_PPC_64
@ -458,7 +459,9 @@ lp_build_init(void)
util_cpu_caps.has_sse3 = 0;
util_cpu_caps.has_ssse3 = 0;
util_cpu_caps.has_sse4_1 = 0;
util_cpu_caps.has_sse4_2 = 0;
util_cpu_caps.has_avx = 0;
util_cpu_caps.has_avx2 = 0;
util_cpu_caps.has_f16c = 0;
#endif

View File

@ -497,20 +497,48 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
#endif
}
llvm::SmallVector<std::string, 1> MAttrs;
if (util_cpu_caps.has_avx) {
/*
* AVX feature is not automatically detected from CPUID by the X86 target
* yet, because the old (yet default) JIT engine is not capable of
* emitting the opcodes. On newer llvm versions it is and at least some
* versions (tested with 3.3) will emit avx opcodes without this anyway.
*/
MAttrs.push_back("+avx");
if (util_cpu_caps.has_f16c) {
MAttrs.push_back("+f16c");
}
builder.setMAttrs(MAttrs);
}
llvm::SmallVector<std::string, 16> MAttrs;
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
/*
* We need to unset attributes because sometimes LLVM mistakenly assumes
* certain features are present given the processor name.
*
* https://bugs.freedesktop.org/show_bug.cgi?id=92214
* http://llvm.org/PR25021
* http://llvm.org/PR19429
* http://llvm.org/PR16721
*/
MAttrs.push_back(util_cpu_caps.has_sse ? "+sse" : "-sse" );
MAttrs.push_back(util_cpu_caps.has_sse2 ? "+sse2" : "-sse2" );
MAttrs.push_back(util_cpu_caps.has_sse3 ? "+sse3" : "-sse3" );
MAttrs.push_back(util_cpu_caps.has_ssse3 ? "+ssse3" : "-ssse3" );
#if HAVE_LLVM >= 0x0304
MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse4.1" : "-sse4.1");
#else
MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse41" : "-sse41" );
#endif
#if HAVE_LLVM >= 0x0304
MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse4.2" : "-sse4.2");
#else
MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse42" : "-sse42" );
#endif
/*
* AVX feature is not automatically detected from CPUID by the X86 target
* yet, because the old (yet default) JIT engine is not capable of
* emitting the opcodes. On newer llvm versions it is and at least some
* versions (tested with 3.3) will emit avx opcodes without this anyway.
*/
MAttrs.push_back(util_cpu_caps.has_avx ? "+avx" : "-avx");
MAttrs.push_back(util_cpu_caps.has_f16c ? "+f16c" : "-f16c");
MAttrs.push_back(util_cpu_caps.has_avx2 ? "+avx2" : "-avx2");
#endif
#if defined(PIPE_ARCH_PPC)
MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
#endif
builder.setMAttrs(MAttrs);
#if HAVE_LLVM >= 0x0305
StringRef MCPU = llvm::sys::getHostCPUName();

View File

@ -405,16 +405,17 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
break;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
if (offset) {
offset = lp_build_int_to_float(coord_bld, offset);
offset = lp_build_div(coord_bld, offset, length_f);
coord = lp_build_add(coord_bld, coord, offset);
}
/* compute mirror function */
coord = lp_build_coord_mirror(bld, coord);
/* scale coord to length */
coord = lp_build_mul(coord_bld, coord, length_f);
coord = lp_build_sub(coord_bld, coord, half);
if (offset) {
offset = lp_build_int_to_float(coord_bld, offset);
coord = lp_build_add(coord_bld, coord, offset);
}
/* convert to int, compute lerp weight */
lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
@ -567,12 +568,13 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
coord = lp_build_mul(coord_bld, coord, length_f);
}
if (offset) {
offset = lp_build_int_to_float(coord_bld, offset);
coord = lp_build_add(coord_bld, coord, offset);
}
/* floor */
/* use itrunc instead since we clamp to 0 anyway */
icoord = lp_build_itrunc(coord_bld, coord);
if (offset) {
icoord = lp_build_add(int_coord_bld, icoord, offset);
}
/* clamp to [0, length - 1]. */
icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
@ -2586,6 +2588,10 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
}
/*
* We could force CLAMP to CLAMP_TO_EDGE here if min/mag filter is nearest,
* so AoS path could be used. Not sure it's worth the trouble...
*/
min_img_filter = derived_sampler_state.min_img_filter;
mag_img_filter = derived_sampler_state.mag_img_filter;

View File

@ -59,6 +59,11 @@
#include "vc4/drm/vc4_drm_public.h"
#endif
#if GALLIUM_VIRGL
#include "virgl/drm/virgl_drm_public.h"
#include "virgl/virgl_public.h"
#endif
static char* driver_name = NULL;
/* XXX: We need to teardown the winsys if *screen_create() fails. */
@ -296,6 +301,33 @@ pipe_freedreno_create_screen(int fd)
}
#endif
#if defined(GALLIUM_VIRGL)
#if defined(DRI_TARGET)
const __DRIextension **__driDriverGetExtensions_virtio_gpu(void);
PUBLIC const __DRIextension **__driDriverGetExtensions_virtio_gpu(void)
{
globalDriverAPI = &galliumdrm_driver_api;
return galliumdrm_driver_extensions;
}
#endif
static struct pipe_screen *
pipe_virgl_create_screen(int fd)
{
struct virgl_winsys *vws;
struct pipe_screen *screen;
vws = virgl_drm_winsys_create(fd);
if (!vws)
return NULL;
screen = virgl_create_screen(vws);
return screen ? debug_screen_wrap(screen) : NULL;
}
#endif
#if defined(GALLIUM_VC4)
#if defined(DRI_TARGET)
@ -385,6 +417,11 @@ dd_create_screen(int fd)
return pipe_freedreno_create_screen(fd);
else
#endif
#if defined(GALLIUM_VIRGL)
if ((strcmp(driver_name, "virtio_gpu") == 0))
return pipe_virgl_create_screen(fd);
else
#endif
#if defined(GALLIUM_VC4)
if (strcmp(driver_name, "vc4") == 0)
return pipe_vc4_create_screen(fd);
@ -474,6 +511,11 @@ dd_configuration(enum drm_conf conf)
return configuration_query(conf);
else
#endif
#if defined(GALLIUM_VIRGL)
if ((strcmp(driver_name, "virtio_gpu") == 0))
return configuration_query(conf);
else
#endif
#if defined(GALLIUM_VC4)
if (strcmp(driver_name, "vc4") == 0)
return configuration_query(conf);

View File

@ -19,6 +19,10 @@
#include "llvmpipe/lp_public.h"
#endif
#ifdef GALLIUM_VIRGL
#include "virgl/virgl_public.h"
#include "virgl/vtest/virgl_vtest_public.h"
#endif
static inline struct pipe_screen *
sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
@ -30,6 +34,14 @@ sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
screen = llvmpipe_create_screen(winsys);
#endif
#if defined(GALLIUM_VIRGL)
if (screen == NULL && strcmp(driver, "virpipe") == 0) {
struct virgl_winsys *vws;
vws = virgl_vtest_winsys_wrap(winsys);
screen = virgl_create_screen(vws);
}
#endif
#if defined(GALLIUM_SOFTPIPE)
if (screen == NULL)
screen = softpipe_create_screen(winsys);

View File

@ -29,6 +29,7 @@
#include "util/u_string.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_math.h"
#include "tgsi_dump.h"
#include "tgsi_info.h"
#include "tgsi_iterate.h"
@ -43,6 +44,8 @@ struct dump_ctx
{
struct tgsi_iterate_context iter;
boolean dump_float_as_hex;
uint instno;
uint immno;
int indent;
@ -88,6 +91,7 @@ dump_enum(
#define SID(I) ctx->dump_printf( ctx, "%d", I )
#define FLT(F) ctx->dump_printf( ctx, "%10.4f", F )
#define DBL(D) ctx->dump_printf( ctx, "%10.8f", D )
#define HFLT(F) ctx->dump_printf( ctx, "0x%08x", fui((F)) )
#define ENM(E,ENUMS) dump_enum( ctx, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
const char *
@ -251,7 +255,10 @@ dump_imm_data(struct tgsi_iterate_context *iter,
break;
}
case TGSI_IMM_FLOAT32:
FLT( data[i].Float );
if (ctx->dump_float_as_hex)
HFLT( data[i].Float );
else
FLT( data[i].Float );
break;
case TGSI_IMM_UINT32:
UID(data[i].Uint);
@ -682,6 +689,11 @@ tgsi_dump_to_file(const struct tgsi_token *tokens, uint flags, FILE *file)
ctx.indentation = 0;
ctx.file = file;
if (flags & TGSI_DUMP_FLOAT_AS_HEX)
ctx.dump_float_as_hex = TRUE;
else
ctx.dump_float_as_hex = FALSE;
tgsi_iterate_shader( tokens, &ctx.iter );
}
@ -697,6 +709,7 @@ struct str_dump_ctx
char *str;
char *ptr;
int left;
bool nospace;
};
static void
@ -719,10 +732,11 @@ str_dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...)
sctx->ptr += written;
sctx->left -= written;
}
}
} else
sctx->nospace = true;
}
void
bool
tgsi_dump_str(
const struct tgsi_token *tokens,
uint flags,
@ -749,8 +763,16 @@ tgsi_dump_str(
ctx.str[0] = 0;
ctx.ptr = str;
ctx.left = (int)size;
ctx.nospace = false;
if (flags & TGSI_DUMP_FLOAT_AS_HEX)
ctx.base.dump_float_as_hex = TRUE;
else
ctx.base.dump_float_as_hex = FALSE;
tgsi_iterate_shader( tokens, &ctx.base.iter );
return !ctx.nospace;
}
void
@ -773,6 +795,7 @@ tgsi_dump_instruction_str(
ctx.str[0] = 0;
ctx.ptr = str;
ctx.left = (int)size;
ctx.nospace = false;
iter_instruction( &ctx.base.iter, (struct tgsi_full_instruction *)inst );
}

View File

@ -38,7 +38,9 @@
extern "C" {
#endif
void
#define TGSI_DUMP_FLOAT_AS_HEX (1 << 0)
bool
tgsi_dump_str(
const struct tgsi_token *tokens,
uint flags,

View File

@ -195,8 +195,15 @@ static boolean parse_float( const char **pcur, float *val )
boolean integral_part = FALSE;
boolean fractional_part = FALSE;
*val = (float) atof( cur );
if (*cur == '0' && *(cur + 1) == 'x') {
union fi fi;
fi.ui = strtoul(cur, NULL, 16);
*val = fi.f;
cur += 10;
goto out;
}
*val = (float) atof( cur );
if (*cur == '-' || *cur == '+')
cur++;
if (is_digit( cur )) {
@ -228,6 +235,8 @@ static boolean parse_float( const char **pcur, float *val )
else
return FALSE;
}
out:
*pcur = cur;
return TRUE;
}

View File

@ -169,6 +169,25 @@ util_format_is_snorm(enum pipe_format format)
desc->channel[i].normalized;
}
boolean
util_format_is_snorm8(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
int i;
if (desc->is_mixed)
return FALSE;
i = util_format_get_first_non_void_channel(format);
if (i == -1)
return FALSE;
return desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED &&
!desc->channel[i].pure_integer &&
desc->channel[i].normalized &&
desc->channel[i].size == 8;
}
boolean
util_format_is_luminance_alpha(enum pipe_format format)
{

View File

@ -686,6 +686,9 @@ util_format_is_pure_uint(enum pipe_format format);
boolean
util_format_is_snorm(enum pipe_format format);
boolean
util_format_is_snorm8(enum pipe_format format);
/**
* Check if the src format can be blitted to the destination format with
* a simple memcpy. For example, blitting from RGBA to RGBx is OK, but not

View File

@ -450,6 +450,43 @@ null_constant_buffer(struct pipe_context *ctx)
util_report_result(pass);
}
static void
null_fragment_shader(struct pipe_context *ctx)
{
struct cso_context *cso;
struct pipe_resource *cb;
void *vs;
struct pipe_rasterizer_state rs = {0};
struct pipe_query *query;
union pipe_query_result qresult;
cso = cso_create_context(ctx);
cb = util_create_texture2d(ctx->screen, 256, 256,
PIPE_FORMAT_R8G8B8A8_UNORM);
util_set_common_states_and_clear(cso, ctx, cb);
/* No rasterization. */
rs.rasterizer_discard = 1;
cso_set_rasterizer(cso, &rs);
vs = util_set_passthrough_vertex_shader(cso, ctx, false);
query = ctx->create_query(ctx, PIPE_QUERY_PRIMITIVES_GENERATED, 0);
ctx->begin_query(ctx, query);
util_draw_fullscreen_quad(cso);
ctx->end_query(ctx, query);
ctx->get_query_result(ctx, query, true, &qresult);
/* Cleanup. */
cso_destroy_context(cso);
ctx->delete_vs_state(ctx, vs);
ctx->destroy_query(ctx, query);
pipe_resource_reference(&cb, NULL);
/* Check PRIMITIVES_GENERATED. */
util_report_result(qresult.u64 == 2);
}
/**
* Run all tests. This should be run with a clean context after
* context_create.
@ -459,6 +496,7 @@ util_run_tests(struct pipe_screen *screen)
{
struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
null_fragment_shader(ctx);
tgsi_vs_window_space_position(ctx);
null_sampler_view(ctx, TGSI_TEXTURE_2D);
null_sampler_view(ctx, TGSI_TEXTURE_BUFFER);

View File

@ -278,7 +278,9 @@ The integer capabilities:
in the shader.
* ``PIPE_CAP_SHAREABLE_SHADERS``: Whether shader CSOs can be used by any
pipe_context.
* ``PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS``:
Whether copying between compressed and plain formats is supported where
a compressed block is copied to/from a plain pixel of the same size.
.. _pipe_capf:

View File

@ -81,7 +81,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff);
if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex &&
if (ctx->rasterizer->point_size_per_vertex &&
(info->mode == PIPE_PRIM_POINTS))
primtype = DI_PT_POINTLIST_PSIZE;
@ -137,7 +137,7 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.key = {
/* do binning pass first: */
.binning_pass = true,
.color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
.color_two_side = ctx->rasterizer->light_twoside,
// TODO set .half_precision based on render target format,
// ie. float16 and smaller use half, float32 use full..
.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
@ -149,9 +149,9 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.fsaturate_t = fd3_ctx->fsaturate_t,
.fsaturate_r = fd3_ctx->fsaturate_r,
},
.rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
.sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : 0,
.sprite_coord_mode = ctx->rasterizer ? ctx->rasterizer->sprite_coord_mode : false,
.rasterflat = ctx->rasterizer->flatshade,
.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
};
unsigned dirty;

View File

@ -627,7 +627,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
ctx->prog.dirty = 0;
}
if ((dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) && ctx->blend) {
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
struct fd3_blend_stateobj *blend = fd3_blend_stateobj(ctx->blend);
uint32_t i;

View File

@ -118,12 +118,12 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.key = {
/* do binning pass first: */
.binning_pass = true,
.color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
.rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
.color_two_side = ctx->rasterizer->light_twoside,
.rasterflat = ctx->rasterizer->flatshade,
// TODO set .half_precision based on render target format,
// ie. float16 and smaller use half, float32 use full..
.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
.ucp_enables = ctx->rasterizer ? ctx->rasterizer->clip_plane_enable : 0,
.ucp_enables = ctx->rasterizer->clip_plane_enable,
.has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate),
.vsaturate_s = fd4_ctx->vsaturate_s,
.vsaturate_t = fd4_ctx->vsaturate_t,
@ -132,9 +132,9 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.fsaturate_t = fd4_ctx->fsaturate_t,
.fsaturate_r = fd4_ctx->fsaturate_r,
},
.rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
.sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : false,
.sprite_coord_mode = ctx->rasterizer ? ctx->rasterizer->sprite_coord_mode : false,
.rasterflat = ctx->rasterizer->flatshade,
.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
};
unsigned dirty;

View File

@ -594,7 +594,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
ctx->prog.dirty = 0;
}
if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
if ((dirty & FD_DIRTY_BLEND)) {
struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend);
uint32_t i;

View File

@ -238,6 +238,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:

View File

@ -252,6 +252,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:

View File

@ -202,14 +202,16 @@ static inline void
gen6_3DSTATE_WM(struct ilo_builder *builder,
const struct ilo_state_raster *rs,
const struct ilo_state_ps *ps,
uint32_t kernel_offset)
uint32_t kernel_offset,
struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
unsigned pos;
ILO_DEV_ASSERT(builder->dev, 6, 6);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
dw[1] = kernel_offset;
@ -221,6 +223,11 @@ gen6_3DSTATE_WM(struct ilo_builder *builder,
dw[6] = rs->wm[2] | ps->ps[4];
dw[7] = 0; /* kernel 1 */
dw[8] = 0; /* kernel 2 */
if (ilo_state_ps_get_scratch_size(ps)) {
ilo_builder_batch_reloc(builder, pos + 2, scratch_bo,
ps->ps[0], 0);
}
}
static inline void
@ -329,14 +336,16 @@ gen8_3DSTATE_WM_CHROMAKEY(struct ilo_builder *builder)
static inline void
gen7_3DSTATE_PS(struct ilo_builder *builder,
const struct ilo_state_ps *ps,
uint32_t kernel_offset)
uint32_t kernel_offset,
struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 8;
uint32_t *dw;
unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
dw[1] = kernel_offset;
@ -347,19 +356,26 @@ gen7_3DSTATE_PS(struct ilo_builder *builder,
dw[5] = ps->ps[5];
dw[6] = 0; /* kernel 1 */
dw[7] = 0; /* kernel 2 */
if (ilo_state_ps_get_scratch_size(ps)) {
ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
ps->ps[3], 0);
}
}
static inline void
gen8_3DSTATE_PS(struct ilo_builder *builder,
const struct ilo_state_ps *ps,
uint32_t kernel_offset)
uint32_t kernel_offset,
struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 12;
uint32_t *dw;
unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
dw[1] = kernel_offset;
@ -374,6 +390,11 @@ gen8_3DSTATE_PS(struct ilo_builder *builder,
dw[9] = 0;
dw[10] = 0; /* kernel 2 */
dw[11] = 0;
if (ilo_state_ps_get_scratch_size(ps)) {
ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
ps->ps[1], 0);
}
}
static inline void

View File

@ -477,14 +477,16 @@ gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
static inline void
gen6_3DSTATE_VS(struct ilo_builder *builder,
const struct ilo_state_vs *vs,
uint32_t kernel_offset)
uint32_t kernel_offset,
struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 6;
uint32_t *dw;
unsigned pos;
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
dw[1] = kernel_offset;
@ -493,19 +495,26 @@ gen6_3DSTATE_VS(struct ilo_builder *builder,
dw[3] = vs->vs[1];
dw[4] = vs->vs[2];
dw[5] = vs->vs[3];
if (ilo_state_vs_get_scratch_size(vs)) {
ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
vs->vs[1], 0);
}
}
static inline void
gen8_3DSTATE_VS(struct ilo_builder *builder,
const struct ilo_state_vs *vs,
uint32_t kernel_offset)
uint32_t kernel_offset,
struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
dw[1] = kernel_offset;
@ -517,19 +526,26 @@ gen8_3DSTATE_VS(struct ilo_builder *builder,
dw[6] = vs->vs[2];
dw[7] = vs->vs[3];
dw[8] = vs->vs[4];
if (ilo_state_vs_get_scratch_size(vs)) {
ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
vs->vs[1], 0);
}
}
static inline void
gen7_3DSTATE_HS(struct ilo_builder *builder,
const struct ilo_state_hs *hs,
uint32_t kernel_offset)
uint32_t kernel_offset,
struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
/* see hs_set_gen7_3DSTATE_HS() */
@ -539,19 +555,26 @@ gen7_3DSTATE_HS(struct ilo_builder *builder,
dw[4] = hs->hs[2];
dw[5] = hs->hs[3];
dw[6] = 0;
if (ilo_state_hs_get_scratch_size(hs)) {
ilo_builder_batch_reloc(builder, pos + 4, scratch_bo,
hs->hs[2], 0);
}
}
static inline void
gen8_3DSTATE_HS(struct ilo_builder *builder,
const struct ilo_state_hs *hs,
uint32_t kernel_offset)
uint32_t kernel_offset,
struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
/* see hs_set_gen7_3DSTATE_HS() */
@ -563,6 +586,11 @@ gen8_3DSTATE_HS(struct ilo_builder *builder,
dw[6] = 0;
dw[7] = hs->hs[3];
dw[8] = 0;
if (ilo_state_hs_get_scratch_size(hs)) {
ilo_builder_batch_reloc64(builder, pos + 5, scratch_bo,
hs->hs[2], 0);
}
}
static inline void
@ -586,14 +614,16 @@ gen7_3DSTATE_TE(struct ilo_builder *builder,
static inline void
gen7_3DSTATE_DS(struct ilo_builder *builder,
const struct ilo_state_ds *ds,
uint32_t kernel_offset)
uint32_t kernel_offset,
struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 6;
uint32_t *dw;
unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
/* see ds_set_gen7_3DSTATE_DS() */
@ -602,19 +632,26 @@ gen7_3DSTATE_DS(struct ilo_builder *builder,
dw[3] = ds->ds[1];
dw[4] = ds->ds[2];
dw[5] = ds->ds[3];
if (ilo_state_ds_get_scratch_size(ds)) {
ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
ds->ds[1], 0);
}
}
static inline void
gen8_3DSTATE_DS(struct ilo_builder *builder,
const struct ilo_state_ds *ds,
uint32_t kernel_offset)
uint32_t kernel_offset,
struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
/* see ds_set_gen7_3DSTATE_DS() */
@ -626,19 +663,26 @@ gen8_3DSTATE_DS(struct ilo_builder *builder,
dw[6] = ds->ds[2];
dw[7] = ds->ds[3];
dw[8] = ds->ds[4];
if (ilo_state_ds_get_scratch_size(ds)) {
ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
ds->ds[1], 0);
}
}
static inline void
gen6_3DSTATE_GS(struct ilo_builder *builder,
const struct ilo_state_gs *gs,
uint32_t kernel_offset)
uint32_t kernel_offset,
struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
unsigned pos;
ILO_DEV_ASSERT(builder->dev, 6, 6);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = kernel_offset;
@ -648,6 +692,11 @@ gen6_3DSTATE_GS(struct ilo_builder *builder,
dw[4] = gs->gs[2];
dw[5] = gs->gs[3];
dw[6] = gs->gs[4];
if (ilo_state_gs_get_scratch_size(gs)) {
ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
gs->gs[1], 0);
}
}
static inline void
@ -677,14 +726,16 @@ gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder,
static inline void
gen7_3DSTATE_GS(struct ilo_builder *builder,
const struct ilo_state_gs *gs,
uint32_t kernel_offset)
uint32_t kernel_offset,
struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = kernel_offset;
@ -694,19 +745,26 @@ gen7_3DSTATE_GS(struct ilo_builder *builder,
dw[4] = gs->gs[2];
dw[5] = gs->gs[3];
dw[6] = 0;
if (ilo_state_gs_get_scratch_size(gs)) {
ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
gs->gs[1], 0);
}
}
static inline void
gen8_3DSTATE_GS(struct ilo_builder *builder,
const struct ilo_state_gs *gs,
uint32_t kernel_offset)
uint32_t kernel_offset,
struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 10;
uint32_t *dw;
unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
ilo_builder_batch_pointer(builder, cmd_len, &dw);
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = kernel_offset;
@ -719,6 +777,11 @@ gen8_3DSTATE_GS(struct ilo_builder *builder,
dw[7] = gs->gs[3];
dw[8] = 0;
dw[9] = gs->gs[4];
if (ilo_state_gs_get_scratch_size(gs)) {
ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
gs->gs[1], 0);
}
}
static inline void

View File

@ -158,7 +158,8 @@ compute_interface_get_gen6_read_end(const struct ilo_dev *dev,
*/
assert(per_thread_read <= 63);
/* From the Haswell PRM, volume 2d, page 199:
/*
* From the Haswell PRM, volume 2d, page 199:
*
* "(Cross-Thread Constant Data Read Length) [0,127]"
*/
@ -210,38 +211,68 @@ compute_validate_gen6(const struct ilo_dev *dev,
return true;
}
static uint8_t
compute_get_gen6_scratch_space(const struct ilo_dev *dev,
const struct ilo_state_compute_info *info)
static uint32_t
compute_get_gen6_per_thread_scratch_size(const struct ilo_dev *dev,
const struct ilo_state_compute_info *info,
uint8_t *per_thread_space)
{
uint32_t scratch_size = 0;
uint8_t i;
ILO_DEV_ASSERT(dev, 6, 7);
ILO_DEV_ASSERT(dev, 6, 8);
/*
* From the Sandy Bridge PRM, volume 2 part 2, page 30:
*
* "(Per Thread Scratch Space)
* Range = [0,11] indicating [1k bytes, 12k bytes] [DevSNB]"
*/
assert(info->per_thread_scratch_size <= 12 * 1024);
for (i = 0; i < info->interface_count; i++) {
if (scratch_size < info->interfaces[i].scratch_size)
scratch_size = info->interfaces[i].scratch_size;
if (!info->per_thread_scratch_size) {
*per_thread_space = 0;
return 0;
}
if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
assert(scratch_size <= 2 * 1024 * 1024);
*per_thread_space = (info->per_thread_scratch_size > 1024) ?
(info->per_thread_scratch_size - 1) / 1024 : 0;
/* next power of two, starting from 1KB */
return (scratch_size > 1024) ?
(util_last_bit(scratch_size - 1) - 10): 0;
} else if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
assert(scratch_size <= 2 * 1024 * 1024);
return 1024 * (1 + *per_thread_space);
}
/* next power of two, starting from 2KB */
return (scratch_size > 2048) ?
(util_last_bit(scratch_size - 1) - 11): 0;
} else {
assert(scratch_size <= 12 * 1024);
static uint32_t
compute_get_gen75_per_thread_scratch_size(const struct ilo_dev *dev,
const struct ilo_state_compute_info *info,
uint8_t *per_thread_space)
{
ILO_DEV_ASSERT(dev, 7.5, 8);
return (scratch_size > 1024) ?
(scratch_size - 1) / 1024 : 0;
/*
* From the Haswell PRM, volume 2b, page 407:
*
* "(Per Thread Scratch Space)
* [0,10] Indicating [2k bytes, 2 Mbytes]"
*
* "Note: The scratch space should be declared as 2x the desired
* scratch space. The stack will start at the half-way point instead
* of the end. The upper half of scratch space will not be accessed
* and so does not have to be allocated in memory."
*
* From the Broadwell PRM, volume 2a, page 450:
*
* "(Per Thread Scratch Space)
* [0,11] indicating [1k bytes, 2 Mbytes]"
*/
assert(info->per_thread_scratch_size <=
((ilo_dev_gen(dev) >= ILO_GEN(8)) ? 2 : 1) * 1024 * 1024);
if (!info->per_thread_scratch_size) {
*per_thread_space = 0;
return 0;
}
/* next power of two, starting from 1KB */
*per_thread_space = (info->per_thread_scratch_size > 1024) ?
(util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0;
return 1 << (10 + *per_thread_space);
}
static bool
@ -250,7 +281,8 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
const struct ilo_state_compute_info *info)
{
struct compute_urb_configuration urb;
uint8_t scratch_space;
uint32_t per_thread_size;
uint8_t per_thread_space;
uint32_t dw1, dw2, dw4;
@ -260,9 +292,16 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
!compute_validate_gen6(dev, info, &urb))
return false;
scratch_space = compute_get_gen6_scratch_space(dev, info);
if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
per_thread_size = compute_get_gen75_per_thread_scratch_size(dev,
info, &per_thread_space);
} else {
per_thread_size = compute_get_gen6_per_thread_scratch_size(dev,
info, &per_thread_space);
}
dw1 = per_thread_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT;
dw1 = scratch_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT;
dw2 = (dev->thread_count - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT |
urb.urb_entry_count << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT |
GEN6_VFE_DW2_RESET_GATEWAY_TIMER |
@ -281,6 +320,8 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
compute->vfe[1] = dw2;
compute->vfe[2] = dw4;
compute->scratch_size = per_thread_size * dev->thread_count;
return true;
}

View File

@ -45,8 +45,6 @@ struct ilo_state_compute_interface_info {
/* usually 0 unless there are multiple interfaces */
uint32_t kernel_offset;
uint32_t scratch_size;
uint8_t sampler_count;
uint8_t surface_count;
@ -65,6 +63,8 @@ struct ilo_state_compute_info {
const struct ilo_state_compute_interface_info *interfaces;
uint8_t interface_count;
uint32_t per_thread_scratch_size;
uint32_t cv_urb_alloc_size;
uint32_t curbe_alloc_size;
};
@ -74,6 +74,8 @@ struct ilo_state_compute {
uint32_t (*idrt)[6];
uint8_t idrt_count;
uint32_t scratch_size;
};
static inline size_t
@ -89,4 +91,10 @@ ilo_state_compute_init(struct ilo_state_compute *compute,
const struct ilo_dev *dev,
const struct ilo_state_compute_info *info);
static inline uint32_t
ilo_state_compute_get_scratch_size(const struct ilo_state_compute *compute)
{
return compute->scratch_size;
}
#endif /* ILO_STATE_COMPUTE_H */

View File

@ -37,7 +37,9 @@ enum vertex_stage {
struct vertex_ff {
uint8_t grf_start;
uint8_t scratch_space;
uint8_t per_thread_scratch_space;
uint32_t per_thread_scratch_size;
uint8_t sampler_count;
uint8_t surface_count;
@ -59,13 +61,6 @@ vertex_validate_gen6_kernel(const struct ilo_dev *dev,
* others.
*/
const uint8_t max_grf_start = (stage == STAGE_GS) ? 16 : 32;
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 134:
*
* "(Per-Thread Scratch Space)
* Range [0,11] indicating [1K Bytes, 2M Bytes]"
*/
const uint32_t max_scratch_size = 2 * 1024 * 1024;
ILO_DEV_ASSERT(dev, 6, 8);
@ -73,7 +68,6 @@ vertex_validate_gen6_kernel(const struct ilo_dev *dev,
assert(!kernel->offset);
assert(kernel->grf_start < max_grf_start);
assert(kernel->scratch_size <= max_scratch_size);
return true;
}
@ -112,18 +106,33 @@ vertex_get_gen6_ff(const struct ilo_dev *dev,
const struct ilo_state_shader_kernel_info *kernel,
const struct ilo_state_shader_resource_info *resource,
const struct ilo_state_shader_urb_info *urb,
uint32_t per_thread_scratch_size,
struct vertex_ff *ff)
{
ILO_DEV_ASSERT(dev, 6, 8);
memset(ff, 0, sizeof(*ff));
if (!vertex_validate_gen6_kernel(dev, stage, kernel) ||
!vertex_validate_gen6_urb(dev, stage, urb))
return false;
ff->grf_start = kernel->grf_start;
/* next power of two, starting from 1KB */
ff->scratch_space = (kernel->scratch_size > 1024) ?
(util_last_bit(kernel->scratch_size - 1) - 10): 0;
if (per_thread_scratch_size) {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 134:
*
* "(Per-Thread Scratch Space)
* Range [0,11] indicating [1K Bytes, 2M Bytes]"
*/
assert(per_thread_scratch_size <= 2 * 1024 * 1024);
/* next power of two, starting from 1KB */
ff->per_thread_scratch_space = (per_thread_scratch_size > 1024) ?
(util_last_bit(per_thread_scratch_size - 1) - 10) : 0;
ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space);
}
ff->sampler_count = (resource->sampler_count <= 12) ?
(resource->sampler_count + 3) / 4 : 4;
@ -192,8 +201,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
ILO_DEV_ASSERT(dev, 6, 8);
if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel,
&info->resource, &info->urb, &ff))
if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel, &info->resource,
&info->urb, info->per_thread_scratch_size, &ff))
return false;
thread_count = vs_get_gen6_thread_count(dev, info);
@ -207,7 +216,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
dw2 |= GEN75_THREADDISP_ACCESS_UAV;
dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw3 = ff.per_thread_scratch_space <<
GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff.grf_start << GEN6_VS_DW4_URB_GRF_START__SHIFT |
ff.vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
@ -234,6 +244,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
if (ilo_dev_gen(dev) >= ILO_GEN(8))
vs->vs[4] = ff.user_clip_enables << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT;
vs->scratch_size = ff.per_thread_scratch_size * thread_count;
return true;
}
@ -273,8 +285,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
ILO_DEV_ASSERT(dev, 7, 8);
if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel,
&info->resource, &info->urb, &ff))
if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel, &info->resource,
&info->urb, info->per_thread_scratch_size, &ff))
return false;
thread_count = hs_get_gen7_thread_count(dev, info);
@ -282,19 +294,22 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
dw1 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
dw2 = 0 << GEN7_HS_DW2_INSTANCE_COUNT__SHIFT;
if (ilo_dev_gen(dev) >= ILO_GEN(8))
dw2 |= thread_count << GEN8_HS_DW2_MAX_THREADS__SHIFT;
else if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
dw1 |= thread_count << GEN75_HS_DW1_DISPATCH_MAX_THREADS__SHIFT;
else
dw1 |= thread_count << GEN7_HS_DW1_DISPATCH_MAX_THREADS__SHIFT;
dw2 = 0 << GEN7_HS_DW2_INSTANCE_COUNT__SHIFT;
if (info->dispatch_enable)
dw2 |= GEN7_HS_DW2_HS_ENABLE;
if (info->stats_enable)
dw2 |= GEN7_HS_DW2_STATISTICS;
dw4 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff.per_thread_scratch_space <<
GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw5 = GEN7_HS_DW5_INCLUDE_VERTEX_HANDLES |
ff.grf_start << GEN7_HS_DW5_URB_GRF_START__SHIFT |
@ -310,6 +325,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
hs->hs[2] = dw4;
hs->hs[3] = dw5;
hs->scratch_size = ff.per_thread_scratch_size * thread_count;
return true;
}
@ -373,8 +390,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
ILO_DEV_ASSERT(dev, 7, 8);
if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel,
&info->resource, &info->urb, &ff))
if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel, &info->resource,
&info->urb, info->per_thread_scratch_size, &ff))
return false;
thread_count = ds_get_gen7_thread_count(dev, info);
@ -385,7 +402,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
dw2 |= GEN75_THREADDISP_ACCESS_UAV;
dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw3 = ff.per_thread_scratch_space <<
GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff.grf_start << GEN7_DS_DW4_URB_GRF_START__SHIFT |
ff.vue_read_len << GEN7_DS_DW4_URB_READ_LEN__SHIFT |
@ -412,6 +430,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
if (ilo_dev_gen(dev) >= ILO_GEN(8))
ds->ds[4] = ff.user_clip_enables << GEN8_DS_DW8_UCP_CLIP_ENABLES__SHIFT;
ds->scratch_size = ff.per_thread_scratch_size * thread_count;
return true;
}
@ -425,8 +445,8 @@ gs_get_gen6_ff(const struct ilo_dev *dev,
ILO_DEV_ASSERT(dev, 6, 8);
if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel,
&info->resource, &info->urb, ff))
if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel, &info->resource,
&info->urb, info->per_thread_scratch_size, ff))
return false;
/*
@ -510,7 +530,8 @@ gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs,
ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw3 = ff.per_thread_scratch_space <<
GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff.vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
ff.vue_read_offset << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
@ -550,6 +571,8 @@ gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs,
gs->gs[3] = dw5;
gs->gs[4] = dw6;
gs->scratch_size = ff.per_thread_scratch_size * thread_count;
return true;
}
@ -588,7 +611,8 @@ gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs,
if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
dw2 |= GEN75_THREADDISP_ACCESS_UAV;
dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw3 = ff.per_thread_scratch_space <<
GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = vertex_size << GEN7_GS_DW4_OUTPUT_SIZE__SHIFT |
0 << GEN7_GS_DW4_OUTPUT_TOPO__SHIFT |
@ -618,6 +642,8 @@ gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs,
if (ilo_dev_gen(dev) >= ILO_GEN(8))
gs->gs[4] = ff.user_clip_enables << GEN8_GS_DW9_UCP_CLIP_ENABLES__SHIFT;
gs->scratch_size = ff.per_thread_scratch_size * thread_count;
return true;
}

View File

@ -42,8 +42,6 @@ struct ilo_state_shader_kernel_info {
uint8_t grf_start;
uint8_t pcb_attr_count;
uint32_t scratch_size;
};
/**
@ -77,6 +75,7 @@ struct ilo_state_vs_info {
struct ilo_state_shader_resource_info resource;
struct ilo_state_shader_urb_info urb;
uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
@ -86,6 +85,7 @@ struct ilo_state_hs_info {
struct ilo_state_shader_resource_info resource;
struct ilo_state_shader_urb_info urb;
uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
@ -95,6 +95,7 @@ struct ilo_state_ds_info {
struct ilo_state_shader_resource_info resource;
struct ilo_state_shader_urb_info urb;
uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
@ -119,6 +120,7 @@ struct ilo_state_gs_info {
struct ilo_state_gs_sol_info sol;
uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
@ -158,6 +160,8 @@ struct ilo_state_ps_info {
struct ilo_state_ps_io_info io;
struct ilo_state_ps_params_info params;
uint32_t per_thread_scratch_size;
/* bitmask of GEN6_PS_DISPATCH_x */
uint8_t valid_kernels;
bool per_sample_dispatch;
@ -173,23 +177,28 @@ struct ilo_state_ps_info {
struct ilo_state_vs {
uint32_t vs[5];
uint32_t scratch_size;
};
struct ilo_state_hs {
uint32_t hs[4];
uint32_t scratch_size;
};
struct ilo_state_ds {
uint32_t te[3];
uint32_t ds[5];
uint32_t scratch_size;
};
struct ilo_state_gs {
uint32_t gs[5];
uint32_t scratch_size;
};
struct ilo_state_ps {
uint32_t ps[8];
uint32_t scratch_size;
struct ilo_state_ps_dispatch_conds {
bool ps_valid;
@ -211,6 +220,12 @@ bool
ilo_state_vs_init_disabled(struct ilo_state_vs *vs,
const struct ilo_dev *dev);
static inline uint32_t
ilo_state_vs_get_scratch_size(const struct ilo_state_vs *vs)
{
return vs->scratch_size;
}
bool
ilo_state_hs_init(struct ilo_state_hs *hs,
const struct ilo_dev *dev,
@ -221,6 +236,12 @@ ilo_state_hs_init_disabled(struct ilo_state_hs *hs,
const struct ilo_dev *dev);
static inline uint32_t
ilo_state_hs_get_scratch_size(const struct ilo_state_hs *hs)
{
return hs->scratch_size;
}
bool
ilo_state_ds_init(struct ilo_state_ds *ds,
const struct ilo_dev *dev,
@ -230,6 +251,12 @@ bool
ilo_state_ds_init_disabled(struct ilo_state_ds *ds,
const struct ilo_dev *dev);
static inline uint32_t
ilo_state_ds_get_scratch_size(const struct ilo_state_ds *ds)
{
return ds->scratch_size;
}
bool
ilo_state_gs_init(struct ilo_state_gs *gs,
const struct ilo_dev *dev,
@ -239,6 +266,12 @@ bool
ilo_state_gs_init_disabled(struct ilo_state_gs *gs,
const struct ilo_dev *dev);
static inline uint32_t
ilo_state_gs_get_scratch_size(const struct ilo_state_gs *gs)
{
return gs->scratch_size;
}
bool
ilo_state_ps_init(struct ilo_state_ps *ps,
const struct ilo_dev *dev,
@ -253,4 +286,10 @@ ilo_state_ps_set_params(struct ilo_state_ps *ps,
const struct ilo_dev *dev,
const struct ilo_state_ps_params_info *params);
static inline uint32_t
ilo_state_ps_get_scratch_size(const struct ilo_state_ps *ps)
{
return ps->scratch_size;
}
#endif /* ILO_STATE_SHADER_H */

View File

@ -34,7 +34,8 @@ struct pixel_ff {
uint32_t kernel_offsets[3];
uint8_t grf_starts[3];
bool pcb_enable;
uint8_t scratch_space;
uint8_t per_thread_scratch_space;
uint32_t per_thread_scratch_size;
uint8_t sampler_count;
uint8_t surface_count;
@ -56,13 +57,6 @@ ps_kernel_validate_gen6(const struct ilo_dev *dev,
{
/* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */
const uint8_t max_grf_start = 128;
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 271:
*
* "(Per-Thread Scratch Space)
* Range [0,11] indicating [1k bytes, 2M bytes] in powers of two"
*/
const uint32_t max_scratch_size = 2 * 1024 * 1024;
ILO_DEV_ASSERT(dev, 6, 8);
@ -70,7 +64,6 @@ ps_kernel_validate_gen6(const struct ilo_dev *dev,
assert(kernel->offset % 64 == 0);
assert(kernel->grf_start < max_grf_start);
assert(kernel->scratch_size <= max_scratch_size);
return true;
}
@ -325,7 +318,6 @@ ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
uint32_t scratch_size;
ILO_DEV_ASSERT(dev, 6, 8);
@ -363,21 +355,6 @@ ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
kernel_32->pcb_attr_count));
scratch_size = 0;
if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
scratch_size < kernel_8->scratch_size)
scratch_size = kernel_8->scratch_size;
if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
scratch_size < kernel_16->scratch_size)
scratch_size = kernel_16->scratch_size;
if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
scratch_size < kernel_32->scratch_size)
scratch_size = kernel_32->scratch_size;
/* next power of two, starting from 1KB */
ff->scratch_space = (scratch_size > 1024) ?
(util_last_bit(scratch_size - 1) - 10): 0;
/* GPU hangs on Haswell if none of the dispatch mode bits is set */
if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes)
ff->dispatch_modes |= GEN6_PS_DISPATCH_8;
@ -401,6 +378,21 @@ ps_get_gen6_ff(const struct ilo_dev *dev,
if (!ps_validate_gen6(dev, info) || !ps_get_gen6_ff_kernels(dev, info, ff))
return false;
if (info->per_thread_scratch_size) {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 271:
*
* "(Per-Thread Scratch Space)
* Range [0,11] indicating [1k bytes, 2M bytes] in powers of two"
*/
assert(info->per_thread_scratch_size <= 2 * 1024 * 1024);
/* next power of two, starting from 1KB */
ff->per_thread_scratch_space = (info->per_thread_scratch_size > 1024) ?
(util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0;
ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space);
}
ff->sampler_count = (resource->sampler_count <= 12) ?
(resource->sampler_count + 3) / 4 : 4;
ff->surface_count = resource->surface_count;
@ -441,7 +433,8 @@ ps_set_gen6_3dstate_wm(struct ilo_state_ps *ps,
if (false)
dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw3 = ff->per_thread_scratch_space <<
GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff->grf_starts[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
ff->grf_starts[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
@ -539,7 +532,8 @@ ps_set_gen7_3DSTATE_PS(struct ilo_state_ps *ps,
if (false)
dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw3 = ff->per_thread_scratch_space <<
GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT |
ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
@ -603,7 +597,8 @@ ps_set_gen8_3DSTATE_PS(struct ilo_state_ps *ps,
if (false)
dw3 |= GEN6_THREADDISP_FP_MODE_ALT;
dw4 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff->per_thread_scratch_space <<
GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw6 = ff->thread_count << GEN8_PS_DW6_MAX_THREADS__SHIFT |
io->posoffset << GEN8_PS_DW6_POSOFFSET__SHIFT |
@ -705,6 +700,7 @@ ilo_state_ps_init(struct ilo_state_ps *ps,
ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff);
}
ps->scratch_size = ff.per_thread_scratch_size * ff.thread_count;
/* save conditions */
ps->conds = ff.conds;

View File

@ -58,10 +58,12 @@ ilo_blit_resolve_slices(struct ilo_context *ilo,
* As it is only used to resolve HiZ right now, return early when there is
* no HiZ.
*/
if (!ilo_image_can_enable_aux(&tex->image, level))
if (tex->image.aux.type != ILO_IMAGE_AUX_HIZ ||
!ilo_image_can_enable_aux(&tex->image, level))
return;
if (ilo_image_can_enable_aux(&tex->image, level)) {
if (tex->image.aux.type == ILO_IMAGE_AUX_HIZ &&
ilo_image_can_enable_aux(&tex->image, level)) {
ilo_blit_resolve_slices_for_hiz(ilo, res, level,
first_slice, num_slices, resolve_flags);
}

View File

@ -547,6 +547,7 @@ static void
ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
struct ilo_context *ilo = ilo_context(pipe);
int vs_scratch_size, gs_scratch_size, fs_scratch_size;
if (ilo_debug & ILO_DEBUG_DRAW) {
if (info->indexed) {
@ -574,8 +575,15 @@ ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
ilo_finalize_3d_states(ilo, info);
/* upload kernels */
ilo_shader_cache_upload(ilo->shader_cache, &ilo->cp->builder);
/* prepare scratch spaces */
ilo_shader_cache_get_max_scratch_sizes(ilo->shader_cache,
&vs_scratch_size, &gs_scratch_size, &fs_scratch_size);
ilo_render_prepare_scratch_spaces(ilo->render,
vs_scratch_size, gs_scratch_size, fs_scratch_size);
ilo_blit_resolve_framebuffer(ilo);
/* If draw_vbo ever fails, return immediately. */

View File

@ -67,10 +67,49 @@ ilo_render_create(struct ilo_builder *builder)
void
ilo_render_destroy(struct ilo_render *render)
{
intel_bo_unref(render->vs_scratch.bo);
intel_bo_unref(render->gs_scratch.bo);
intel_bo_unref(render->fs_scratch.bo);
intel_bo_unref(render->workaround_bo);
FREE(render);
}
static bool
resize_scratch_space(struct ilo_render *render,
struct ilo_render_scratch_space *scratch,
const char *name, int new_size)
{
struct intel_bo *bo;
if (scratch->size >= new_size)
return true;
bo = intel_winsys_alloc_bo(render->builder->winsys, name, new_size, false);
if (!bo)
return false;
intel_bo_unref(scratch->bo);
scratch->bo = bo;
scratch->size = new_size;
return true;
}
bool
ilo_render_prepare_scratch_spaces(struct ilo_render *render,
int vs_scratch_size,
int gs_scratch_size,
int fs_scratch_size)
{
return (resize_scratch_space(render, &render->vs_scratch,
"vs scratch", vs_scratch_size) &&
resize_scratch_space(render, &render->gs_scratch,
"gs scratch", gs_scratch_size) &&
resize_scratch_space(render, &render->fs_scratch,
"fs scratch", fs_scratch_size));
}
void
ilo_render_get_sample_position(const struct ilo_render *render,
unsigned sample_count,

View File

@ -43,6 +43,12 @@ ilo_render_create(struct ilo_builder *builder);
void
ilo_render_destroy(struct ilo_render *render);
bool
ilo_render_prepare_scratch_spaces(struct ilo_render *render,
int vs_scratch_size,
int gs_scratch_size,
int fs_scratch_size);
void
ilo_render_get_sample_position(const struct ilo_render *render,
unsigned sample_count,

View File

@ -51,6 +51,11 @@ struct ilo_render {
struct intel_bo *workaround_bo;
struct ilo_render_scratch_space {
struct intel_bo *bo;
int size;
} vs_scratch, gs_scratch, fs_scratch;
struct ilo_state_sample_pattern sample_pattern;
bool hw_ctx_changed;

View File

@ -475,10 +475,13 @@ gen6_draw_vs(struct ilo_render *r,
gen6_wa_pre_3dstate_vs_toggle(r);
if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO))
gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, kernel_offset);
else
gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) {
gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs,
kernel_offset, r->vs_scratch.bo);
} else {
gen6_3DSTATE_VS(r->builder, &cso->vs,
kernel_offset, r->vs_scratch.bo);
}
}
}
@ -501,7 +504,8 @@ gen6_draw_gs(struct ilo_render *r,
cso = ilo_shader_get_kernel_cso(vec->gs);
kernel_offset = ilo_shader_get_kernel_offset(vec->gs);
gen6_3DSTATE_GS(r->builder, &cso->gs, kernel_offset);
gen6_3DSTATE_GS(r->builder, &cso->gs,
kernel_offset, r->gs_scratch.bo);
} else if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) {
const int verts_per_prim =
@ -524,9 +528,10 @@ gen6_draw_gs(struct ilo_render *r,
kernel_offset = ilo_shader_get_kernel_offset(vec->vs) +
ilo_shader_get_kernel_param(vec->vs, param);
gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, kernel_offset);
gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol,
kernel_offset, r->gs_scratch.bo);
} else {
gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0);
gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0, NULL);
}
}
}
@ -672,7 +677,7 @@ gen6_draw_wm(struct ilo_render *r,
gen6_wa_pre_3dstate_wm_max_threads(r);
gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs,
&cso->ps, kernel_offset);
&cso->ps, kernel_offset, r->fs_scratch.bo);
}
}
@ -817,10 +822,10 @@ gen6_rectlist_vs_to_sf(struct ilo_render *r,
gen6_wa_post_3dstate_constant_vs(r);
gen6_wa_pre_3dstate_vs_toggle(r);
gen6_3DSTATE_VS(r->builder, &blitter->vs, 0);
gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL);
gen6_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
gen6_3DSTATE_GS(r->builder, &blitter->gs, 0);
gen6_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL);
gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs);
gen6_3DSTATE_SF(r->builder, &blitter->fb.rs, &blitter->sbe);
@ -833,7 +838,7 @@ gen6_rectlist_wm(struct ilo_render *r,
gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
gen6_wa_pre_3dstate_wm_max_threads(r);
gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0);
gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0, NULL);
}
static void

View File

@ -318,10 +318,13 @@ gen7_draw_vs(struct ilo_render *r,
const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->vs);
const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->vs);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
else
gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) {
gen8_3DSTATE_VS(r->builder, &cso->vs,
kernel_offset, r->vs_scratch.bo);
} else {
gen6_3DSTATE_VS(r->builder, &cso->vs,
kernel_offset, r->vs_scratch.bo);
}
}
}
@ -338,9 +341,9 @@ gen7_draw_hs(struct ilo_render *r,
gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
gen8_3DSTATE_HS(r->builder, hs, kernel_offset);
gen8_3DSTATE_HS(r->builder, hs, kernel_offset, NULL);
else
gen7_3DSTATE_HS(r->builder, hs, kernel_offset);
gen7_3DSTATE_HS(r->builder, hs, kernel_offset, NULL);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_HS */
@ -373,9 +376,9 @@ gen7_draw_ds(struct ilo_render *r,
gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
gen8_3DSTATE_DS(r->builder, ds, kernel_offset);
gen8_3DSTATE_DS(r->builder, ds, kernel_offset, NULL);
else
gen7_3DSTATE_DS(r->builder, ds, kernel_offset);
gen7_3DSTATE_DS(r->builder, ds, kernel_offset, NULL);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_DS */
@ -397,9 +400,9 @@ gen7_draw_gs(struct ilo_render *r,
gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
gen8_3DSTATE_GS(r->builder, gs, kernel_offset);
gen8_3DSTATE_GS(r->builder, gs, kernel_offset, NULL);
else
gen7_3DSTATE_GS(r->builder, gs, kernel_offset);
gen7_3DSTATE_GS(r->builder, gs, kernel_offset, NULL);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_GS */
@ -534,7 +537,7 @@ gen7_draw_wm(struct ilo_render *r,
if (r->hw_ctx_changed)
gen7_wa_pre_3dstate_ps_max_threads(r);
gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, r->fs_scratch.bo);
}
/* 3DSTATE_SCISSOR_STATE_POINTERS */
@ -678,18 +681,18 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r,
const struct ilo_blitter *blitter)
{
gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0);
gen6_3DSTATE_VS(r->builder, &blitter->vs, 0);
gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL);
gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0);
gen7_3DSTATE_HS(r->builder, &blitter->hs, 0);
gen7_3DSTATE_HS(r->builder, &blitter->hs, 0, NULL);
gen7_3DSTATE_TE(r->builder, &blitter->ds);
gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0);
gen7_3DSTATE_DS(r->builder, &blitter->ds, 0);
gen7_3DSTATE_DS(r->builder, &blitter->ds, 0, NULL);
gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
gen7_3DSTATE_GS(r->builder, &blitter->gs, 0);
gen7_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL);
gen7_3DSTATE_STREAMOUT(r->builder, &blitter->sol);
@ -711,7 +714,7 @@ gen7_rectlist_wm(struct ilo_render *r,
gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
gen7_wa_pre_3dstate_ps_max_threads(r);
gen7_3DSTATE_PS(r->builder, &blitter->ps, 0);
gen7_3DSTATE_PS(r->builder, &blitter->ps, 0, NULL);
}
static void

View File

@ -125,7 +125,7 @@ gen8_draw_wm(struct ilo_render *r,
/* 3DSTATE_PS */
if (DIRTY(FS) || r->instruction_bo_changed)
gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, r->fs_scratch.bo);
/* 3DSTATE_PS_EXTRA */
if (DIRTY(FS))

View File

@ -474,6 +474,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
case PIPE_CAP_VENDOR_ID:

View File

@ -37,6 +37,10 @@
struct ilo_shader_cache {
struct list_head shaders;
struct list_head changed;
int max_vs_scratch_size;
int max_gs_scratch_size;
int max_fs_scratch_size;
};
/**
@ -121,6 +125,8 @@ ilo_shader_cache_upload(struct ilo_shader_cache *shc,
struct ilo_shader *sh;
LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) {
int scratch_size, *cur_max;
if (sh->uploaded)
continue;
@ -128,6 +134,29 @@ ilo_shader_cache_upload(struct ilo_shader_cache *shc,
sh->kernel_size, sh->kernel);
sh->uploaded = true;
switch (shader->info.type) {
case PIPE_SHADER_VERTEX:
scratch_size = ilo_state_vs_get_scratch_size(&sh->cso.vs);
cur_max = &shc->max_vs_scratch_size;
break;
case PIPE_SHADER_GEOMETRY:
scratch_size = ilo_state_gs_get_scratch_size(&sh->cso.gs);
cur_max = &shc->max_gs_scratch_size;
break;
case PIPE_SHADER_FRAGMENT:
scratch_size = ilo_state_ps_get_scratch_size(&sh->cso.ps);
cur_max = &shc->max_fs_scratch_size;
break;
default:
assert(!"unknown shader type");
scratch_size = 0;
cur_max = &shc->max_vs_scratch_size;
break;
}
if (*cur_max < scratch_size)
*cur_max = scratch_size;
}
list_del(&shader->list);
@ -155,6 +184,21 @@ ilo_shader_cache_invalidate(struct ilo_shader_cache *shc)
LIST_FOR_EACH_ENTRY(sh, &shader->variants, list)
sh->uploaded = false;
}
shc->max_vs_scratch_size = 0;
shc->max_gs_scratch_size = 0;
shc->max_fs_scratch_size = 0;
}
void
ilo_shader_cache_get_max_scratch_sizes(const struct ilo_shader_cache *shc,
int *vs_scratch_size,
int *gs_scratch_size,
int *fs_scratch_size)
{
*vs_scratch_size = shc->max_vs_scratch_size;
*gs_scratch_size = shc->max_gs_scratch_size;
*fs_scratch_size = shc->max_fs_scratch_size;
}
/**
@ -578,7 +622,6 @@ init_shader_kernel(const struct ilo_shader *kernel,
kern->grf_start = kernel->in.start_grf;
kern->pcb_attr_count =
(kernel->pcb.cbuf0_size + kernel->pcb.clip_state_size + 15) / 16;
kern->scratch_size = 0;
}
static void
@ -602,6 +645,7 @@ init_vs(struct ilo_shader *kernel,
init_shader_urb(kernel, state, &info.urb);
init_shader_kernel(kernel, state, &info.kernel);
init_shader_resource(kernel, state, &info.resource);
info.per_thread_scratch_size = kernel->per_thread_scratch_size;
info.dispatch_enable = true;
info.stats_enable = true;
@ -640,6 +684,7 @@ init_gs(struct ilo_shader *kernel,
init_shader_urb(kernel, state, &info.urb);
init_shader_kernel(kernel, state, &info.kernel);
init_shader_resource(kernel, state, &info.resource);
info.per_thread_scratch_size = kernel->per_thread_scratch_size;
info.dispatch_enable = true;
info.stats_enable = true;
@ -664,6 +709,7 @@ init_ps(struct ilo_shader *kernel,
init_shader_kernel(kernel, state, &info.kernel_8);
init_shader_resource(kernel, state, &info.resource);
info.per_thread_scratch_size = kernel->per_thread_scratch_size;
info.io.has_rt_write = true;
info.io.posoffset = GEN6_POSOFFSET_NONE;
info.io.attr_count = kernel->in.count;

View File

@ -120,6 +120,12 @@ ilo_shader_cache_upload(struct ilo_shader_cache *shc,
void
ilo_shader_cache_invalidate(struct ilo_shader_cache *shc);
void
ilo_shader_cache_get_max_scratch_sizes(const struct ilo_shader_cache *shc,
int *vs_scratch_size,
int *gs_scratch_size,
int *fs_scratch_size);
struct ilo_shader_state *
ilo_shader_create_vs(const struct ilo_dev *dev,
const struct pipe_shader_state *state,

View File

@ -139,6 +139,7 @@ struct ilo_shader {
void *kernel;
int kernel_size;
int per_thread_scratch_size;
struct ilo_kernel_routing routing;
struct ilo_state_ps_params_info ps_params;

View File

@ -299,6 +299,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
}
/* should only get here on unhandled cases */

View File

@ -854,10 +854,10 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
jit_tex->img_stride[j] = lp_tex->img_stride[j];
}
if (view->target == PIPE_TEXTURE_1D_ARRAY ||
view->target == PIPE_TEXTURE_2D_ARRAY ||
view->target == PIPE_TEXTURE_CUBE ||
view->target == PIPE_TEXTURE_CUBE_ARRAY) {
if (res->target == PIPE_TEXTURE_1D_ARRAY ||
res->target == PIPE_TEXTURE_2D_ARRAY ||
res->target == PIPE_TEXTURE_CUBE ||
res->target == PIPE_TEXTURE_CUBE_ARRAY) {
/*
* For array textures, we don't have first_layer, instead
* adjust last_layer (stored as depth) plus the mip level offsets

View File

@ -275,10 +275,10 @@ prepare_shader_sampling(
row_stride[j] = lp_tex->row_stride[j];
img_stride[j] = lp_tex->img_stride[j];
}
if (view->target == PIPE_TEXTURE_1D_ARRAY ||
view->target == PIPE_TEXTURE_2D_ARRAY ||
view->target == PIPE_TEXTURE_CUBE ||
view->target == PIPE_TEXTURE_CUBE_ARRAY) {
if (tex->target == PIPE_TEXTURE_1D_ARRAY ||
tex->target == PIPE_TEXTURE_2D_ARRAY ||
tex->target == PIPE_TEXTURE_CUBE ||
tex->target == PIPE_TEXTURE_CUBE_ARRAY) {
num_layers = view->u.tex.last_layer - view->u.tex.first_layer + 1;
for (j = first_level; j <= last_level; j++) {
mip_offsets[j] += view->u.tex.first_layer *

View File

@ -200,7 +200,8 @@ llvmpipe_can_create_resource(struct pipe_screen *screen,
static boolean
llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
struct llvmpipe_resource *lpr)
struct llvmpipe_resource *lpr,
const void *map_front_private)
{
struct sw_winsys *winsys = screen->winsys;
@ -215,12 +216,13 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
lpr->base.format,
width, height,
64,
map_front_private,
&lpr->row_stride[0] );
if (lpr->dt == NULL)
return FALSE;
{
if (!map_front_private) {
void *map = winsys->displaytarget_map(winsys, lpr->dt,
PIPE_TRANSFER_WRITE);
@ -235,8 +237,9 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
static struct pipe_resource *
llvmpipe_resource_create(struct pipe_screen *_screen,
const struct pipe_resource *templat)
llvmpipe_resource_create_front(struct pipe_screen *_screen,
const struct pipe_resource *templat,
const void *map_front_private)
{
struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
struct llvmpipe_resource *lpr = CALLOC_STRUCT(llvmpipe_resource);
@ -254,7 +257,7 @@ llvmpipe_resource_create(struct pipe_screen *_screen,
PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED)) {
/* displayable surface */
if (!llvmpipe_displaytarget_layout(screen, lpr))
if (!llvmpipe_displaytarget_layout(screen, lpr, map_front_private))
goto fail;
}
else {
@ -300,7 +303,12 @@ llvmpipe_resource_create(struct pipe_screen *_screen,
FREE(lpr);
return NULL;
}
static struct pipe_resource *
llvmpipe_resource_create(struct pipe_screen *_screen,
const struct pipe_resource *templat)
{
return llvmpipe_resource_create_front(_screen, templat, NULL);
}
static void
llvmpipe_resource_destroy(struct pipe_screen *pscreen,
@ -797,6 +805,7 @@ llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen)
#endif
screen->resource_create = llvmpipe_resource_create;
screen->resource_create_front = llvmpipe_resource_create_front;
screen->resource_destroy = llvmpipe_resource_destroy;
screen->resource_from_handle = llvmpipe_resource_from_handle;
screen->resource_get_handle = llvmpipe_resource_get_handle;

View File

@ -73,6 +73,9 @@ NV50_C_SOURCES := \
nv50/nv50_program.h \
nv50/nv50_push.c \
nv50/nv50_query.c \
nv50/nv50_query.h \
nv50/nv50_query_hw.c \
nv50/nv50_query_hw.h \
nv50/nv50_resource.c \
nv50/nv50_resource.h \
nv50/nv50_screen.c \

View File

@ -1128,7 +1128,6 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
info->prop.gp.instanceCount = 1;
info->prop.gp.maxVertices = 1;
}
info->io.clipDistance = 0xff;
info->io.pointSize = 0xff;
info->io.instanceId = 0xff;
info->io.vertexId = 0xff;

View File

@ -73,8 +73,8 @@ public:
Instruction *mkCvt(operation, DataType, Value *, DataType, Value *);
CmpInstruction *mkCmp(operation, CondCode, DataType,
Value *,
DataType, Value *, Value *, Value * = NULL);
Value *,
DataType, Value *, Value *, Value * = NULL);
TexInstruction *mkTex(operation, TexTarget,
uint16_t tic, uint16_t tsc,
const std::vector<Value *> &def,

View File

@ -99,6 +99,7 @@ struct nv50_ir_prog_info
uint8_t sourceRep; /* NV50_PROGRAM_IR */
const void *source;
void *relocData;
void *interpData;
struct nv50_ir_prog_symbol *syms;
uint16_t numSyms;
} bin;
@ -143,6 +144,7 @@ struct nv50_ir_prog_info
bool earlyFragTests;
bool separateFragData;
bool usesDiscard;
bool sampleInterp; /* perform sample interp on all fp inputs */
} fp;
struct {
uint32_t inputOffset; /* base address for user args */
@ -154,9 +156,8 @@ struct nv50_ir_prog_info
uint8_t numBarriers;
struct {
uint8_t clipDistance; /* index of first clip distance output */
uint8_t clipDistanceMask; /* mask of clip distances defined */
uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */
uint8_t clipDistances; /* number of clip distance outputs */
uint8_t cullDistances; /* number of cull distance outputs */
int8_t genUserClip; /* request user clip planes for ClipVertex */
uint16_t ucpBase; /* base address for UCPs */
uint8_t ucpCBSlot; /* constant buffer index of UCP data */
@ -168,7 +169,6 @@ struct nv50_ir_prog_info
int8_t viewportId; /* output index of ViewportIndex */
uint8_t fragDepth; /* output index of FragDepth */
uint8_t sampleMask; /* output index of SampleMask */
bool sampleInterp; /* perform sample interp on all fp inputs */
uint8_t backFaceColor[2]; /* input/output indices of back face colour */
uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */
bool fp64; /* program uses fp64 math */
@ -198,6 +198,10 @@ extern void nv50_ir_relocate_code(void *relocData, uint32_t *code,
uint32_t libPos,
uint32_t dataPos);
extern void
nv50_ir_change_interp(void *interpData, uint32_t *code,
bool force_per_sample, bool flatshade);
/* obtain code that will be shared among programs */
extern void nv50_ir_get_target_library(uint32_t chipset,
const uint32_t **code, uint32_t *size);

View File

@ -1437,6 +1437,30 @@ CodeEmitterGK110::emitInterpMode(const Instruction *i)
code[1] |= (i->ipa & 0xc) << (19 - 2);
}
static void
interpApply(const InterpEntry *entry, uint32_t *code,
bool force_persample_interp, bool flatshade)
{
int ipa = entry->ipa;
int reg = entry->reg;
int loc = entry->loc;
if (flatshade &&
(ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
ipa = NV50_IR_INTERP_FLAT;
reg = 0xff;
} else if (force_persample_interp &&
(ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
(ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
ipa |= NV50_IR_INTERP_CENTROID;
}
code[loc + 1] &= ~(0xf << 19);
code[loc + 1] |= (ipa & 0x3) << 21;
code[loc + 1] |= (ipa & 0xc) << (19 - 2);
code[loc + 0] &= ~(0xff << 23);
code[loc + 0] |= reg << 23;
}
void
CodeEmitterGK110::emitINTERP(const Instruction *i)
{
@ -1448,10 +1472,13 @@ CodeEmitterGK110::emitINTERP(const Instruction *i)
if (i->saturate)
code[1] |= 1 << 18;
if (i->op == OP_PINTERP)
if (i->op == OP_PINTERP) {
srcId(i->src(1), 23);
else
addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
} else {
code[0] |= 0xff << 23;
addInterp(i->ipa, 0xff, interpApply);
}
srcId(i->src(0).getIndirect(0), 10);
emitInterpMode(i);

View File

@ -2217,6 +2217,30 @@ CodeEmitterGM107::emitAL2P()
emitGPR (0x00, insn->def(0));
}
static void
interpApply(const InterpEntry *entry, uint32_t *code,
bool force_persample_interp, bool flatshade)
{
int ipa = entry->ipa;
int reg = entry->reg;
int loc = entry->loc;
if (flatshade &&
(ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
ipa = NV50_IR_INTERP_FLAT;
reg = 0xff;
} else if (force_persample_interp &&
(ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
(ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
ipa |= NV50_IR_INTERP_CENTROID;
}
code[loc + 1] &= ~(0xf << 0x14);
code[loc + 1] |= (ipa & 0x3) << 0x16;
code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
code[loc + 0] &= ~(0xff << 0x14);
code[loc + 0] |= reg << 0x14;
}
void
CodeEmitterGM107::emitIPA()
{
@ -2255,10 +2279,12 @@ CodeEmitterGM107::emitIPA()
emitGPR(0x14, insn->src(1));
if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
emitGPR(0x27, insn->src(2));
addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply);
} else {
if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
emitGPR(0x27, insn->src(1));
emitGPR(0x14);
addInterp(insn->ipa, 0xff, interpApply);
}
if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)

View File

@ -372,7 +372,7 @@ CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
mode |= 3 << (s * 2);
break;
default:
ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
assert(0);
break;
}
@ -876,6 +876,30 @@ CodeEmitterNV50::emitPFETCH(const Instruction *i)
emitFlagsRd(i);
}
static void
interpApply(const InterpEntry *entry, uint32_t *code,
bool force_persample_interp, bool flatshade)
{
int ipa = entry->ipa;
int encSize = entry->reg;
int loc = entry->loc;
if ((ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
(ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
if (force_persample_interp) {
if (encSize == 8)
code[loc + 1] |= 1 << 16;
else
code[loc + 0] |= 1 << 24;
} else {
if (encSize == 8)
code[loc + 1] &= ~(1 << 16);
else
code[loc + 0] &= ~(1 << 24);
}
}
}
void
CodeEmitterNV50::emitINTERP(const Instruction *i)
{
@ -904,6 +928,8 @@ CodeEmitterNV50::emitINTERP(const Instruction *i)
code[0] |= 1;
emitFlagsRd(i);
}
addInterp(i->ipa, i->encSize, interpApply);
}
void

View File

@ -1618,6 +1618,29 @@ CodeEmitterNVC0::emitInterpMode(const Instruction *i)
}
}
static void
interpApply(const InterpEntry *entry, uint32_t *code,
bool force_persample_interp, bool flatshade)
{
int ipa = entry->ipa;
int reg = entry->reg;
int loc = entry->loc;
if (flatshade &&
(ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
ipa = NV50_IR_INTERP_FLAT;
reg = 0x3f;
} else if (force_persample_interp &&
(ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
(ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
ipa |= NV50_IR_INTERP_CENTROID;
}
code[loc + 0] &= ~(0xf << 6);
code[loc + 0] |= ipa << 6;
code[loc + 0] &= ~(0x3f << 26);
code[loc + 0] |= reg << 26;
}
void
CodeEmitterNVC0::emitINTERP(const Instruction *i)
{
@ -1630,10 +1653,13 @@ CodeEmitterNVC0::emitINTERP(const Instruction *i)
if (i->saturate)
code[0] |= 1 << 5;
if (i->op == OP_PINTERP)
if (i->op == OP_PINTERP) {
srcId(i->src(1), 26);
else
addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
} else {
code[0] |= 0x3f << 26;
addInterp(i->ipa, 0x3f, interpApply);
}
srcId(i->src(0).getIndirect(0), 20);
} else {

View File

@ -910,7 +910,7 @@ bool Source::scanSource()
info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
if (info->io.genUserClip > 0) {
info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1;
info->io.clipDistances = info->io.genUserClip;
const unsigned int nOut = (info->io.genUserClip + 3) / 4;
@ -919,7 +919,7 @@ bool Source::scanSource()
info->out[i].id = i;
info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
info->out[i].si = n;
info->out[i].mask = info->io.clipDistanceMask >> (n * 4);
info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
}
}
@ -969,6 +969,12 @@ void Source::scanProperty(const struct tgsi_full_property *prop)
else
info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */
break;
case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
info->io.clipDistances = prop->u[0].Data;
break;
case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
info->io.cullDistances = prop->u[0].Data;
break;
default:
INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
break;
@ -1054,7 +1060,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
default:
break;
}
if (decl->Interp.Location || info->io.sampleInterp)
if (decl->Interp.Location)
info->in[i].centroid = 1;
}
@ -1086,8 +1092,6 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
clipVertexOutput = i;
break;
case TGSI_SEMANTIC_CLIPDIST:
info->io.clipDistanceMask |=
decl->Declaration.UsageMask << (si * 4);
info->io.genUserClip = -1;
break;
case TGSI_SEMANTIC_SAMPLEMASK:
@ -1119,6 +1123,10 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
case TGSI_SEMANTIC_VERTEXID:
info->io.vertexId = first;
break;
case TGSI_SEMANTIC_SAMPLEID:
case TGSI_SEMANTIC_SAMPLEPOS:
info->prop.fp.sampleInterp = 1;
break;
default:
break;
}
@ -1338,6 +1346,8 @@ private:
void handleINTERP(Value *dst0[4]);
uint8_t translateInterpMode(const struct nv50_ir_varying *var,
operation& op);
Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
@ -1451,8 +1461,8 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
return sym;
}
static inline uint8_t
translateInterpMode(const struct nv50_ir_varying *var, operation& op)
uint8_t
Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op)
{
uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
@ -1468,7 +1478,7 @@ translateInterpMode(const struct nv50_ir_varying *var, operation& op)
op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
? OP_PINTERP : OP_LINTERP;
if (var->centroid)
if (var->centroid || info->prop.fp.sampleInterp)
mode |= NV50_IR_INTERP_CENTROID;
return mode;
@ -1628,7 +1638,7 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
// don't load masked inputs, won't be assigned a slot
if (!ptr && !(info->in[idx].mask & (1 << swz)))
return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE)
if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE)
return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0));
return interpolate(src, c, shiftAddress(ptr));
} else

View File

@ -166,7 +166,7 @@ void Target::destroy(Target *targ)
delete targ;
}
CodeEmitter::CodeEmitter(const Target *target) : targ(target)
CodeEmitter::CodeEmitter(const Target *target) : targ(target), interpInfo(NULL)
{
}
@ -388,6 +388,7 @@ Program::emitBinary(struct nv50_ir_prog_info *info)
}
}
info->bin.relocData = emit->getRelocInfo();
info->bin.interpData = emit->getInterpInfo();
emitSymbolTable(info);
@ -428,6 +429,29 @@ CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
return true;
}
bool
CodeEmitter::addInterp(int ipa, int reg, InterpApply apply)
{
unsigned int n = interpInfo ? interpInfo->count : 0;
if (!(n % RELOC_ALLOC_INCREMENT)) {
size_t size = sizeof(InterpInfo) + n * sizeof(InterpEntry);
interpInfo = reinterpret_cast<InterpInfo *>(
REALLOC(interpInfo, n ? size : 0,
size + RELOC_ALLOC_INCREMENT * sizeof(InterpEntry)));
if (!interpInfo)
return false;
if (n == 0)
memset(interpInfo, 0, sizeof(InterpInfo));
}
++interpInfo->count;
interpInfo->entry[n] = InterpEntry(ipa, reg, codeSize >> 2);
interpInfo->apply = apply;
return true;
}
void
RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
{
@ -471,6 +495,19 @@ nv50_ir_relocate_code(void *relocData, uint32_t *code,
info->entry[i].apply(code, info);
}
void
nv50_ir_change_interp(void *interpData, uint32_t *code,
bool force_persample_interp, bool flatshade)
{
nv50_ir::InterpInfo *info = reinterpret_cast<nv50_ir::InterpInfo *>(
interpData);
// force_persample_interp: all non-flat -> per-sample
// flatshade: all color -> flat
for (unsigned i = 0; i < info->count; ++i)
info->apply(&info->entry[i], code, force_persample_interp, flatshade);
}
void
nv50_ir_get_target_library(uint32_t chipset,
const uint32_t **code, uint32_t *size)

View File

@ -58,6 +58,23 @@ struct RelocInfo
RelocEntry entry[0];
};
struct InterpEntry
{
InterpEntry(int ipa, int reg, int loc) : ipa(ipa), reg(reg), loc(loc) {}
uint32_t ipa:4; // SC mode used to identify colors
uint32_t reg:8; // The reg used for perspective division
uint32_t loc:20; // Let's hope we don't have more than 1M-sized shaders
};
typedef void (*InterpApply)(const InterpEntry*, uint32_t*, bool, bool);
struct InterpInfo
{
uint32_t count;
InterpApply apply;
InterpEntry entry[0];
};
class CodeEmitter
{
public:
@ -78,6 +95,9 @@ public:
inline void *getRelocInfo() const { return relocInfo; }
bool addInterp(int ipa, int reg, InterpApply apply);
inline void *getInterpInfo() const { return interpInfo; }
virtual void prepareEmission(Program *);
virtual void prepareEmission(Function *);
virtual void prepareEmission(BasicBlock *);
@ -92,6 +112,7 @@ protected:
uint32_t codeSizeLimit;
RelocInfo *relocInfo;
InterpInfo *interpInfo;
};

View File

@ -29,95 +29,95 @@ int
nouveau_heap_init(struct nouveau_heap **heap,
unsigned start, unsigned size)
{
struct nouveau_heap *r;
struct nouveau_heap *r;
r = calloc(1, sizeof(struct nouveau_heap));
if (!r)
return 1;
r = calloc(1, sizeof(struct nouveau_heap));
if (!r)
return 1;
r->start = start;
r->size = size;
*heap = r;
return 0;
r->start = start;
r->size = size;
*heap = r;
return 0;
}
void
nouveau_heap_destroy(struct nouveau_heap **heap)
{
if (!*heap)
return;
free(*heap);
*heap = NULL;
if (!*heap)
return;
free(*heap);
*heap = NULL;
}
int
nouveau_heap_alloc(struct nouveau_heap *heap, unsigned size, void *priv,
struct nouveau_heap **res)
{
struct nouveau_heap *r;
struct nouveau_heap *r;
if (!heap || !size || !res || *res)
return 1;
if (!heap || !size || !res || *res)
return 1;
while (heap) {
if (!heap->in_use && heap->size >= size) {
r = calloc(1, sizeof(struct nouveau_heap));
if (!r)
return 1;
while (heap) {
if (!heap->in_use && heap->size >= size) {
r = calloc(1, sizeof(struct nouveau_heap));
if (!r)
return 1;
r->start = (heap->start + heap->size) - size;
r->size = size;
r->in_use = 1;
r->priv = priv;
r->start = (heap->start + heap->size) - size;
r->size = size;
r->in_use = 1;
r->priv = priv;
heap->size -= size;
heap->size -= size;
r->next = heap->next;
if (heap->next)
heap->next->prev = r;
r->prev = heap;
heap->next = r;
r->next = heap->next;
if (heap->next)
heap->next->prev = r;
r->prev = heap;
heap->next = r;
*res = r;
return 0;
}
*res = r;
return 0;
}
heap = heap->next;
}
heap = heap->next;
}
return 1;
return 1;
}
void
nouveau_heap_free(struct nouveau_heap **res)
{
struct nouveau_heap *r;
struct nouveau_heap *r;
if (!res || !*res)
return;
r = *res;
*res = NULL;
if (!res || !*res)
return;
r = *res;
*res = NULL;
r->in_use = 0;
r->in_use = 0;
if (r->next && !r->next->in_use) {
struct nouveau_heap *new = r->next;
if (r->next && !r->next->in_use) {
struct nouveau_heap *new = r->next;
new->prev = r->prev;
if (r->prev)
r->prev->next = new;
new->size += r->size;
new->start = r->start;
new->prev = r->prev;
if (r->prev)
r->prev->next = new;
new->size += r->size;
new->start = r->start;
free(r);
r = new;
}
free(r);
r = new;
}
if (r->prev && !r->prev->in_use) {
r->prev->next = r->next;
if (r->next)
r->next->prev = r->prev;
r->prev->size += r->size;
free(r);
}
if (r->prev && !r->prev->in_use) {
r->prev->next = r->next;
if (r->next)
r->next->prev = r->prev;
r->prev->size += r->size;
free(r);
}
}

View File

@ -44,15 +44,15 @@
* full size of the heap.
*/
struct nouveau_heap {
struct nouveau_heap *prev;
struct nouveau_heap *next;
struct nouveau_heap *prev;
struct nouveau_heap *next;
void *priv;
void *priv;
unsigned start;
unsigned size;
unsigned start;
unsigned size;
int in_use;
int in_use;
};
int

View File

@ -30,211 +30,211 @@ int nouveau_mesa_debug = 0;
static const char *
nouveau_screen_get_name(struct pipe_screen *pscreen)
{
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
static char buffer[128];
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
static char buffer[128];
util_snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
return buffer;
util_snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
return buffer;
}
static const char *
nouveau_screen_get_vendor(struct pipe_screen *pscreen)
{
return "nouveau";
return "nouveau";
}
static const char *
nouveau_screen_get_device_vendor(struct pipe_screen *pscreen)
{
return "NVIDIA";
return "NVIDIA";
}
static uint64_t
nouveau_screen_get_timestamp(struct pipe_screen *pscreen)
{
int64_t cpu_time = os_time_get() * 1000;
int64_t cpu_time = os_time_get() * 1000;
/* getparam of PTIMER_TIME takes about x10 as long (several usecs) */
/* getparam of PTIMER_TIME takes about x10 as long (several usecs) */
return cpu_time + nouveau_screen(pscreen)->cpu_gpu_time_delta;
return cpu_time + nouveau_screen(pscreen)->cpu_gpu_time_delta;
}
static void
nouveau_screen_fence_ref(struct pipe_screen *pscreen,
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *pfence)
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *pfence)
{
nouveau_fence_ref(nouveau_fence(pfence), (struct nouveau_fence **)ptr);
nouveau_fence_ref(nouveau_fence(pfence), (struct nouveau_fence **)ptr);
}
static boolean
nouveau_screen_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *pfence,
struct pipe_fence_handle *pfence,
uint64_t timeout)
{
if (!timeout)
return nouveau_fence_signalled(nouveau_fence(pfence));
if (!timeout)
return nouveau_fence_signalled(nouveau_fence(pfence));
return nouveau_fence_wait(nouveau_fence(pfence));
return nouveau_fence_wait(nouveau_fence(pfence));
}
struct nouveau_bo *
nouveau_screen_bo_from_handle(struct pipe_screen *pscreen,
struct winsys_handle *whandle,
unsigned *out_stride)
struct winsys_handle *whandle,
unsigned *out_stride)
{
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
struct nouveau_bo *bo = 0;
int ret;
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
struct nouveau_bo *bo = 0;
int ret;
if (whandle->type != DRM_API_HANDLE_TYPE_SHARED &&
whandle->type != DRM_API_HANDLE_TYPE_FD) {
debug_printf("%s: attempt to import unsupported handle type %d\n",
__FUNCTION__, whandle->type);
return NULL;
}
if (whandle->type != DRM_API_HANDLE_TYPE_SHARED &&
whandle->type != DRM_API_HANDLE_TYPE_FD) {
debug_printf("%s: attempt to import unsupported handle type %d\n",
__FUNCTION__, whandle->type);
return NULL;
}
if (whandle->type == DRM_API_HANDLE_TYPE_SHARED)
ret = nouveau_bo_name_ref(dev, whandle->handle, &bo);
else
ret = nouveau_bo_prime_handle_ref(dev, whandle->handle, &bo);
if (whandle->type == DRM_API_HANDLE_TYPE_SHARED)
ret = nouveau_bo_name_ref(dev, whandle->handle, &bo);
else
ret = nouveau_bo_prime_handle_ref(dev, whandle->handle, &bo);
if (ret) {
debug_printf("%s: ref name 0x%08x failed with %d\n",
__FUNCTION__, whandle->handle, ret);
return NULL;
}
if (ret) {
debug_printf("%s: ref name 0x%08x failed with %d\n",
__FUNCTION__, whandle->handle, ret);
return NULL;
}
*out_stride = whandle->stride;
return bo;
*out_stride = whandle->stride;
return bo;
}
bool
nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
struct nouveau_bo *bo,
unsigned stride,
struct winsys_handle *whandle)
struct nouveau_bo *bo,
unsigned stride,
struct winsys_handle *whandle)
{
whandle->stride = stride;
whandle->stride = stride;
if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
return nouveau_bo_name_get(bo, &whandle->handle) == 0;
} else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
whandle->handle = bo->handle;
return true;
} else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
return nouveau_bo_set_prime(bo, (int *)&whandle->handle) == 0;
} else {
return false;
}
if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
return nouveau_bo_name_get(bo, &whandle->handle) == 0;
} else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
whandle->handle = bo->handle;
return true;
} else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
return nouveau_bo_set_prime(bo, (int *)&whandle->handle) == 0;
} else {
return false;
}
}
int
nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
{
struct pipe_screen *pscreen = &screen->base;
struct nv04_fifo nv04_data = { .vram = 0xbeef0201, .gart = 0xbeef0202 };
struct nvc0_fifo nvc0_data = { };
uint64_t time;
int size, ret;
void *data;
union nouveau_bo_config mm_config;
struct pipe_screen *pscreen = &screen->base;
struct nv04_fifo nv04_data = { .vram = 0xbeef0201, .gart = 0xbeef0202 };
struct nvc0_fifo nvc0_data = { };
uint64_t time;
int size, ret;
void *data;
union nouveau_bo_config mm_config;
char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG");
if (nv_dbg)
nouveau_mesa_debug = atoi(nv_dbg);
char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG");
if (nv_dbg)
nouveau_mesa_debug = atoi(nv_dbg);
/*
* this is initialized to 1 in nouveau_drm_screen_create after screen
* is fully constructed and added to the global screen list.
*/
screen->refcount = -1;
/*
* this is initialized to 1 in nouveau_drm_screen_create after screen
* is fully constructed and added to the global screen list.
*/
screen->refcount = -1;
if (dev->chipset < 0xc0) {
data = &nv04_data;
size = sizeof(nv04_data);
} else {
data = &nvc0_data;
size = sizeof(nvc0_data);
}
if (dev->chipset < 0xc0) {
data = &nv04_data;
size = sizeof(nv04_data);
} else {
data = &nvc0_data;
size = sizeof(nvc0_data);
}
/*
* Set default VRAM domain if not overridden
*/
if (!screen->vram_domain) {
if (dev->vram_size > 0)
screen->vram_domain = NOUVEAU_BO_VRAM;
else
screen->vram_domain = NOUVEAU_BO_GART;
}
/*
* Set default VRAM domain if not overridden
*/
if (!screen->vram_domain) {
if (dev->vram_size > 0)
screen->vram_domain = NOUVEAU_BO_VRAM;
else
screen->vram_domain = NOUVEAU_BO_GART;
}
ret = nouveau_object_new(&dev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS,
data, size, &screen->channel);
if (ret)
return ret;
screen->device = dev;
ret = nouveau_object_new(&dev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS,
data, size, &screen->channel);
if (ret)
return ret;
screen->device = dev;
ret = nouveau_client_new(screen->device, &screen->client);
if (ret)
return ret;
ret = nouveau_pushbuf_new(screen->client, screen->channel,
4, 512 * 1024, 1,
&screen->pushbuf);
if (ret)
return ret;
ret = nouveau_client_new(screen->device, &screen->client);
if (ret)
return ret;
ret = nouveau_pushbuf_new(screen->client, screen->channel,
4, 512 * 1024, 1,
&screen->pushbuf);
if (ret)
return ret;
/* getting CPU time first appears to be more accurate */
screen->cpu_gpu_time_delta = os_time_get();
/* getting CPU time first appears to be more accurate */
screen->cpu_gpu_time_delta = os_time_get();
ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_PTIMER_TIME, &time);
if (!ret)
screen->cpu_gpu_time_delta = time - screen->cpu_gpu_time_delta * 1000;
ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_PTIMER_TIME, &time);
if (!ret)
screen->cpu_gpu_time_delta = time - screen->cpu_gpu_time_delta * 1000;
pscreen->get_name = nouveau_screen_get_name;
pscreen->get_vendor = nouveau_screen_get_vendor;
pscreen->get_device_vendor = nouveau_screen_get_device_vendor;
pscreen->get_name = nouveau_screen_get_name;
pscreen->get_vendor = nouveau_screen_get_vendor;
pscreen->get_device_vendor = nouveau_screen_get_device_vendor;
pscreen->get_timestamp = nouveau_screen_get_timestamp;
pscreen->get_timestamp = nouveau_screen_get_timestamp;
pscreen->fence_reference = nouveau_screen_fence_ref;
pscreen->fence_finish = nouveau_screen_fence_finish;
pscreen->fence_reference = nouveau_screen_fence_ref;
pscreen->fence_finish = nouveau_screen_fence_finish;
util_format_s3tc_init();
util_format_s3tc_init();
screen->lowmem_bindings = PIPE_BIND_GLOBAL; /* gallium limit */
screen->vidmem_bindings =
PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL |
PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
PIPE_BIND_CURSOR |
PIPE_BIND_SAMPLER_VIEW |
PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE |
PIPE_BIND_COMPUTE_RESOURCE |
PIPE_BIND_GLOBAL;
screen->sysmem_bindings =
PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT |
PIPE_BIND_COMMAND_ARGS_BUFFER;
screen->lowmem_bindings = PIPE_BIND_GLOBAL; /* gallium limit */
screen->vidmem_bindings =
PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL |
PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
PIPE_BIND_CURSOR |
PIPE_BIND_SAMPLER_VIEW |
PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE |
PIPE_BIND_COMPUTE_RESOURCE |
PIPE_BIND_GLOBAL;
screen->sysmem_bindings =
PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT |
PIPE_BIND_COMMAND_ARGS_BUFFER;
memset(&mm_config, 0, sizeof(mm_config));
memset(&mm_config, 0, sizeof(mm_config));
screen->mm_GART = nouveau_mm_create(dev,
NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
&mm_config);
screen->mm_VRAM = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, &mm_config);
return 0;
screen->mm_GART = nouveau_mm_create(dev,
NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
&mm_config);
screen->mm_VRAM = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, &mm_config);
return 0;
}
void
nouveau_screen_fini(struct nouveau_screen *screen)
{
nouveau_mm_destroy(screen->mm_GART);
nouveau_mm_destroy(screen->mm_VRAM);
nouveau_mm_destroy(screen->mm_GART);
nouveau_mm_destroy(screen->mm_VRAM);
nouveau_pushbuf_del(&screen->pushbuf);
nouveau_pushbuf_del(&screen->pushbuf);
nouveau_client_del(&screen->client);
nouveau_object_del(&screen->channel);
nouveau_client_del(&screen->client);
nouveau_object_del(&screen->channel);
nouveau_device_del(&screen->device);
nouveau_device_del(&screen->device);
}

View File

@ -16,47 +16,47 @@ extern int nouveau_mesa_debug;
struct nouveau_bo;
struct nouveau_screen {
struct pipe_screen base;
struct nouveau_device *device;
struct nouveau_object *channel;
struct nouveau_client *client;
struct nouveau_pushbuf *pushbuf;
struct pipe_screen base;
struct nouveau_device *device;
struct nouveau_object *channel;
struct nouveau_client *client;
struct nouveau_pushbuf *pushbuf;
int refcount;
int refcount;
unsigned vidmem_bindings; /* PIPE_BIND_* where VRAM placement is desired */
unsigned sysmem_bindings; /* PIPE_BIND_* where GART placement is desired */
unsigned lowmem_bindings; /* PIPE_BIND_* that require an address < 4 GiB */
/*
* For bindings with (vidmem & sysmem) bits set, PIPE_USAGE_* decides
* placement.
*/
unsigned vidmem_bindings; /* PIPE_BIND_* where VRAM placement is desired */
unsigned sysmem_bindings; /* PIPE_BIND_* where GART placement is desired */
unsigned lowmem_bindings; /* PIPE_BIND_* that require an address < 4 GiB */
/*
* For bindings with (vidmem & sysmem) bits set, PIPE_USAGE_* decides
* placement.
*/
uint16_t class_3d;
uint16_t class_3d;
struct {
struct nouveau_fence *head;
struct nouveau_fence *tail;
struct nouveau_fence *current;
u32 sequence;
u32 sequence_ack;
void (*emit)(struct pipe_screen *, u32 *sequence);
u32 (*update)(struct pipe_screen *);
} fence;
struct {
struct nouveau_fence *head;
struct nouveau_fence *tail;
struct nouveau_fence *current;
u32 sequence;
u32 sequence_ack;
void (*emit)(struct pipe_screen *, u32 *sequence);
u32 (*update)(struct pipe_screen *);
} fence;
struct nouveau_mman *mm_VRAM;
struct nouveau_mman *mm_GART;
struct nouveau_mman *mm_VRAM;
struct nouveau_mman *mm_GART;
int64_t cpu_gpu_time_delta;
int64_t cpu_gpu_time_delta;
bool hint_buf_keep_sysmem_copy;
bool hint_buf_keep_sysmem_copy;
unsigned vram_domain;
unsigned vram_domain;
struct {
unsigned profiles_checked;
unsigned profiles_present;
} firmware_info;
struct {
unsigned profiles_checked;
unsigned profiles_present;
} firmware_info;
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
union {
@ -100,10 +100,10 @@ struct nouveau_screen {
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
# define NOUVEAU_DRV_STAT(s, n, v) do { \
(s)->stats.named.n += (v); \
(s)->stats.named.n += (v); \
} while(0)
# define NOUVEAU_DRV_STAT_RES(r, n, v) do { \
nouveau_screen((r)->base.screen)->stats.named.n += (v); \
# define NOUVEAU_DRV_STAT_RES(r, n, v) do { \
nouveau_screen((r)->base.screen)->stats.named.n += (v); \
} while(0)
# define NOUVEAU_DRV_STAT_IFD(x) x
#else
@ -115,20 +115,20 @@ struct nouveau_screen {
static inline struct nouveau_screen *
nouveau_screen(struct pipe_screen *pscreen)
{
return (struct nouveau_screen *)pscreen;
return (struct nouveau_screen *)pscreen;
}
bool nouveau_drm_screen_unref(struct nouveau_screen *screen);
bool
nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
struct nouveau_bo *bo,
unsigned stride,
struct winsys_handle *whandle);
struct nouveau_bo *bo,
unsigned stride,
struct winsys_handle *whandle);
struct nouveau_bo *
nouveau_screen_bo_from_handle(struct pipe_screen *pscreen,
struct winsys_handle *whandle,
unsigned *out_stride);
struct winsys_handle *whandle,
unsigned *out_stride);
int nouveau_screen_init(struct nouveau_screen *, struct nouveau_device *);

View File

@ -6,9 +6,9 @@
struct nouveau_statebuf_builder
{
uint32_t* p;
uint32_t* p;
#ifdef DEBUG
uint32_t* pend;
uint32_t* pend;
#endif
};
@ -22,7 +22,7 @@ struct nouveau_statebuf_builder
static inline uint32_t sb_header(unsigned subc, unsigned mthd, unsigned size)
{
return (size << 18) | (subc << 13) | mthd;
return (size << 18) | (subc << 13) | mthd;
}
#define sb_method(sb, v, n) sb_data(sb, sb_header(SUBC_3D(v), n));

View File

@ -831,7 +831,7 @@ error:
static int
nouveau_screen_get_video_param(struct pipe_screen *pscreen,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint,
enum pipe_video_entrypoint entrypoint,
enum pipe_video_cap param)
{
switch (param) {

View File

@ -83,7 +83,7 @@ BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
static inline void
PUSH_MTHDl(struct nouveau_pushbuf *push, int subc, int mthd,
struct nouveau_bo *bo, uint32_t offset,
struct nouveau_bufctx *ctx, int bin, uint32_t rw)
struct nouveau_bufctx *ctx, int bin, uint32_t rw)
{
nouveau_bufctx_mthd(ctx, bin, NV04_FIFO_PKHDR(subc, mthd, 1),
bo, offset,

View File

@ -117,22 +117,22 @@ struct nouveau_vp3_decoder {
};
struct comm {
uint32_t bsp_cur_index; // 000
uint32_t byte_ofs; // 004
uint32_t status[0x10]; // 008
uint32_t pos[0x10]; // 048
uint8_t pad[0x100 - 0x88]; // 0a0 bool comm_encrypted
uint32_t bsp_cur_index; // 000
uint32_t byte_ofs; // 004
uint32_t status[0x10]; // 008
uint32_t pos[0x10]; // 048
uint8_t pad[0x100 - 0x88]; // 0a0 bool comm_encrypted
uint32_t pvp_cur_index; // 100
uint32_t acked_byte_ofs; // 104
uint32_t status_vp[0x10]; // 108
uint16_t mb_y[0x10]; //148
uint32_t pvp_stage; // 168 0xeeXX
uint16_t parse_endpos_index; // 16c
uint16_t irq_index; // 16e
uint8_t irq_470[0x10]; // 170
uint32_t irq_pos[0x10]; // 180
uint32_t parse_endpos[0x10]; // 1c0
uint32_t pvp_cur_index; // 100
uint32_t acked_byte_ofs; // 104
uint32_t status_vp[0x10]; // 108
uint16_t mb_y[0x10]; //148
uint32_t pvp_stage; // 168 0xeeXX
uint16_t parse_endpos_index; // 16c
uint16_t irq_index; // 16e
uint8_t irq_470[0x10]; // 170
uint32_t irq_pos[0x10]; // 180
uint32_t parse_endpos[0x10]; // 1c0
};
static inline uint32_t nouveau_vp3_video_align(uint32_t h)

View File

@ -23,90 +23,90 @@
#include "nouveau_vp3_video.h"
struct strparm_bsp {
uint32_t w0[4]; // bits 0-23 length, bits 24-31 addr_hi
uint32_t w1[4]; // bit 8-24 addr_lo
uint32_t unk20; // should be idx * 0x8000000, bitstream offset
uint32_t do_crypto_crap; // set to 0
uint32_t w0[4]; // bits 0-23 length, bits 24-31 addr_hi
uint32_t w1[4]; // bit 8-24 addr_lo
uint32_t unk20; // should be idx * 0x8000000, bitstream offset
uint32_t do_crypto_crap; // set to 0
};
struct mpeg12_picparm_bsp {
uint16_t width;
uint16_t height;
uint8_t picture_structure;
uint8_t picture_coding_type;
uint8_t intra_dc_precision;
uint8_t frame_pred_frame_dct;
uint8_t concealment_motion_vectors;
uint8_t intra_vlc_format;
uint16_t pad;
uint8_t f_code[2][2];
uint16_t width;
uint16_t height;
uint8_t picture_structure;
uint8_t picture_coding_type;
uint8_t intra_dc_precision;
uint8_t frame_pred_frame_dct;
uint8_t concealment_motion_vectors;
uint8_t intra_vlc_format;
uint16_t pad;
uint8_t f_code[2][2];
};
struct mpeg4_picparm_bsp {
uint16_t width;
uint16_t height;
uint8_t vop_time_increment_size;
uint8_t interlaced;
uint8_t resync_marker_disable;
uint16_t width;
uint16_t height;
uint8_t vop_time_increment_size;
uint8_t interlaced;
uint8_t resync_marker_disable;
};
struct vc1_picparm_bsp {
uint16_t width;
uint16_t height;
uint8_t profile; // 04 0 simple, 1 main, 2 advanced
uint8_t postprocflag; // 05
uint8_t pulldown; // 06
uint8_t interlaced; // 07
uint8_t tfcntrflag; // 08
uint8_t finterpflag; // 09
uint8_t psf; // 0a
uint8_t pad; // 0b
uint8_t multires; // 0c
uint8_t syncmarker; // 0d
uint8_t rangered; // 0e
uint8_t maxbframes; // 0f
uint8_t dquant; // 10
uint8_t panscan_flag; // 11
uint8_t refdist_flag; // 12
uint8_t quantizer; // 13
uint8_t extended_mv; // 14
uint8_t extended_dmv; // 15
uint8_t overlap; // 16
uint8_t vstransform; // 17
uint16_t width;
uint16_t height;
uint8_t profile; // 04 0 simple, 1 main, 2 advanced
uint8_t postprocflag; // 05
uint8_t pulldown; // 06
uint8_t interlaced; // 07
uint8_t tfcntrflag; // 08
uint8_t finterpflag; // 09
uint8_t psf; // 0a
uint8_t pad; // 0b
uint8_t multires; // 0c
uint8_t syncmarker; // 0d
uint8_t rangered; // 0e
uint8_t maxbframes; // 0f
uint8_t dquant; // 10
uint8_t panscan_flag; // 11
uint8_t refdist_flag; // 12
uint8_t quantizer; // 13
uint8_t extended_mv; // 14
uint8_t extended_dmv; // 15
uint8_t overlap; // 16
uint8_t vstransform; // 17
};
struct h264_picparm_bsp {
// 00
uint32_t unk00;
// 04
uint32_t log2_max_frame_num_minus4; // 04 checked
uint32_t pic_order_cnt_type; // 08 checked
uint32_t log2_max_pic_order_cnt_lsb_minus4; // 0c checked
uint32_t delta_pic_order_always_zero_flag; // 10, or unknown
// 00
uint32_t unk00;
// 04
uint32_t log2_max_frame_num_minus4; // 04 checked
uint32_t pic_order_cnt_type; // 08 checked
uint32_t log2_max_pic_order_cnt_lsb_minus4; // 0c checked
uint32_t delta_pic_order_always_zero_flag; // 10, or unknown
uint32_t frame_mbs_only_flag; // 14, always 1?
uint32_t direct_8x8_inference_flag; // 18, always 1?
uint32_t width_mb; // 1c checked
uint32_t height_mb; // 20 checked
// 24
//struct picparm2
uint32_t entropy_coding_mode_flag; // 00, checked
uint32_t pic_order_present_flag; // 04 checked
uint32_t unk; // 08 seems to be 0?
uint32_t pad1; // 0c seems to be 0?
uint32_t pad2; // 10 always 0 ?
uint32_t num_ref_idx_l0_active_minus1; // 14 always 0?
uint32_t num_ref_idx_l1_active_minus1; // 18 always 0?
uint32_t weighted_pred_flag; // 1c checked
uint32_t weighted_bipred_idc; // 20 checked
uint32_t pic_init_qp_minus26; // 24 checked
uint32_t deblocking_filter_control_present_flag; // 28 always 1?
uint32_t redundant_pic_cnt_present_flag; // 2c always 0?
uint32_t transform_8x8_mode_flag; // 30 checked
uint32_t mb_adaptive_frame_field_flag; // 34 checked-ish
uint8_t field_pic_flag; // 38 checked
uint8_t bottom_field_flag; // 39 checked
uint8_t real_pad[0x1b]; // XX why?
uint32_t frame_mbs_only_flag; // 14, always 1?
uint32_t direct_8x8_inference_flag; // 18, always 1?
uint32_t width_mb; // 1c checked
uint32_t height_mb; // 20 checked
// 24
//struct picparm2
uint32_t entropy_coding_mode_flag; // 00, checked
uint32_t pic_order_present_flag; // 04 checked
uint32_t unk; // 08 seems to be 0?
uint32_t pad1; // 0c seems to be 0?
uint32_t pad2; // 10 always 0 ?
uint32_t num_ref_idx_l0_active_minus1; // 14 always 0?
uint32_t num_ref_idx_l1_active_minus1; // 18 always 0?
uint32_t weighted_pred_flag; // 1c checked
uint32_t weighted_bipred_idc; // 20 checked
uint32_t pic_init_qp_minus26; // 24 checked
uint32_t deblocking_filter_control_present_flag; // 28 always 1?
uint32_t redundant_pic_cnt_present_flag; // 2c always 0?
uint32_t transform_8x8_mode_flag; // 30 checked
uint32_t mb_adaptive_frame_field_flag; // 34 checked-ish
uint8_t field_pic_flag; // 38 checked
uint8_t bottom_field_flag; // 39 checked
uint8_t real_pad[0x1b]; // XX why?
};
static uint32_t

View File

@ -23,147 +23,147 @@
#include "nouveau_vp3_video.h"
struct mpeg12_picparm_vp {
uint16_t width; // 00 in mb units
uint16_t height; // 02 in mb units
uint16_t width; // 00 in mb units
uint16_t height; // 02 in mb units
uint32_t unk04; // 04 stride for Y?
uint32_t unk08; // 08 stride for CbCr?
uint32_t unk04; // 04 stride for Y?
uint32_t unk08; // 08 stride for CbCr?
uint32_t ofs[6]; // 1c..20 ofs
uint32_t bucket_size; // 24
uint32_t inter_ring_data_size; // 28
uint16_t unk2c; // 2c
uint16_t alternate_scan; // 2e
uint16_t unk30; // 30 not seen set yet
uint16_t picture_structure; // 32
uint16_t pad2[3];
uint16_t unk3a; // 3a set on I frame?
uint32_t ofs[6]; // 1c..20 ofs
uint32_t bucket_size; // 24
uint32_t inter_ring_data_size; // 28
uint16_t unk2c; // 2c
uint16_t alternate_scan; // 2e
uint16_t unk30; // 30 not seen set yet
uint16_t picture_structure; // 32
uint16_t pad2[3];
uint16_t unk3a; // 3a set on I frame?
uint32_t f_code[4]; // 3c
uint32_t picture_coding_type; // 4c
uint32_t intra_dc_precision; // 50
uint32_t q_scale_type; // 54
uint32_t top_field_first; // 58
uint32_t full_pel_forward_vector; // 5c
uint32_t full_pel_backward_vector; // 60
uint8_t intra_quantizer_matrix[0x40]; // 64
uint8_t non_intra_quantizer_matrix[0x40]; // a4
uint32_t f_code[4]; // 3c
uint32_t picture_coding_type; // 4c
uint32_t intra_dc_precision; // 50
uint32_t q_scale_type; // 54
uint32_t top_field_first; // 58
uint32_t full_pel_forward_vector; // 5c
uint32_t full_pel_backward_vector; // 60
uint8_t intra_quantizer_matrix[0x40]; // 64
uint8_t non_intra_quantizer_matrix[0x40]; // a4
};
struct mpeg4_picparm_vp {
uint32_t width; // 00 in normal units
uint32_t height; // 04 in normal units
uint32_t unk08; // stride 1
uint32_t unk0c; // stride 2
uint32_t ofs[6]; // 10..24 ofs
uint32_t bucket_size; // 28
uint32_t pad1; // 2c, pad
uint32_t pad2; // 30
uint32_t inter_ring_data_size; // 34
uint32_t width; // 00 in normal units
uint32_t height; // 04 in normal units
uint32_t unk08; // stride 1
uint32_t unk0c; // stride 2
uint32_t ofs[6]; // 10..24 ofs
uint32_t bucket_size; // 28
uint32_t pad1; // 2c, pad
uint32_t pad2; // 30
uint32_t inter_ring_data_size; // 34
uint32_t trd[2]; // 38, 3c
uint32_t trb[2]; // 40, 44
uint32_t u48; // XXX codec selection? Should test with different values of VdpDecoderProfile
uint16_t f_code_fw; // 4c
uint16_t f_code_bw; // 4e
uint8_t interlaced; // 50
uint32_t trd[2]; // 38, 3c
uint32_t trb[2]; // 40, 44
uint32_t u48; // XXX codec selection? Should test with different values of VdpDecoderProfile
uint16_t f_code_fw; // 4c
uint16_t f_code_bw; // 4e
uint8_t interlaced; // 50
uint8_t quant_type; // bool, written to 528
uint8_t quarter_sample; // bool, written to 548
uint8_t short_video_header; // bool, negated written to 528 shifted by 1
uint8_t u54; // bool, written to 0x740
uint8_t vop_coding_type; // 55
uint8_t rounding_control; // 56
uint8_t alternate_vertical_scan_flag; // 57 bool
uint8_t top_field_first; // bool, written to vuc
uint8_t quant_type; // bool, written to 528
uint8_t quarter_sample; // bool, written to 548
uint8_t short_video_header; // bool, negated written to 528 shifted by 1
uint8_t u54; // bool, written to 0x740
uint8_t vop_coding_type; // 55
uint8_t rounding_control; // 56
uint8_t alternate_vertical_scan_flag; // 57 bool
uint8_t top_field_first; // bool, written to vuc
uint8_t pad4[3]; // 59, 5a, 5b, contains garbage on blob
uint8_t pad4[3]; // 59, 5a, 5b, contains garbage on blob
uint32_t intra[0x10]; // 5c
uint32_t non_intra[0x10]; // 9c
uint32_t pad5[0x10]; // bc what does this do?
// udc..uff pad?
uint32_t intra[0x10]; // 5c
uint32_t non_intra[0x10]; // 9c
uint32_t pad5[0x10]; // bc what does this do?
// udc..uff pad?
};
// Full version, with data pumped from BSP
struct vc1_picparm_vp {
uint32_t bucket_size; // 00
uint32_t pad; // 04
uint32_t bucket_size; // 00
uint32_t pad; // 04
uint32_t inter_ring_data_size; // 08
uint32_t unk0c; // stride 1
uint32_t unk10; // stride 2
uint32_t ofs[6]; // 14..28 ofs
uint32_t inter_ring_data_size; // 08
uint32_t unk0c; // stride 1
uint32_t unk10; // stride 2
uint32_t ofs[6]; // 14..28 ofs
uint16_t width; // 2c
uint16_t height; // 2e
uint16_t width; // 2c
uint16_t height; // 2e
uint8_t profile; // 30 0 = simple, 1 = main, 2 = advanced
uint8_t loopfilter; // 31 written into vuc
uint8_t fastuvmc; // 32, written into vuc
uint8_t dquant; // 33
uint8_t profile; // 30 0 = simple, 1 = main, 2 = advanced
uint8_t loopfilter; // 31 written into vuc
uint8_t fastuvmc; // 32, written into vuc
uint8_t dquant; // 33
uint8_t overlap; // 34
uint8_t quantizer; // 35
uint8_t u36; // 36, bool
uint8_t pad2; // 37, to align to 0x38
uint8_t overlap; // 34
uint8_t quantizer; // 35
uint8_t u36; // 36, bool
uint8_t pad2; // 37, to align to 0x38
};
struct h264_picparm_vp { // 700..a00
uint16_t width, height;
uint32_t stride1, stride2; // 04 08
uint32_t ofs[6]; // 0c..24 in-image offset
uint16_t width, height;
uint32_t stride1, stride2; // 04 08
uint32_t ofs[6]; // 0c..24 in-image offset
uint32_t tmp_stride;
uint32_t bucket_size; // 28 bucket size
uint32_t inter_ring_data_size; // 2c
uint32_t tmp_stride;
uint32_t bucket_size; // 28 bucket size
uint32_t inter_ring_data_size; // 2c
unsigned mb_adaptive_frame_field_flag : 1; // 0
unsigned direct_8x8_inference_flag : 1; // 1 0x02: into vuc ofs 56
unsigned weighted_pred_flag : 1; // 2 0x04
unsigned constrained_intra_pred_flag : 1; // 3 0x08: into vuc ofs 68
unsigned is_reference : 1; // 4
unsigned interlace : 1; // 5 field_pic_flag
unsigned bottom_field_flag : 1; // 6
unsigned second_field : 1; // 7 0x80: nfi yet
unsigned mb_adaptive_frame_field_flag : 1; // 0
unsigned direct_8x8_inference_flag : 1; // 1 0x02: into vuc ofs 56
unsigned weighted_pred_flag : 1; // 2 0x04
unsigned constrained_intra_pred_flag : 1; // 3 0x08: into vuc ofs 68
unsigned is_reference : 1; // 4
unsigned interlace : 1; // 5 field_pic_flag
unsigned bottom_field_flag : 1; // 6
unsigned second_field : 1; // 7 0x80: nfi yet
signed log2_max_frame_num_minus4 : 4; // 31 0..3
unsigned chroma_format_idc : 2; // 31 4..5
unsigned pic_order_cnt_type : 2; // 31 6..7
signed pic_init_qp_minus26 : 6; // 32 0..5
signed chroma_qp_index_offset : 5; // 32 6..10
signed second_chroma_qp_index_offset : 5; // 32 11..15
signed log2_max_frame_num_minus4 : 4; // 31 0..3
unsigned chroma_format_idc : 2; // 31 4..5
unsigned pic_order_cnt_type : 2; // 31 6..7
signed pic_init_qp_minus26 : 6; // 32 0..5
signed chroma_qp_index_offset : 5; // 32 6..10
signed second_chroma_qp_index_offset : 5; // 32 11..15
unsigned weighted_bipred_idc : 2; // 34 0..1
unsigned fifo_dec_index : 7; // 34 2..8
unsigned tmp_idx : 5; // 34 9..13
unsigned frame_number : 16; // 34 14..29
unsigned u34_3030 : 1; // 34 30..30 pp.u34[30:30]
unsigned u34_3131 : 1; // 34 31..31 pad?
unsigned weighted_bipred_idc : 2; // 34 0..1
unsigned fifo_dec_index : 7; // 34 2..8
unsigned tmp_idx : 5; // 34 9..13
unsigned frame_number : 16; // 34 14..29
unsigned u34_3030 : 1; // 34 30..30 pp.u34[30:30]
unsigned u34_3131 : 1; // 34 31..31 pad?
uint32_t field_order_cnt[2]; // 38, 3c
uint32_t field_order_cnt[2]; // 38, 3c
struct { // 40
unsigned fifo_idx : 7; // 00 0..6
unsigned tmp_idx : 5; // 00 7..11
unsigned top_is_reference : 1; // 00 12
unsigned bottom_is_reference : 1; // 00 13
unsigned is_long_term : 1; // 00 14
unsigned notseenyet : 1; // 00 15 pad?
unsigned field_pic_flag : 1; // 00 16
unsigned top_field_marking : 4; // 00 17..20
unsigned bottom_field_marking : 4; // 00 21..24
unsigned pad : 7; // 00 d25..31
struct { // 40
unsigned fifo_idx : 7; // 00 0..6
unsigned tmp_idx : 5; // 00 7..11
unsigned top_is_reference : 1; // 00 12
unsigned bottom_is_reference : 1; // 00 13
unsigned is_long_term : 1; // 00 14
unsigned notseenyet : 1; // 00 15 pad?
unsigned field_pic_flag : 1; // 00 16
unsigned top_field_marking : 4; // 00 17..20
unsigned bottom_field_marking : 4; // 00 21..24
unsigned pad : 7; // 00 d25..31
uint32_t field_order_cnt[2]; // 04,08
uint32_t frame_idx; // 0c
} refs[0x10];
uint32_t field_order_cnt[2]; // 04,08
uint32_t frame_idx; // 0c
} refs[0x10];
uint8_t m4x4[6][16]; // 140
uint8_t m8x8[2][64]; // 1a0
uint32_t u220; // 220 number of extra reorder_list to append?
uint8_t u224[0x20]; // 224..244 reorder_list append ?
uint8_t nfi244[0xb0]; // add some pad to make sure nulls are read
uint8_t m4x4[6][16]; // 140
uint8_t m8x8[2][64]; // 1a0
uint32_t u220; // 220 number of extra reorder_list to append?
uint8_t u224[0x20]; // 224..244 reorder_list append ?
uint8_t nfi244[0xb0]; // add some pad to make sure nulls are read
};
static void

View File

@ -65,18 +65,18 @@ PUSH_KICK(struct nouveau_pushbuf *push)
static inline uint32_t
nouveau_screen_transfer_flags(unsigned pipe)
{
uint32_t flags = 0;
uint32_t flags = 0;
if (!(pipe & PIPE_TRANSFER_UNSYNCHRONIZED)) {
if (pipe & PIPE_TRANSFER_READ)
flags |= NOUVEAU_BO_RD;
if (pipe & PIPE_TRANSFER_WRITE)
flags |= NOUVEAU_BO_WR;
if (pipe & PIPE_TRANSFER_DONTBLOCK)
flags |= NOUVEAU_BO_NOBLOCK;
}
if (!(pipe & PIPE_TRANSFER_UNSYNCHRONIZED)) {
if (pipe & PIPE_TRANSFER_READ)
flags |= NOUVEAU_BO_RD;
if (pipe & PIPE_TRANSFER_WRITE)
flags |= NOUVEAU_BO_WR;
if (pipe & PIPE_TRANSFER_DONTBLOCK)
flags |= NOUVEAU_BO_NOBLOCK;
}
return flags;
return flags;
}
extern struct pipe_screen *

View File

@ -172,6 +172,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
return 0;
case PIPE_CAP_VENDOR_ID:

View File

@ -16,6 +16,7 @@
#include "nv50/nv50_program.h"
#include "nv50/nv50_resource.h"
#include "nv50/nv50_transfer.h"
#include "nv50/nv50_query.h"
#include "nouveau_context.h"
#include "nouveau_debug.h"
@ -195,17 +196,6 @@ void nv50_default_kick_notify(struct nouveau_pushbuf *);
/* nv50_draw.c */
extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
/* nv50_query.c */
void nv50_init_query_functions(struct nv50_context *);
void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t method,
struct pipe_query *, unsigned result_offset);
void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
void nva0_so_target_save_offset(struct pipe_context *,
struct pipe_stream_output_target *,
unsigned index, bool seralize);
#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
/* nv50_shader_state.c */
void nv50_vertprog_validate(struct nv50_context *);
void nv50_gmtyprog_validate(struct nv50_context *);

View File

@ -336,7 +336,6 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
info->io.ucpCBSlot = 15;
info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
info->io.genUserClip = prog->vp.clpd_nr;
info->io.sampleInterp = prog->fp.sample_interp;
info->io.resInfoCBSlot = 15;
info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
@ -374,6 +373,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
prog->code = info->bin.code;
prog->code_size = info->bin.codeSize;
prog->fixups = info->bin.relocData;
prog->interps = info->bin.interpData;
prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
prog->tls_space = info->bin.tlsSpace;
@ -420,8 +420,8 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
switch (prog->type) {
case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break;
case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break;
case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break;
case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break;
case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break;
default:
assert(!"invalid program type");
return false;
@ -456,6 +456,10 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
if (prog->fixups)
nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0);
if (prog->interps)
nv50_ir_change_interp(prog->interps, prog->code,
prog->fp.force_persample_interp,
false /* flatshade */);
nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
(prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,

View File

@ -86,7 +86,7 @@ struct nv50_program {
uint32_t interp; /* 0x1988 */
uint32_t colors; /* 0x1904 */
uint8_t has_samplemask;
uint8_t sample_interp;
uint8_t force_persample_interp;
} fp;
struct {
@ -99,6 +99,7 @@ struct nv50_program {
} gp;
void *fixups; /* relocation records */
void *interps; /* interpolation records */
struct nouveau_heap *mem;

View File

@ -25,356 +25,46 @@
#define NV50_PUSH_EXPLICIT_SPACE_CHECKING
#include "nv50/nv50_context.h"
#include "nv_object.xml.h"
#define NV50_QUERY_STATE_READY 0
#define NV50_QUERY_STATE_ACTIVE 1
#define NV50_QUERY_STATE_ENDED 2
#define NV50_QUERY_STATE_FLUSHED 3
/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
* (since we use only a single GPU channel per screen) will not work properly.
*
* The first is not that big of an issue because OpenGL does not allow nested
* queries anyway.
*/
struct nv50_query {
uint32_t *data;
uint16_t type;
uint16_t index;
uint32_t sequence;
struct nouveau_bo *bo;
uint32_t base;
uint32_t offset; /* base + i * 32 */
uint8_t state;
bool is64bit;
int nesting; /* only used for occlusion queries */
struct nouveau_mm_allocation *mm;
struct nouveau_fence *fence;
};
#define NV50_QUERY_ALLOC_SPACE 256
static inline struct nv50_query *
nv50_query(struct pipe_query *pipe)
{
return (struct nv50_query *)pipe;
}
static bool
nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size)
{
struct nv50_screen *screen = nv50->screen;
int ret;
if (q->bo) {
nouveau_bo_ref(NULL, &q->bo);
if (q->mm) {
if (q->state == NV50_QUERY_STATE_READY)
nouveau_mm_free(q->mm);
else
nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work,
q->mm);
}
}
if (size) {
q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
if (!q->bo)
return false;
q->offset = q->base;
ret = nouveau_bo_map(q->bo, 0, screen->base.client);
if (ret) {
nv50_query_allocate(nv50, q, 0);
return false;
}
q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
}
return true;
}
static void
nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
{
nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
nouveau_fence_ref(NULL, &nv50_query(pq)->fence);
FREE(nv50_query(pq));
}
#include "nv50/nv50_query.h"
#include "nv50/nv50_query_hw.h"
static struct pipe_query *
nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
nv50_create_query(struct pipe_context *pipe, unsigned type, unsigned index)
{
struct nv50_context *nv50 = nv50_context(pipe);
struct nv50_query *q;
q = CALLOC_STRUCT(nv50_query);
if (!q)
return NULL;
if (!nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE)) {
FREE(q);
return NULL;
}
q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
type == PIPE_QUERY_PRIMITIVES_EMITTED ||
type == PIPE_QUERY_SO_STATISTICS ||
type == PIPE_QUERY_PIPELINE_STATISTICS);
q->type = type;
if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
q->offset -= 32;
q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */
}
q = nv50_hw_create_query(nv50, type, index);
return (struct pipe_query *)q;
}
static void
nv50_query_get(struct nouveau_pushbuf *push, struct nv50_query *q,
unsigned offset, uint32_t get)
nv50_destroy_query(struct pipe_context *pipe, struct pipe_query *pq)
{
offset += q->offset;
PUSH_SPACE(push, 5);
PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4);
PUSH_DATAh(push, q->bo->offset + offset);
PUSH_DATA (push, q->bo->offset + offset);
PUSH_DATA (push, q->sequence);
PUSH_DATA (push, get);
struct nv50_query *q = nv50_query(pq);
q->funcs->destroy_query(nv50_context(pipe), q);
}
static boolean
nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
nv50_begin_query(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_query *q = nv50_query(pq);
/* For occlusion queries we have to change the storage, because a previous
* query might set the initial render conition to false even *after* we re-
* initialized it to true.
*/
if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
q->offset += 32;
q->data += 32 / sizeof(*q->data);
if (q->offset - q->base == NV50_QUERY_ALLOC_SPACE)
nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE);
/* XXX: can we do this with the GPU, and sync with respect to a previous
* query ?
*/
q->data[0] = q->sequence; /* initialize sequence */
q->data[1] = 1; /* initial render condition = true */
q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
q->data[5] = 0;
}
if (!q->is64bit)
q->data[0] = q->sequence++; /* the previously used one */
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
q->nesting = nv50->screen->num_occlusion_queries_active++;
if (q->nesting) {
nv50_query_get(push, q, 0x10, 0x0100f002);
} else {
PUSH_SPACE(push, 4);
BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
PUSH_DATA (push, 1);
}
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
nv50_query_get(push, q, 0x10, 0x06805002);
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
nv50_query_get(push, q, 0x10, 0x05805002);
break;
case PIPE_QUERY_SO_STATISTICS:
nv50_query_get(push, q, 0x20, 0x05805002);
nv50_query_get(push, q, 0x30, 0x06805002);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
nv50_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */
nv50_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */
nv50_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */
nv50_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */
nv50_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */
nv50_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */
nv50_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */
nv50_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */
break;
case PIPE_QUERY_TIME_ELAPSED:
nv50_query_get(push, q, 0x10, 0x00005002);
break;
default:
break;
}
q->state = NV50_QUERY_STATE_ACTIVE;
return true;
return q->funcs->begin_query(nv50_context(pipe), q);
}
static void
nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
nv50_end_query(struct pipe_context *pipe, struct pipe_query *pq)
{
struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_query *q = nv50_query(pq);
q->state = NV50_QUERY_STATE_ENDED;
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
nv50_query_get(push, q, 0, 0x0100f002);
if (--nv50->screen->num_occlusion_queries_active == 0) {
PUSH_SPACE(push, 2);
BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
PUSH_DATA (push, 0);
}
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
nv50_query_get(push, q, 0, 0x06805002);
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
nv50_query_get(push, q, 0, 0x05805002);
break;
case PIPE_QUERY_SO_STATISTICS:
nv50_query_get(push, q, 0x00, 0x05805002);
nv50_query_get(push, q, 0x10, 0x06805002);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
nv50_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
nv50_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
nv50_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
nv50_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
nv50_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
nv50_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
nv50_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
nv50_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
break;
case PIPE_QUERY_TIMESTAMP:
q->sequence++;
/* fall through */
case PIPE_QUERY_TIME_ELAPSED:
nv50_query_get(push, q, 0, 0x00005002);
break;
case PIPE_QUERY_GPU_FINISHED:
q->sequence++;
nv50_query_get(push, q, 0, 0x1000f010);
break;
case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
q->sequence++;
nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
/* This query is not issued on GPU because disjoint is forced to false */
q->state = NV50_QUERY_STATE_READY;
break;
default:
assert(0);
break;
}
if (q->is64bit)
nouveau_fence_ref(nv50->screen->base.fence.current, &q->fence);
}
static inline void
nv50_query_update(struct nv50_query *q)
{
if (q->is64bit) {
if (nouveau_fence_signalled(q->fence))
q->state = NV50_QUERY_STATE_READY;
} else {
if (q->data[0] == q->sequence)
q->state = NV50_QUERY_STATE_READY;
}
q->funcs->end_query(nv50_context(pipe), q);
}
static boolean
nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
boolean wait, union pipe_query_result *result)
{
struct nv50_context *nv50 = nv50_context(pipe);
struct nv50_query *q = nv50_query(pq);
uint64_t *res64 = (uint64_t *)result;
uint32_t *res32 = (uint32_t *)result;
uint8_t *res8 = (uint8_t *)result;
uint64_t *data64 = (uint64_t *)q->data;
int i;
if (q->state != NV50_QUERY_STATE_READY)
nv50_query_update(q);
if (q->state != NV50_QUERY_STATE_READY) {
if (!wait) {
/* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */
if (q->state != NV50_QUERY_STATE_FLUSHED) {
q->state = NV50_QUERY_STATE_FLUSHED;
PUSH_KICK(nv50->base.pushbuf);
}
return false;
}
if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nv50->screen->base.client))
return false;
}
q->state = NV50_QUERY_STATE_READY;
switch (q->type) {
case PIPE_QUERY_GPU_FINISHED:
res8[0] = true;
break;
case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
res64[0] = q->data[1] - q->data[5];
break;
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
res64[0] = data64[0] - data64[2];
break;
case PIPE_QUERY_SO_STATISTICS:
res64[0] = data64[0] - data64[4];
res64[1] = data64[2] - data64[6];
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
for (i = 0; i < 8; ++i)
res64[i] = data64[i * 2] - data64[16 + i * 2];
break;
case PIPE_QUERY_TIMESTAMP:
res64[0] = data64[1];
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
res64[0] = 1000000000;
res8[8] = false;
break;
case PIPE_QUERY_TIME_ELAPSED:
res64[0] = data64[1] - data64[3];
break;
case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
res32[0] = q->data[1];
break;
default:
return false;
}
return true;
}
void
nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
nv50_get_query_result(struct pipe_context *pipe, struct pipe_query *pq,
boolean wait, union pipe_query_result *result)
{
struct nv50_query *q = nv50_query(pq);
unsigned offset = q->offset;
PUSH_SPACE(push, 5);
PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
PUSH_DATAh(push, q->bo->offset + offset);
PUSH_DATA (push, q->bo->offset + offset);
PUSH_DATA (push, q->sequence);
PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
return q->funcs->get_query_result(nv50_context(pipe), q, wait, result);
}
static void
@ -384,7 +74,8 @@ nv50_render_condition(struct pipe_context *pipe,
{
struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_query *q;
struct nv50_query *q = nv50_query(pq);
struct nv50_hw_query *hq = nv50_hw_query(q);
uint32_t cond;
bool wait =
mode != PIPE_RENDER_COND_NO_WAIT &&
@ -394,7 +85,6 @@ nv50_render_condition(struct pipe_context *pipe,
cond = NV50_3D_COND_MODE_ALWAYS;
}
else {
q = nv50_query(pq);
/* NOTE: comparison of 2 queries only works if both have completed */
switch (q->type) {
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
@ -405,7 +95,7 @@ nv50_render_condition(struct pipe_context *pipe,
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
if (likely(!condition)) {
if (unlikely(q->nesting))
if (unlikely(hq->nesting))
cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL :
NV50_3D_COND_MODE_ALWAYS;
else
@ -440,48 +130,15 @@ nv50_render_condition(struct pipe_context *pipe,
PUSH_DATA (push, 0);
}
PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
BEGIN_NV04(push, NV50_3D(COND_ADDRESS_HIGH), 3);
PUSH_DATAh(push, q->bo->offset + q->offset);
PUSH_DATA (push, q->bo->offset + q->offset);
PUSH_DATAh(push, hq->bo->offset + hq->offset);
PUSH_DATA (push, hq->bo->offset + hq->offset);
PUSH_DATA (push, cond);
BEGIN_NV04(push, NV50_2D(COND_ADDRESS_HIGH), 2);
PUSH_DATAh(push, q->bo->offset + q->offset);
PUSH_DATA (push, q->bo->offset + q->offset);
}
void
nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
struct pipe_query *pq, unsigned result_offset)
{
struct nv50_query *q = nv50_query(pq);
nv50_query_update(q);
if (q->state != NV50_QUERY_STATE_READY)
nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client);
q->state = NV50_QUERY_STATE_READY;
BEGIN_NV04(push, SUBC_3D(method), 1);
PUSH_DATA (push, q->data[result_offset / 4]);
}
void
nva0_so_target_save_offset(struct pipe_context *pipe,
struct pipe_stream_output_target *ptarg,
unsigned index, bool serialize)
{
struct nv50_so_target *targ = nv50_so_target(ptarg);
if (serialize) {
struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
PUSH_SPACE(push, 2);
BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
PUSH_DATA (push, 0);
}
nv50_query(targ->pq)->index = index;
nv50_query_end(pipe, targ->pq);
PUSH_DATAh(push, hq->bo->offset + hq->offset);
PUSH_DATA (push, hq->bo->offset + hq->offset);
}
void
@ -489,10 +146,10 @@ nv50_init_query_functions(struct nv50_context *nv50)
{
struct pipe_context *pipe = &nv50->base.pipe;
pipe->create_query = nv50_query_create;
pipe->destroy_query = nv50_query_destroy;
pipe->begin_query = nv50_query_begin;
pipe->end_query = nv50_query_end;
pipe->get_query_result = nv50_query_result;
pipe->create_query = nv50_create_query;
pipe->destroy_query = nv50_destroy_query;
pipe->begin_query = nv50_begin_query;
pipe->end_query = nv50_end_query;
pipe->get_query_result = nv50_get_query_result;
pipe->render_condition = nv50_render_condition;
}

View File

@ -0,0 +1,33 @@
#ifndef __NV50_QUERY_H__
#define __NV50_QUERY_H__
#include "pipe/p_context.h"
#include "nouveau_context.h"
struct nv50_context;
struct nv50_query;
struct nv50_query_funcs {
void (*destroy_query)(struct nv50_context *, struct nv50_query *);
boolean (*begin_query)(struct nv50_context *, struct nv50_query *);
void (*end_query)(struct nv50_context *, struct nv50_query *);
boolean (*get_query_result)(struct nv50_context *, struct nv50_query *,
boolean, union pipe_query_result *);
};
struct nv50_query {
const struct nv50_query_funcs *funcs;
uint16_t type;
uint16_t index;
};
static inline struct nv50_query *
nv50_query(struct pipe_query *pipe)
{
return (struct nv50_query *)pipe;
}
void nv50_init_query_functions(struct nv50_context *);
#endif

View File

@ -0,0 +1,406 @@
/*
* Copyright 2011 Christoph Bumiller
* Copyright 2015 Samuel Pitoiset
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#define NV50_PUSH_EXPLICIT_SPACE_CHECKING
#include "nv50/nv50_context.h"
#include "nv50/nv50_query_hw.h"
#include "nv_object.xml.h"
#define NV50_HW_QUERY_STATE_READY 0
#define NV50_HW_QUERY_STATE_ACTIVE 1
#define NV50_HW_QUERY_STATE_ENDED 2
#define NV50_HW_QUERY_STATE_FLUSHED 3
/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
* (since we use only a single GPU channel per screen) will not work properly.
*
* The first is not that big of an issue because OpenGL does not allow nested
* queries anyway.
*/
#define NV50_HW_QUERY_ALLOC_SPACE 256
static bool
nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q,
int size)
{
struct nv50_screen *screen = nv50->screen;
struct nv50_hw_query *hq = nv50_hw_query(q);
int ret;
if (hq->bo) {
nouveau_bo_ref(NULL, &hq->bo);
if (hq->mm) {
if (hq->state == NV50_HW_QUERY_STATE_READY)
nouveau_mm_free(hq->mm);
else
nouveau_fence_work(screen->base.fence.current,
nouveau_mm_free_work, hq->mm);
}
}
if (size) {
hq->mm = nouveau_mm_allocate(screen->base.mm_GART, size,
&hq->bo, &hq->base_offset);
if (!hq->bo)
return false;
hq->offset = hq->base_offset;
ret = nouveau_bo_map(hq->bo, 0, screen->base.client);
if (ret) {
nv50_hw_query_allocate(nv50, q, 0);
return false;
}
hq->data = (uint32_t *)((uint8_t *)hq->bo->map + hq->base_offset);
}
return true;
}
static void
nv50_hw_query_get(struct nouveau_pushbuf *push, struct nv50_query *q,
unsigned offset, uint32_t get)
{
struct nv50_hw_query *hq = nv50_hw_query(q);
offset += hq->offset;
PUSH_SPACE(push, 5);
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4);
PUSH_DATAh(push, hq->bo->offset + offset);
PUSH_DATA (push, hq->bo->offset + offset);
PUSH_DATA (push, hq->sequence);
PUSH_DATA (push, get);
}
static inline void
nv50_hw_query_update(struct nv50_query *q)
{
struct nv50_hw_query *hq = nv50_hw_query(q);
if (hq->is64bit) {
if (nouveau_fence_signalled(hq->fence))
hq->state = NV50_HW_QUERY_STATE_READY;
} else {
if (hq->data[0] == hq->sequence)
hq->state = NV50_HW_QUERY_STATE_READY;
}
}
static void
nv50_hw_destroy_query(struct nv50_context *nv50, struct nv50_query *q)
{
struct nv50_hw_query *hq = nv50_hw_query(q);
nv50_hw_query_allocate(nv50, q, 0);
nouveau_fence_ref(NULL, &hq->fence);
FREE(hq);
}
static boolean
nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
{
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_hw_query *hq = nv50_hw_query(q);
/* For occlusion queries we have to change the storage, because a previous
* query might set the initial render condition to false even *after* we re-
* initialized it to true.
*/
if (hq->rotate) {
hq->offset += hq->rotate;
hq->data += hq->rotate / sizeof(*hq->data);
if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE)
nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE);
/* XXX: can we do this with the GPU, and sync with respect to a previous
* query ?
*/
hq->data[0] = hq->sequence; /* initialize sequence */
hq->data[1] = 1; /* initial render condition = true */
hq->data[4] = hq->sequence + 1; /* for comparison COND_MODE */
hq->data[5] = 0;
}
if (!hq->is64bit)
hq->data[0] = hq->sequence++; /* the previously used one */
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
hq->nesting = nv50->screen->num_occlusion_queries_active++;
if (hq->nesting) {
nv50_hw_query_get(push, q, 0x10, 0x0100f002);
} else {
PUSH_SPACE(push, 4);
BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
PUSH_DATA (push, 1);
}
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
nv50_hw_query_get(push, q, 0x10, 0x06805002);
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
nv50_hw_query_get(push, q, 0x10, 0x05805002);
break;
case PIPE_QUERY_SO_STATISTICS:
nv50_hw_query_get(push, q, 0x20, 0x05805002);
nv50_hw_query_get(push, q, 0x30, 0x06805002);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
nv50_hw_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */
nv50_hw_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */
nv50_hw_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */
nv50_hw_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */
nv50_hw_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */
nv50_hw_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */
nv50_hw_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */
nv50_hw_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */
break;
case PIPE_QUERY_TIME_ELAPSED:
nv50_hw_query_get(push, q, 0x10, 0x00005002);
break;
default:
assert(0);
return false;
}
hq->state = NV50_HW_QUERY_STATE_ACTIVE;
return true;
}
static void
nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
{
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_hw_query *hq = nv50_hw_query(q);
hq->state = NV50_HW_QUERY_STATE_ENDED;
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
nv50_hw_query_get(push, q, 0, 0x0100f002);
if (--nv50->screen->num_occlusion_queries_active == 0) {
PUSH_SPACE(push, 2);
BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
PUSH_DATA (push, 0);
}
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
nv50_hw_query_get(push, q, 0, 0x06805002);
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
nv50_hw_query_get(push, q, 0, 0x05805002);
break;
case PIPE_QUERY_SO_STATISTICS:
nv50_hw_query_get(push, q, 0x00, 0x05805002);
nv50_hw_query_get(push, q, 0x10, 0x06805002);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
nv50_hw_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
nv50_hw_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
nv50_hw_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
nv50_hw_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
nv50_hw_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
nv50_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
nv50_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
nv50_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
break;
case PIPE_QUERY_TIMESTAMP:
hq->sequence++;
/* fall through */
case PIPE_QUERY_TIME_ELAPSED:
nv50_hw_query_get(push, q, 0, 0x00005002);
break;
case PIPE_QUERY_GPU_FINISHED:
hq->sequence++;
nv50_hw_query_get(push, q, 0, 0x1000f010);
break;
case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
hq->sequence++;
nv50_hw_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
/* This query is not issued on GPU because disjoint is forced to false */
hq->state = NV50_HW_QUERY_STATE_READY;
break;
default:
assert(0);
break;
}
if (hq->is64bit)
nouveau_fence_ref(nv50->screen->base.fence.current, &hq->fence);
}
static boolean
nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,
boolean wait, union pipe_query_result *result)
{
struct nv50_hw_query *hq = nv50_hw_query(q);
uint64_t *res64 = (uint64_t *)result;
uint32_t *res32 = (uint32_t *)result;
uint8_t *res8 = (uint8_t *)result;
uint64_t *data64 = (uint64_t *)hq->data;
int i;
if (hq->state != NV50_HW_QUERY_STATE_READY)
nv50_hw_query_update(q);
if (hq->state != NV50_HW_QUERY_STATE_READY) {
if (!wait) {
/* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */
if (hq->state != NV50_HW_QUERY_STATE_FLUSHED) {
hq->state = NV50_HW_QUERY_STATE_FLUSHED;
PUSH_KICK(nv50->base.pushbuf);
}
return false;
}
if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client))
return false;
}
hq->state = NV50_HW_QUERY_STATE_READY;
switch (q->type) {
case PIPE_QUERY_GPU_FINISHED:
res8[0] = true;
break;
case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
res64[0] = hq->data[1] - hq->data[5];
break;
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
res64[0] = data64[0] - data64[2];
break;
case PIPE_QUERY_SO_STATISTICS:
res64[0] = data64[0] - data64[4];
res64[1] = data64[2] - data64[6];
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
for (i = 0; i < 8; ++i)
res64[i] = data64[i * 2] - data64[16 + i * 2];
break;
case PIPE_QUERY_TIMESTAMP:
res64[0] = data64[1];
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
res64[0] = 1000000000;
res8[8] = false;
break;
case PIPE_QUERY_TIME_ELAPSED:
res64[0] = data64[1] - data64[3];
break;
case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
res32[0] = hq->data[1];
break;
default:
assert(0);
return false;
}
return true;
}
static const struct nv50_query_funcs hw_query_funcs = {
.destroy_query = nv50_hw_destroy_query,
.begin_query = nv50_hw_begin_query,
.end_query = nv50_hw_end_query,
.get_query_result = nv50_hw_get_query_result,
};
struct nv50_query *
nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
{
struct nv50_hw_query *hq;
struct nv50_query *q;
hq = CALLOC_STRUCT(nv50_hw_query);
if (!hq)
return NULL;
q = &hq->base;
q->funcs = &hw_query_funcs;
q->type = type;
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
hq->rotate = 32;
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_PIPELINE_STATISTICS:
hq->is64bit = true;
break;
case PIPE_QUERY_TIME_ELAPSED:
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_GPU_FINISHED:
case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
break;
default:
debug_printf("invalid query type: %u\n", type);
FREE(q);
return NULL;
}
if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) {
FREE(hq);
return NULL;
}
if (hq->rotate) {
/* we advance before query_begin ! */
hq->offset -= hq->rotate;
hq->data -= hq->rotate / sizeof(*hq->data);
}
return q;
}
void
nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
struct nv50_query *q, unsigned result_offset)
{
struct nv50_hw_query *hq = nv50_hw_query(q);
nv50_hw_query_update(q);
if (hq->state != NV50_HW_QUERY_STATE_READY)
nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, push->client);
hq->state = NV50_HW_QUERY_STATE_READY;
BEGIN_NV04(push, SUBC_3D(method), 1);
PUSH_DATA (push, hq->data[result_offset / 4]);
}
void
nv84_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q)
{
struct nv50_hw_query *hq = nv50_hw_query(q);
unsigned offset = hq->offset;
PUSH_SPACE(push, 5);
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
PUSH_DATAh(push, hq->bo->offset + offset);
PUSH_DATA (push, hq->bo->offset + offset);
PUSH_DATA (push, hq->sequence);
PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
}

View File

@ -0,0 +1,40 @@
#ifndef __NV50_QUERY_HW_H__
#define __NV50_QUERY_HW_H__
#include "nouveau_fence.h"
#include "nouveau_mm.h"
#include "nv50_query.h"
#define NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
struct nv50_hw_query {
struct nv50_query base;
uint32_t *data;
uint32_t sequence;
struct nouveau_bo *bo;
uint32_t base_offset;
uint32_t offset; /* base + i * rotate */
uint8_t state;
bool is64bit;
uint8_t rotate;
int nesting; /* only used for occlusion queries */
struct nouveau_mm_allocation *mm;
struct nouveau_fence *fence;
};
static inline struct nv50_hw_query *
nv50_hw_query(struct nv50_query *q)
{
return (struct nv50_hw_query *)q;
}
struct nv50_query *
nv50_hw_create_query(struct nv50_context *, unsigned, unsigned);
void
nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t,
struct nv50_query *, unsigned);
void
nv84_hw_query_fifo_wait(struct nouveau_pushbuf *, struct nv50_query *);
#endif

View File

@ -32,8 +32,8 @@ nv50_resource_from_handle(struct pipe_screen * screen,
struct pipe_surface *
nv50_surface_from_buffer(struct pipe_context *pipe,
struct pipe_resource *pbuf,
const struct pipe_surface *templ)
struct pipe_resource *pbuf,
const struct pipe_surface *templ)
{
struct nv50_surface *sf = CALLOC_STRUCT(nv50_surface);
if (!sf)
@ -65,8 +65,8 @@ nv50_surface_from_buffer(struct pipe_context *pipe,
static struct pipe_surface *
nv50_surface_create(struct pipe_context *pipe,
struct pipe_resource *pres,
const struct pipe_surface *templ)
struct pipe_resource *pres,
const struct pipe_surface *templ)
{
/* surfaces are assumed to be miptrees all over the place. */
assert(pres->target != PIPE_BUFFER);

View File

@ -180,6 +180,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_SHAREABLE_SHADERS:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1; /* class_3d >= NVA0_3D_CLASS; */
@ -191,6 +193,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_INDEP_BLEND_FUNC:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
return class_3d >= NVA3_3D_CLASS;
/* unsupported caps */
@ -215,8 +218,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
case PIPE_CAP_VENDOR_ID:

View File

@ -27,6 +27,7 @@
#include "util/u_inlines.h"
#include "nv50/nv50_context.h"
#include "nv50/nv50_query_hw.h"
void
nv50_constbufs_validate(struct nv50_context *nv50)
@ -168,11 +169,23 @@ nv50_fragprog_validate(struct nv50_context *nv50)
{
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_program *fp = nv50->fragprog;
struct pipe_rasterizer_state *rast = &nv50->rast->pipe;
fp->fp.sample_interp = nv50->min_samples > 1;
if (fp->fp.force_persample_interp != rast->force_persample_interp) {
/* Force the program to be reuploaded, which will trigger interp fixups
* to get applied
*/
if (fp->mem)
nouveau_heap_free(&fp->mem);
fp->fp.force_persample_interp = rast->force_persample_interp;
}
if (fp->mem && !(nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_MIN_SAMPLES)))
return;
if (!nv50_program_validate(nv50, fp))
return;
return;
nv50_program_update_context_state(nv50, fp, 1);
BEGIN_NV04(push, NV50_3D(FP_REG_ALLOC_TEMP), 1);
@ -629,7 +642,7 @@ nv50_stream_output_validate(struct nv50_context *nv50)
const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3;
if (n == 4 && !targ->clean)
nv84_query_fifo_wait(push, targ->pq);
nv84_hw_query_fifo_wait(push, nv50_query(targ->pq));
BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n);
PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
@ -638,8 +651,8 @@ nv50_stream_output_validate(struct nv50_context *nv50)
PUSH_DATA(push, targ->pipe.buffer_size);
if (!targ->clean) {
assert(targ->pq);
nv50_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i),
targ->pq, 0x4);
nv50_hw_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i),
nv50_query(targ->pq), 0x4);
} else {
BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
PUSH_DATA(push, 0);

View File

@ -30,6 +30,7 @@
#include "nv50/nv50_stateobj.h"
#include "nv50/nv50_context.h"
#include "nv50/nv50_query_hw.h"
#include "nv50/nv50_3d.xml.h"
#include "nv50/nv50_texture.xml.h"
@ -725,6 +726,9 @@ nv50_sp_state_create(struct pipe_context *pipe,
if (cso->stream_output.num_outputs)
prog->pipe.stream_output = cso->stream_output;
prog->translated = nv50_program_translate(
prog, nv50_context(pipe)->screen->base.device->chipset);
return (void *)prog;
}
@ -1033,7 +1037,7 @@ nv50_so_target_create(struct pipe_context *pipe,
if (nouveau_context(pipe)->screen->class_3d >= NVA0_3D_CLASS) {
targ->pq = pipe->create_query(pipe,
NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET, 0);
NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET, 0);
if (!targ->pq) {
FREE(targ);
return NULL;
@ -1056,6 +1060,24 @@ nv50_so_target_create(struct pipe_context *pipe,
return &targ->pipe;
}
static void
nva0_so_target_save_offset(struct pipe_context *pipe,
struct pipe_stream_output_target *ptarg,
unsigned index, bool serialize)
{
struct nv50_so_target *targ = nv50_so_target(ptarg);
if (serialize) {
struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
PUSH_SPACE(push, 2);
BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
PUSH_DATA (push, 0);
}
nv50_query(targ->pq)->index = index;
pipe->end_query(pipe, targ->pq);
}
static void
nv50_so_target_destroy(struct pipe_context *pipe,
struct pipe_stream_output_target *ptarg)

View File

@ -487,7 +487,7 @@ static struct state_validate {
{ nv50_validate_viewport, NV50_NEW_VIEWPORT },
{ nv50_vertprog_validate, NV50_NEW_VERTPROG },
{ nv50_gmtyprog_validate, NV50_NEW_GMTYPROG },
{ nv50_fragprog_validate, NV50_NEW_FRAGPROG |
{ nv50_fragprog_validate, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
NV50_NEW_MIN_SAMPLES },
{ nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
NV50_NEW_GMTYPROG | NV50_NEW_RASTERIZER },

View File

@ -220,10 +220,14 @@ nv50_resource_copy_region(struct pipe_context *pipe,
nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
if (m2mf) {
struct nv50_miptree *src_mt = nv50_miptree(src);
struct nv50_miptree *dst_mt = nv50_miptree(dst);
struct nv50_m2mf_rect drect, srect;
unsigned i;
unsigned nx = util_format_get_nblocksx(src->format, src_box->width);
unsigned ny = util_format_get_nblocksy(src->format, src_box->height);
unsigned nx = util_format_get_nblocksx(src->format, src_box->width)
<< src_mt->ms_x;
unsigned ny = util_format_get_nblocksy(src->format, src_box->height)
<< src_mt->ms_y;
nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz);
nv50_m2mf_rect_setup(&srect, src, src_level,
@ -232,15 +236,15 @@ nv50_resource_copy_region(struct pipe_context *pipe,
for (i = 0; i < src_box->depth; ++i) {
nv50_m2mf_transfer_rect(nv50, &drect, &srect, nx, ny);
if (nv50_miptree(dst)->layout_3d)
if (dst_mt->layout_3d)
drect.z++;
else
drect.base += nv50_miptree(dst)->layer_stride;
drect.base += dst_mt->layer_stride;
if (nv50_miptree(src)->layout_3d)
if (src_mt->layout_3d)
srect.z++;
else
srect.base += nv50_miptree(src)->layer_stride;
srect.base += src_mt->layer_stride;
}
return;
}
@ -270,7 +274,7 @@ nv50_resource_copy_region(struct pipe_context *pipe,
static void
nv50_clear_render_target(struct pipe_context *pipe,
struct pipe_surface *dst,
const union pipe_color_union *color,
const union pipe_color_union *color,
unsigned dstx, unsigned dsty,
unsigned width, unsigned height)
{

View File

@ -27,6 +27,7 @@
#include "translate/translate.h"
#include "nv50/nv50_context.h"
#include "nv50/nv50_query_hw.h"
#include "nv50/nv50_resource.h"
#include "nv50/nv50_3d.xml.h"
@ -745,7 +746,8 @@ nva0_draw_stream_output(struct nv50_context *nv50,
PUSH_DATA (push, 0);
BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1);
PUSH_DATA (push, so->stride);
nv50_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES, so->pq, 0x4);
nv50_hw_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES,
nv50_query(so->pq), 0x4);
BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
PUSH_DATA (push, 0);

View File

@ -27,33 +27,33 @@
static void dump_comm_vp(struct nouveau_vp3_decoder *dec, struct comm *comm, u32 comm_seq,
struct nouveau_bo *inter_bo, unsigned slice_size)
{
unsigned i, idx = comm->pvp_cur_index & 0xf;
debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage);
unsigned i, idx = comm->pvp_cur_index & 0xf;
debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage);
#if 0
debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs);
debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index);
debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs);
debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index);
for (i = 0; i != comm->irq_index; ++i)
debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]);
for (i = 0; i != comm->parse_endpos_index; ++i)
debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]);
for (i = 0; i != comm->irq_index; ++i)
debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]);
for (i = 0; i != comm->parse_endpos_index; ++i)
debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]);
#endif
debug_printf("mb_y = %u\n", comm->mb_y[idx]);
if (comm->status_vp[idx] <= 1)
return;
debug_printf("mb_y = %u\n", comm->mb_y[idx]);
if (comm->status_vp[idx] <= 1)
return;
if ((comm->pvp_stage & 0xff) != 0xff) {
unsigned *map;
int ret = nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client);
assert(ret >= 0);
map = inter_bo->map;
for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) {
debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]);
}
munmap(inter_bo->map, inter_bo->size);
inter_bo->map = NULL;
}
assert((comm->pvp_stage & 0xff) == 0xff);
if ((comm->pvp_stage & 0xff) != 0xff) {
unsigned *map;
int ret = nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client);
assert(ret >= 0);
map = inter_bo->map;
for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) {
debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]);
}
munmap(inter_bo->map, inter_bo->size);
inter_bo->map = NULL;
}
assert((comm->pvp_stage & 0xff) == 0xff);
}
#endif

View File

@ -252,10 +252,10 @@ nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
}
}
vp->vp.clip_enable = info->io.clipDistanceMask;
for (i = 0; i < 8; ++i)
if (info->io.cullDistanceMask & (1 << i))
vp->vp.clip_mode |= 1 << (i * 4);
vp->vp.clip_enable =
(1 << (info->io.clipDistances + info->io.cullDistances)) - 1;
for (i = 0; i < info->io.cullDistances; ++i)
vp->vp.clip_mode |= 1 << ((info->io.clipDistances + i) * 4);
if (info->io.genUserClip < 0)
vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */
@ -269,8 +269,6 @@ nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
vp->hdr[0] = 0x20061 | (1 << 10);
vp->hdr[4] = 0xff000;
vp->hdr[18] = info->io.clipDistanceMask;
return nvc0_vtgp_gen_header(vp, info);
}
@ -424,6 +422,11 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
for (i = 0; i < info->numInputs; ++i) {
m = nvc0_hdr_interp_mode(&info->in[i]);
if (info->in[i].sn == TGSI_SEMANTIC_COLOR) {
fp->fp.colors |= 1 << info->in[i].si;
if (info->in[i].sc)
fp->fp.color_interp[info->in[i].si] = m | (info->in[i].mask << 4);
}
for (c = 0; c < 4; ++c) {
if (!(info->in[i].mask & (1 << c)))
continue;
@ -531,7 +534,6 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
info->io.genUserClip = prog->vp.num_ucps;
info->io.ucpBase = 256;
info->io.ucpCBSlot = 15;
info->io.sampleInterp = prog->fp.sample_interp;
if (prog->type == PIPE_SHADER_COMPUTE) {
if (chipset >= NVISA_GK104_CHIPSET) {
@ -575,6 +577,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
prog->immd_data = info->immd.buf;
prog->immd_size = info->immd.bufSize;
prog->relocs = info->bin.relocData;
prog->interps = info->bin.interpData;
prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
prog->num_barriers = info->numBarriers;
@ -713,6 +716,23 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
if (prog->relocs)
nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0);
if (prog->interps) {
nv50_ir_change_interp(prog->interps, prog->code,
prog->fp.force_persample_interp,
prog->fp.flatshade);
for (int i = 0; i < 2; i++) {
unsigned mask = prog->fp.color_interp[i] >> 4;
unsigned interp = prog->fp.color_interp[i] & 3;
if (!mask)
continue;
prog->hdr[14] &= ~(0xff << (8 * i));
if (prog->fp.flatshade)
interp = NVC0_INTERP_FLAT;
for (int c = 0; c < 4; c++)
if (mask & (1 << c))
prog->hdr[14] |= interp << (2 * (4 * i + c));
}
}
#ifdef DEBUG
if (debug_get_bool_option("NV50_PROG_DEBUG", false))
@ -773,6 +793,7 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
FREE(prog->code); /* may be 0 for hardcoded shaders */
FREE(prog->immd_data);
FREE(prog->relocs);
FREE(prog->interps);
if (prog->type == PIPE_SHADER_COMPUTE && prog->cp.syms)
FREE(prog->cp.syms);
if (prog->tfb) {

View File

@ -45,8 +45,10 @@ struct nvc0_program {
} vp;
struct {
uint8_t early_z;
uint8_t in_pos[PIPE_MAX_SHADER_INPUTS];
uint8_t sample_interp;
uint8_t colors;
uint8_t color_interp[2];
bool force_persample_interp;
bool flatshade;
} fp;
struct {
uint32_t tess_mode; /* ~0 if defined by the other stage */
@ -61,6 +63,7 @@ struct nvc0_program {
uint8_t num_barriers;
void *relocs;
void *interps;
struct nvc0_transform_feedback_state *tfb;

View File

@ -28,6 +28,7 @@
#include "nvc0/nvc0_query.h"
#include "nvc0/nvc0_query_sw.h"
#include "nvc0/nvc0_query_hw.h"
#include "nvc0/nvc0_query_hw_metric.h"
#include "nvc0/nvc0_query_hw_sm.h"
static struct pipe_query *
@ -188,7 +189,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
count++;
} else
if (screen->base.class_3d < NVE4_3D_CLASS) {
count++;
count += 2;
}
}
}
@ -218,6 +219,17 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
return 1;
}
}
} else
if (id == NVC0_HW_METRIC_QUERY_GROUP) {
if (screen->compute) {
if (screen->base.class_3d < NVE4_3D_CLASS) {
info->name = "Performance metrics";
info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
info->max_active_queries = 1;
info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
return 1;
}
}
}
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {

View File

@ -32,7 +32,8 @@ nvc0_query(struct pipe_query *pipe)
* Driver queries groups:
*/
#define NVC0_HW_SM_QUERY_GROUP 0
#define NVC0_SW_QUERY_DRV_STAT_GROUP 1
#define NVC0_HW_METRIC_QUERY_GROUP 1
#define NVC0_SW_QUERY_DRV_STAT_GROUP 2
void nvc0_init_query_functions(struct nvc0_context *);

View File

@ -431,7 +431,7 @@ nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
id = nvc0_hw_metric_get_next_query_id(queries, id);
info->name = nvc0_hw_metric_names[id];
info->query_type = NVC0_HW_METRIC_QUERY(id);
info->group_id = -1;
info->group_id = NVC0_HW_METRIC_QUERY_GROUP;
return 1;
}
}

View File

@ -26,7 +26,8 @@ nvc0_resource_from_handle(struct pipe_screen * screen,
} else {
struct pipe_resource *res = nv50_miptree_from_handle(screen,
templ, whandle);
nv04_resource(res)->vtbl = &nvc0_miptree_vtbl;
if (res)
nv04_resource(res)->vtbl = &nvc0_miptree_vtbl;
return res;
}
}

View File

@ -179,6 +179,9 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@ -201,8 +204,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
return 0;
case PIPE_CAP_VENDOR_ID:
@ -352,45 +353,51 @@ static int
nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
enum pipe_compute_cap param, void *data)
{
uint64_t *data64 = (uint64_t *)data;
uint32_t *data32 = (uint32_t *)data;
const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;
struct nvc0_screen *screen = nvc0_screen(pscreen);
const uint16_t obj_class = screen->compute->oclass;
#define RET(x) do { \
if (data) \
memcpy(data, x, sizeof(x)); \
return sizeof(x); \
} while (0)
switch (param) {
case PIPE_COMPUTE_CAP_GRID_DIMENSION:
data64[0] = 3;
return 8;
RET((uint64_t []) { 3 });
case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
data64[0] = (obj_class >= NVE4_COMPUTE_CLASS) ? 0x7fffffff : 65535;
data64[1] = 65535;
data64[2] = 65535;
return 24;
if (obj_class >= NVE4_COMPUTE_CLASS) {
RET(((uint64_t []) { 0x7fffffff, 65535, 65535 }));
} else {
RET(((uint64_t []) { 65535, 65535, 65535 }));
}
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
data64[0] = 1024;
data64[1] = 1024;
data64[2] = 64;
return 24;
RET(((uint64_t []) { 1024, 1024, 64 }));
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
data64[0] = 1024;
return 8;
RET((uint64_t []) { 1024 });
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */
data64[0] = (uint64_t)1 << 40;
return 8;
RET((uint64_t []) { 1ULL << 40 });
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
data64[0] = 48 << 10;
return 8;
RET((uint64_t []) { 48 << 10 });
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
data64[0] = 512 << 10;
return 8;
RET((uint64_t []) { 512 << 10 });
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
data64[0] = 4096;
return 8;
RET((uint64_t []) { 4096 });
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
data32[0] = 32;
return 4;
RET((uint32_t []) { 32 });
case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
RET((uint64_t []) { 1ULL << 40 });
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
RET((uint32_t []) { 0 });
case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
RET((uint32_t []) { screen->mp_count_compute });
case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */
default:
return 0;
}
#undef RET
}
static void
@ -827,6 +834,8 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, 1);
BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1);
PUSH_DATA (push, 0);
BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1);
PUSH_DATA (push, NVC0_3D_SHADE_MODEL_SMOOTH);
if (screen->eng3d->oclass < NVE4_3D_CLASS) {
BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1);
PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);

View File

@ -38,6 +38,7 @@ struct nvc0_graph_state {
uint32_t constant_elts;
int32_t index_bias;
uint16_t scissor;
bool flatshade;
uint8_t patch_vertices;
uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
uint8_t num_vtxbufs;

View File

@ -107,8 +107,54 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_program *fp = nvc0->fragprog;
struct pipe_rasterizer_state *rast = &nvc0->rast->pipe;
fp->fp.sample_interp = nvc0->min_samples > 1;
if (fp->fp.force_persample_interp != rast->force_persample_interp) {
/* Force the program to be reuploaded, which will trigger interp fixups
* to get applied
*/
if (fp->mem)
nouveau_heap_free(&fp->mem);
fp->fp.force_persample_interp = rast->force_persample_interp;
}
/* Shade model works well enough when both colors follow it. However if one
* (or both) is explicitly set, then we have to go the patching route.
*/
bool has_explicit_color = fp->fp.colors &&
(((fp->fp.colors & 1) && !fp->fp.color_interp[0]) ||
((fp->fp.colors & 2) && !fp->fp.color_interp[1]));
bool hwflatshade = false;
if (has_explicit_color && fp->fp.flatshade != rast->flatshade) {
/* Force re-upload */
if (fp->mem)
nouveau_heap_free(&fp->mem);
fp->fp.flatshade = rast->flatshade;
/* Always smooth-shade in this mode, the shader will decide on its own
* when to flat-shade.
*/
} else if (!has_explicit_color) {
hwflatshade = rast->flatshade;
/* No need to binary-patch the shader each time, make sure that it's set
* up for the default behaviour.
*/
fp->fp.flatshade = 0;
}
if (hwflatshade != nvc0->state.flatshade) {
nvc0->state.flatshade = hwflatshade;
BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1);
PUSH_DATA (push, hwflatshade ? NVC0_3D_SHADE_MODEL_FLAT :
NVC0_3D_SHADE_MODEL_SMOOTH);
}
if (fp->mem && !(nvc0->dirty & NVC0_NEW_FRAGPROG)) {
return;
}
if (!nvc0_program_validate(nvc0, fp))
return;

View File

@ -212,9 +212,6 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe,
* always emit 16 commands, one for each scissor rectangle, here.
*/
SB_BEGIN_3D(so, SHADE_MODEL, 1);
SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT :
NVC0_3D_SHADE_MODEL_SMOOTH);
SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first);
SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside);
@ -683,6 +680,9 @@ nvc0_sp_state_create(struct pipe_context *pipe,
if (cso->stream_output.num_outputs)
prog->pipe.stream_output = cso->stream_output;
prog->translated = nvc0_program_translate(
prog, nvc0_context(pipe)->screen->base.device->chipset);
return (void *)prog;
}

View File

@ -606,6 +606,9 @@ nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
ctx_to->constbuf_dirty[s] = (1 << NVC0_MAX_PIPE_CONSTBUFS) - 1;
}
/* Reset tfb as the shader that owns it may have been deleted. */
ctx_to->state.tfb = NULL;
if (!ctx_to->vertex)
ctx_to->dirty &= ~(NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS);
if (!ctx_to->idxbuf.buffer)
@ -645,7 +648,7 @@ static struct state_validate {
{ nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG },
{ nvc0_validate_tess_state, NVC0_NEW_TESSFACTOR },
{ nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG },
{ nvc0_fragprog_validate, NVC0_NEW_FRAGPROG },
{ nvc0_fragprog_validate, NVC0_NEW_FRAGPROG | NVC0_NEW_RASTERIZER },
{ nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA |
NVC0_NEW_RASTERIZER },
{ nvc0_validate_derived_2, NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER },

Some files were not shown because too many files have changed in this diff Show More