Merge remote-tracking branch 'mesa-public/master' into vulkan
This commit is contained in:
commit
b00e3f221b
14
configure.ac
14
configure.ac
|
@ -81,7 +81,7 @@ PRESENTPROTO_REQUIRED=1.0
|
|||
LIBUDEV_REQUIRED=151
|
||||
GLPROTO_REQUIRED=1.4.14
|
||||
LIBOMXIL_BELLAGIO_REQUIRED=0.0
|
||||
LIBVA_REQUIRED=0.35.0
|
||||
LIBVA_REQUIRED=0.38.0
|
||||
VDPAU_REQUIRED=1.1
|
||||
WAYLAND_REQUIRED=1.2.0
|
||||
XCB_REQUIRED=1.9.3
|
||||
|
@ -867,7 +867,7 @@ GALLIUM_DRIVERS_DEFAULT="r300,r600,svga,swrast"
|
|||
AC_ARG_WITH([gallium-drivers],
|
||||
[AS_HELP_STRING([--with-gallium-drivers@<:@=DIRS...@:>@],
|
||||
[comma delimited Gallium drivers list, e.g.
|
||||
"i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4"
|
||||
"i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl"
|
||||
@<:@default=r300,r600,svga,swrast@:>@])],
|
||||
[with_gallium_drivers="$withval"],
|
||||
[with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT"])
|
||||
|
@ -2188,6 +2188,12 @@ if test -n "$with_gallium_drivers"; then
|
|||
PKG_CHECK_MODULES([SIMPENROSE], [simpenrose],
|
||||
[USE_VC4_SIMULATOR=yes], [USE_VC4_SIMULATOR=no])
|
||||
;;
|
||||
xvirgl)
|
||||
HAVE_GALLIUM_VIRGL=yes
|
||||
gallium_require_drm "virgl"
|
||||
gallium_require_drm_loader
|
||||
require_egl_drm "virgl"
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([Unknown Gallium driver: $driver])
|
||||
;;
|
||||
|
@ -2259,6 +2265,7 @@ AM_CONDITIONAL(HAVE_GALLIUM_FREEDRENO, test "x$HAVE_GALLIUM_FREEDRENO" = xyes)
|
|||
AM_CONDITIONAL(HAVE_GALLIUM_SOFTPIPE, test "x$HAVE_GALLIUM_SOFTPIPE" = xyes)
|
||||
AM_CONDITIONAL(HAVE_GALLIUM_LLVMPIPE, test "x$HAVE_GALLIUM_LLVMPIPE" = xyes)
|
||||
AM_CONDITIONAL(HAVE_GALLIUM_VC4, test "x$HAVE_GALLIUM_VC4" = xyes)
|
||||
AM_CONDITIONAL(HAVE_GALLIUM_VIRGL, test "x$HAVE_GALLIUM_VIRGL" = xyes)
|
||||
|
||||
AM_CONDITIONAL(HAVE_GALLIUM_STATIC_TARGETS, test "x$enable_shared_pipe_drivers" = xno)
|
||||
|
||||
|
@ -2386,6 +2393,7 @@ AC_CONFIG_FILES([Makefile
|
|||
src/gallium/drivers/svga/Makefile
|
||||
src/gallium/drivers/trace/Makefile
|
||||
src/gallium/drivers/vc4/Makefile
|
||||
src/gallium/drivers/virgl/Makefile
|
||||
src/gallium/state_trackers/clover/Makefile
|
||||
src/gallium/state_trackers/dri/Makefile
|
||||
src/gallium/state_trackers/glx/xlib/Makefile
|
||||
|
@ -2426,6 +2434,8 @@ AC_CONFIG_FILES([Makefile
|
|||
src/gallium/winsys/sw/wrapper/Makefile
|
||||
src/gallium/winsys/sw/xlib/Makefile
|
||||
src/gallium/winsys/vc4/drm/Makefile
|
||||
src/gallium/winsys/virgl/drm/Makefile
|
||||
src/gallium/winsys/virgl/vtest/Makefile
|
||||
src/gbm/Makefile
|
||||
src/gbm/main/gbm.pc
|
||||
src/glsl/Makefile
|
||||
|
|
|
@ -153,10 +153,10 @@ GL 4.3, GLSL 4.30:
|
|||
GL_ARB_ES3_compatibility DONE (all drivers that support GLSL 3.30)
|
||||
GL_ARB_clear_buffer_object DONE (all drivers)
|
||||
GL_ARB_compute_shader in progress (jljusten)
|
||||
GL_ARB_copy_image DONE (i965) (gallium - in progress, VMware)
|
||||
GL_ARB_copy_image DONE (i965, nv50, nvc0, radeonsi)
|
||||
GL_KHR_debug DONE (all drivers)
|
||||
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
|
||||
GL_ARB_fragment_layer_viewport DONE (nv50, nvc0, r600, radeonsi, llvmpipe)
|
||||
GL_ARB_fragment_layer_viewport DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe)
|
||||
GL_ARB_framebuffer_no_attachments DONE (i965)
|
||||
GL_ARB_internalformat_query2 not started
|
||||
GL_ARB_invalidate_subdata DONE (all drivers)
|
||||
|
@ -243,7 +243,7 @@ GLES3.2, GLSL ES 3.2
|
|||
GL_KHR_texture_compression_astc_ldr DONE (i965/gen9+)
|
||||
GL_OES_copy_image not started (based on GL_ARB_copy_image, which is done for some drivers)
|
||||
GL_OES_draw_buffers_indexed not started
|
||||
GL_OES_draw_elements_base_vertex not started (based on GL_ARB_draw_elements_base_vertex, which is done for all drivers)
|
||||
GL_OES_draw_elements_base_vertex DONE (all drivers)
|
||||
GL_OES_geometry_shader not started (based on GL_ARB_geometry_shader4, which is done for all drivers)
|
||||
GL_OES_gpu_shader5 not started (based on parts of GL_ARB_gpu_shader5, which is done for some drivers)
|
||||
GL_OES_primitive_bounding box not started
|
||||
|
|
|
@ -16,6 +16,12 @@
|
|||
|
||||
<h1>News</h1>
|
||||
|
||||
<h2>October 24, 2015</h2>
|
||||
<p>
|
||||
<a href="relnotes/11.0.4.html">Mesa 11.0.4</a> is released.
|
||||
This is a bug-fix release.
|
||||
</p>
|
||||
|
||||
<h2>October 10, 2015</h2>
|
||||
<p>
|
||||
<a href="relnotes/11.0.3.html">Mesa 11.0.3</a> is released.
|
||||
|
@ -28,7 +34,7 @@ This is a bug-fix release.
|
|||
This is a bug-fix release.
|
||||
<br>
|
||||
NOTE: It is anticipated that 10.6.9 will be the final release in the 10.6
|
||||
series. Users of 10.5 are encouraged to migrate to the 11.0 series in order
|
||||
series. Users of 10.6 are encouraged to migrate to the 11.0 series in order
|
||||
to obtain future fixes.
|
||||
</p>
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release.
|
|||
</p>
|
||||
|
||||
<ul>
|
||||
<li><a href="relnotes/11.0.4.html">11.0.4 release notes</a>
|
||||
<li><a href="relnotes/11.0.3.html">11.0.3 release notes</a>
|
||||
<li><a href="relnotes/10.6.9.html">10.6.9 release notes</a>
|
||||
<li><a href="relnotes/11.0.2.html">11.0.2 release notes</a>
|
||||
|
|
|
@ -0,0 +1,168 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 11.0.4 Release Notes / October 24, 2015</h1>
|
||||
|
||||
<p>
|
||||
Mesa 11.0.4 is a bug fix release which fixes bugs found since the 11.0.3 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 11.0.4 implements the OpenGL 4.1 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.1. OpenGL
|
||||
4.1 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
ed412ca6a46d1bd055120e5c12806c15419ae8c4dd6d3f6ea20a83091d5c78bf mesa-11.0.4.tar.gz
|
||||
40201bf7fc6fa12a6d9edfe870b41eb4dd6669154e3c42c48a96f70805f5483d mesa-11.0.4.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<p>This list is likely incomplete.</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86281">Bug 86281</a> - brw_meta_fast_clear (brw=brw@entry=0x7fffd4097a08, fb=fb@entry=0x7fffd40fa900, buffers=buffers@entry=2, partial_clear=partial_clear@entry=false)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86720">Bug 86720</a> - [radeon] Europa Universalis 4 freezing during game start (10.3.3+, still broken on 11.0.2)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91788">Bug 91788</a> - [HSW Regression] Synmark2_v6 Multithread performance case FPS reduced by 36%</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92304">Bug 92304</a> - [cts] cts.shaders.negative conformance tests fail</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Alejandro Piñeiro (2):</p>
|
||||
<ul>
|
||||
<li>i965/vec4: check writemask when bailing out at register coalesce</li>
|
||||
<li>i965/vec4: fill src_reg type using the constructor type parameter</li>
|
||||
</ul>
|
||||
|
||||
<p>Brian Paul (2):</p>
|
||||
<ul>
|
||||
<li>vbo: fix incorrect switch statement in init_mat_currval()</li>
|
||||
<li>mesa: fix incorrect opcode in save_BlendFunci()</li>
|
||||
</ul>
|
||||
|
||||
<p>Chih-Wei Huang (3):</p>
|
||||
<ul>
|
||||
<li>mesa: android: Fix the incorrect path of sse_minmax.c</li>
|
||||
<li>nv50/ir: use C++11 standard std::unordered_map if possible</li>
|
||||
<li>nv30: include the header of ffs prototype</li>
|
||||
</ul>
|
||||
|
||||
<p>Chris Wilson (1):</p>
|
||||
<ul>
|
||||
<li>i965: Remove early release of DRI2 miptree</li>
|
||||
</ul>
|
||||
|
||||
<p>Dave Airlie (1):</p>
|
||||
<ul>
|
||||
<li>mesa/uniforms: fix get_uniform for doubles (v2)</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (1):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 11.0.3</li>
|
||||
</ul>
|
||||
|
||||
<p>Francisco Jerez (5):</p>
|
||||
<ul>
|
||||
<li>i965: Don't tell the hardware about our UAV access.</li>
|
||||
<li>mesa: Expose function to calculate whether a shader image unit is valid.</li>
|
||||
<li>mesa: Skip redundant texture completeness checking during image validation.</li>
|
||||
<li>i965: Use _mesa_is_image_unit_valid() instead of gl_image_unit::_Valid.</li>
|
||||
<li>mesa: Get rid of texture-dependent image unit derived state.</li>
|
||||
</ul>
|
||||
|
||||
<p>Ian Romanick (8):</p>
|
||||
<ul>
|
||||
<li>glsl: Allow built-in functions as constant expressions in OpenGL ES 1.00</li>
|
||||
<li>ff_fragment_shader: Use binding to set the sampler unit</li>
|
||||
<li>glsl/linker: Use constant_initializer instead of constant_value to initialize uniforms</li>
|
||||
<li>glsl: Use constant_initializer instead of constant_value to determine whether to keep an unused uniform</li>
|
||||
<li>glsl: Only set ir_variable::constant_value for const-decorated variables</li>
|
||||
<li>glsl: Restrict initializers for global variables to constant expression in ES</li>
|
||||
<li>glsl: Add method to determine whether an expression contains the sequence operator</li>
|
||||
<li>glsl: In later GLSL versions, sequence operator is cannot be a constant expression</li>
|
||||
</ul>
|
||||
|
||||
<p>Ilia Mirkin (1):</p>
|
||||
<ul>
|
||||
<li>nouveau: make sure there's always room to emit a fence</li>
|
||||
</ul>
|
||||
|
||||
<p>Indrajit Das (1):</p>
|
||||
<ul>
|
||||
<li>st/va: Used correct parameter to derive the value of the "h" variable in vlVaCreateImage</li>
|
||||
</ul>
|
||||
|
||||
<p>Jonathan Gray (1):</p>
|
||||
<ul>
|
||||
<li>configure.ac: ensure RM is set</li>
|
||||
</ul>
|
||||
|
||||
<p>Krzysztof Sobiecki (1):</p>
|
||||
<ul>
|
||||
<li>st/fbo: use pipe_surface_release instead of pipe_surface_reference</li>
|
||||
</ul>
|
||||
|
||||
<p>Leo Liu (1):</p>
|
||||
<ul>
|
||||
<li>st/omx/dec/h264: fix field picture type 0 poc disorder</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (3):</p>
|
||||
<ul>
|
||||
<li>st/mesa: fix clip state dependencies</li>
|
||||
<li>radeonsi: fix a GS copy shader leak</li>
|
||||
<li>gallium: add PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT</li>
|
||||
</ul>
|
||||
|
||||
<p>Nicolai Hähnle (1):</p>
|
||||
<ul>
|
||||
<li>u_vbuf: fix vb slot assignment for translated buffers</li>
|
||||
</ul>
|
||||
|
||||
<p>Rob Clark (1):</p>
|
||||
<ul>
|
||||
<li>freedreno/a3xx: cache-flush is needed after MEM_WRITE</li>
|
||||
</ul>
|
||||
|
||||
<p>Tapani Pälli (3):</p>
|
||||
<ul>
|
||||
<li>mesa: add GL_UNSIGNED_INT_24_8 to _mesa_pack_depth_span</li>
|
||||
<li>mesa: Set api prefix to version string when overriding version</li>
|
||||
<li>mesa: fix ARRAY_SIZE query for GetProgramResourceiv</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
|
@ -45,15 +45,21 @@ Note: some of the new features are only available with certain drivers.
|
|||
|
||||
<ul>
|
||||
<li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
|
||||
<li>GL_ARB_copy_image on radeonsi</li>
|
||||
<li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
|
||||
<li>GL_ARB_gpu_shader5 on r600 for Evergreen and later chips</li>
|
||||
<li>GL_ARB_shader_clock on i965 (gen7+)</li>
|
||||
<li>GL_ARB_shader_stencil_export on i965 (gen9+)</li>
|
||||
<li>GL_ARB_shader_storage_buffer_object on i965</li>
|
||||
<li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li>
|
||||
<li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
|
||||
<li>GL_ARB_texture_query_lod on softpipe</li>
|
||||
<li>GL_ARB_texture_view on radeonsi</li>
|
||||
<li>GL_EXT_draw_elements_base_vertex on all drivers</li>
|
||||
<li>GL_OES_draw_elements_base_vertex on all drivers</li>
|
||||
<li>EGL_KHR_create_context on softpipe, llvmpipe</li>
|
||||
<li>EGL_KHR_gl_colorspace on softpipe, llvmpipe</li>
|
||||
<li>new virgl gallium driver for qemu virtio-gpu</li>
|
||||
</ul>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
|
|
@ -495,7 +495,7 @@ struct __DRIdamageExtensionRec {
|
|||
* SWRast Loader extension.
|
||||
*/
|
||||
#define __DRI_SWRAST_LOADER "DRI_SWRastLoader"
|
||||
#define __DRI_SWRAST_LOADER_VERSION 2
|
||||
#define __DRI_SWRAST_LOADER_VERSION 3
|
||||
struct __DRIswrastLoaderExtensionRec {
|
||||
__DRIextension base;
|
||||
|
||||
|
@ -528,6 +528,15 @@ struct __DRIswrastLoaderExtensionRec {
|
|||
void (*putImage2)(__DRIdrawable *drawable, int op,
|
||||
int x, int y, int width, int height, int stride,
|
||||
char *data, void *loaderPrivate);
|
||||
|
||||
/**
|
||||
* Put image to drawable
|
||||
*
|
||||
* \since 3
|
||||
*/
|
||||
void (*getImage2)(__DRIdrawable *readable,
|
||||
int x, int y, int width, int height, int stride,
|
||||
char *data, void *loaderPrivate);
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -109,21 +109,29 @@ CHIPSET(0x162A, bdw_gt3, "Intel(R) Iris Pro P6300 (Broadwell GT3e)")
|
|||
CHIPSET(0x162B, bdw_gt3, "Intel(R) Iris 6100 (Broadwell GT3)")
|
||||
CHIPSET(0x162D, bdw_gt3, "Intel(R) Broadwell GT3")
|
||||
CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3")
|
||||
CHIPSET(0x1902, skl_gt1, "Intel(R) Skylake DT GT1")
|
||||
CHIPSET(0x1906, skl_gt1, "Intel(R) Skylake ULT GT1")
|
||||
CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake SRV GT1")
|
||||
CHIPSET(0x190B, skl_gt1, "Intel(R) Skylake Halo GT1")
|
||||
CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake ULX GT1")
|
||||
CHIPSET(0x1912, skl_gt2, "Intel(R) Skylake DT GT2")
|
||||
CHIPSET(0x1916, skl_gt2, "Intel(R) Skylake ULT GT2")
|
||||
CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake SRV GT2")
|
||||
CHIPSET(0x191B, skl_gt2, "Intel(R) Skylake Halo GT2")
|
||||
CHIPSET(0x191D, skl_gt2, "Intel(R) Skylake WKS GT2")
|
||||
CHIPSET(0x191E, skl_gt2, "Intel(R) Skylake ULX GT2")
|
||||
CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake ULT GT2F")
|
||||
CHIPSET(0x1926, skl_gt3, "Intel(R) Skylake ULT GT3")
|
||||
CHIPSET(0x192A, skl_gt3, "Intel(R) Skylake SRV GT3")
|
||||
CHIPSET(0x192B, skl_gt3, "Intel(R) Skylake Halo GT3")
|
||||
CHIPSET(0x1902, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
|
||||
CHIPSET(0x1906, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
|
||||
CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake GT1")
|
||||
CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake GT1")
|
||||
CHIPSET(0x1912, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)")
|
||||
CHIPSET(0x1913, skl_gt2, "Intel(R) Skylake GT2f")
|
||||
CHIPSET(0x1915, skl_gt2, "Intel(R) Skylake GT2f")
|
||||
CHIPSET(0x1916, skl_gt2, "Intel(R) HD Graphics 520 (Skylake GT2)")
|
||||
CHIPSET(0x1917, skl_gt2, "Intel(R) Skylake GT2f")
|
||||
CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake GT2")
|
||||
CHIPSET(0x191B, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)")
|
||||
CHIPSET(0x191D, skl_gt2, "Intel(R) HD Graphics P530 (Skylake GT2)")
|
||||
CHIPSET(0x191E, skl_gt2, "Intel(R) HD Graphics 515 (Skylake GT2)")
|
||||
CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake GT2")
|
||||
CHIPSET(0x1923, skl_gt3, "Intel(R) Iris Graphics 540 (Skylake GT3e)")
|
||||
CHIPSET(0x1926, skl_gt3, "Intel(R) HD Graphics 535 (Skylake GT3)")
|
||||
CHIPSET(0x1927, skl_gt3, "Intel(R) Iris Graphics 550 (Skylake GT3e)")
|
||||
CHIPSET(0x192A, skl_gt4, "Intel(R) Skylake GT4")
|
||||
CHIPSET(0x192B, skl_gt3, "Intel(R) Iris Graphics (Skylake GT3fe)")
|
||||
CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4")
|
||||
CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4")
|
||||
CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4")
|
||||
CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4")
|
||||
CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherryview)")
|
||||
CHIPSET(0x22B1, chv, "Intel(R) HD Graphics (Cherryview)")
|
||||
CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)")
|
||||
|
|
|
@ -181,3 +181,5 @@ CHIPSET(0x9876, CARRIZO_, CARRIZO)
|
|||
CHIPSET(0x9877, CARRIZO_, CARRIZO)
|
||||
|
||||
CHIPSET(0x7300, FIJI_, FIJI)
|
||||
|
||||
CHIPSET(0x98E4, STONEY_, STONEY)
|
||||
|
|
|
@ -82,6 +82,11 @@ if HAVE_GALLIUM_VC4
|
|||
SUBDIRS += drivers/vc4 winsys/vc4/drm
|
||||
endif
|
||||
|
||||
## virgl
|
||||
if HAVE_GALLIUM_VIRGL
|
||||
SUBDIRS += drivers/virgl winsys/virgl/drm winsys/virgl/vtest
|
||||
endif
|
||||
|
||||
## the sw winsys'
|
||||
SUBDIRS += winsys/sw/null
|
||||
|
||||
|
|
|
@ -427,6 +427,7 @@ lp_build_init(void)
|
|||
*/
|
||||
util_cpu_caps.has_avx = 0;
|
||||
util_cpu_caps.has_avx2 = 0;
|
||||
util_cpu_caps.has_f16c = 0;
|
||||
}
|
||||
|
||||
#ifdef PIPE_ARCH_PPC_64
|
||||
|
@ -458,7 +459,9 @@ lp_build_init(void)
|
|||
util_cpu_caps.has_sse3 = 0;
|
||||
util_cpu_caps.has_ssse3 = 0;
|
||||
util_cpu_caps.has_sse4_1 = 0;
|
||||
util_cpu_caps.has_sse4_2 = 0;
|
||||
util_cpu_caps.has_avx = 0;
|
||||
util_cpu_caps.has_avx2 = 0;
|
||||
util_cpu_caps.has_f16c = 0;
|
||||
#endif
|
||||
|
||||
|
|
|
@ -497,20 +497,48 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
|
|||
#endif
|
||||
}
|
||||
|
||||
llvm::SmallVector<std::string, 1> MAttrs;
|
||||
if (util_cpu_caps.has_avx) {
|
||||
/*
|
||||
* AVX feature is not automatically detected from CPUID by the X86 target
|
||||
* yet, because the old (yet default) JIT engine is not capable of
|
||||
* emitting the opcodes. On newer llvm versions it is and at least some
|
||||
* versions (tested with 3.3) will emit avx opcodes without this anyway.
|
||||
*/
|
||||
MAttrs.push_back("+avx");
|
||||
if (util_cpu_caps.has_f16c) {
|
||||
MAttrs.push_back("+f16c");
|
||||
}
|
||||
builder.setMAttrs(MAttrs);
|
||||
}
|
||||
llvm::SmallVector<std::string, 16> MAttrs;
|
||||
|
||||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
|
||||
/*
|
||||
* We need to unset attributes because sometimes LLVM mistakenly assumes
|
||||
* certain features are present given the processor name.
|
||||
*
|
||||
* https://bugs.freedesktop.org/show_bug.cgi?id=92214
|
||||
* http://llvm.org/PR25021
|
||||
* http://llvm.org/PR19429
|
||||
* http://llvm.org/PR16721
|
||||
*/
|
||||
MAttrs.push_back(util_cpu_caps.has_sse ? "+sse" : "-sse" );
|
||||
MAttrs.push_back(util_cpu_caps.has_sse2 ? "+sse2" : "-sse2" );
|
||||
MAttrs.push_back(util_cpu_caps.has_sse3 ? "+sse3" : "-sse3" );
|
||||
MAttrs.push_back(util_cpu_caps.has_ssse3 ? "+ssse3" : "-ssse3" );
|
||||
#if HAVE_LLVM >= 0x0304
|
||||
MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse4.1" : "-sse4.1");
|
||||
#else
|
||||
MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse41" : "-sse41" );
|
||||
#endif
|
||||
#if HAVE_LLVM >= 0x0304
|
||||
MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse4.2" : "-sse4.2");
|
||||
#else
|
||||
MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse42" : "-sse42" );
|
||||
#endif
|
||||
/*
|
||||
* AVX feature is not automatically detected from CPUID by the X86 target
|
||||
* yet, because the old (yet default) JIT engine is not capable of
|
||||
* emitting the opcodes. On newer llvm versions it is and at least some
|
||||
* versions (tested with 3.3) will emit avx opcodes without this anyway.
|
||||
*/
|
||||
MAttrs.push_back(util_cpu_caps.has_avx ? "+avx" : "-avx");
|
||||
MAttrs.push_back(util_cpu_caps.has_f16c ? "+f16c" : "-f16c");
|
||||
MAttrs.push_back(util_cpu_caps.has_avx2 ? "+avx2" : "-avx2");
|
||||
#endif
|
||||
|
||||
#if defined(PIPE_ARCH_PPC)
|
||||
MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
|
||||
#endif
|
||||
|
||||
builder.setMAttrs(MAttrs);
|
||||
|
||||
#if HAVE_LLVM >= 0x0305
|
||||
StringRef MCPU = llvm::sys::getHostCPUName();
|
||||
|
|
|
@ -405,16 +405,17 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
|
|||
break;
|
||||
|
||||
case PIPE_TEX_WRAP_MIRROR_REPEAT:
|
||||
if (offset) {
|
||||
offset = lp_build_int_to_float(coord_bld, offset);
|
||||
offset = lp_build_div(coord_bld, offset, length_f);
|
||||
coord = lp_build_add(coord_bld, coord, offset);
|
||||
}
|
||||
/* compute mirror function */
|
||||
coord = lp_build_coord_mirror(bld, coord);
|
||||
|
||||
/* scale coord to length */
|
||||
coord = lp_build_mul(coord_bld, coord, length_f);
|
||||
coord = lp_build_sub(coord_bld, coord, half);
|
||||
if (offset) {
|
||||
offset = lp_build_int_to_float(coord_bld, offset);
|
||||
coord = lp_build_add(coord_bld, coord, offset);
|
||||
}
|
||||
|
||||
/* convert to int, compute lerp weight */
|
||||
lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
|
||||
|
@ -567,12 +568,13 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
|
|||
coord = lp_build_mul(coord_bld, coord, length_f);
|
||||
}
|
||||
|
||||
if (offset) {
|
||||
offset = lp_build_int_to_float(coord_bld, offset);
|
||||
coord = lp_build_add(coord_bld, coord, offset);
|
||||
}
|
||||
/* floor */
|
||||
/* use itrunc instead since we clamp to 0 anyway */
|
||||
icoord = lp_build_itrunc(coord_bld, coord);
|
||||
if (offset) {
|
||||
icoord = lp_build_add(int_coord_bld, icoord, offset);
|
||||
}
|
||||
|
||||
/* clamp to [0, length - 1]. */
|
||||
icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
|
||||
|
@ -2586,6 +2588,10 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
|
|||
derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
|
||||
derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
|
||||
}
|
||||
/*
|
||||
* We could force CLAMP to CLAMP_TO_EDGE here if min/mag filter is nearest,
|
||||
* so AoS path could be used. Not sure it's worth the trouble...
|
||||
*/
|
||||
|
||||
min_img_filter = derived_sampler_state.min_img_filter;
|
||||
mag_img_filter = derived_sampler_state.mag_img_filter;
|
||||
|
|
|
@ -59,6 +59,11 @@
|
|||
#include "vc4/drm/vc4_drm_public.h"
|
||||
#endif
|
||||
|
||||
#if GALLIUM_VIRGL
|
||||
#include "virgl/drm/virgl_drm_public.h"
|
||||
#include "virgl/virgl_public.h"
|
||||
#endif
|
||||
|
||||
static char* driver_name = NULL;
|
||||
|
||||
/* XXX: We need to teardown the winsys if *screen_create() fails. */
|
||||
|
@ -296,6 +301,33 @@ pipe_freedreno_create_screen(int fd)
|
|||
}
|
||||
#endif
|
||||
|
||||
#if defined(GALLIUM_VIRGL)
|
||||
#if defined(DRI_TARGET)
|
||||
|
||||
const __DRIextension **__driDriverGetExtensions_virtio_gpu(void);
|
||||
|
||||
PUBLIC const __DRIextension **__driDriverGetExtensions_virtio_gpu(void)
|
||||
{
|
||||
globalDriverAPI = &galliumdrm_driver_api;
|
||||
return galliumdrm_driver_extensions;
|
||||
}
|
||||
#endif
|
||||
|
||||
static struct pipe_screen *
|
||||
pipe_virgl_create_screen(int fd)
|
||||
{
|
||||
struct virgl_winsys *vws;
|
||||
struct pipe_screen *screen;
|
||||
|
||||
vws = virgl_drm_winsys_create(fd);
|
||||
if (!vws)
|
||||
return NULL;
|
||||
|
||||
screen = virgl_create_screen(vws);
|
||||
return screen ? debug_screen_wrap(screen) : NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(GALLIUM_VC4)
|
||||
#if defined(DRI_TARGET)
|
||||
|
||||
|
@ -385,6 +417,11 @@ dd_create_screen(int fd)
|
|||
return pipe_freedreno_create_screen(fd);
|
||||
else
|
||||
#endif
|
||||
#if defined(GALLIUM_VIRGL)
|
||||
if ((strcmp(driver_name, "virtio_gpu") == 0))
|
||||
return pipe_virgl_create_screen(fd);
|
||||
else
|
||||
#endif
|
||||
#if defined(GALLIUM_VC4)
|
||||
if (strcmp(driver_name, "vc4") == 0)
|
||||
return pipe_vc4_create_screen(fd);
|
||||
|
@ -474,6 +511,11 @@ dd_configuration(enum drm_conf conf)
|
|||
return configuration_query(conf);
|
||||
else
|
||||
#endif
|
||||
#if defined(GALLIUM_VIRGL)
|
||||
if ((strcmp(driver_name, "virtio_gpu") == 0))
|
||||
return configuration_query(conf);
|
||||
else
|
||||
#endif
|
||||
#if defined(GALLIUM_VC4)
|
||||
if (strcmp(driver_name, "vc4") == 0)
|
||||
return configuration_query(conf);
|
||||
|
|
|
@ -19,6 +19,10 @@
|
|||
#include "llvmpipe/lp_public.h"
|
||||
#endif
|
||||
|
||||
#ifdef GALLIUM_VIRGL
|
||||
#include "virgl/virgl_public.h"
|
||||
#include "virgl/vtest/virgl_vtest_public.h"
|
||||
#endif
|
||||
|
||||
static inline struct pipe_screen *
|
||||
sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
|
||||
|
@ -30,6 +34,14 @@ sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
|
|||
screen = llvmpipe_create_screen(winsys);
|
||||
#endif
|
||||
|
||||
#if defined(GALLIUM_VIRGL)
|
||||
if (screen == NULL && strcmp(driver, "virpipe") == 0) {
|
||||
struct virgl_winsys *vws;
|
||||
vws = virgl_vtest_winsys_wrap(winsys);
|
||||
screen = virgl_create_screen(vws);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(GALLIUM_SOFTPIPE)
|
||||
if (screen == NULL)
|
||||
screen = softpipe_create_screen(winsys);
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "util/u_string.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_math.h"
|
||||
#include "tgsi_dump.h"
|
||||
#include "tgsi_info.h"
|
||||
#include "tgsi_iterate.h"
|
||||
|
@ -43,6 +44,8 @@ struct dump_ctx
|
|||
{
|
||||
struct tgsi_iterate_context iter;
|
||||
|
||||
boolean dump_float_as_hex;
|
||||
|
||||
uint instno;
|
||||
uint immno;
|
||||
int indent;
|
||||
|
@ -88,6 +91,7 @@ dump_enum(
|
|||
#define SID(I) ctx->dump_printf( ctx, "%d", I )
|
||||
#define FLT(F) ctx->dump_printf( ctx, "%10.4f", F )
|
||||
#define DBL(D) ctx->dump_printf( ctx, "%10.8f", D )
|
||||
#define HFLT(F) ctx->dump_printf( ctx, "0x%08x", fui((F)) )
|
||||
#define ENM(E,ENUMS) dump_enum( ctx, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
|
||||
|
||||
const char *
|
||||
|
@ -251,7 +255,10 @@ dump_imm_data(struct tgsi_iterate_context *iter,
|
|||
break;
|
||||
}
|
||||
case TGSI_IMM_FLOAT32:
|
||||
FLT( data[i].Float );
|
||||
if (ctx->dump_float_as_hex)
|
||||
HFLT( data[i].Float );
|
||||
else
|
||||
FLT( data[i].Float );
|
||||
break;
|
||||
case TGSI_IMM_UINT32:
|
||||
UID(data[i].Uint);
|
||||
|
@ -682,6 +689,11 @@ tgsi_dump_to_file(const struct tgsi_token *tokens, uint flags, FILE *file)
|
|||
ctx.indentation = 0;
|
||||
ctx.file = file;
|
||||
|
||||
if (flags & TGSI_DUMP_FLOAT_AS_HEX)
|
||||
ctx.dump_float_as_hex = TRUE;
|
||||
else
|
||||
ctx.dump_float_as_hex = FALSE;
|
||||
|
||||
tgsi_iterate_shader( tokens, &ctx.iter );
|
||||
}
|
||||
|
||||
|
@ -697,6 +709,7 @@ struct str_dump_ctx
|
|||
char *str;
|
||||
char *ptr;
|
||||
int left;
|
||||
bool nospace;
|
||||
};
|
||||
|
||||
static void
|
||||
|
@ -719,10 +732,11 @@ str_dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...)
|
|||
sctx->ptr += written;
|
||||
sctx->left -= written;
|
||||
}
|
||||
}
|
||||
} else
|
||||
sctx->nospace = true;
|
||||
}
|
||||
|
||||
void
|
||||
bool
|
||||
tgsi_dump_str(
|
||||
const struct tgsi_token *tokens,
|
||||
uint flags,
|
||||
|
@ -749,8 +763,16 @@ tgsi_dump_str(
|
|||
ctx.str[0] = 0;
|
||||
ctx.ptr = str;
|
||||
ctx.left = (int)size;
|
||||
ctx.nospace = false;
|
||||
|
||||
if (flags & TGSI_DUMP_FLOAT_AS_HEX)
|
||||
ctx.base.dump_float_as_hex = TRUE;
|
||||
else
|
||||
ctx.base.dump_float_as_hex = FALSE;
|
||||
|
||||
tgsi_iterate_shader( tokens, &ctx.base.iter );
|
||||
|
||||
return !ctx.nospace;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -773,6 +795,7 @@ tgsi_dump_instruction_str(
|
|||
ctx.str[0] = 0;
|
||||
ctx.ptr = str;
|
||||
ctx.left = (int)size;
|
||||
ctx.nospace = false;
|
||||
|
||||
iter_instruction( &ctx.base.iter, (struct tgsi_full_instruction *)inst );
|
||||
}
|
||||
|
|
|
@ -38,7 +38,9 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
void
|
||||
#define TGSI_DUMP_FLOAT_AS_HEX (1 << 0)
|
||||
|
||||
bool
|
||||
tgsi_dump_str(
|
||||
const struct tgsi_token *tokens,
|
||||
uint flags,
|
||||
|
|
|
@ -195,8 +195,15 @@ static boolean parse_float( const char **pcur, float *val )
|
|||
boolean integral_part = FALSE;
|
||||
boolean fractional_part = FALSE;
|
||||
|
||||
*val = (float) atof( cur );
|
||||
if (*cur == '0' && *(cur + 1) == 'x') {
|
||||
union fi fi;
|
||||
fi.ui = strtoul(cur, NULL, 16);
|
||||
*val = fi.f;
|
||||
cur += 10;
|
||||
goto out;
|
||||
}
|
||||
|
||||
*val = (float) atof( cur );
|
||||
if (*cur == '-' || *cur == '+')
|
||||
cur++;
|
||||
if (is_digit( cur )) {
|
||||
|
@ -228,6 +235,8 @@ static boolean parse_float( const char **pcur, float *val )
|
|||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
out:
|
||||
*pcur = cur;
|
||||
return TRUE;
|
||||
}
|
||||
|
|
|
@ -169,6 +169,25 @@ util_format_is_snorm(enum pipe_format format)
|
|||
desc->channel[i].normalized;
|
||||
}
|
||||
|
||||
boolean
|
||||
util_format_is_snorm8(enum pipe_format format)
|
||||
{
|
||||
const struct util_format_description *desc = util_format_description(format);
|
||||
int i;
|
||||
|
||||
if (desc->is_mixed)
|
||||
return FALSE;
|
||||
|
||||
i = util_format_get_first_non_void_channel(format);
|
||||
if (i == -1)
|
||||
return FALSE;
|
||||
|
||||
return desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED &&
|
||||
!desc->channel[i].pure_integer &&
|
||||
desc->channel[i].normalized &&
|
||||
desc->channel[i].size == 8;
|
||||
}
|
||||
|
||||
boolean
|
||||
util_format_is_luminance_alpha(enum pipe_format format)
|
||||
{
|
||||
|
|
|
@ -686,6 +686,9 @@ util_format_is_pure_uint(enum pipe_format format);
|
|||
boolean
|
||||
util_format_is_snorm(enum pipe_format format);
|
||||
|
||||
boolean
|
||||
util_format_is_snorm8(enum pipe_format format);
|
||||
|
||||
/**
|
||||
* Check if the src format can be blitted to the destination format with
|
||||
* a simple memcpy. For example, blitting from RGBA to RGBx is OK, but not
|
||||
|
|
|
@ -450,6 +450,43 @@ null_constant_buffer(struct pipe_context *ctx)
|
|||
util_report_result(pass);
|
||||
}
|
||||
|
||||
static void
|
||||
null_fragment_shader(struct pipe_context *ctx)
|
||||
{
|
||||
struct cso_context *cso;
|
||||
struct pipe_resource *cb;
|
||||
void *vs;
|
||||
struct pipe_rasterizer_state rs = {0};
|
||||
struct pipe_query *query;
|
||||
union pipe_query_result qresult;
|
||||
|
||||
cso = cso_create_context(ctx);
|
||||
cb = util_create_texture2d(ctx->screen, 256, 256,
|
||||
PIPE_FORMAT_R8G8B8A8_UNORM);
|
||||
util_set_common_states_and_clear(cso, ctx, cb);
|
||||
|
||||
/* No rasterization. */
|
||||
rs.rasterizer_discard = 1;
|
||||
cso_set_rasterizer(cso, &rs);
|
||||
|
||||
vs = util_set_passthrough_vertex_shader(cso, ctx, false);
|
||||
|
||||
query = ctx->create_query(ctx, PIPE_QUERY_PRIMITIVES_GENERATED, 0);
|
||||
ctx->begin_query(ctx, query);
|
||||
util_draw_fullscreen_quad(cso);
|
||||
ctx->end_query(ctx, query);
|
||||
ctx->get_query_result(ctx, query, true, &qresult);
|
||||
|
||||
/* Cleanup. */
|
||||
cso_destroy_context(cso);
|
||||
ctx->delete_vs_state(ctx, vs);
|
||||
ctx->destroy_query(ctx, query);
|
||||
pipe_resource_reference(&cb, NULL);
|
||||
|
||||
/* Check PRIMITIVES_GENERATED. */
|
||||
util_report_result(qresult.u64 == 2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Run all tests. This should be run with a clean context after
|
||||
* context_create.
|
||||
|
@ -459,6 +496,7 @@ util_run_tests(struct pipe_screen *screen)
|
|||
{
|
||||
struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
|
||||
|
||||
null_fragment_shader(ctx);
|
||||
tgsi_vs_window_space_position(ctx);
|
||||
null_sampler_view(ctx, TGSI_TEXTURE_2D);
|
||||
null_sampler_view(ctx, TGSI_TEXTURE_BUFFER);
|
||||
|
|
|
@ -278,7 +278,9 @@ The integer capabilities:
|
|||
in the shader.
|
||||
* ``PIPE_CAP_SHAREABLE_SHADERS``: Whether shader CSOs can be used by any
|
||||
pipe_context.
|
||||
|
||||
* ``PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS``:
|
||||
Whether copying between compressed and plain formats is supported where
|
||||
a compressed block is copied to/from a plain pixel of the same size.
|
||||
|
||||
|
||||
.. _pipe_capf:
|
||||
|
|
|
@ -81,7 +81,7 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
|
||||
info->restart_index : 0xffffffff);
|
||||
|
||||
if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex &&
|
||||
if (ctx->rasterizer->point_size_per_vertex &&
|
||||
(info->mode == PIPE_PRIM_POINTS))
|
||||
primtype = DI_PT_POINTLIST_PSIZE;
|
||||
|
||||
|
@ -137,7 +137,7 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
|
|||
.key = {
|
||||
/* do binning pass first: */
|
||||
.binning_pass = true,
|
||||
.color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
|
||||
.color_two_side = ctx->rasterizer->light_twoside,
|
||||
// TODO set .half_precision based on render target format,
|
||||
// ie. float16 and smaller use half, float32 use full..
|
||||
.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
|
||||
|
@ -149,9 +149,9 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
|
|||
.fsaturate_t = fd3_ctx->fsaturate_t,
|
||||
.fsaturate_r = fd3_ctx->fsaturate_r,
|
||||
},
|
||||
.rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
|
||||
.sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : 0,
|
||||
.sprite_coord_mode = ctx->rasterizer ? ctx->rasterizer->sprite_coord_mode : false,
|
||||
.rasterflat = ctx->rasterizer->flatshade,
|
||||
.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
|
||||
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
|
||||
};
|
||||
unsigned dirty;
|
||||
|
||||
|
|
|
@ -627,7 +627,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
ctx->prog.dirty = 0;
|
||||
}
|
||||
|
||||
if ((dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) && ctx->blend) {
|
||||
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
|
||||
struct fd3_blend_stateobj *blend = fd3_blend_stateobj(ctx->blend);
|
||||
uint32_t i;
|
||||
|
||||
|
|
|
@ -118,12 +118,12 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
|
|||
.key = {
|
||||
/* do binning pass first: */
|
||||
.binning_pass = true,
|
||||
.color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
|
||||
.rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
|
||||
.color_two_side = ctx->rasterizer->light_twoside,
|
||||
.rasterflat = ctx->rasterizer->flatshade,
|
||||
// TODO set .half_precision based on render target format,
|
||||
// ie. float16 and smaller use half, float32 use full..
|
||||
.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
|
||||
.ucp_enables = ctx->rasterizer ? ctx->rasterizer->clip_plane_enable : 0,
|
||||
.ucp_enables = ctx->rasterizer->clip_plane_enable,
|
||||
.has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate),
|
||||
.vsaturate_s = fd4_ctx->vsaturate_s,
|
||||
.vsaturate_t = fd4_ctx->vsaturate_t,
|
||||
|
@ -132,9 +132,9 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
|
|||
.fsaturate_t = fd4_ctx->fsaturate_t,
|
||||
.fsaturate_r = fd4_ctx->fsaturate_r,
|
||||
},
|
||||
.rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
|
||||
.sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : false,
|
||||
.sprite_coord_mode = ctx->rasterizer ? ctx->rasterizer->sprite_coord_mode : false,
|
||||
.rasterflat = ctx->rasterizer->flatshade,
|
||||
.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
|
||||
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
|
||||
};
|
||||
unsigned dirty;
|
||||
|
||||
|
|
|
@ -594,7 +594,7 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
ctx->prog.dirty = 0;
|
||||
}
|
||||
|
||||
if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
|
||||
if ((dirty & FD_DIRTY_BLEND)) {
|
||||
struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend);
|
||||
uint32_t i;
|
||||
|
||||
|
|
|
@ -238,6 +238,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_MAX_VIEWPORTS:
|
||||
|
|
|
@ -252,6 +252,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
|
|||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
|
||||
|
|
|
@ -202,14 +202,16 @@ static inline void
|
|||
gen6_3DSTATE_WM(struct ilo_builder *builder,
|
||||
const struct ilo_state_raster *rs,
|
||||
const struct ilo_state_ps *ps,
|
||||
uint32_t kernel_offset)
|
||||
uint32_t kernel_offset,
|
||||
struct intel_bo *scratch_bo)
|
||||
{
|
||||
const uint8_t cmd_len = 9;
|
||||
uint32_t *dw;
|
||||
unsigned pos;
|
||||
|
||||
ILO_DEV_ASSERT(builder->dev, 6, 6);
|
||||
|
||||
ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
|
||||
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
|
||||
dw[1] = kernel_offset;
|
||||
|
@ -221,6 +223,11 @@ gen6_3DSTATE_WM(struct ilo_builder *builder,
|
|||
dw[6] = rs->wm[2] | ps->ps[4];
|
||||
dw[7] = 0; /* kernel 1 */
|
||||
dw[8] = 0; /* kernel 2 */
|
||||
|
||||
if (ilo_state_ps_get_scratch_size(ps)) {
|
||||
ilo_builder_batch_reloc(builder, pos + 2, scratch_bo,
|
||||
ps->ps[0], 0);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
@ -329,14 +336,16 @@ gen8_3DSTATE_WM_CHROMAKEY(struct ilo_builder *builder)
|
|||
static inline void
|
||||
gen7_3DSTATE_PS(struct ilo_builder *builder,
|
||||
const struct ilo_state_ps *ps,
|
||||
uint32_t kernel_offset)
|
||||
uint32_t kernel_offset,
|
||||
struct intel_bo *scratch_bo)
|
||||
{
|
||||
const uint8_t cmd_len = 8;
|
||||
uint32_t *dw;
|
||||
unsigned pos;
|
||||
|
||||
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
|
||||
|
||||
ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
|
||||
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
|
||||
dw[1] = kernel_offset;
|
||||
|
@ -347,19 +356,26 @@ gen7_3DSTATE_PS(struct ilo_builder *builder,
|
|||
dw[5] = ps->ps[5];
|
||||
dw[6] = 0; /* kernel 1 */
|
||||
dw[7] = 0; /* kernel 2 */
|
||||
|
||||
if (ilo_state_ps_get_scratch_size(ps)) {
|
||||
ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
|
||||
ps->ps[3], 0);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
gen8_3DSTATE_PS(struct ilo_builder *builder,
|
||||
const struct ilo_state_ps *ps,
|
||||
uint32_t kernel_offset)
|
||||
uint32_t kernel_offset,
|
||||
struct intel_bo *scratch_bo)
|
||||
{
|
||||
const uint8_t cmd_len = 12;
|
||||
uint32_t *dw;
|
||||
unsigned pos;
|
||||
|
||||
ILO_DEV_ASSERT(builder->dev, 8, 8);
|
||||
|
||||
ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
|
||||
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
|
||||
dw[1] = kernel_offset;
|
||||
|
@ -374,6 +390,11 @@ gen8_3DSTATE_PS(struct ilo_builder *builder,
|
|||
dw[9] = 0;
|
||||
dw[10] = 0; /* kernel 2 */
|
||||
dw[11] = 0;
|
||||
|
||||
if (ilo_state_ps_get_scratch_size(ps)) {
|
||||
ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
|
||||
ps->ps[1], 0);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
|
|
@ -477,14 +477,16 @@ gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
|
|||
static inline void
|
||||
gen6_3DSTATE_VS(struct ilo_builder *builder,
|
||||
const struct ilo_state_vs *vs,
|
||||
uint32_t kernel_offset)
|
||||
uint32_t kernel_offset,
|
||||
struct intel_bo *scratch_bo)
|
||||
{
|
||||
const uint8_t cmd_len = 6;
|
||||
uint32_t *dw;
|
||||
unsigned pos;
|
||||
|
||||
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
|
||||
|
||||
ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
|
||||
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
|
||||
dw[1] = kernel_offset;
|
||||
|
@ -493,19 +495,26 @@ gen6_3DSTATE_VS(struct ilo_builder *builder,
|
|||
dw[3] = vs->vs[1];
|
||||
dw[4] = vs->vs[2];
|
||||
dw[5] = vs->vs[3];
|
||||
|
||||
if (ilo_state_vs_get_scratch_size(vs)) {
|
||||
ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
|
||||
vs->vs[1], 0);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
gen8_3DSTATE_VS(struct ilo_builder *builder,
|
||||
const struct ilo_state_vs *vs,
|
||||
uint32_t kernel_offset)
|
||||
uint32_t kernel_offset,
|
||||
struct intel_bo *scratch_bo)
|
||||
{
|
||||
const uint8_t cmd_len = 9;
|
||||
uint32_t *dw;
|
||||
unsigned pos;
|
||||
|
||||
ILO_DEV_ASSERT(builder->dev, 8, 8);
|
||||
|
||||
ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
|
||||
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
|
||||
dw[1] = kernel_offset;
|
||||
|
@ -517,19 +526,26 @@ gen8_3DSTATE_VS(struct ilo_builder *builder,
|
|||
dw[6] = vs->vs[2];
|
||||
dw[7] = vs->vs[3];
|
||||
dw[8] = vs->vs[4];
|
||||
|
||||
if (ilo_state_vs_get_scratch_size(vs)) {
|
||||
ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
|
||||
vs->vs[1], 0);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
gen7_3DSTATE_HS(struct ilo_builder *builder,
|
||||
const struct ilo_state_hs *hs,
|
||||
uint32_t kernel_offset)
|
||||
uint32_t kernel_offset,
|
||||
struct intel_bo *scratch_bo)
|
||||
{
|
||||
const uint8_t cmd_len = 7;
|
||||
uint32_t *dw;
|
||||
unsigned pos;
|
||||
|
||||
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
|
||||
|
||||
ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
|
||||
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
|
||||
/* see hs_set_gen7_3DSTATE_HS() */
|
||||
|
@ -539,19 +555,26 @@ gen7_3DSTATE_HS(struct ilo_builder *builder,
|
|||
dw[4] = hs->hs[2];
|
||||
dw[5] = hs->hs[3];
|
||||
dw[6] = 0;
|
||||
|
||||
if (ilo_state_hs_get_scratch_size(hs)) {
|
||||
ilo_builder_batch_reloc(builder, pos + 4, scratch_bo,
|
||||
hs->hs[2], 0);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
gen8_3DSTATE_HS(struct ilo_builder *builder,
|
||||
const struct ilo_state_hs *hs,
|
||||
uint32_t kernel_offset)
|
||||
uint32_t kernel_offset,
|
||||
struct intel_bo *scratch_bo)
|
||||
{
|
||||
const uint8_t cmd_len = 9;
|
||||
uint32_t *dw;
|
||||
unsigned pos;
|
||||
|
||||
ILO_DEV_ASSERT(builder->dev, 8, 8);
|
||||
|
||||
ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
|
||||
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
|
||||
/* see hs_set_gen7_3DSTATE_HS() */
|
||||
|
@ -563,6 +586,11 @@ gen8_3DSTATE_HS(struct ilo_builder *builder,
|
|||
dw[6] = 0;
|
||||
dw[7] = hs->hs[3];
|
||||
dw[8] = 0;
|
||||
|
||||
if (ilo_state_hs_get_scratch_size(hs)) {
|
||||
ilo_builder_batch_reloc64(builder, pos + 5, scratch_bo,
|
||||
hs->hs[2], 0);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
@ -586,14 +614,16 @@ gen7_3DSTATE_TE(struct ilo_builder *builder,
|
|||
static inline void
|
||||
gen7_3DSTATE_DS(struct ilo_builder *builder,
|
||||
const struct ilo_state_ds *ds,
|
||||
uint32_t kernel_offset)
|
||||
uint32_t kernel_offset,
|
||||
struct intel_bo *scratch_bo)
|
||||
{
|
||||
const uint8_t cmd_len = 6;
|
||||
uint32_t *dw;
|
||||
unsigned pos;
|
||||
|
||||
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
|
||||
|
||||
ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
|
||||
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
|
||||
/* see ds_set_gen7_3DSTATE_DS() */
|
||||
|
@ -602,19 +632,26 @@ gen7_3DSTATE_DS(struct ilo_builder *builder,
|
|||
dw[3] = ds->ds[1];
|
||||
dw[4] = ds->ds[2];
|
||||
dw[5] = ds->ds[3];
|
||||
|
||||
if (ilo_state_ds_get_scratch_size(ds)) {
|
||||
ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
|
||||
ds->ds[1], 0);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
gen8_3DSTATE_DS(struct ilo_builder *builder,
|
||||
const struct ilo_state_ds *ds,
|
||||
uint32_t kernel_offset)
|
||||
uint32_t kernel_offset,
|
||||
struct intel_bo *scratch_bo)
|
||||
{
|
||||
const uint8_t cmd_len = 9;
|
||||
uint32_t *dw;
|
||||
unsigned pos;
|
||||
|
||||
ILO_DEV_ASSERT(builder->dev, 8, 8);
|
||||
|
||||
ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
|
||||
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
|
||||
/* see ds_set_gen7_3DSTATE_DS() */
|
||||
|
@ -626,19 +663,26 @@ gen8_3DSTATE_DS(struct ilo_builder *builder,
|
|||
dw[6] = ds->ds[2];
|
||||
dw[7] = ds->ds[3];
|
||||
dw[8] = ds->ds[4];
|
||||
|
||||
if (ilo_state_ds_get_scratch_size(ds)) {
|
||||
ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
|
||||
ds->ds[1], 0);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
gen6_3DSTATE_GS(struct ilo_builder *builder,
|
||||
const struct ilo_state_gs *gs,
|
||||
uint32_t kernel_offset)
|
||||
uint32_t kernel_offset,
|
||||
struct intel_bo *scratch_bo)
|
||||
{
|
||||
const uint8_t cmd_len = 7;
|
||||
uint32_t *dw;
|
||||
unsigned pos;
|
||||
|
||||
ILO_DEV_ASSERT(builder->dev, 6, 6);
|
||||
|
||||
ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
|
||||
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
|
||||
dw[1] = kernel_offset;
|
||||
|
@ -648,6 +692,11 @@ gen6_3DSTATE_GS(struct ilo_builder *builder,
|
|||
dw[4] = gs->gs[2];
|
||||
dw[5] = gs->gs[3];
|
||||
dw[6] = gs->gs[4];
|
||||
|
||||
if (ilo_state_gs_get_scratch_size(gs)) {
|
||||
ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
|
||||
gs->gs[1], 0);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
@ -677,14 +726,16 @@ gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder,
|
|||
static inline void
|
||||
gen7_3DSTATE_GS(struct ilo_builder *builder,
|
||||
const struct ilo_state_gs *gs,
|
||||
uint32_t kernel_offset)
|
||||
uint32_t kernel_offset,
|
||||
struct intel_bo *scratch_bo)
|
||||
{
|
||||
const uint8_t cmd_len = 7;
|
||||
uint32_t *dw;
|
||||
unsigned pos;
|
||||
|
||||
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
|
||||
|
||||
ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
|
||||
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
|
||||
dw[1] = kernel_offset;
|
||||
|
@ -694,19 +745,26 @@ gen7_3DSTATE_GS(struct ilo_builder *builder,
|
|||
dw[4] = gs->gs[2];
|
||||
dw[5] = gs->gs[3];
|
||||
dw[6] = 0;
|
||||
|
||||
if (ilo_state_gs_get_scratch_size(gs)) {
|
||||
ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
|
||||
gs->gs[1], 0);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
gen8_3DSTATE_GS(struct ilo_builder *builder,
|
||||
const struct ilo_state_gs *gs,
|
||||
uint32_t kernel_offset)
|
||||
uint32_t kernel_offset,
|
||||
struct intel_bo *scratch_bo)
|
||||
{
|
||||
const uint8_t cmd_len = 10;
|
||||
uint32_t *dw;
|
||||
unsigned pos;
|
||||
|
||||
ILO_DEV_ASSERT(builder->dev, 8, 8);
|
||||
|
||||
ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
|
||||
|
||||
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
|
||||
dw[1] = kernel_offset;
|
||||
|
@ -719,6 +777,11 @@ gen8_3DSTATE_GS(struct ilo_builder *builder,
|
|||
dw[7] = gs->gs[3];
|
||||
dw[8] = 0;
|
||||
dw[9] = gs->gs[4];
|
||||
|
||||
if (ilo_state_gs_get_scratch_size(gs)) {
|
||||
ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
|
||||
gs->gs[1], 0);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
|
|
@ -158,7 +158,8 @@ compute_interface_get_gen6_read_end(const struct ilo_dev *dev,
|
|||
*/
|
||||
assert(per_thread_read <= 63);
|
||||
|
||||
/* From the Haswell PRM, volume 2d, page 199:
|
||||
/*
|
||||
* From the Haswell PRM, volume 2d, page 199:
|
||||
*
|
||||
* "(Cross-Thread Constant Data Read Length) [0,127]"
|
||||
*/
|
||||
|
@ -210,38 +211,68 @@ compute_validate_gen6(const struct ilo_dev *dev,
|
|||
return true;
|
||||
}
|
||||
|
||||
static uint8_t
|
||||
compute_get_gen6_scratch_space(const struct ilo_dev *dev,
|
||||
const struct ilo_state_compute_info *info)
|
||||
static uint32_t
|
||||
compute_get_gen6_per_thread_scratch_size(const struct ilo_dev *dev,
|
||||
const struct ilo_state_compute_info *info,
|
||||
uint8_t *per_thread_space)
|
||||
{
|
||||
uint32_t scratch_size = 0;
|
||||
uint8_t i;
|
||||
ILO_DEV_ASSERT(dev, 6, 7);
|
||||
|
||||
ILO_DEV_ASSERT(dev, 6, 8);
|
||||
/*
|
||||
* From the Sandy Bridge PRM, volume 2 part 2, page 30:
|
||||
*
|
||||
* "(Per Thread Scratch Space)
|
||||
* Range = [0,11] indicating [1k bytes, 12k bytes] [DevSNB]"
|
||||
*/
|
||||
assert(info->per_thread_scratch_size <= 12 * 1024);
|
||||
|
||||
for (i = 0; i < info->interface_count; i++) {
|
||||
if (scratch_size < info->interfaces[i].scratch_size)
|
||||
scratch_size = info->interfaces[i].scratch_size;
|
||||
if (!info->per_thread_scratch_size) {
|
||||
*per_thread_space = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
|
||||
assert(scratch_size <= 2 * 1024 * 1024);
|
||||
*per_thread_space = (info->per_thread_scratch_size > 1024) ?
|
||||
(info->per_thread_scratch_size - 1) / 1024 : 0;
|
||||
|
||||
/* next power of two, starting from 1KB */
|
||||
return (scratch_size > 1024) ?
|
||||
(util_last_bit(scratch_size - 1) - 10): 0;
|
||||
} else if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
|
||||
assert(scratch_size <= 2 * 1024 * 1024);
|
||||
return 1024 * (1 + *per_thread_space);
|
||||
}
|
||||
|
||||
/* next power of two, starting from 2KB */
|
||||
return (scratch_size > 2048) ?
|
||||
(util_last_bit(scratch_size - 1) - 11): 0;
|
||||
} else {
|
||||
assert(scratch_size <= 12 * 1024);
|
||||
static uint32_t
|
||||
compute_get_gen75_per_thread_scratch_size(const struct ilo_dev *dev,
|
||||
const struct ilo_state_compute_info *info,
|
||||
uint8_t *per_thread_space)
|
||||
{
|
||||
ILO_DEV_ASSERT(dev, 7.5, 8);
|
||||
|
||||
return (scratch_size > 1024) ?
|
||||
(scratch_size - 1) / 1024 : 0;
|
||||
/*
|
||||
* From the Haswell PRM, volume 2b, page 407:
|
||||
*
|
||||
* "(Per Thread Scratch Space)
|
||||
* [0,10] Indicating [2k bytes, 2 Mbytes]"
|
||||
*
|
||||
* "Note: The scratch space should be declared as 2x the desired
|
||||
* scratch space. The stack will start at the half-way point instead
|
||||
* of the end. The upper half of scratch space will not be accessed
|
||||
* and so does not have to be allocated in memory."
|
||||
*
|
||||
* From the Broadwell PRM, volume 2a, page 450:
|
||||
*
|
||||
* "(Per Thread Scratch Space)
|
||||
* [0,11] indicating [1k bytes, 2 Mbytes]"
|
||||
*/
|
||||
assert(info->per_thread_scratch_size <=
|
||||
((ilo_dev_gen(dev) >= ILO_GEN(8)) ? 2 : 1) * 1024 * 1024);
|
||||
|
||||
if (!info->per_thread_scratch_size) {
|
||||
*per_thread_space = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* next power of two, starting from 1KB */
|
||||
*per_thread_space = (info->per_thread_scratch_size > 1024) ?
|
||||
(util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0;
|
||||
|
||||
return 1 << (10 + *per_thread_space);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
@ -250,7 +281,8 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
|
|||
const struct ilo_state_compute_info *info)
|
||||
{
|
||||
struct compute_urb_configuration urb;
|
||||
uint8_t scratch_space;
|
||||
uint32_t per_thread_size;
|
||||
uint8_t per_thread_space;
|
||||
|
||||
uint32_t dw1, dw2, dw4;
|
||||
|
||||
|
@ -260,9 +292,16 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
|
|||
!compute_validate_gen6(dev, info, &urb))
|
||||
return false;
|
||||
|
||||
scratch_space = compute_get_gen6_scratch_space(dev, info);
|
||||
if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
|
||||
per_thread_size = compute_get_gen75_per_thread_scratch_size(dev,
|
||||
info, &per_thread_space);
|
||||
} else {
|
||||
per_thread_size = compute_get_gen6_per_thread_scratch_size(dev,
|
||||
info, &per_thread_space);
|
||||
}
|
||||
|
||||
dw1 = per_thread_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT;
|
||||
|
||||
dw1 = scratch_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT;
|
||||
dw2 = (dev->thread_count - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT |
|
||||
urb.urb_entry_count << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT |
|
||||
GEN6_VFE_DW2_RESET_GATEWAY_TIMER |
|
||||
|
@ -281,6 +320,8 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
|
|||
compute->vfe[1] = dw2;
|
||||
compute->vfe[2] = dw4;
|
||||
|
||||
compute->scratch_size = per_thread_size * dev->thread_count;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -45,8 +45,6 @@ struct ilo_state_compute_interface_info {
|
|||
/* usually 0 unless there are multiple interfaces */
|
||||
uint32_t kernel_offset;
|
||||
|
||||
uint32_t scratch_size;
|
||||
|
||||
uint8_t sampler_count;
|
||||
uint8_t surface_count;
|
||||
|
||||
|
@ -65,6 +63,8 @@ struct ilo_state_compute_info {
|
|||
const struct ilo_state_compute_interface_info *interfaces;
|
||||
uint8_t interface_count;
|
||||
|
||||
uint32_t per_thread_scratch_size;
|
||||
|
||||
uint32_t cv_urb_alloc_size;
|
||||
uint32_t curbe_alloc_size;
|
||||
};
|
||||
|
@ -74,6 +74,8 @@ struct ilo_state_compute {
|
|||
|
||||
uint32_t (*idrt)[6];
|
||||
uint8_t idrt_count;
|
||||
|
||||
uint32_t scratch_size;
|
||||
};
|
||||
|
||||
static inline size_t
|
||||
|
@ -89,4 +91,10 @@ ilo_state_compute_init(struct ilo_state_compute *compute,
|
|||
const struct ilo_dev *dev,
|
||||
const struct ilo_state_compute_info *info);
|
||||
|
||||
static inline uint32_t
|
||||
ilo_state_compute_get_scratch_size(const struct ilo_state_compute *compute)
|
||||
{
|
||||
return compute->scratch_size;
|
||||
}
|
||||
|
||||
#endif /* ILO_STATE_COMPUTE_H */
|
||||
|
|
|
@ -37,7 +37,9 @@ enum vertex_stage {
|
|||
|
||||
struct vertex_ff {
|
||||
uint8_t grf_start;
|
||||
uint8_t scratch_space;
|
||||
|
||||
uint8_t per_thread_scratch_space;
|
||||
uint32_t per_thread_scratch_size;
|
||||
|
||||
uint8_t sampler_count;
|
||||
uint8_t surface_count;
|
||||
|
@ -59,13 +61,6 @@ vertex_validate_gen6_kernel(const struct ilo_dev *dev,
|
|||
* others.
|
||||
*/
|
||||
const uint8_t max_grf_start = (stage == STAGE_GS) ? 16 : 32;
|
||||
/*
|
||||
* From the Sandy Bridge PRM, volume 2 part 1, page 134:
|
||||
*
|
||||
* "(Per-Thread Scratch Space)
|
||||
* Range [0,11] indicating [1K Bytes, 2M Bytes]"
|
||||
*/
|
||||
const uint32_t max_scratch_size = 2 * 1024 * 1024;
|
||||
|
||||
ILO_DEV_ASSERT(dev, 6, 8);
|
||||
|
||||
|
@ -73,7 +68,6 @@ vertex_validate_gen6_kernel(const struct ilo_dev *dev,
|
|||
assert(!kernel->offset);
|
||||
|
||||
assert(kernel->grf_start < max_grf_start);
|
||||
assert(kernel->scratch_size <= max_scratch_size);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -112,18 +106,33 @@ vertex_get_gen6_ff(const struct ilo_dev *dev,
|
|||
const struct ilo_state_shader_kernel_info *kernel,
|
||||
const struct ilo_state_shader_resource_info *resource,
|
||||
const struct ilo_state_shader_urb_info *urb,
|
||||
uint32_t per_thread_scratch_size,
|
||||
struct vertex_ff *ff)
|
||||
{
|
||||
ILO_DEV_ASSERT(dev, 6, 8);
|
||||
|
||||
memset(ff, 0, sizeof(*ff));
|
||||
|
||||
if (!vertex_validate_gen6_kernel(dev, stage, kernel) ||
|
||||
!vertex_validate_gen6_urb(dev, stage, urb))
|
||||
return false;
|
||||
|
||||
ff->grf_start = kernel->grf_start;
|
||||
/* next power of two, starting from 1KB */
|
||||
ff->scratch_space = (kernel->scratch_size > 1024) ?
|
||||
(util_last_bit(kernel->scratch_size - 1) - 10): 0;
|
||||
|
||||
if (per_thread_scratch_size) {
|
||||
/*
|
||||
* From the Sandy Bridge PRM, volume 2 part 1, page 134:
|
||||
*
|
||||
* "(Per-Thread Scratch Space)
|
||||
* Range [0,11] indicating [1K Bytes, 2M Bytes]"
|
||||
*/
|
||||
assert(per_thread_scratch_size <= 2 * 1024 * 1024);
|
||||
|
||||
/* next power of two, starting from 1KB */
|
||||
ff->per_thread_scratch_space = (per_thread_scratch_size > 1024) ?
|
||||
(util_last_bit(per_thread_scratch_size - 1) - 10) : 0;
|
||||
ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space);
|
||||
}
|
||||
|
||||
ff->sampler_count = (resource->sampler_count <= 12) ?
|
||||
(resource->sampler_count + 3) / 4 : 4;
|
||||
|
@ -192,8 +201,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
|
|||
|
||||
ILO_DEV_ASSERT(dev, 6, 8);
|
||||
|
||||
if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel,
|
||||
&info->resource, &info->urb, &ff))
|
||||
if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel, &info->resource,
|
||||
&info->urb, info->per_thread_scratch_size, &ff))
|
||||
return false;
|
||||
|
||||
thread_count = vs_get_gen6_thread_count(dev, info);
|
||||
|
@ -207,7 +216,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
|
|||
if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
|
||||
dw2 |= GEN75_THREADDISP_ACCESS_UAV;
|
||||
|
||||
dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
|
||||
dw3 = ff.per_thread_scratch_space <<
|
||||
GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
|
||||
|
||||
dw4 = ff.grf_start << GEN6_VS_DW4_URB_GRF_START__SHIFT |
|
||||
ff.vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
|
||||
|
@ -234,6 +244,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
|
|||
if (ilo_dev_gen(dev) >= ILO_GEN(8))
|
||||
vs->vs[4] = ff.user_clip_enables << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT;
|
||||
|
||||
vs->scratch_size = ff.per_thread_scratch_size * thread_count;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -273,8 +285,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
|
|||
|
||||
ILO_DEV_ASSERT(dev, 7, 8);
|
||||
|
||||
if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel,
|
||||
&info->resource, &info->urb, &ff))
|
||||
if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel, &info->resource,
|
||||
&info->urb, info->per_thread_scratch_size, &ff))
|
||||
return false;
|
||||
|
||||
thread_count = hs_get_gen7_thread_count(dev, info);
|
||||
|
@ -282,19 +294,22 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
|
|||
dw1 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
|
||||
ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
|
||||
|
||||
if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
|
||||
dw2 = 0 << GEN7_HS_DW2_INSTANCE_COUNT__SHIFT;
|
||||
|
||||
if (ilo_dev_gen(dev) >= ILO_GEN(8))
|
||||
dw2 |= thread_count << GEN8_HS_DW2_MAX_THREADS__SHIFT;
|
||||
else if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
|
||||
dw1 |= thread_count << GEN75_HS_DW1_DISPATCH_MAX_THREADS__SHIFT;
|
||||
else
|
||||
dw1 |= thread_count << GEN7_HS_DW1_DISPATCH_MAX_THREADS__SHIFT;
|
||||
|
||||
dw2 = 0 << GEN7_HS_DW2_INSTANCE_COUNT__SHIFT;
|
||||
|
||||
if (info->dispatch_enable)
|
||||
dw2 |= GEN7_HS_DW2_HS_ENABLE;
|
||||
if (info->stats_enable)
|
||||
dw2 |= GEN7_HS_DW2_STATISTICS;
|
||||
|
||||
dw4 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
|
||||
dw4 = ff.per_thread_scratch_space <<
|
||||
GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
|
||||
|
||||
dw5 = GEN7_HS_DW5_INCLUDE_VERTEX_HANDLES |
|
||||
ff.grf_start << GEN7_HS_DW5_URB_GRF_START__SHIFT |
|
||||
|
@ -310,6 +325,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
|
|||
hs->hs[2] = dw4;
|
||||
hs->hs[3] = dw5;
|
||||
|
||||
hs->scratch_size = ff.per_thread_scratch_size * thread_count;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -373,8 +390,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
|
|||
|
||||
ILO_DEV_ASSERT(dev, 7, 8);
|
||||
|
||||
if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel,
|
||||
&info->resource, &info->urb, &ff))
|
||||
if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel, &info->resource,
|
||||
&info->urb, info->per_thread_scratch_size, &ff))
|
||||
return false;
|
||||
|
||||
thread_count = ds_get_gen7_thread_count(dev, info);
|
||||
|
@ -385,7 +402,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
|
|||
if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
|
||||
dw2 |= GEN75_THREADDISP_ACCESS_UAV;
|
||||
|
||||
dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
|
||||
dw3 = ff.per_thread_scratch_space <<
|
||||
GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
|
||||
|
||||
dw4 = ff.grf_start << GEN7_DS_DW4_URB_GRF_START__SHIFT |
|
||||
ff.vue_read_len << GEN7_DS_DW4_URB_READ_LEN__SHIFT |
|
||||
|
@ -412,6 +430,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
|
|||
if (ilo_dev_gen(dev) >= ILO_GEN(8))
|
||||
ds->ds[4] = ff.user_clip_enables << GEN8_DS_DW8_UCP_CLIP_ENABLES__SHIFT;
|
||||
|
||||
ds->scratch_size = ff.per_thread_scratch_size * thread_count;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -425,8 +445,8 @@ gs_get_gen6_ff(const struct ilo_dev *dev,
|
|||
|
||||
ILO_DEV_ASSERT(dev, 6, 8);
|
||||
|
||||
if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel,
|
||||
&info->resource, &info->urb, ff))
|
||||
if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel, &info->resource,
|
||||
&info->urb, info->per_thread_scratch_size, ff))
|
||||
return false;
|
||||
|
||||
/*
|
||||
|
@ -510,7 +530,8 @@ gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs,
|
|||
ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
|
||||
ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
|
||||
|
||||
dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
|
||||
dw3 = ff.per_thread_scratch_space <<
|
||||
GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
|
||||
|
||||
dw4 = ff.vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
|
||||
ff.vue_read_offset << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
|
||||
|
@ -550,6 +571,8 @@ gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs,
|
|||
gs->gs[3] = dw5;
|
||||
gs->gs[4] = dw6;
|
||||
|
||||
gs->scratch_size = ff.per_thread_scratch_size * thread_count;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -588,7 +611,8 @@ gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs,
|
|||
if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
|
||||
dw2 |= GEN75_THREADDISP_ACCESS_UAV;
|
||||
|
||||
dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
|
||||
dw3 = ff.per_thread_scratch_space <<
|
||||
GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
|
||||
|
||||
dw4 = vertex_size << GEN7_GS_DW4_OUTPUT_SIZE__SHIFT |
|
||||
0 << GEN7_GS_DW4_OUTPUT_TOPO__SHIFT |
|
||||
|
@ -618,6 +642,8 @@ gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs,
|
|||
if (ilo_dev_gen(dev) >= ILO_GEN(8))
|
||||
gs->gs[4] = ff.user_clip_enables << GEN8_GS_DW9_UCP_CLIP_ENABLES__SHIFT;
|
||||
|
||||
gs->scratch_size = ff.per_thread_scratch_size * thread_count;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -42,8 +42,6 @@ struct ilo_state_shader_kernel_info {
|
|||
|
||||
uint8_t grf_start;
|
||||
uint8_t pcb_attr_count;
|
||||
|
||||
uint32_t scratch_size;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -77,6 +75,7 @@ struct ilo_state_vs_info {
|
|||
struct ilo_state_shader_resource_info resource;
|
||||
struct ilo_state_shader_urb_info urb;
|
||||
|
||||
uint32_t per_thread_scratch_size;
|
||||
bool dispatch_enable;
|
||||
bool stats_enable;
|
||||
};
|
||||
|
@ -86,6 +85,7 @@ struct ilo_state_hs_info {
|
|||
struct ilo_state_shader_resource_info resource;
|
||||
struct ilo_state_shader_urb_info urb;
|
||||
|
||||
uint32_t per_thread_scratch_size;
|
||||
bool dispatch_enable;
|
||||
bool stats_enable;
|
||||
};
|
||||
|
@ -95,6 +95,7 @@ struct ilo_state_ds_info {
|
|||
struct ilo_state_shader_resource_info resource;
|
||||
struct ilo_state_shader_urb_info urb;
|
||||
|
||||
uint32_t per_thread_scratch_size;
|
||||
bool dispatch_enable;
|
||||
bool stats_enable;
|
||||
};
|
||||
|
@ -119,6 +120,7 @@ struct ilo_state_gs_info {
|
|||
|
||||
struct ilo_state_gs_sol_info sol;
|
||||
|
||||
uint32_t per_thread_scratch_size;
|
||||
bool dispatch_enable;
|
||||
bool stats_enable;
|
||||
};
|
||||
|
@ -158,6 +160,8 @@ struct ilo_state_ps_info {
|
|||
struct ilo_state_ps_io_info io;
|
||||
struct ilo_state_ps_params_info params;
|
||||
|
||||
uint32_t per_thread_scratch_size;
|
||||
|
||||
/* bitmask of GEN6_PS_DISPATCH_x */
|
||||
uint8_t valid_kernels;
|
||||
bool per_sample_dispatch;
|
||||
|
@ -173,23 +177,28 @@ struct ilo_state_ps_info {
|
|||
|
||||
struct ilo_state_vs {
|
||||
uint32_t vs[5];
|
||||
uint32_t scratch_size;
|
||||
};
|
||||
|
||||
struct ilo_state_hs {
|
||||
uint32_t hs[4];
|
||||
uint32_t scratch_size;
|
||||
};
|
||||
|
||||
struct ilo_state_ds {
|
||||
uint32_t te[3];
|
||||
uint32_t ds[5];
|
||||
uint32_t scratch_size;
|
||||
};
|
||||
|
||||
struct ilo_state_gs {
|
||||
uint32_t gs[5];
|
||||
uint32_t scratch_size;
|
||||
};
|
||||
|
||||
struct ilo_state_ps {
|
||||
uint32_t ps[8];
|
||||
uint32_t scratch_size;
|
||||
|
||||
struct ilo_state_ps_dispatch_conds {
|
||||
bool ps_valid;
|
||||
|
@ -211,6 +220,12 @@ bool
|
|||
ilo_state_vs_init_disabled(struct ilo_state_vs *vs,
|
||||
const struct ilo_dev *dev);
|
||||
|
||||
static inline uint32_t
|
||||
ilo_state_vs_get_scratch_size(const struct ilo_state_vs *vs)
|
||||
{
|
||||
return vs->scratch_size;
|
||||
}
|
||||
|
||||
bool
|
||||
ilo_state_hs_init(struct ilo_state_hs *hs,
|
||||
const struct ilo_dev *dev,
|
||||
|
@ -221,6 +236,12 @@ ilo_state_hs_init_disabled(struct ilo_state_hs *hs,
|
|||
const struct ilo_dev *dev);
|
||||
|
||||
|
||||
static inline uint32_t
|
||||
ilo_state_hs_get_scratch_size(const struct ilo_state_hs *hs)
|
||||
{
|
||||
return hs->scratch_size;
|
||||
}
|
||||
|
||||
bool
|
||||
ilo_state_ds_init(struct ilo_state_ds *ds,
|
||||
const struct ilo_dev *dev,
|
||||
|
@ -230,6 +251,12 @@ bool
|
|||
ilo_state_ds_init_disabled(struct ilo_state_ds *ds,
|
||||
const struct ilo_dev *dev);
|
||||
|
||||
static inline uint32_t
|
||||
ilo_state_ds_get_scratch_size(const struct ilo_state_ds *ds)
|
||||
{
|
||||
return ds->scratch_size;
|
||||
}
|
||||
|
||||
bool
|
||||
ilo_state_gs_init(struct ilo_state_gs *gs,
|
||||
const struct ilo_dev *dev,
|
||||
|
@ -239,6 +266,12 @@ bool
|
|||
ilo_state_gs_init_disabled(struct ilo_state_gs *gs,
|
||||
const struct ilo_dev *dev);
|
||||
|
||||
static inline uint32_t
|
||||
ilo_state_gs_get_scratch_size(const struct ilo_state_gs *gs)
|
||||
{
|
||||
return gs->scratch_size;
|
||||
}
|
||||
|
||||
bool
|
||||
ilo_state_ps_init(struct ilo_state_ps *ps,
|
||||
const struct ilo_dev *dev,
|
||||
|
@ -253,4 +286,10 @@ ilo_state_ps_set_params(struct ilo_state_ps *ps,
|
|||
const struct ilo_dev *dev,
|
||||
const struct ilo_state_ps_params_info *params);
|
||||
|
||||
static inline uint32_t
|
||||
ilo_state_ps_get_scratch_size(const struct ilo_state_ps *ps)
|
||||
{
|
||||
return ps->scratch_size;
|
||||
}
|
||||
|
||||
#endif /* ILO_STATE_SHADER_H */
|
||||
|
|
|
@ -34,7 +34,8 @@ struct pixel_ff {
|
|||
uint32_t kernel_offsets[3];
|
||||
uint8_t grf_starts[3];
|
||||
bool pcb_enable;
|
||||
uint8_t scratch_space;
|
||||
uint8_t per_thread_scratch_space;
|
||||
uint32_t per_thread_scratch_size;
|
||||
|
||||
uint8_t sampler_count;
|
||||
uint8_t surface_count;
|
||||
|
@ -56,13 +57,6 @@ ps_kernel_validate_gen6(const struct ilo_dev *dev,
|
|||
{
|
||||
/* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */
|
||||
const uint8_t max_grf_start = 128;
|
||||
/*
|
||||
* From the Sandy Bridge PRM, volume 2 part 1, page 271:
|
||||
*
|
||||
* "(Per-Thread Scratch Space)
|
||||
* Range [0,11] indicating [1k bytes, 2M bytes] in powers of two"
|
||||
*/
|
||||
const uint32_t max_scratch_size = 2 * 1024 * 1024;
|
||||
|
||||
ILO_DEV_ASSERT(dev, 6, 8);
|
||||
|
||||
|
@ -70,7 +64,6 @@ ps_kernel_validate_gen6(const struct ilo_dev *dev,
|
|||
assert(kernel->offset % 64 == 0);
|
||||
|
||||
assert(kernel->grf_start < max_grf_start);
|
||||
assert(kernel->scratch_size <= max_scratch_size);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -325,7 +318,6 @@ ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
|
|||
const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
|
||||
const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
|
||||
const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
|
||||
uint32_t scratch_size;
|
||||
|
||||
ILO_DEV_ASSERT(dev, 6, 8);
|
||||
|
||||
|
@ -363,21 +355,6 @@ ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
|
|||
((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
|
||||
kernel_32->pcb_attr_count));
|
||||
|
||||
scratch_size = 0;
|
||||
if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
|
||||
scratch_size < kernel_8->scratch_size)
|
||||
scratch_size = kernel_8->scratch_size;
|
||||
if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
|
||||
scratch_size < kernel_16->scratch_size)
|
||||
scratch_size = kernel_16->scratch_size;
|
||||
if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
|
||||
scratch_size < kernel_32->scratch_size)
|
||||
scratch_size = kernel_32->scratch_size;
|
||||
|
||||
/* next power of two, starting from 1KB */
|
||||
ff->scratch_space = (scratch_size > 1024) ?
|
||||
(util_last_bit(scratch_size - 1) - 10): 0;
|
||||
|
||||
/* GPU hangs on Haswell if none of the dispatch mode bits is set */
|
||||
if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes)
|
||||
ff->dispatch_modes |= GEN6_PS_DISPATCH_8;
|
||||
|
@ -401,6 +378,21 @@ ps_get_gen6_ff(const struct ilo_dev *dev,
|
|||
if (!ps_validate_gen6(dev, info) || !ps_get_gen6_ff_kernels(dev, info, ff))
|
||||
return false;
|
||||
|
||||
if (info->per_thread_scratch_size) {
|
||||
/*
|
||||
* From the Sandy Bridge PRM, volume 2 part 1, page 271:
|
||||
*
|
||||
* "(Per-Thread Scratch Space)
|
||||
* Range [0,11] indicating [1k bytes, 2M bytes] in powers of two"
|
||||
*/
|
||||
assert(info->per_thread_scratch_size <= 2 * 1024 * 1024);
|
||||
|
||||
/* next power of two, starting from 1KB */
|
||||
ff->per_thread_scratch_space = (info->per_thread_scratch_size > 1024) ?
|
||||
(util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0;
|
||||
ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space);
|
||||
}
|
||||
|
||||
ff->sampler_count = (resource->sampler_count <= 12) ?
|
||||
(resource->sampler_count + 3) / 4 : 4;
|
||||
ff->surface_count = resource->surface_count;
|
||||
|
@ -441,7 +433,8 @@ ps_set_gen6_3dstate_wm(struct ilo_state_ps *ps,
|
|||
if (false)
|
||||
dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
|
||||
|
||||
dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
|
||||
dw3 = ff->per_thread_scratch_space <<
|
||||
GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
|
||||
|
||||
dw4 = ff->grf_starts[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
|
||||
ff->grf_starts[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
|
||||
|
@ -539,7 +532,8 @@ ps_set_gen7_3DSTATE_PS(struct ilo_state_ps *ps,
|
|||
if (false)
|
||||
dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
|
||||
|
||||
dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
|
||||
dw3 = ff->per_thread_scratch_space <<
|
||||
GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
|
||||
|
||||
dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT |
|
||||
ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
|
||||
|
@ -603,7 +597,8 @@ ps_set_gen8_3DSTATE_PS(struct ilo_state_ps *ps,
|
|||
if (false)
|
||||
dw3 |= GEN6_THREADDISP_FP_MODE_ALT;
|
||||
|
||||
dw4 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
|
||||
dw4 = ff->per_thread_scratch_space <<
|
||||
GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
|
||||
|
||||
dw6 = ff->thread_count << GEN8_PS_DW6_MAX_THREADS__SHIFT |
|
||||
io->posoffset << GEN8_PS_DW6_POSOFFSET__SHIFT |
|
||||
|
@ -705,6 +700,7 @@ ilo_state_ps_init(struct ilo_state_ps *ps,
|
|||
ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff);
|
||||
}
|
||||
|
||||
ps->scratch_size = ff.per_thread_scratch_size * ff.thread_count;
|
||||
/* save conditions */
|
||||
ps->conds = ff.conds;
|
||||
|
||||
|
|
|
@ -58,10 +58,12 @@ ilo_blit_resolve_slices(struct ilo_context *ilo,
|
|||
* As it is only used to resolve HiZ right now, return early when there is
|
||||
* no HiZ.
|
||||
*/
|
||||
if (!ilo_image_can_enable_aux(&tex->image, level))
|
||||
if (tex->image.aux.type != ILO_IMAGE_AUX_HIZ ||
|
||||
!ilo_image_can_enable_aux(&tex->image, level))
|
||||
return;
|
||||
|
||||
if (ilo_image_can_enable_aux(&tex->image, level)) {
|
||||
if (tex->image.aux.type == ILO_IMAGE_AUX_HIZ &&
|
||||
ilo_image_can_enable_aux(&tex->image, level)) {
|
||||
ilo_blit_resolve_slices_for_hiz(ilo, res, level,
|
||||
first_slice, num_slices, resolve_flags);
|
||||
}
|
||||
|
|
|
@ -547,6 +547,7 @@ static void
|
|||
ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
||||
{
|
||||
struct ilo_context *ilo = ilo_context(pipe);
|
||||
int vs_scratch_size, gs_scratch_size, fs_scratch_size;
|
||||
|
||||
if (ilo_debug & ILO_DEBUG_DRAW) {
|
||||
if (info->indexed) {
|
||||
|
@ -574,8 +575,15 @@ ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
|||
|
||||
ilo_finalize_3d_states(ilo, info);
|
||||
|
||||
/* upload kernels */
|
||||
ilo_shader_cache_upload(ilo->shader_cache, &ilo->cp->builder);
|
||||
|
||||
/* prepare scratch spaces */
|
||||
ilo_shader_cache_get_max_scratch_sizes(ilo->shader_cache,
|
||||
&vs_scratch_size, &gs_scratch_size, &fs_scratch_size);
|
||||
ilo_render_prepare_scratch_spaces(ilo->render,
|
||||
vs_scratch_size, gs_scratch_size, fs_scratch_size);
|
||||
|
||||
ilo_blit_resolve_framebuffer(ilo);
|
||||
|
||||
/* If draw_vbo ever fails, return immediately. */
|
||||
|
|
|
@ -67,10 +67,49 @@ ilo_render_create(struct ilo_builder *builder)
|
|||
void
|
||||
ilo_render_destroy(struct ilo_render *render)
|
||||
{
|
||||
intel_bo_unref(render->vs_scratch.bo);
|
||||
intel_bo_unref(render->gs_scratch.bo);
|
||||
intel_bo_unref(render->fs_scratch.bo);
|
||||
|
||||
intel_bo_unref(render->workaround_bo);
|
||||
FREE(render);
|
||||
}
|
||||
|
||||
static bool
|
||||
resize_scratch_space(struct ilo_render *render,
|
||||
struct ilo_render_scratch_space *scratch,
|
||||
const char *name, int new_size)
|
||||
{
|
||||
struct intel_bo *bo;
|
||||
|
||||
if (scratch->size >= new_size)
|
||||
return true;
|
||||
|
||||
bo = intel_winsys_alloc_bo(render->builder->winsys, name, new_size, false);
|
||||
if (!bo)
|
||||
return false;
|
||||
|
||||
intel_bo_unref(scratch->bo);
|
||||
scratch->bo = bo;
|
||||
scratch->size = new_size;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ilo_render_prepare_scratch_spaces(struct ilo_render *render,
|
||||
int vs_scratch_size,
|
||||
int gs_scratch_size,
|
||||
int fs_scratch_size)
|
||||
{
|
||||
return (resize_scratch_space(render, &render->vs_scratch,
|
||||
"vs scratch", vs_scratch_size) &&
|
||||
resize_scratch_space(render, &render->gs_scratch,
|
||||
"gs scratch", gs_scratch_size) &&
|
||||
resize_scratch_space(render, &render->fs_scratch,
|
||||
"fs scratch", fs_scratch_size));
|
||||
}
|
||||
|
||||
void
|
||||
ilo_render_get_sample_position(const struct ilo_render *render,
|
||||
unsigned sample_count,
|
||||
|
|
|
@ -43,6 +43,12 @@ ilo_render_create(struct ilo_builder *builder);
|
|||
void
|
||||
ilo_render_destroy(struct ilo_render *render);
|
||||
|
||||
bool
|
||||
ilo_render_prepare_scratch_spaces(struct ilo_render *render,
|
||||
int vs_scratch_size,
|
||||
int gs_scratch_size,
|
||||
int fs_scratch_size);
|
||||
|
||||
void
|
||||
ilo_render_get_sample_position(const struct ilo_render *render,
|
||||
unsigned sample_count,
|
||||
|
|
|
@ -51,6 +51,11 @@ struct ilo_render {
|
|||
|
||||
struct intel_bo *workaround_bo;
|
||||
|
||||
struct ilo_render_scratch_space {
|
||||
struct intel_bo *bo;
|
||||
int size;
|
||||
} vs_scratch, gs_scratch, fs_scratch;
|
||||
|
||||
struct ilo_state_sample_pattern sample_pattern;
|
||||
|
||||
bool hw_ctx_changed;
|
||||
|
|
|
@ -475,10 +475,13 @@ gen6_draw_vs(struct ilo_render *r,
|
|||
gen6_wa_pre_3dstate_vs_toggle(r);
|
||||
|
||||
if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
|
||||
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO))
|
||||
gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, kernel_offset);
|
||||
else
|
||||
gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
|
||||
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) {
|
||||
gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs,
|
||||
kernel_offset, r->vs_scratch.bo);
|
||||
} else {
|
||||
gen6_3DSTATE_VS(r->builder, &cso->vs,
|
||||
kernel_offset, r->vs_scratch.bo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -501,7 +504,8 @@ gen6_draw_gs(struct ilo_render *r,
|
|||
cso = ilo_shader_get_kernel_cso(vec->gs);
|
||||
kernel_offset = ilo_shader_get_kernel_offset(vec->gs);
|
||||
|
||||
gen6_3DSTATE_GS(r->builder, &cso->gs, kernel_offset);
|
||||
gen6_3DSTATE_GS(r->builder, &cso->gs,
|
||||
kernel_offset, r->gs_scratch.bo);
|
||||
} else if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
|
||||
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) {
|
||||
const int verts_per_prim =
|
||||
|
@ -524,9 +528,10 @@ gen6_draw_gs(struct ilo_render *r,
|
|||
kernel_offset = ilo_shader_get_kernel_offset(vec->vs) +
|
||||
ilo_shader_get_kernel_param(vec->vs, param);
|
||||
|
||||
gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, kernel_offset);
|
||||
gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol,
|
||||
kernel_offset, r->gs_scratch.bo);
|
||||
} else {
|
||||
gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0);
|
||||
gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -672,7 +677,7 @@ gen6_draw_wm(struct ilo_render *r,
|
|||
gen6_wa_pre_3dstate_wm_max_threads(r);
|
||||
|
||||
gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs,
|
||||
&cso->ps, kernel_offset);
|
||||
&cso->ps, kernel_offset, r->fs_scratch.bo);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -817,10 +822,10 @@ gen6_rectlist_vs_to_sf(struct ilo_render *r,
|
|||
gen6_wa_post_3dstate_constant_vs(r);
|
||||
|
||||
gen6_wa_pre_3dstate_vs_toggle(r);
|
||||
gen6_3DSTATE_VS(r->builder, &blitter->vs, 0);
|
||||
gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL);
|
||||
|
||||
gen6_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
|
||||
gen6_3DSTATE_GS(r->builder, &blitter->gs, 0);
|
||||
gen6_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL);
|
||||
|
||||
gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs);
|
||||
gen6_3DSTATE_SF(r->builder, &blitter->fb.rs, &blitter->sbe);
|
||||
|
@ -833,7 +838,7 @@ gen6_rectlist_wm(struct ilo_render *r,
|
|||
gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
|
||||
|
||||
gen6_wa_pre_3dstate_wm_max_threads(r);
|
||||
gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0);
|
||||
gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
@ -318,10 +318,13 @@ gen7_draw_vs(struct ilo_render *r,
|
|||
const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->vs);
|
||||
const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->vs);
|
||||
|
||||
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
|
||||
gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
|
||||
else
|
||||
gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
|
||||
if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) {
|
||||
gen8_3DSTATE_VS(r->builder, &cso->vs,
|
||||
kernel_offset, r->vs_scratch.bo);
|
||||
} else {
|
||||
gen6_3DSTATE_VS(r->builder, &cso->vs,
|
||||
kernel_offset, r->vs_scratch.bo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -338,9 +341,9 @@ gen7_draw_hs(struct ilo_render *r,
|
|||
gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0);
|
||||
|
||||
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
|
||||
gen8_3DSTATE_HS(r->builder, hs, kernel_offset);
|
||||
gen8_3DSTATE_HS(r->builder, hs, kernel_offset, NULL);
|
||||
else
|
||||
gen7_3DSTATE_HS(r->builder, hs, kernel_offset);
|
||||
gen7_3DSTATE_HS(r->builder, hs, kernel_offset, NULL);
|
||||
}
|
||||
|
||||
/* 3DSTATE_BINDING_TABLE_POINTERS_HS */
|
||||
|
@ -373,9 +376,9 @@ gen7_draw_ds(struct ilo_render *r,
|
|||
gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0);
|
||||
|
||||
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
|
||||
gen8_3DSTATE_DS(r->builder, ds, kernel_offset);
|
||||
gen8_3DSTATE_DS(r->builder, ds, kernel_offset, NULL);
|
||||
else
|
||||
gen7_3DSTATE_DS(r->builder, ds, kernel_offset);
|
||||
gen7_3DSTATE_DS(r->builder, ds, kernel_offset, NULL);
|
||||
}
|
||||
|
||||
/* 3DSTATE_BINDING_TABLE_POINTERS_DS */
|
||||
|
@ -397,9 +400,9 @@ gen7_draw_gs(struct ilo_render *r,
|
|||
gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0);
|
||||
|
||||
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
|
||||
gen8_3DSTATE_GS(r->builder, gs, kernel_offset);
|
||||
gen8_3DSTATE_GS(r->builder, gs, kernel_offset, NULL);
|
||||
else
|
||||
gen7_3DSTATE_GS(r->builder, gs, kernel_offset);
|
||||
gen7_3DSTATE_GS(r->builder, gs, kernel_offset, NULL);
|
||||
}
|
||||
|
||||
/* 3DSTATE_BINDING_TABLE_POINTERS_GS */
|
||||
|
@ -534,7 +537,7 @@ gen7_draw_wm(struct ilo_render *r,
|
|||
if (r->hw_ctx_changed)
|
||||
gen7_wa_pre_3dstate_ps_max_threads(r);
|
||||
|
||||
gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
|
||||
gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, r->fs_scratch.bo);
|
||||
}
|
||||
|
||||
/* 3DSTATE_SCISSOR_STATE_POINTERS */
|
||||
|
@ -678,18 +681,18 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r,
|
|||
const struct ilo_blitter *blitter)
|
||||
{
|
||||
gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0);
|
||||
gen6_3DSTATE_VS(r->builder, &blitter->vs, 0);
|
||||
gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL);
|
||||
|
||||
gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0);
|
||||
gen7_3DSTATE_HS(r->builder, &blitter->hs, 0);
|
||||
gen7_3DSTATE_HS(r->builder, &blitter->hs, 0, NULL);
|
||||
|
||||
gen7_3DSTATE_TE(r->builder, &blitter->ds);
|
||||
|
||||
gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0);
|
||||
gen7_3DSTATE_DS(r->builder, &blitter->ds, 0);
|
||||
gen7_3DSTATE_DS(r->builder, &blitter->ds, 0, NULL);
|
||||
|
||||
gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
|
||||
gen7_3DSTATE_GS(r->builder, &blitter->gs, 0);
|
||||
gen7_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL);
|
||||
|
||||
gen7_3DSTATE_STREAMOUT(r->builder, &blitter->sol);
|
||||
|
||||
|
@ -711,7 +714,7 @@ gen7_rectlist_wm(struct ilo_render *r,
|
|||
gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
|
||||
|
||||
gen7_wa_pre_3dstate_ps_max_threads(r);
|
||||
gen7_3DSTATE_PS(r->builder, &blitter->ps, 0);
|
||||
gen7_3DSTATE_PS(r->builder, &blitter->ps, 0, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
@ -125,7 +125,7 @@ gen8_draw_wm(struct ilo_render *r,
|
|||
|
||||
/* 3DSTATE_PS */
|
||||
if (DIRTY(FS) || r->instruction_bo_changed)
|
||||
gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
|
||||
gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, r->fs_scratch.bo);
|
||||
|
||||
/* 3DSTATE_PS_EXTRA */
|
||||
if (DIRTY(FS))
|
||||
|
|
|
@ -474,6 +474,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
|
@ -37,6 +37,10 @@
|
|||
struct ilo_shader_cache {
|
||||
struct list_head shaders;
|
||||
struct list_head changed;
|
||||
|
||||
int max_vs_scratch_size;
|
||||
int max_gs_scratch_size;
|
||||
int max_fs_scratch_size;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -121,6 +125,8 @@ ilo_shader_cache_upload(struct ilo_shader_cache *shc,
|
|||
struct ilo_shader *sh;
|
||||
|
||||
LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) {
|
||||
int scratch_size, *cur_max;
|
||||
|
||||
if (sh->uploaded)
|
||||
continue;
|
||||
|
||||
|
@ -128,6 +134,29 @@ ilo_shader_cache_upload(struct ilo_shader_cache *shc,
|
|||
sh->kernel_size, sh->kernel);
|
||||
|
||||
sh->uploaded = true;
|
||||
|
||||
switch (shader->info.type) {
|
||||
case PIPE_SHADER_VERTEX:
|
||||
scratch_size = ilo_state_vs_get_scratch_size(&sh->cso.vs);
|
||||
cur_max = &shc->max_vs_scratch_size;
|
||||
break;
|
||||
case PIPE_SHADER_GEOMETRY:
|
||||
scratch_size = ilo_state_gs_get_scratch_size(&sh->cso.gs);
|
||||
cur_max = &shc->max_gs_scratch_size;
|
||||
break;
|
||||
case PIPE_SHADER_FRAGMENT:
|
||||
scratch_size = ilo_state_ps_get_scratch_size(&sh->cso.ps);
|
||||
cur_max = &shc->max_fs_scratch_size;
|
||||
break;
|
||||
default:
|
||||
assert(!"unknown shader type");
|
||||
scratch_size = 0;
|
||||
cur_max = &shc->max_vs_scratch_size;
|
||||
break;
|
||||
}
|
||||
|
||||
if (*cur_max < scratch_size)
|
||||
*cur_max = scratch_size;
|
||||
}
|
||||
|
||||
list_del(&shader->list);
|
||||
|
@ -155,6 +184,21 @@ ilo_shader_cache_invalidate(struct ilo_shader_cache *shc)
|
|||
LIST_FOR_EACH_ENTRY(sh, &shader->variants, list)
|
||||
sh->uploaded = false;
|
||||
}
|
||||
|
||||
shc->max_vs_scratch_size = 0;
|
||||
shc->max_gs_scratch_size = 0;
|
||||
shc->max_fs_scratch_size = 0;
|
||||
}
|
||||
|
||||
void
|
||||
ilo_shader_cache_get_max_scratch_sizes(const struct ilo_shader_cache *shc,
|
||||
int *vs_scratch_size,
|
||||
int *gs_scratch_size,
|
||||
int *fs_scratch_size)
|
||||
{
|
||||
*vs_scratch_size = shc->max_vs_scratch_size;
|
||||
*gs_scratch_size = shc->max_gs_scratch_size;
|
||||
*fs_scratch_size = shc->max_fs_scratch_size;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -578,7 +622,6 @@ init_shader_kernel(const struct ilo_shader *kernel,
|
|||
kern->grf_start = kernel->in.start_grf;
|
||||
kern->pcb_attr_count =
|
||||
(kernel->pcb.cbuf0_size + kernel->pcb.clip_state_size + 15) / 16;
|
||||
kern->scratch_size = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -602,6 +645,7 @@ init_vs(struct ilo_shader *kernel,
|
|||
init_shader_urb(kernel, state, &info.urb);
|
||||
init_shader_kernel(kernel, state, &info.kernel);
|
||||
init_shader_resource(kernel, state, &info.resource);
|
||||
info.per_thread_scratch_size = kernel->per_thread_scratch_size;
|
||||
info.dispatch_enable = true;
|
||||
info.stats_enable = true;
|
||||
|
||||
|
@ -640,6 +684,7 @@ init_gs(struct ilo_shader *kernel,
|
|||
init_shader_urb(kernel, state, &info.urb);
|
||||
init_shader_kernel(kernel, state, &info.kernel);
|
||||
init_shader_resource(kernel, state, &info.resource);
|
||||
info.per_thread_scratch_size = kernel->per_thread_scratch_size;
|
||||
info.dispatch_enable = true;
|
||||
info.stats_enable = true;
|
||||
|
||||
|
@ -664,6 +709,7 @@ init_ps(struct ilo_shader *kernel,
|
|||
init_shader_kernel(kernel, state, &info.kernel_8);
|
||||
init_shader_resource(kernel, state, &info.resource);
|
||||
|
||||
info.per_thread_scratch_size = kernel->per_thread_scratch_size;
|
||||
info.io.has_rt_write = true;
|
||||
info.io.posoffset = GEN6_POSOFFSET_NONE;
|
||||
info.io.attr_count = kernel->in.count;
|
||||
|
|
|
@ -120,6 +120,12 @@ ilo_shader_cache_upload(struct ilo_shader_cache *shc,
|
|||
void
|
||||
ilo_shader_cache_invalidate(struct ilo_shader_cache *shc);
|
||||
|
||||
void
|
||||
ilo_shader_cache_get_max_scratch_sizes(const struct ilo_shader_cache *shc,
|
||||
int *vs_scratch_size,
|
||||
int *gs_scratch_size,
|
||||
int *fs_scratch_size);
|
||||
|
||||
struct ilo_shader_state *
|
||||
ilo_shader_create_vs(const struct ilo_dev *dev,
|
||||
const struct pipe_shader_state *state,
|
||||
|
|
|
@ -139,6 +139,7 @@ struct ilo_shader {
|
|||
|
||||
void *kernel;
|
||||
int kernel_size;
|
||||
int per_thread_scratch_size;
|
||||
|
||||
struct ilo_kernel_routing routing;
|
||||
struct ilo_state_ps_params_info ps_params;
|
||||
|
|
|
@ -299,6 +299,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
return 0;
|
||||
}
|
||||
/* should only get here on unhandled cases */
|
||||
|
|
|
@ -854,10 +854,10 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
|
|||
jit_tex->img_stride[j] = lp_tex->img_stride[j];
|
||||
}
|
||||
|
||||
if (view->target == PIPE_TEXTURE_1D_ARRAY ||
|
||||
view->target == PIPE_TEXTURE_2D_ARRAY ||
|
||||
view->target == PIPE_TEXTURE_CUBE ||
|
||||
view->target == PIPE_TEXTURE_CUBE_ARRAY) {
|
||||
if (res->target == PIPE_TEXTURE_1D_ARRAY ||
|
||||
res->target == PIPE_TEXTURE_2D_ARRAY ||
|
||||
res->target == PIPE_TEXTURE_CUBE ||
|
||||
res->target == PIPE_TEXTURE_CUBE_ARRAY) {
|
||||
/*
|
||||
* For array textures, we don't have first_layer, instead
|
||||
* adjust last_layer (stored as depth) plus the mip level offsets
|
||||
|
|
|
@ -275,10 +275,10 @@ prepare_shader_sampling(
|
|||
row_stride[j] = lp_tex->row_stride[j];
|
||||
img_stride[j] = lp_tex->img_stride[j];
|
||||
}
|
||||
if (view->target == PIPE_TEXTURE_1D_ARRAY ||
|
||||
view->target == PIPE_TEXTURE_2D_ARRAY ||
|
||||
view->target == PIPE_TEXTURE_CUBE ||
|
||||
view->target == PIPE_TEXTURE_CUBE_ARRAY) {
|
||||
if (tex->target == PIPE_TEXTURE_1D_ARRAY ||
|
||||
tex->target == PIPE_TEXTURE_2D_ARRAY ||
|
||||
tex->target == PIPE_TEXTURE_CUBE ||
|
||||
tex->target == PIPE_TEXTURE_CUBE_ARRAY) {
|
||||
num_layers = view->u.tex.last_layer - view->u.tex.first_layer + 1;
|
||||
for (j = first_level; j <= last_level; j++) {
|
||||
mip_offsets[j] += view->u.tex.first_layer *
|
||||
|
|
|
@ -200,7 +200,8 @@ llvmpipe_can_create_resource(struct pipe_screen *screen,
|
|||
|
||||
static boolean
|
||||
llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
|
||||
struct llvmpipe_resource *lpr)
|
||||
struct llvmpipe_resource *lpr,
|
||||
const void *map_front_private)
|
||||
{
|
||||
struct sw_winsys *winsys = screen->winsys;
|
||||
|
||||
|
@ -215,12 +216,13 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
|
|||
lpr->base.format,
|
||||
width, height,
|
||||
64,
|
||||
map_front_private,
|
||||
&lpr->row_stride[0] );
|
||||
|
||||
if (lpr->dt == NULL)
|
||||
return FALSE;
|
||||
|
||||
{
|
||||
if (!map_front_private) {
|
||||
void *map = winsys->displaytarget_map(winsys, lpr->dt,
|
||||
PIPE_TRANSFER_WRITE);
|
||||
|
||||
|
@ -235,8 +237,9 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
|
|||
|
||||
|
||||
static struct pipe_resource *
|
||||
llvmpipe_resource_create(struct pipe_screen *_screen,
|
||||
const struct pipe_resource *templat)
|
||||
llvmpipe_resource_create_front(struct pipe_screen *_screen,
|
||||
const struct pipe_resource *templat,
|
||||
const void *map_front_private)
|
||||
{
|
||||
struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
|
||||
struct llvmpipe_resource *lpr = CALLOC_STRUCT(llvmpipe_resource);
|
||||
|
@ -254,7 +257,7 @@ llvmpipe_resource_create(struct pipe_screen *_screen,
|
|||
PIPE_BIND_SCANOUT |
|
||||
PIPE_BIND_SHARED)) {
|
||||
/* displayable surface */
|
||||
if (!llvmpipe_displaytarget_layout(screen, lpr))
|
||||
if (!llvmpipe_displaytarget_layout(screen, lpr, map_front_private))
|
||||
goto fail;
|
||||
}
|
||||
else {
|
||||
|
@ -300,7 +303,12 @@ llvmpipe_resource_create(struct pipe_screen *_screen,
|
|||
FREE(lpr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct pipe_resource *
|
||||
llvmpipe_resource_create(struct pipe_screen *_screen,
|
||||
const struct pipe_resource *templat)
|
||||
{
|
||||
return llvmpipe_resource_create_front(_screen, templat, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
llvmpipe_resource_destroy(struct pipe_screen *pscreen,
|
||||
|
@ -797,6 +805,7 @@ llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen)
|
|||
#endif
|
||||
|
||||
screen->resource_create = llvmpipe_resource_create;
|
||||
screen->resource_create_front = llvmpipe_resource_create_front;
|
||||
screen->resource_destroy = llvmpipe_resource_destroy;
|
||||
screen->resource_from_handle = llvmpipe_resource_from_handle;
|
||||
screen->resource_get_handle = llvmpipe_resource_get_handle;
|
||||
|
|
|
@ -73,6 +73,9 @@ NV50_C_SOURCES := \
|
|||
nv50/nv50_program.h \
|
||||
nv50/nv50_push.c \
|
||||
nv50/nv50_query.c \
|
||||
nv50/nv50_query.h \
|
||||
nv50/nv50_query_hw.c \
|
||||
nv50/nv50_query_hw.h \
|
||||
nv50/nv50_resource.c \
|
||||
nv50/nv50_resource.h \
|
||||
nv50/nv50_screen.c \
|
||||
|
|
|
@ -1128,7 +1128,6 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
|
|||
info->prop.gp.instanceCount = 1;
|
||||
info->prop.gp.maxVertices = 1;
|
||||
}
|
||||
info->io.clipDistance = 0xff;
|
||||
info->io.pointSize = 0xff;
|
||||
info->io.instanceId = 0xff;
|
||||
info->io.vertexId = 0xff;
|
||||
|
|
|
@ -73,8 +73,8 @@ public:
|
|||
|
||||
Instruction *mkCvt(operation, DataType, Value *, DataType, Value *);
|
||||
CmpInstruction *mkCmp(operation, CondCode, DataType,
|
||||
Value *,
|
||||
DataType, Value *, Value *, Value * = NULL);
|
||||
Value *,
|
||||
DataType, Value *, Value *, Value * = NULL);
|
||||
TexInstruction *mkTex(operation, TexTarget,
|
||||
uint16_t tic, uint16_t tsc,
|
||||
const std::vector<Value *> &def,
|
||||
|
|
|
@ -99,6 +99,7 @@ struct nv50_ir_prog_info
|
|||
uint8_t sourceRep; /* NV50_PROGRAM_IR */
|
||||
const void *source;
|
||||
void *relocData;
|
||||
void *interpData;
|
||||
struct nv50_ir_prog_symbol *syms;
|
||||
uint16_t numSyms;
|
||||
} bin;
|
||||
|
@ -143,6 +144,7 @@ struct nv50_ir_prog_info
|
|||
bool earlyFragTests;
|
||||
bool separateFragData;
|
||||
bool usesDiscard;
|
||||
bool sampleInterp; /* perform sample interp on all fp inputs */
|
||||
} fp;
|
||||
struct {
|
||||
uint32_t inputOffset; /* base address for user args */
|
||||
|
@ -154,9 +156,8 @@ struct nv50_ir_prog_info
|
|||
uint8_t numBarriers;
|
||||
|
||||
struct {
|
||||
uint8_t clipDistance; /* index of first clip distance output */
|
||||
uint8_t clipDistanceMask; /* mask of clip distances defined */
|
||||
uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */
|
||||
uint8_t clipDistances; /* number of clip distance outputs */
|
||||
uint8_t cullDistances; /* number of cull distance outputs */
|
||||
int8_t genUserClip; /* request user clip planes for ClipVertex */
|
||||
uint16_t ucpBase; /* base address for UCPs */
|
||||
uint8_t ucpCBSlot; /* constant buffer index of UCP data */
|
||||
|
@ -168,7 +169,6 @@ struct nv50_ir_prog_info
|
|||
int8_t viewportId; /* output index of ViewportIndex */
|
||||
uint8_t fragDepth; /* output index of FragDepth */
|
||||
uint8_t sampleMask; /* output index of SampleMask */
|
||||
bool sampleInterp; /* perform sample interp on all fp inputs */
|
||||
uint8_t backFaceColor[2]; /* input/output indices of back face colour */
|
||||
uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */
|
||||
bool fp64; /* program uses fp64 math */
|
||||
|
@ -198,6 +198,10 @@ extern void nv50_ir_relocate_code(void *relocData, uint32_t *code,
|
|||
uint32_t libPos,
|
||||
uint32_t dataPos);
|
||||
|
||||
extern void
|
||||
nv50_ir_change_interp(void *interpData, uint32_t *code,
|
||||
bool force_per_sample, bool flatshade);
|
||||
|
||||
/* obtain code that will be shared among programs */
|
||||
extern void nv50_ir_get_target_library(uint32_t chipset,
|
||||
const uint32_t **code, uint32_t *size);
|
||||
|
|
|
@ -1437,6 +1437,30 @@ CodeEmitterGK110::emitInterpMode(const Instruction *i)
|
|||
code[1] |= (i->ipa & 0xc) << (19 - 2);
|
||||
}
|
||||
|
||||
static void
|
||||
interpApply(const InterpEntry *entry, uint32_t *code,
|
||||
bool force_persample_interp, bool flatshade)
|
||||
{
|
||||
int ipa = entry->ipa;
|
||||
int reg = entry->reg;
|
||||
int loc = entry->loc;
|
||||
|
||||
if (flatshade &&
|
||||
(ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
|
||||
ipa = NV50_IR_INTERP_FLAT;
|
||||
reg = 0xff;
|
||||
} else if (force_persample_interp &&
|
||||
(ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
|
||||
(ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
|
||||
ipa |= NV50_IR_INTERP_CENTROID;
|
||||
}
|
||||
code[loc + 1] &= ~(0xf << 19);
|
||||
code[loc + 1] |= (ipa & 0x3) << 21;
|
||||
code[loc + 1] |= (ipa & 0xc) << (19 - 2);
|
||||
code[loc + 0] &= ~(0xff << 23);
|
||||
code[loc + 0] |= reg << 23;
|
||||
}
|
||||
|
||||
void
|
||||
CodeEmitterGK110::emitINTERP(const Instruction *i)
|
||||
{
|
||||
|
@ -1448,10 +1472,13 @@ CodeEmitterGK110::emitINTERP(const Instruction *i)
|
|||
if (i->saturate)
|
||||
code[1] |= 1 << 18;
|
||||
|
||||
if (i->op == OP_PINTERP)
|
||||
if (i->op == OP_PINTERP) {
|
||||
srcId(i->src(1), 23);
|
||||
else
|
||||
addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
|
||||
} else {
|
||||
code[0] |= 0xff << 23;
|
||||
addInterp(i->ipa, 0xff, interpApply);
|
||||
}
|
||||
|
||||
srcId(i->src(0).getIndirect(0), 10);
|
||||
emitInterpMode(i);
|
||||
|
|
|
@ -2217,6 +2217,30 @@ CodeEmitterGM107::emitAL2P()
|
|||
emitGPR (0x00, insn->def(0));
|
||||
}
|
||||
|
||||
static void
|
||||
interpApply(const InterpEntry *entry, uint32_t *code,
|
||||
bool force_persample_interp, bool flatshade)
|
||||
{
|
||||
int ipa = entry->ipa;
|
||||
int reg = entry->reg;
|
||||
int loc = entry->loc;
|
||||
|
||||
if (flatshade &&
|
||||
(ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
|
||||
ipa = NV50_IR_INTERP_FLAT;
|
||||
reg = 0xff;
|
||||
} else if (force_persample_interp &&
|
||||
(ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
|
||||
(ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
|
||||
ipa |= NV50_IR_INTERP_CENTROID;
|
||||
}
|
||||
code[loc + 1] &= ~(0xf << 0x14);
|
||||
code[loc + 1] |= (ipa & 0x3) << 0x16;
|
||||
code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
|
||||
code[loc + 0] &= ~(0xff << 0x14);
|
||||
code[loc + 0] |= reg << 0x14;
|
||||
}
|
||||
|
||||
void
|
||||
CodeEmitterGM107::emitIPA()
|
||||
{
|
||||
|
@ -2255,10 +2279,12 @@ CodeEmitterGM107::emitIPA()
|
|||
emitGPR(0x14, insn->src(1));
|
||||
if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
|
||||
emitGPR(0x27, insn->src(2));
|
||||
addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply);
|
||||
} else {
|
||||
if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
|
||||
emitGPR(0x27, insn->src(1));
|
||||
emitGPR(0x14);
|
||||
addInterp(insn->ipa, 0xff, interpApply);
|
||||
}
|
||||
|
||||
if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
|
||||
|
|
|
@ -372,7 +372,7 @@ CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
|
|||
mode |= 3 << (s * 2);
|
||||
break;
|
||||
default:
|
||||
ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
|
||||
ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
|
@ -876,6 +876,30 @@ CodeEmitterNV50::emitPFETCH(const Instruction *i)
|
|||
emitFlagsRd(i);
|
||||
}
|
||||
|
||||
static void
|
||||
interpApply(const InterpEntry *entry, uint32_t *code,
|
||||
bool force_persample_interp, bool flatshade)
|
||||
{
|
||||
int ipa = entry->ipa;
|
||||
int encSize = entry->reg;
|
||||
int loc = entry->loc;
|
||||
|
||||
if ((ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
|
||||
(ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
|
||||
if (force_persample_interp) {
|
||||
if (encSize == 8)
|
||||
code[loc + 1] |= 1 << 16;
|
||||
else
|
||||
code[loc + 0] |= 1 << 24;
|
||||
} else {
|
||||
if (encSize == 8)
|
||||
code[loc + 1] &= ~(1 << 16);
|
||||
else
|
||||
code[loc + 0] &= ~(1 << 24);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
CodeEmitterNV50::emitINTERP(const Instruction *i)
|
||||
{
|
||||
|
@ -904,6 +928,8 @@ CodeEmitterNV50::emitINTERP(const Instruction *i)
|
|||
code[0] |= 1;
|
||||
emitFlagsRd(i);
|
||||
}
|
||||
|
||||
addInterp(i->ipa, i->encSize, interpApply);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -1618,6 +1618,29 @@ CodeEmitterNVC0::emitInterpMode(const Instruction *i)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
interpApply(const InterpEntry *entry, uint32_t *code,
|
||||
bool force_persample_interp, bool flatshade)
|
||||
{
|
||||
int ipa = entry->ipa;
|
||||
int reg = entry->reg;
|
||||
int loc = entry->loc;
|
||||
|
||||
if (flatshade &&
|
||||
(ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
|
||||
ipa = NV50_IR_INTERP_FLAT;
|
||||
reg = 0x3f;
|
||||
} else if (force_persample_interp &&
|
||||
(ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
|
||||
(ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
|
||||
ipa |= NV50_IR_INTERP_CENTROID;
|
||||
}
|
||||
code[loc + 0] &= ~(0xf << 6);
|
||||
code[loc + 0] |= ipa << 6;
|
||||
code[loc + 0] &= ~(0x3f << 26);
|
||||
code[loc + 0] |= reg << 26;
|
||||
}
|
||||
|
||||
void
|
||||
CodeEmitterNVC0::emitINTERP(const Instruction *i)
|
||||
{
|
||||
|
@ -1630,10 +1653,13 @@ CodeEmitterNVC0::emitINTERP(const Instruction *i)
|
|||
if (i->saturate)
|
||||
code[0] |= 1 << 5;
|
||||
|
||||
if (i->op == OP_PINTERP)
|
||||
if (i->op == OP_PINTERP) {
|
||||
srcId(i->src(1), 26);
|
||||
else
|
||||
addInterp(i->ipa, SDATA(i->src(1)).id, interpApply);
|
||||
} else {
|
||||
code[0] |= 0x3f << 26;
|
||||
addInterp(i->ipa, 0x3f, interpApply);
|
||||
}
|
||||
|
||||
srcId(i->src(0).getIndirect(0), 20);
|
||||
} else {
|
||||
|
|
|
@ -910,7 +910,7 @@ bool Source::scanSource()
|
|||
info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
|
||||
|
||||
if (info->io.genUserClip > 0) {
|
||||
info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1;
|
||||
info->io.clipDistances = info->io.genUserClip;
|
||||
|
||||
const unsigned int nOut = (info->io.genUserClip + 3) / 4;
|
||||
|
||||
|
@ -919,7 +919,7 @@ bool Source::scanSource()
|
|||
info->out[i].id = i;
|
||||
info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
|
||||
info->out[i].si = n;
|
||||
info->out[i].mask = info->io.clipDistanceMask >> (n * 4);
|
||||
info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -969,6 +969,12 @@ void Source::scanProperty(const struct tgsi_full_property *prop)
|
|||
else
|
||||
info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */
|
||||
break;
|
||||
case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
|
||||
info->io.clipDistances = prop->u[0].Data;
|
||||
break;
|
||||
case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
|
||||
info->io.cullDistances = prop->u[0].Data;
|
||||
break;
|
||||
default:
|
||||
INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
|
||||
break;
|
||||
|
@ -1054,7 +1060,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
|
|||
default:
|
||||
break;
|
||||
}
|
||||
if (decl->Interp.Location || info->io.sampleInterp)
|
||||
if (decl->Interp.Location)
|
||||
info->in[i].centroid = 1;
|
||||
}
|
||||
|
||||
|
@ -1086,8 +1092,6 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
|
|||
clipVertexOutput = i;
|
||||
break;
|
||||
case TGSI_SEMANTIC_CLIPDIST:
|
||||
info->io.clipDistanceMask |=
|
||||
decl->Declaration.UsageMask << (si * 4);
|
||||
info->io.genUserClip = -1;
|
||||
break;
|
||||
case TGSI_SEMANTIC_SAMPLEMASK:
|
||||
|
@ -1119,6 +1123,10 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
|
|||
case TGSI_SEMANTIC_VERTEXID:
|
||||
info->io.vertexId = first;
|
||||
break;
|
||||
case TGSI_SEMANTIC_SAMPLEID:
|
||||
case TGSI_SEMANTIC_SAMPLEPOS:
|
||||
info->prop.fp.sampleInterp = 1;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -1338,6 +1346,8 @@ private:
|
|||
|
||||
void handleINTERP(Value *dst0[4]);
|
||||
|
||||
uint8_t translateInterpMode(const struct nv50_ir_varying *var,
|
||||
operation& op);
|
||||
Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
|
||||
|
||||
void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
|
||||
|
@ -1451,8 +1461,8 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
|
|||
return sym;
|
||||
}
|
||||
|
||||
static inline uint8_t
|
||||
translateInterpMode(const struct nv50_ir_varying *var, operation& op)
|
||||
uint8_t
|
||||
Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op)
|
||||
{
|
||||
uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
|
||||
|
||||
|
@ -1468,7 +1478,7 @@ translateInterpMode(const struct nv50_ir_varying *var, operation& op)
|
|||
op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
|
||||
? OP_PINTERP : OP_LINTERP;
|
||||
|
||||
if (var->centroid)
|
||||
if (var->centroid || info->prop.fp.sampleInterp)
|
||||
mode |= NV50_IR_INTERP_CENTROID;
|
||||
|
||||
return mode;
|
||||
|
@ -1628,7 +1638,7 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
|
|||
// don't load masked inputs, won't be assigned a slot
|
||||
if (!ptr && !(info->in[idx].mask & (1 << swz)))
|
||||
return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
|
||||
if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE)
|
||||
if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE)
|
||||
return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0));
|
||||
return interpolate(src, c, shiftAddress(ptr));
|
||||
} else
|
||||
|
|
|
@ -166,7 +166,7 @@ void Target::destroy(Target *targ)
|
|||
delete targ;
|
||||
}
|
||||
|
||||
CodeEmitter::CodeEmitter(const Target *target) : targ(target)
|
||||
CodeEmitter::CodeEmitter(const Target *target) : targ(target), interpInfo(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -388,6 +388,7 @@ Program::emitBinary(struct nv50_ir_prog_info *info)
|
|||
}
|
||||
}
|
||||
info->bin.relocData = emit->getRelocInfo();
|
||||
info->bin.interpData = emit->getInterpInfo();
|
||||
|
||||
emitSymbolTable(info);
|
||||
|
||||
|
@ -428,6 +429,29 @@ CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
CodeEmitter::addInterp(int ipa, int reg, InterpApply apply)
|
||||
{
|
||||
unsigned int n = interpInfo ? interpInfo->count : 0;
|
||||
|
||||
if (!(n % RELOC_ALLOC_INCREMENT)) {
|
||||
size_t size = sizeof(InterpInfo) + n * sizeof(InterpEntry);
|
||||
interpInfo = reinterpret_cast<InterpInfo *>(
|
||||
REALLOC(interpInfo, n ? size : 0,
|
||||
size + RELOC_ALLOC_INCREMENT * sizeof(InterpEntry)));
|
||||
if (!interpInfo)
|
||||
return false;
|
||||
if (n == 0)
|
||||
memset(interpInfo, 0, sizeof(InterpInfo));
|
||||
}
|
||||
++interpInfo->count;
|
||||
|
||||
interpInfo->entry[n] = InterpEntry(ipa, reg, codeSize >> 2);
|
||||
interpInfo->apply = apply;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
|
||||
{
|
||||
|
@ -471,6 +495,19 @@ nv50_ir_relocate_code(void *relocData, uint32_t *code,
|
|||
info->entry[i].apply(code, info);
|
||||
}
|
||||
|
||||
void
|
||||
nv50_ir_change_interp(void *interpData, uint32_t *code,
|
||||
bool force_persample_interp, bool flatshade)
|
||||
{
|
||||
nv50_ir::InterpInfo *info = reinterpret_cast<nv50_ir::InterpInfo *>(
|
||||
interpData);
|
||||
|
||||
// force_persample_interp: all non-flat -> per-sample
|
||||
// flatshade: all color -> flat
|
||||
for (unsigned i = 0; i < info->count; ++i)
|
||||
info->apply(&info->entry[i], code, force_persample_interp, flatshade);
|
||||
}
|
||||
|
||||
void
|
||||
nv50_ir_get_target_library(uint32_t chipset,
|
||||
const uint32_t **code, uint32_t *size)
|
||||
|
|
|
@ -58,6 +58,23 @@ struct RelocInfo
|
|||
RelocEntry entry[0];
|
||||
};
|
||||
|
||||
struct InterpEntry
|
||||
{
|
||||
InterpEntry(int ipa, int reg, int loc) : ipa(ipa), reg(reg), loc(loc) {}
|
||||
uint32_t ipa:4; // SC mode used to identify colors
|
||||
uint32_t reg:8; // The reg used for perspective division
|
||||
uint32_t loc:20; // Let's hope we don't have more than 1M-sized shaders
|
||||
};
|
||||
|
||||
typedef void (*InterpApply)(const InterpEntry*, uint32_t*, bool, bool);
|
||||
|
||||
struct InterpInfo
|
||||
{
|
||||
uint32_t count;
|
||||
InterpApply apply;
|
||||
InterpEntry entry[0];
|
||||
};
|
||||
|
||||
class CodeEmitter
|
||||
{
|
||||
public:
|
||||
|
@ -78,6 +95,9 @@ public:
|
|||
|
||||
inline void *getRelocInfo() const { return relocInfo; }
|
||||
|
||||
bool addInterp(int ipa, int reg, InterpApply apply);
|
||||
inline void *getInterpInfo() const { return interpInfo; }
|
||||
|
||||
virtual void prepareEmission(Program *);
|
||||
virtual void prepareEmission(Function *);
|
||||
virtual void prepareEmission(BasicBlock *);
|
||||
|
@ -92,6 +112,7 @@ protected:
|
|||
uint32_t codeSizeLimit;
|
||||
|
||||
RelocInfo *relocInfo;
|
||||
InterpInfo *interpInfo;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -29,95 +29,95 @@ int
|
|||
nouveau_heap_init(struct nouveau_heap **heap,
|
||||
unsigned start, unsigned size)
|
||||
{
|
||||
struct nouveau_heap *r;
|
||||
struct nouveau_heap *r;
|
||||
|
||||
r = calloc(1, sizeof(struct nouveau_heap));
|
||||
if (!r)
|
||||
return 1;
|
||||
r = calloc(1, sizeof(struct nouveau_heap));
|
||||
if (!r)
|
||||
return 1;
|
||||
|
||||
r->start = start;
|
||||
r->size = size;
|
||||
*heap = r;
|
||||
return 0;
|
||||
r->start = start;
|
||||
r->size = size;
|
||||
*heap = r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
nouveau_heap_destroy(struct nouveau_heap **heap)
|
||||
{
|
||||
if (!*heap)
|
||||
return;
|
||||
free(*heap);
|
||||
*heap = NULL;
|
||||
if (!*heap)
|
||||
return;
|
||||
free(*heap);
|
||||
*heap = NULL;
|
||||
}
|
||||
|
||||
int
|
||||
nouveau_heap_alloc(struct nouveau_heap *heap, unsigned size, void *priv,
|
||||
struct nouveau_heap **res)
|
||||
{
|
||||
struct nouveau_heap *r;
|
||||
struct nouveau_heap *r;
|
||||
|
||||
if (!heap || !size || !res || *res)
|
||||
return 1;
|
||||
if (!heap || !size || !res || *res)
|
||||
return 1;
|
||||
|
||||
while (heap) {
|
||||
if (!heap->in_use && heap->size >= size) {
|
||||
r = calloc(1, sizeof(struct nouveau_heap));
|
||||
if (!r)
|
||||
return 1;
|
||||
while (heap) {
|
||||
if (!heap->in_use && heap->size >= size) {
|
||||
r = calloc(1, sizeof(struct nouveau_heap));
|
||||
if (!r)
|
||||
return 1;
|
||||
|
||||
r->start = (heap->start + heap->size) - size;
|
||||
r->size = size;
|
||||
r->in_use = 1;
|
||||
r->priv = priv;
|
||||
r->start = (heap->start + heap->size) - size;
|
||||
r->size = size;
|
||||
r->in_use = 1;
|
||||
r->priv = priv;
|
||||
|
||||
heap->size -= size;
|
||||
heap->size -= size;
|
||||
|
||||
r->next = heap->next;
|
||||
if (heap->next)
|
||||
heap->next->prev = r;
|
||||
r->prev = heap;
|
||||
heap->next = r;
|
||||
r->next = heap->next;
|
||||
if (heap->next)
|
||||
heap->next->prev = r;
|
||||
r->prev = heap;
|
||||
heap->next = r;
|
||||
|
||||
*res = r;
|
||||
return 0;
|
||||
}
|
||||
*res = r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
heap = heap->next;
|
||||
}
|
||||
heap = heap->next;
|
||||
}
|
||||
|
||||
return 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
void
|
||||
nouveau_heap_free(struct nouveau_heap **res)
|
||||
{
|
||||
struct nouveau_heap *r;
|
||||
struct nouveau_heap *r;
|
||||
|
||||
if (!res || !*res)
|
||||
return;
|
||||
r = *res;
|
||||
*res = NULL;
|
||||
if (!res || !*res)
|
||||
return;
|
||||
r = *res;
|
||||
*res = NULL;
|
||||
|
||||
r->in_use = 0;
|
||||
r->in_use = 0;
|
||||
|
||||
if (r->next && !r->next->in_use) {
|
||||
struct nouveau_heap *new = r->next;
|
||||
if (r->next && !r->next->in_use) {
|
||||
struct nouveau_heap *new = r->next;
|
||||
|
||||
new->prev = r->prev;
|
||||
if (r->prev)
|
||||
r->prev->next = new;
|
||||
new->size += r->size;
|
||||
new->start = r->start;
|
||||
new->prev = r->prev;
|
||||
if (r->prev)
|
||||
r->prev->next = new;
|
||||
new->size += r->size;
|
||||
new->start = r->start;
|
||||
|
||||
free(r);
|
||||
r = new;
|
||||
}
|
||||
free(r);
|
||||
r = new;
|
||||
}
|
||||
|
||||
if (r->prev && !r->prev->in_use) {
|
||||
r->prev->next = r->next;
|
||||
if (r->next)
|
||||
r->next->prev = r->prev;
|
||||
r->prev->size += r->size;
|
||||
free(r);
|
||||
}
|
||||
if (r->prev && !r->prev->in_use) {
|
||||
r->prev->next = r->next;
|
||||
if (r->next)
|
||||
r->next->prev = r->prev;
|
||||
r->prev->size += r->size;
|
||||
free(r);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,15 +44,15 @@
|
|||
* full size of the heap.
|
||||
*/
|
||||
struct nouveau_heap {
|
||||
struct nouveau_heap *prev;
|
||||
struct nouveau_heap *next;
|
||||
struct nouveau_heap *prev;
|
||||
struct nouveau_heap *next;
|
||||
|
||||
void *priv;
|
||||
void *priv;
|
||||
|
||||
unsigned start;
|
||||
unsigned size;
|
||||
unsigned start;
|
||||
unsigned size;
|
||||
|
||||
int in_use;
|
||||
int in_use;
|
||||
};
|
||||
|
||||
int
|
||||
|
|
|
@ -30,211 +30,211 @@ int nouveau_mesa_debug = 0;
|
|||
static const char *
|
||||
nouveau_screen_get_name(struct pipe_screen *pscreen)
|
||||
{
|
||||
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
|
||||
static char buffer[128];
|
||||
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
|
||||
static char buffer[128];
|
||||
|
||||
util_snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
|
||||
return buffer;
|
||||
util_snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
static const char *
|
||||
nouveau_screen_get_vendor(struct pipe_screen *pscreen)
|
||||
{
|
||||
return "nouveau";
|
||||
return "nouveau";
|
||||
}
|
||||
|
||||
static const char *
|
||||
nouveau_screen_get_device_vendor(struct pipe_screen *pscreen)
|
||||
{
|
||||
return "NVIDIA";
|
||||
return "NVIDIA";
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
nouveau_screen_get_timestamp(struct pipe_screen *pscreen)
|
||||
{
|
||||
int64_t cpu_time = os_time_get() * 1000;
|
||||
int64_t cpu_time = os_time_get() * 1000;
|
||||
|
||||
/* getparam of PTIMER_TIME takes about x10 as long (several usecs) */
|
||||
/* getparam of PTIMER_TIME takes about x10 as long (several usecs) */
|
||||
|
||||
return cpu_time + nouveau_screen(pscreen)->cpu_gpu_time_delta;
|
||||
return cpu_time + nouveau_screen(pscreen)->cpu_gpu_time_delta;
|
||||
}
|
||||
|
||||
static void
|
||||
nouveau_screen_fence_ref(struct pipe_screen *pscreen,
|
||||
struct pipe_fence_handle **ptr,
|
||||
struct pipe_fence_handle *pfence)
|
||||
struct pipe_fence_handle **ptr,
|
||||
struct pipe_fence_handle *pfence)
|
||||
{
|
||||
nouveau_fence_ref(nouveau_fence(pfence), (struct nouveau_fence **)ptr);
|
||||
nouveau_fence_ref(nouveau_fence(pfence), (struct nouveau_fence **)ptr);
|
||||
}
|
||||
|
||||
static boolean
|
||||
nouveau_screen_fence_finish(struct pipe_screen *screen,
|
||||
struct pipe_fence_handle *pfence,
|
||||
struct pipe_fence_handle *pfence,
|
||||
uint64_t timeout)
|
||||
{
|
||||
if (!timeout)
|
||||
return nouveau_fence_signalled(nouveau_fence(pfence));
|
||||
if (!timeout)
|
||||
return nouveau_fence_signalled(nouveau_fence(pfence));
|
||||
|
||||
return nouveau_fence_wait(nouveau_fence(pfence));
|
||||
return nouveau_fence_wait(nouveau_fence(pfence));
|
||||
}
|
||||
|
||||
|
||||
struct nouveau_bo *
|
||||
nouveau_screen_bo_from_handle(struct pipe_screen *pscreen,
|
||||
struct winsys_handle *whandle,
|
||||
unsigned *out_stride)
|
||||
struct winsys_handle *whandle,
|
||||
unsigned *out_stride)
|
||||
{
|
||||
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
|
||||
struct nouveau_bo *bo = 0;
|
||||
int ret;
|
||||
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
|
||||
struct nouveau_bo *bo = 0;
|
||||
int ret;
|
||||
|
||||
if (whandle->type != DRM_API_HANDLE_TYPE_SHARED &&
|
||||
whandle->type != DRM_API_HANDLE_TYPE_FD) {
|
||||
debug_printf("%s: attempt to import unsupported handle type %d\n",
|
||||
__FUNCTION__, whandle->type);
|
||||
return NULL;
|
||||
}
|
||||
if (whandle->type != DRM_API_HANDLE_TYPE_SHARED &&
|
||||
whandle->type != DRM_API_HANDLE_TYPE_FD) {
|
||||
debug_printf("%s: attempt to import unsupported handle type %d\n",
|
||||
__FUNCTION__, whandle->type);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (whandle->type == DRM_API_HANDLE_TYPE_SHARED)
|
||||
ret = nouveau_bo_name_ref(dev, whandle->handle, &bo);
|
||||
else
|
||||
ret = nouveau_bo_prime_handle_ref(dev, whandle->handle, &bo);
|
||||
if (whandle->type == DRM_API_HANDLE_TYPE_SHARED)
|
||||
ret = nouveau_bo_name_ref(dev, whandle->handle, &bo);
|
||||
else
|
||||
ret = nouveau_bo_prime_handle_ref(dev, whandle->handle, &bo);
|
||||
|
||||
if (ret) {
|
||||
debug_printf("%s: ref name 0x%08x failed with %d\n",
|
||||
__FUNCTION__, whandle->handle, ret);
|
||||
return NULL;
|
||||
}
|
||||
if (ret) {
|
||||
debug_printf("%s: ref name 0x%08x failed with %d\n",
|
||||
__FUNCTION__, whandle->handle, ret);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*out_stride = whandle->stride;
|
||||
return bo;
|
||||
*out_stride = whandle->stride;
|
||||
return bo;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
|
||||
struct nouveau_bo *bo,
|
||||
unsigned stride,
|
||||
struct winsys_handle *whandle)
|
||||
struct nouveau_bo *bo,
|
||||
unsigned stride,
|
||||
struct winsys_handle *whandle)
|
||||
{
|
||||
whandle->stride = stride;
|
||||
whandle->stride = stride;
|
||||
|
||||
if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
|
||||
return nouveau_bo_name_get(bo, &whandle->handle) == 0;
|
||||
} else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
|
||||
whandle->handle = bo->handle;
|
||||
return true;
|
||||
} else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
|
||||
return nouveau_bo_set_prime(bo, (int *)&whandle->handle) == 0;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
|
||||
return nouveau_bo_name_get(bo, &whandle->handle) == 0;
|
||||
} else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
|
||||
whandle->handle = bo->handle;
|
||||
return true;
|
||||
} else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
|
||||
return nouveau_bo_set_prime(bo, (int *)&whandle->handle) == 0;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
|
||||
{
|
||||
struct pipe_screen *pscreen = &screen->base;
|
||||
struct nv04_fifo nv04_data = { .vram = 0xbeef0201, .gart = 0xbeef0202 };
|
||||
struct nvc0_fifo nvc0_data = { };
|
||||
uint64_t time;
|
||||
int size, ret;
|
||||
void *data;
|
||||
union nouveau_bo_config mm_config;
|
||||
struct pipe_screen *pscreen = &screen->base;
|
||||
struct nv04_fifo nv04_data = { .vram = 0xbeef0201, .gart = 0xbeef0202 };
|
||||
struct nvc0_fifo nvc0_data = { };
|
||||
uint64_t time;
|
||||
int size, ret;
|
||||
void *data;
|
||||
union nouveau_bo_config mm_config;
|
||||
|
||||
char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG");
|
||||
if (nv_dbg)
|
||||
nouveau_mesa_debug = atoi(nv_dbg);
|
||||
char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG");
|
||||
if (nv_dbg)
|
||||
nouveau_mesa_debug = atoi(nv_dbg);
|
||||
|
||||
/*
|
||||
* this is initialized to 1 in nouveau_drm_screen_create after screen
|
||||
* is fully constructed and added to the global screen list.
|
||||
*/
|
||||
screen->refcount = -1;
|
||||
/*
|
||||
* this is initialized to 1 in nouveau_drm_screen_create after screen
|
||||
* is fully constructed and added to the global screen list.
|
||||
*/
|
||||
screen->refcount = -1;
|
||||
|
||||
if (dev->chipset < 0xc0) {
|
||||
data = &nv04_data;
|
||||
size = sizeof(nv04_data);
|
||||
} else {
|
||||
data = &nvc0_data;
|
||||
size = sizeof(nvc0_data);
|
||||
}
|
||||
if (dev->chipset < 0xc0) {
|
||||
data = &nv04_data;
|
||||
size = sizeof(nv04_data);
|
||||
} else {
|
||||
data = &nvc0_data;
|
||||
size = sizeof(nvc0_data);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set default VRAM domain if not overridden
|
||||
*/
|
||||
if (!screen->vram_domain) {
|
||||
if (dev->vram_size > 0)
|
||||
screen->vram_domain = NOUVEAU_BO_VRAM;
|
||||
else
|
||||
screen->vram_domain = NOUVEAU_BO_GART;
|
||||
}
|
||||
/*
|
||||
* Set default VRAM domain if not overridden
|
||||
*/
|
||||
if (!screen->vram_domain) {
|
||||
if (dev->vram_size > 0)
|
||||
screen->vram_domain = NOUVEAU_BO_VRAM;
|
||||
else
|
||||
screen->vram_domain = NOUVEAU_BO_GART;
|
||||
}
|
||||
|
||||
ret = nouveau_object_new(&dev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS,
|
||||
data, size, &screen->channel);
|
||||
if (ret)
|
||||
return ret;
|
||||
screen->device = dev;
|
||||
ret = nouveau_object_new(&dev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS,
|
||||
data, size, &screen->channel);
|
||||
if (ret)
|
||||
return ret;
|
||||
screen->device = dev;
|
||||
|
||||
ret = nouveau_client_new(screen->device, &screen->client);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = nouveau_pushbuf_new(screen->client, screen->channel,
|
||||
4, 512 * 1024, 1,
|
||||
&screen->pushbuf);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = nouveau_client_new(screen->device, &screen->client);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = nouveau_pushbuf_new(screen->client, screen->channel,
|
||||
4, 512 * 1024, 1,
|
||||
&screen->pushbuf);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* getting CPU time first appears to be more accurate */
|
||||
screen->cpu_gpu_time_delta = os_time_get();
|
||||
/* getting CPU time first appears to be more accurate */
|
||||
screen->cpu_gpu_time_delta = os_time_get();
|
||||
|
||||
ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_PTIMER_TIME, &time);
|
||||
if (!ret)
|
||||
screen->cpu_gpu_time_delta = time - screen->cpu_gpu_time_delta * 1000;
|
||||
ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_PTIMER_TIME, &time);
|
||||
if (!ret)
|
||||
screen->cpu_gpu_time_delta = time - screen->cpu_gpu_time_delta * 1000;
|
||||
|
||||
pscreen->get_name = nouveau_screen_get_name;
|
||||
pscreen->get_vendor = nouveau_screen_get_vendor;
|
||||
pscreen->get_device_vendor = nouveau_screen_get_device_vendor;
|
||||
pscreen->get_name = nouveau_screen_get_name;
|
||||
pscreen->get_vendor = nouveau_screen_get_vendor;
|
||||
pscreen->get_device_vendor = nouveau_screen_get_device_vendor;
|
||||
|
||||
pscreen->get_timestamp = nouveau_screen_get_timestamp;
|
||||
pscreen->get_timestamp = nouveau_screen_get_timestamp;
|
||||
|
||||
pscreen->fence_reference = nouveau_screen_fence_ref;
|
||||
pscreen->fence_finish = nouveau_screen_fence_finish;
|
||||
pscreen->fence_reference = nouveau_screen_fence_ref;
|
||||
pscreen->fence_finish = nouveau_screen_fence_finish;
|
||||
|
||||
util_format_s3tc_init();
|
||||
util_format_s3tc_init();
|
||||
|
||||
screen->lowmem_bindings = PIPE_BIND_GLOBAL; /* gallium limit */
|
||||
screen->vidmem_bindings =
|
||||
PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL |
|
||||
PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
|
||||
PIPE_BIND_CURSOR |
|
||||
PIPE_BIND_SAMPLER_VIEW |
|
||||
PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE |
|
||||
PIPE_BIND_COMPUTE_RESOURCE |
|
||||
PIPE_BIND_GLOBAL;
|
||||
screen->sysmem_bindings =
|
||||
PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT |
|
||||
PIPE_BIND_COMMAND_ARGS_BUFFER;
|
||||
screen->lowmem_bindings = PIPE_BIND_GLOBAL; /* gallium limit */
|
||||
screen->vidmem_bindings =
|
||||
PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL |
|
||||
PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
|
||||
PIPE_BIND_CURSOR |
|
||||
PIPE_BIND_SAMPLER_VIEW |
|
||||
PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE |
|
||||
PIPE_BIND_COMPUTE_RESOURCE |
|
||||
PIPE_BIND_GLOBAL;
|
||||
screen->sysmem_bindings =
|
||||
PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT |
|
||||
PIPE_BIND_COMMAND_ARGS_BUFFER;
|
||||
|
||||
memset(&mm_config, 0, sizeof(mm_config));
|
||||
memset(&mm_config, 0, sizeof(mm_config));
|
||||
|
||||
screen->mm_GART = nouveau_mm_create(dev,
|
||||
NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
|
||||
&mm_config);
|
||||
screen->mm_VRAM = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, &mm_config);
|
||||
return 0;
|
||||
screen->mm_GART = nouveau_mm_create(dev,
|
||||
NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
|
||||
&mm_config);
|
||||
screen->mm_VRAM = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, &mm_config);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
nouveau_screen_fini(struct nouveau_screen *screen)
|
||||
{
|
||||
nouveau_mm_destroy(screen->mm_GART);
|
||||
nouveau_mm_destroy(screen->mm_VRAM);
|
||||
nouveau_mm_destroy(screen->mm_GART);
|
||||
nouveau_mm_destroy(screen->mm_VRAM);
|
||||
|
||||
nouveau_pushbuf_del(&screen->pushbuf);
|
||||
nouveau_pushbuf_del(&screen->pushbuf);
|
||||
|
||||
nouveau_client_del(&screen->client);
|
||||
nouveau_object_del(&screen->channel);
|
||||
nouveau_client_del(&screen->client);
|
||||
nouveau_object_del(&screen->channel);
|
||||
|
||||
nouveau_device_del(&screen->device);
|
||||
nouveau_device_del(&screen->device);
|
||||
}
|
||||
|
|
|
@ -16,47 +16,47 @@ extern int nouveau_mesa_debug;
|
|||
struct nouveau_bo;
|
||||
|
||||
struct nouveau_screen {
|
||||
struct pipe_screen base;
|
||||
struct nouveau_device *device;
|
||||
struct nouveau_object *channel;
|
||||
struct nouveau_client *client;
|
||||
struct nouveau_pushbuf *pushbuf;
|
||||
struct pipe_screen base;
|
||||
struct nouveau_device *device;
|
||||
struct nouveau_object *channel;
|
||||
struct nouveau_client *client;
|
||||
struct nouveau_pushbuf *pushbuf;
|
||||
|
||||
int refcount;
|
||||
int refcount;
|
||||
|
||||
unsigned vidmem_bindings; /* PIPE_BIND_* where VRAM placement is desired */
|
||||
unsigned sysmem_bindings; /* PIPE_BIND_* where GART placement is desired */
|
||||
unsigned lowmem_bindings; /* PIPE_BIND_* that require an address < 4 GiB */
|
||||
/*
|
||||
* For bindings with (vidmem & sysmem) bits set, PIPE_USAGE_* decides
|
||||
* placement.
|
||||
*/
|
||||
unsigned vidmem_bindings; /* PIPE_BIND_* where VRAM placement is desired */
|
||||
unsigned sysmem_bindings; /* PIPE_BIND_* where GART placement is desired */
|
||||
unsigned lowmem_bindings; /* PIPE_BIND_* that require an address < 4 GiB */
|
||||
/*
|
||||
* For bindings with (vidmem & sysmem) bits set, PIPE_USAGE_* decides
|
||||
* placement.
|
||||
*/
|
||||
|
||||
uint16_t class_3d;
|
||||
uint16_t class_3d;
|
||||
|
||||
struct {
|
||||
struct nouveau_fence *head;
|
||||
struct nouveau_fence *tail;
|
||||
struct nouveau_fence *current;
|
||||
u32 sequence;
|
||||
u32 sequence_ack;
|
||||
void (*emit)(struct pipe_screen *, u32 *sequence);
|
||||
u32 (*update)(struct pipe_screen *);
|
||||
} fence;
|
||||
struct {
|
||||
struct nouveau_fence *head;
|
||||
struct nouveau_fence *tail;
|
||||
struct nouveau_fence *current;
|
||||
u32 sequence;
|
||||
u32 sequence_ack;
|
||||
void (*emit)(struct pipe_screen *, u32 *sequence);
|
||||
u32 (*update)(struct pipe_screen *);
|
||||
} fence;
|
||||
|
||||
struct nouveau_mman *mm_VRAM;
|
||||
struct nouveau_mman *mm_GART;
|
||||
struct nouveau_mman *mm_VRAM;
|
||||
struct nouveau_mman *mm_GART;
|
||||
|
||||
int64_t cpu_gpu_time_delta;
|
||||
int64_t cpu_gpu_time_delta;
|
||||
|
||||
bool hint_buf_keep_sysmem_copy;
|
||||
bool hint_buf_keep_sysmem_copy;
|
||||
|
||||
unsigned vram_domain;
|
||||
unsigned vram_domain;
|
||||
|
||||
struct {
|
||||
unsigned profiles_checked;
|
||||
unsigned profiles_present;
|
||||
} firmware_info;
|
||||
struct {
|
||||
unsigned profiles_checked;
|
||||
unsigned profiles_present;
|
||||
} firmware_info;
|
||||
|
||||
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
|
||||
union {
|
||||
|
@ -100,10 +100,10 @@ struct nouveau_screen {
|
|||
|
||||
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
|
||||
# define NOUVEAU_DRV_STAT(s, n, v) do { \
|
||||
(s)->stats.named.n += (v); \
|
||||
(s)->stats.named.n += (v); \
|
||||
} while(0)
|
||||
# define NOUVEAU_DRV_STAT_RES(r, n, v) do { \
|
||||
nouveau_screen((r)->base.screen)->stats.named.n += (v); \
|
||||
# define NOUVEAU_DRV_STAT_RES(r, n, v) do { \
|
||||
nouveau_screen((r)->base.screen)->stats.named.n += (v); \
|
||||
} while(0)
|
||||
# define NOUVEAU_DRV_STAT_IFD(x) x
|
||||
#else
|
||||
|
@ -115,20 +115,20 @@ struct nouveau_screen {
|
|||
static inline struct nouveau_screen *
|
||||
nouveau_screen(struct pipe_screen *pscreen)
|
||||
{
|
||||
return (struct nouveau_screen *)pscreen;
|
||||
return (struct nouveau_screen *)pscreen;
|
||||
}
|
||||
|
||||
bool nouveau_drm_screen_unref(struct nouveau_screen *screen);
|
||||
|
||||
bool
|
||||
nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
|
||||
struct nouveau_bo *bo,
|
||||
unsigned stride,
|
||||
struct winsys_handle *whandle);
|
||||
struct nouveau_bo *bo,
|
||||
unsigned stride,
|
||||
struct winsys_handle *whandle);
|
||||
struct nouveau_bo *
|
||||
nouveau_screen_bo_from_handle(struct pipe_screen *pscreen,
|
||||
struct winsys_handle *whandle,
|
||||
unsigned *out_stride);
|
||||
struct winsys_handle *whandle,
|
||||
unsigned *out_stride);
|
||||
|
||||
|
||||
int nouveau_screen_init(struct nouveau_screen *, struct nouveau_device *);
|
||||
|
|
|
@ -6,9 +6,9 @@
|
|||
|
||||
struct nouveau_statebuf_builder
|
||||
{
|
||||
uint32_t* p;
|
||||
uint32_t* p;
|
||||
#ifdef DEBUG
|
||||
uint32_t* pend;
|
||||
uint32_t* pend;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -22,7 +22,7 @@ struct nouveau_statebuf_builder
|
|||
|
||||
static inline uint32_t sb_header(unsigned subc, unsigned mthd, unsigned size)
|
||||
{
|
||||
return (size << 18) | (subc << 13) | mthd;
|
||||
return (size << 18) | (subc << 13) | mthd;
|
||||
}
|
||||
|
||||
#define sb_method(sb, v, n) sb_data(sb, sb_header(SUBC_3D(v), n));
|
||||
|
|
|
@ -831,7 +831,7 @@ error:
|
|||
static int
|
||||
nouveau_screen_get_video_param(struct pipe_screen *pscreen,
|
||||
enum pipe_video_profile profile,
|
||||
enum pipe_video_entrypoint entrypoint,
|
||||
enum pipe_video_entrypoint entrypoint,
|
||||
enum pipe_video_cap param)
|
||||
{
|
||||
switch (param) {
|
||||
|
|
|
@ -83,7 +83,7 @@ BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
|
|||
static inline void
|
||||
PUSH_MTHDl(struct nouveau_pushbuf *push, int subc, int mthd,
|
||||
struct nouveau_bo *bo, uint32_t offset,
|
||||
struct nouveau_bufctx *ctx, int bin, uint32_t rw)
|
||||
struct nouveau_bufctx *ctx, int bin, uint32_t rw)
|
||||
{
|
||||
nouveau_bufctx_mthd(ctx, bin, NV04_FIFO_PKHDR(subc, mthd, 1),
|
||||
bo, offset,
|
||||
|
|
|
@ -117,22 +117,22 @@ struct nouveau_vp3_decoder {
|
|||
};
|
||||
|
||||
struct comm {
|
||||
uint32_t bsp_cur_index; // 000
|
||||
uint32_t byte_ofs; // 004
|
||||
uint32_t status[0x10]; // 008
|
||||
uint32_t pos[0x10]; // 048
|
||||
uint8_t pad[0x100 - 0x88]; // 0a0 bool comm_encrypted
|
||||
uint32_t bsp_cur_index; // 000
|
||||
uint32_t byte_ofs; // 004
|
||||
uint32_t status[0x10]; // 008
|
||||
uint32_t pos[0x10]; // 048
|
||||
uint8_t pad[0x100 - 0x88]; // 0a0 bool comm_encrypted
|
||||
|
||||
uint32_t pvp_cur_index; // 100
|
||||
uint32_t acked_byte_ofs; // 104
|
||||
uint32_t status_vp[0x10]; // 108
|
||||
uint16_t mb_y[0x10]; //148
|
||||
uint32_t pvp_stage; // 168 0xeeXX
|
||||
uint16_t parse_endpos_index; // 16c
|
||||
uint16_t irq_index; // 16e
|
||||
uint8_t irq_470[0x10]; // 170
|
||||
uint32_t irq_pos[0x10]; // 180
|
||||
uint32_t parse_endpos[0x10]; // 1c0
|
||||
uint32_t pvp_cur_index; // 100
|
||||
uint32_t acked_byte_ofs; // 104
|
||||
uint32_t status_vp[0x10]; // 108
|
||||
uint16_t mb_y[0x10]; //148
|
||||
uint32_t pvp_stage; // 168 0xeeXX
|
||||
uint16_t parse_endpos_index; // 16c
|
||||
uint16_t irq_index; // 16e
|
||||
uint8_t irq_470[0x10]; // 170
|
||||
uint32_t irq_pos[0x10]; // 180
|
||||
uint32_t parse_endpos[0x10]; // 1c0
|
||||
};
|
||||
|
||||
static inline uint32_t nouveau_vp3_video_align(uint32_t h)
|
||||
|
|
|
@ -23,90 +23,90 @@
|
|||
#include "nouveau_vp3_video.h"
|
||||
|
||||
struct strparm_bsp {
|
||||
uint32_t w0[4]; // bits 0-23 length, bits 24-31 addr_hi
|
||||
uint32_t w1[4]; // bit 8-24 addr_lo
|
||||
uint32_t unk20; // should be idx * 0x8000000, bitstream offset
|
||||
uint32_t do_crypto_crap; // set to 0
|
||||
uint32_t w0[4]; // bits 0-23 length, bits 24-31 addr_hi
|
||||
uint32_t w1[4]; // bit 8-24 addr_lo
|
||||
uint32_t unk20; // should be idx * 0x8000000, bitstream offset
|
||||
uint32_t do_crypto_crap; // set to 0
|
||||
};
|
||||
|
||||
struct mpeg12_picparm_bsp {
|
||||
uint16_t width;
|
||||
uint16_t height;
|
||||
uint8_t picture_structure;
|
||||
uint8_t picture_coding_type;
|
||||
uint8_t intra_dc_precision;
|
||||
uint8_t frame_pred_frame_dct;
|
||||
uint8_t concealment_motion_vectors;
|
||||
uint8_t intra_vlc_format;
|
||||
uint16_t pad;
|
||||
uint8_t f_code[2][2];
|
||||
uint16_t width;
|
||||
uint16_t height;
|
||||
uint8_t picture_structure;
|
||||
uint8_t picture_coding_type;
|
||||
uint8_t intra_dc_precision;
|
||||
uint8_t frame_pred_frame_dct;
|
||||
uint8_t concealment_motion_vectors;
|
||||
uint8_t intra_vlc_format;
|
||||
uint16_t pad;
|
||||
uint8_t f_code[2][2];
|
||||
};
|
||||
|
||||
struct mpeg4_picparm_bsp {
|
||||
uint16_t width;
|
||||
uint16_t height;
|
||||
uint8_t vop_time_increment_size;
|
||||
uint8_t interlaced;
|
||||
uint8_t resync_marker_disable;
|
||||
uint16_t width;
|
||||
uint16_t height;
|
||||
uint8_t vop_time_increment_size;
|
||||
uint8_t interlaced;
|
||||
uint8_t resync_marker_disable;
|
||||
};
|
||||
|
||||
struct vc1_picparm_bsp {
|
||||
uint16_t width;
|
||||
uint16_t height;
|
||||
uint8_t profile; // 04 0 simple, 1 main, 2 advanced
|
||||
uint8_t postprocflag; // 05
|
||||
uint8_t pulldown; // 06
|
||||
uint8_t interlaced; // 07
|
||||
uint8_t tfcntrflag; // 08
|
||||
uint8_t finterpflag; // 09
|
||||
uint8_t psf; // 0a
|
||||
uint8_t pad; // 0b
|
||||
uint8_t multires; // 0c
|
||||
uint8_t syncmarker; // 0d
|
||||
uint8_t rangered; // 0e
|
||||
uint8_t maxbframes; // 0f
|
||||
uint8_t dquant; // 10
|
||||
uint8_t panscan_flag; // 11
|
||||
uint8_t refdist_flag; // 12
|
||||
uint8_t quantizer; // 13
|
||||
uint8_t extended_mv; // 14
|
||||
uint8_t extended_dmv; // 15
|
||||
uint8_t overlap; // 16
|
||||
uint8_t vstransform; // 17
|
||||
uint16_t width;
|
||||
uint16_t height;
|
||||
uint8_t profile; // 04 0 simple, 1 main, 2 advanced
|
||||
uint8_t postprocflag; // 05
|
||||
uint8_t pulldown; // 06
|
||||
uint8_t interlaced; // 07
|
||||
uint8_t tfcntrflag; // 08
|
||||
uint8_t finterpflag; // 09
|
||||
uint8_t psf; // 0a
|
||||
uint8_t pad; // 0b
|
||||
uint8_t multires; // 0c
|
||||
uint8_t syncmarker; // 0d
|
||||
uint8_t rangered; // 0e
|
||||
uint8_t maxbframes; // 0f
|
||||
uint8_t dquant; // 10
|
||||
uint8_t panscan_flag; // 11
|
||||
uint8_t refdist_flag; // 12
|
||||
uint8_t quantizer; // 13
|
||||
uint8_t extended_mv; // 14
|
||||
uint8_t extended_dmv; // 15
|
||||
uint8_t overlap; // 16
|
||||
uint8_t vstransform; // 17
|
||||
};
|
||||
|
||||
struct h264_picparm_bsp {
|
||||
// 00
|
||||
uint32_t unk00;
|
||||
// 04
|
||||
uint32_t log2_max_frame_num_minus4; // 04 checked
|
||||
uint32_t pic_order_cnt_type; // 08 checked
|
||||
uint32_t log2_max_pic_order_cnt_lsb_minus4; // 0c checked
|
||||
uint32_t delta_pic_order_always_zero_flag; // 10, or unknown
|
||||
// 00
|
||||
uint32_t unk00;
|
||||
// 04
|
||||
uint32_t log2_max_frame_num_minus4; // 04 checked
|
||||
uint32_t pic_order_cnt_type; // 08 checked
|
||||
uint32_t log2_max_pic_order_cnt_lsb_minus4; // 0c checked
|
||||
uint32_t delta_pic_order_always_zero_flag; // 10, or unknown
|
||||
|
||||
uint32_t frame_mbs_only_flag; // 14, always 1?
|
||||
uint32_t direct_8x8_inference_flag; // 18, always 1?
|
||||
uint32_t width_mb; // 1c checked
|
||||
uint32_t height_mb; // 20 checked
|
||||
// 24
|
||||
//struct picparm2
|
||||
uint32_t entropy_coding_mode_flag; // 00, checked
|
||||
uint32_t pic_order_present_flag; // 04 checked
|
||||
uint32_t unk; // 08 seems to be 0?
|
||||
uint32_t pad1; // 0c seems to be 0?
|
||||
uint32_t pad2; // 10 always 0 ?
|
||||
uint32_t num_ref_idx_l0_active_minus1; // 14 always 0?
|
||||
uint32_t num_ref_idx_l1_active_minus1; // 18 always 0?
|
||||
uint32_t weighted_pred_flag; // 1c checked
|
||||
uint32_t weighted_bipred_idc; // 20 checked
|
||||
uint32_t pic_init_qp_minus26; // 24 checked
|
||||
uint32_t deblocking_filter_control_present_flag; // 28 always 1?
|
||||
uint32_t redundant_pic_cnt_present_flag; // 2c always 0?
|
||||
uint32_t transform_8x8_mode_flag; // 30 checked
|
||||
uint32_t mb_adaptive_frame_field_flag; // 34 checked-ish
|
||||
uint8_t field_pic_flag; // 38 checked
|
||||
uint8_t bottom_field_flag; // 39 checked
|
||||
uint8_t real_pad[0x1b]; // XX why?
|
||||
uint32_t frame_mbs_only_flag; // 14, always 1?
|
||||
uint32_t direct_8x8_inference_flag; // 18, always 1?
|
||||
uint32_t width_mb; // 1c checked
|
||||
uint32_t height_mb; // 20 checked
|
||||
// 24
|
||||
//struct picparm2
|
||||
uint32_t entropy_coding_mode_flag; // 00, checked
|
||||
uint32_t pic_order_present_flag; // 04 checked
|
||||
uint32_t unk; // 08 seems to be 0?
|
||||
uint32_t pad1; // 0c seems to be 0?
|
||||
uint32_t pad2; // 10 always 0 ?
|
||||
uint32_t num_ref_idx_l0_active_minus1; // 14 always 0?
|
||||
uint32_t num_ref_idx_l1_active_minus1; // 18 always 0?
|
||||
uint32_t weighted_pred_flag; // 1c checked
|
||||
uint32_t weighted_bipred_idc; // 20 checked
|
||||
uint32_t pic_init_qp_minus26; // 24 checked
|
||||
uint32_t deblocking_filter_control_present_flag; // 28 always 1?
|
||||
uint32_t redundant_pic_cnt_present_flag; // 2c always 0?
|
||||
uint32_t transform_8x8_mode_flag; // 30 checked
|
||||
uint32_t mb_adaptive_frame_field_flag; // 34 checked-ish
|
||||
uint8_t field_pic_flag; // 38 checked
|
||||
uint8_t bottom_field_flag; // 39 checked
|
||||
uint8_t real_pad[0x1b]; // XX why?
|
||||
};
|
||||
|
||||
static uint32_t
|
||||
|
|
|
@ -23,147 +23,147 @@
|
|||
#include "nouveau_vp3_video.h"
|
||||
|
||||
struct mpeg12_picparm_vp {
|
||||
uint16_t width; // 00 in mb units
|
||||
uint16_t height; // 02 in mb units
|
||||
uint16_t width; // 00 in mb units
|
||||
uint16_t height; // 02 in mb units
|
||||
|
||||
uint32_t unk04; // 04 stride for Y?
|
||||
uint32_t unk08; // 08 stride for CbCr?
|
||||
uint32_t unk04; // 04 stride for Y?
|
||||
uint32_t unk08; // 08 stride for CbCr?
|
||||
|
||||
uint32_t ofs[6]; // 1c..20 ofs
|
||||
uint32_t bucket_size; // 24
|
||||
uint32_t inter_ring_data_size; // 28
|
||||
uint16_t unk2c; // 2c
|
||||
uint16_t alternate_scan; // 2e
|
||||
uint16_t unk30; // 30 not seen set yet
|
||||
uint16_t picture_structure; // 32
|
||||
uint16_t pad2[3];
|
||||
uint16_t unk3a; // 3a set on I frame?
|
||||
uint32_t ofs[6]; // 1c..20 ofs
|
||||
uint32_t bucket_size; // 24
|
||||
uint32_t inter_ring_data_size; // 28
|
||||
uint16_t unk2c; // 2c
|
||||
uint16_t alternate_scan; // 2e
|
||||
uint16_t unk30; // 30 not seen set yet
|
||||
uint16_t picture_structure; // 32
|
||||
uint16_t pad2[3];
|
||||
uint16_t unk3a; // 3a set on I frame?
|
||||
|
||||
uint32_t f_code[4]; // 3c
|
||||
uint32_t picture_coding_type; // 4c
|
||||
uint32_t intra_dc_precision; // 50
|
||||
uint32_t q_scale_type; // 54
|
||||
uint32_t top_field_first; // 58
|
||||
uint32_t full_pel_forward_vector; // 5c
|
||||
uint32_t full_pel_backward_vector; // 60
|
||||
uint8_t intra_quantizer_matrix[0x40]; // 64
|
||||
uint8_t non_intra_quantizer_matrix[0x40]; // a4
|
||||
uint32_t f_code[4]; // 3c
|
||||
uint32_t picture_coding_type; // 4c
|
||||
uint32_t intra_dc_precision; // 50
|
||||
uint32_t q_scale_type; // 54
|
||||
uint32_t top_field_first; // 58
|
||||
uint32_t full_pel_forward_vector; // 5c
|
||||
uint32_t full_pel_backward_vector; // 60
|
||||
uint8_t intra_quantizer_matrix[0x40]; // 64
|
||||
uint8_t non_intra_quantizer_matrix[0x40]; // a4
|
||||
};
|
||||
|
||||
struct mpeg4_picparm_vp {
|
||||
uint32_t width; // 00 in normal units
|
||||
uint32_t height; // 04 in normal units
|
||||
uint32_t unk08; // stride 1
|
||||
uint32_t unk0c; // stride 2
|
||||
uint32_t ofs[6]; // 10..24 ofs
|
||||
uint32_t bucket_size; // 28
|
||||
uint32_t pad1; // 2c, pad
|
||||
uint32_t pad2; // 30
|
||||
uint32_t inter_ring_data_size; // 34
|
||||
uint32_t width; // 00 in normal units
|
||||
uint32_t height; // 04 in normal units
|
||||
uint32_t unk08; // stride 1
|
||||
uint32_t unk0c; // stride 2
|
||||
uint32_t ofs[6]; // 10..24 ofs
|
||||
uint32_t bucket_size; // 28
|
||||
uint32_t pad1; // 2c, pad
|
||||
uint32_t pad2; // 30
|
||||
uint32_t inter_ring_data_size; // 34
|
||||
|
||||
uint32_t trd[2]; // 38, 3c
|
||||
uint32_t trb[2]; // 40, 44
|
||||
uint32_t u48; // XXX codec selection? Should test with different values of VdpDecoderProfile
|
||||
uint16_t f_code_fw; // 4c
|
||||
uint16_t f_code_bw; // 4e
|
||||
uint8_t interlaced; // 50
|
||||
uint32_t trd[2]; // 38, 3c
|
||||
uint32_t trb[2]; // 40, 44
|
||||
uint32_t u48; // XXX codec selection? Should test with different values of VdpDecoderProfile
|
||||
uint16_t f_code_fw; // 4c
|
||||
uint16_t f_code_bw; // 4e
|
||||
uint8_t interlaced; // 50
|
||||
|
||||
uint8_t quant_type; // bool, written to 528
|
||||
uint8_t quarter_sample; // bool, written to 548
|
||||
uint8_t short_video_header; // bool, negated written to 528 shifted by 1
|
||||
uint8_t u54; // bool, written to 0x740
|
||||
uint8_t vop_coding_type; // 55
|
||||
uint8_t rounding_control; // 56
|
||||
uint8_t alternate_vertical_scan_flag; // 57 bool
|
||||
uint8_t top_field_first; // bool, written to vuc
|
||||
uint8_t quant_type; // bool, written to 528
|
||||
uint8_t quarter_sample; // bool, written to 548
|
||||
uint8_t short_video_header; // bool, negated written to 528 shifted by 1
|
||||
uint8_t u54; // bool, written to 0x740
|
||||
uint8_t vop_coding_type; // 55
|
||||
uint8_t rounding_control; // 56
|
||||
uint8_t alternate_vertical_scan_flag; // 57 bool
|
||||
uint8_t top_field_first; // bool, written to vuc
|
||||
|
||||
uint8_t pad4[3]; // 59, 5a, 5b, contains garbage on blob
|
||||
uint8_t pad4[3]; // 59, 5a, 5b, contains garbage on blob
|
||||
|
||||
uint32_t intra[0x10]; // 5c
|
||||
uint32_t non_intra[0x10]; // 9c
|
||||
uint32_t pad5[0x10]; // bc what does this do?
|
||||
// udc..uff pad?
|
||||
uint32_t intra[0x10]; // 5c
|
||||
uint32_t non_intra[0x10]; // 9c
|
||||
uint32_t pad5[0x10]; // bc what does this do?
|
||||
// udc..uff pad?
|
||||
};
|
||||
|
||||
// Full version, with data pumped from BSP
|
||||
struct vc1_picparm_vp {
|
||||
uint32_t bucket_size; // 00
|
||||
uint32_t pad; // 04
|
||||
uint32_t bucket_size; // 00
|
||||
uint32_t pad; // 04
|
||||
|
||||
uint32_t inter_ring_data_size; // 08
|
||||
uint32_t unk0c; // stride 1
|
||||
uint32_t unk10; // stride 2
|
||||
uint32_t ofs[6]; // 14..28 ofs
|
||||
uint32_t inter_ring_data_size; // 08
|
||||
uint32_t unk0c; // stride 1
|
||||
uint32_t unk10; // stride 2
|
||||
uint32_t ofs[6]; // 14..28 ofs
|
||||
|
||||
uint16_t width; // 2c
|
||||
uint16_t height; // 2e
|
||||
uint16_t width; // 2c
|
||||
uint16_t height; // 2e
|
||||
|
||||
uint8_t profile; // 30 0 = simple, 1 = main, 2 = advanced
|
||||
uint8_t loopfilter; // 31 written into vuc
|
||||
uint8_t fastuvmc; // 32, written into vuc
|
||||
uint8_t dquant; // 33
|
||||
uint8_t profile; // 30 0 = simple, 1 = main, 2 = advanced
|
||||
uint8_t loopfilter; // 31 written into vuc
|
||||
uint8_t fastuvmc; // 32, written into vuc
|
||||
uint8_t dquant; // 33
|
||||
|
||||
uint8_t overlap; // 34
|
||||
uint8_t quantizer; // 35
|
||||
uint8_t u36; // 36, bool
|
||||
uint8_t pad2; // 37, to align to 0x38
|
||||
uint8_t overlap; // 34
|
||||
uint8_t quantizer; // 35
|
||||
uint8_t u36; // 36, bool
|
||||
uint8_t pad2; // 37, to align to 0x38
|
||||
};
|
||||
|
||||
struct h264_picparm_vp { // 700..a00
|
||||
uint16_t width, height;
|
||||
uint32_t stride1, stride2; // 04 08
|
||||
uint32_t ofs[6]; // 0c..24 in-image offset
|
||||
uint16_t width, height;
|
||||
uint32_t stride1, stride2; // 04 08
|
||||
uint32_t ofs[6]; // 0c..24 in-image offset
|
||||
|
||||
uint32_t tmp_stride;
|
||||
uint32_t bucket_size; // 28 bucket size
|
||||
uint32_t inter_ring_data_size; // 2c
|
||||
uint32_t tmp_stride;
|
||||
uint32_t bucket_size; // 28 bucket size
|
||||
uint32_t inter_ring_data_size; // 2c
|
||||
|
||||
unsigned mb_adaptive_frame_field_flag : 1; // 0
|
||||
unsigned direct_8x8_inference_flag : 1; // 1 0x02: into vuc ofs 56
|
||||
unsigned weighted_pred_flag : 1; // 2 0x04
|
||||
unsigned constrained_intra_pred_flag : 1; // 3 0x08: into vuc ofs 68
|
||||
unsigned is_reference : 1; // 4
|
||||
unsigned interlace : 1; // 5 field_pic_flag
|
||||
unsigned bottom_field_flag : 1; // 6
|
||||
unsigned second_field : 1; // 7 0x80: nfi yet
|
||||
unsigned mb_adaptive_frame_field_flag : 1; // 0
|
||||
unsigned direct_8x8_inference_flag : 1; // 1 0x02: into vuc ofs 56
|
||||
unsigned weighted_pred_flag : 1; // 2 0x04
|
||||
unsigned constrained_intra_pred_flag : 1; // 3 0x08: into vuc ofs 68
|
||||
unsigned is_reference : 1; // 4
|
||||
unsigned interlace : 1; // 5 field_pic_flag
|
||||
unsigned bottom_field_flag : 1; // 6
|
||||
unsigned second_field : 1; // 7 0x80: nfi yet
|
||||
|
||||
signed log2_max_frame_num_minus4 : 4; // 31 0..3
|
||||
unsigned chroma_format_idc : 2; // 31 4..5
|
||||
unsigned pic_order_cnt_type : 2; // 31 6..7
|
||||
signed pic_init_qp_minus26 : 6; // 32 0..5
|
||||
signed chroma_qp_index_offset : 5; // 32 6..10
|
||||
signed second_chroma_qp_index_offset : 5; // 32 11..15
|
||||
signed log2_max_frame_num_minus4 : 4; // 31 0..3
|
||||
unsigned chroma_format_idc : 2; // 31 4..5
|
||||
unsigned pic_order_cnt_type : 2; // 31 6..7
|
||||
signed pic_init_qp_minus26 : 6; // 32 0..5
|
||||
signed chroma_qp_index_offset : 5; // 32 6..10
|
||||
signed second_chroma_qp_index_offset : 5; // 32 11..15
|
||||
|
||||
unsigned weighted_bipred_idc : 2; // 34 0..1
|
||||
unsigned fifo_dec_index : 7; // 34 2..8
|
||||
unsigned tmp_idx : 5; // 34 9..13
|
||||
unsigned frame_number : 16; // 34 14..29
|
||||
unsigned u34_3030 : 1; // 34 30..30 pp.u34[30:30]
|
||||
unsigned u34_3131 : 1; // 34 31..31 pad?
|
||||
unsigned weighted_bipred_idc : 2; // 34 0..1
|
||||
unsigned fifo_dec_index : 7; // 34 2..8
|
||||
unsigned tmp_idx : 5; // 34 9..13
|
||||
unsigned frame_number : 16; // 34 14..29
|
||||
unsigned u34_3030 : 1; // 34 30..30 pp.u34[30:30]
|
||||
unsigned u34_3131 : 1; // 34 31..31 pad?
|
||||
|
||||
uint32_t field_order_cnt[2]; // 38, 3c
|
||||
uint32_t field_order_cnt[2]; // 38, 3c
|
||||
|
||||
struct { // 40
|
||||
unsigned fifo_idx : 7; // 00 0..6
|
||||
unsigned tmp_idx : 5; // 00 7..11
|
||||
unsigned top_is_reference : 1; // 00 12
|
||||
unsigned bottom_is_reference : 1; // 00 13
|
||||
unsigned is_long_term : 1; // 00 14
|
||||
unsigned notseenyet : 1; // 00 15 pad?
|
||||
unsigned field_pic_flag : 1; // 00 16
|
||||
unsigned top_field_marking : 4; // 00 17..20
|
||||
unsigned bottom_field_marking : 4; // 00 21..24
|
||||
unsigned pad : 7; // 00 d25..31
|
||||
struct { // 40
|
||||
unsigned fifo_idx : 7; // 00 0..6
|
||||
unsigned tmp_idx : 5; // 00 7..11
|
||||
unsigned top_is_reference : 1; // 00 12
|
||||
unsigned bottom_is_reference : 1; // 00 13
|
||||
unsigned is_long_term : 1; // 00 14
|
||||
unsigned notseenyet : 1; // 00 15 pad?
|
||||
unsigned field_pic_flag : 1; // 00 16
|
||||
unsigned top_field_marking : 4; // 00 17..20
|
||||
unsigned bottom_field_marking : 4; // 00 21..24
|
||||
unsigned pad : 7; // 00 d25..31
|
||||
|
||||
uint32_t field_order_cnt[2]; // 04,08
|
||||
uint32_t frame_idx; // 0c
|
||||
} refs[0x10];
|
||||
uint32_t field_order_cnt[2]; // 04,08
|
||||
uint32_t frame_idx; // 0c
|
||||
} refs[0x10];
|
||||
|
||||
uint8_t m4x4[6][16]; // 140
|
||||
uint8_t m8x8[2][64]; // 1a0
|
||||
uint32_t u220; // 220 number of extra reorder_list to append?
|
||||
uint8_t u224[0x20]; // 224..244 reorder_list append ?
|
||||
uint8_t nfi244[0xb0]; // add some pad to make sure nulls are read
|
||||
uint8_t m4x4[6][16]; // 140
|
||||
uint8_t m8x8[2][64]; // 1a0
|
||||
uint32_t u220; // 220 number of extra reorder_list to append?
|
||||
uint8_t u224[0x20]; // 224..244 reorder_list append ?
|
||||
uint8_t nfi244[0xb0]; // add some pad to make sure nulls are read
|
||||
};
|
||||
|
||||
static void
|
||||
|
|
|
@ -65,18 +65,18 @@ PUSH_KICK(struct nouveau_pushbuf *push)
|
|||
static inline uint32_t
|
||||
nouveau_screen_transfer_flags(unsigned pipe)
|
||||
{
|
||||
uint32_t flags = 0;
|
||||
uint32_t flags = 0;
|
||||
|
||||
if (!(pipe & PIPE_TRANSFER_UNSYNCHRONIZED)) {
|
||||
if (pipe & PIPE_TRANSFER_READ)
|
||||
flags |= NOUVEAU_BO_RD;
|
||||
if (pipe & PIPE_TRANSFER_WRITE)
|
||||
flags |= NOUVEAU_BO_WR;
|
||||
if (pipe & PIPE_TRANSFER_DONTBLOCK)
|
||||
flags |= NOUVEAU_BO_NOBLOCK;
|
||||
}
|
||||
if (!(pipe & PIPE_TRANSFER_UNSYNCHRONIZED)) {
|
||||
if (pipe & PIPE_TRANSFER_READ)
|
||||
flags |= NOUVEAU_BO_RD;
|
||||
if (pipe & PIPE_TRANSFER_WRITE)
|
||||
flags |= NOUVEAU_BO_WR;
|
||||
if (pipe & PIPE_TRANSFER_DONTBLOCK)
|
||||
flags |= NOUVEAU_BO_NOBLOCK;
|
||||
}
|
||||
|
||||
return flags;
|
||||
return flags;
|
||||
}
|
||||
|
||||
extern struct pipe_screen *
|
||||
|
|
|
@ -172,6 +172,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "nv50/nv50_program.h"
|
||||
#include "nv50/nv50_resource.h"
|
||||
#include "nv50/nv50_transfer.h"
|
||||
#include "nv50/nv50_query.h"
|
||||
|
||||
#include "nouveau_context.h"
|
||||
#include "nouveau_debug.h"
|
||||
|
@ -195,17 +196,6 @@ void nv50_default_kick_notify(struct nouveau_pushbuf *);
|
|||
/* nv50_draw.c */
|
||||
extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
|
||||
|
||||
/* nv50_query.c */
|
||||
void nv50_init_query_functions(struct nv50_context *);
|
||||
void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t method,
|
||||
struct pipe_query *, unsigned result_offset);
|
||||
void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
|
||||
void nva0_so_target_save_offset(struct pipe_context *,
|
||||
struct pipe_stream_output_target *,
|
||||
unsigned index, bool seralize);
|
||||
|
||||
#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
|
||||
|
||||
/* nv50_shader_state.c */
|
||||
void nv50_vertprog_validate(struct nv50_context *);
|
||||
void nv50_gmtyprog_validate(struct nv50_context *);
|
||||
|
|
|
@ -336,7 +336,6 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
|
|||
info->io.ucpCBSlot = 15;
|
||||
info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
|
||||
info->io.genUserClip = prog->vp.clpd_nr;
|
||||
info->io.sampleInterp = prog->fp.sample_interp;
|
||||
|
||||
info->io.resInfoCBSlot = 15;
|
||||
info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
|
||||
|
@ -374,6 +373,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
|
|||
prog->code = info->bin.code;
|
||||
prog->code_size = info->bin.codeSize;
|
||||
prog->fixups = info->bin.relocData;
|
||||
prog->interps = info->bin.interpData;
|
||||
prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
|
||||
prog->tls_space = info->bin.tlsSpace;
|
||||
|
||||
|
@ -420,8 +420,8 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
|
|||
|
||||
switch (prog->type) {
|
||||
case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break;
|
||||
case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break;
|
||||
case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break;
|
||||
case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break;
|
||||
case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break;
|
||||
default:
|
||||
assert(!"invalid program type");
|
||||
return false;
|
||||
|
@ -456,6 +456,10 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
|
|||
|
||||
if (prog->fixups)
|
||||
nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0);
|
||||
if (prog->interps)
|
||||
nv50_ir_change_interp(prog->interps, prog->code,
|
||||
prog->fp.force_persample_interp,
|
||||
false /* flatshade */);
|
||||
|
||||
nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
|
||||
(prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
|
||||
|
|
|
@ -86,7 +86,7 @@ struct nv50_program {
|
|||
uint32_t interp; /* 0x1988 */
|
||||
uint32_t colors; /* 0x1904 */
|
||||
uint8_t has_samplemask;
|
||||
uint8_t sample_interp;
|
||||
uint8_t force_persample_interp;
|
||||
} fp;
|
||||
|
||||
struct {
|
||||
|
@ -99,6 +99,7 @@ struct nv50_program {
|
|||
} gp;
|
||||
|
||||
void *fixups; /* relocation records */
|
||||
void *interps; /* interpolation records */
|
||||
|
||||
struct nouveau_heap *mem;
|
||||
|
||||
|
|
|
@ -25,356 +25,46 @@
|
|||
#define NV50_PUSH_EXPLICIT_SPACE_CHECKING
|
||||
|
||||
#include "nv50/nv50_context.h"
|
||||
#include "nv_object.xml.h"
|
||||
|
||||
#define NV50_QUERY_STATE_READY 0
|
||||
#define NV50_QUERY_STATE_ACTIVE 1
|
||||
#define NV50_QUERY_STATE_ENDED 2
|
||||
#define NV50_QUERY_STATE_FLUSHED 3
|
||||
|
||||
/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
|
||||
* (since we use only a single GPU channel per screen) will not work properly.
|
||||
*
|
||||
* The first is not that big of an issue because OpenGL does not allow nested
|
||||
* queries anyway.
|
||||
*/
|
||||
|
||||
struct nv50_query {
|
||||
uint32_t *data;
|
||||
uint16_t type;
|
||||
uint16_t index;
|
||||
uint32_t sequence;
|
||||
struct nouveau_bo *bo;
|
||||
uint32_t base;
|
||||
uint32_t offset; /* base + i * 32 */
|
||||
uint8_t state;
|
||||
bool is64bit;
|
||||
int nesting; /* only used for occlusion queries */
|
||||
struct nouveau_mm_allocation *mm;
|
||||
struct nouveau_fence *fence;
|
||||
};
|
||||
|
||||
#define NV50_QUERY_ALLOC_SPACE 256
|
||||
|
||||
static inline struct nv50_query *
|
||||
nv50_query(struct pipe_query *pipe)
|
||||
{
|
||||
return (struct nv50_query *)pipe;
|
||||
}
|
||||
|
||||
static bool
|
||||
nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size)
|
||||
{
|
||||
struct nv50_screen *screen = nv50->screen;
|
||||
int ret;
|
||||
|
||||
if (q->bo) {
|
||||
nouveau_bo_ref(NULL, &q->bo);
|
||||
if (q->mm) {
|
||||
if (q->state == NV50_QUERY_STATE_READY)
|
||||
nouveau_mm_free(q->mm);
|
||||
else
|
||||
nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work,
|
||||
q->mm);
|
||||
}
|
||||
}
|
||||
if (size) {
|
||||
q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
|
||||
if (!q->bo)
|
||||
return false;
|
||||
q->offset = q->base;
|
||||
|
||||
ret = nouveau_bo_map(q->bo, 0, screen->base.client);
|
||||
if (ret) {
|
||||
nv50_query_allocate(nv50, q, 0);
|
||||
return false;
|
||||
}
|
||||
q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
|
||||
{
|
||||
nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
|
||||
nouveau_fence_ref(NULL, &nv50_query(pq)->fence);
|
||||
FREE(nv50_query(pq));
|
||||
}
|
||||
#include "nv50/nv50_query.h"
|
||||
#include "nv50/nv50_query_hw.h"
|
||||
|
||||
static struct pipe_query *
|
||||
nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
|
||||
nv50_create_query(struct pipe_context *pipe, unsigned type, unsigned index)
|
||||
{
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
struct nv50_query *q;
|
||||
|
||||
q = CALLOC_STRUCT(nv50_query);
|
||||
if (!q)
|
||||
return NULL;
|
||||
|
||||
if (!nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE)) {
|
||||
FREE(q);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
|
||||
type == PIPE_QUERY_PRIMITIVES_EMITTED ||
|
||||
type == PIPE_QUERY_SO_STATISTICS ||
|
||||
type == PIPE_QUERY_PIPELINE_STATISTICS);
|
||||
q->type = type;
|
||||
|
||||
if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
|
||||
q->offset -= 32;
|
||||
q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */
|
||||
}
|
||||
|
||||
q = nv50_hw_create_query(nv50, type, index);
|
||||
return (struct pipe_query *)q;
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_query_get(struct nouveau_pushbuf *push, struct nv50_query *q,
|
||||
unsigned offset, uint32_t get)
|
||||
nv50_destroy_query(struct pipe_context *pipe, struct pipe_query *pq)
|
||||
{
|
||||
offset += q->offset;
|
||||
|
||||
PUSH_SPACE(push, 5);
|
||||
PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
|
||||
BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4);
|
||||
PUSH_DATAh(push, q->bo->offset + offset);
|
||||
PUSH_DATA (push, q->bo->offset + offset);
|
||||
PUSH_DATA (push, q->sequence);
|
||||
PUSH_DATA (push, get);
|
||||
struct nv50_query *q = nv50_query(pq);
|
||||
q->funcs->destroy_query(nv50_context(pipe), q);
|
||||
}
|
||||
|
||||
static boolean
|
||||
nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
|
||||
nv50_begin_query(struct pipe_context *pipe, struct pipe_query *pq)
|
||||
{
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
struct nouveau_pushbuf *push = nv50->base.pushbuf;
|
||||
struct nv50_query *q = nv50_query(pq);
|
||||
|
||||
/* For occlusion queries we have to change the storage, because a previous
|
||||
* query might set the initial render conition to false even *after* we re-
|
||||
* initialized it to true.
|
||||
*/
|
||||
if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
|
||||
q->offset += 32;
|
||||
q->data += 32 / sizeof(*q->data);
|
||||
if (q->offset - q->base == NV50_QUERY_ALLOC_SPACE)
|
||||
nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE);
|
||||
|
||||
/* XXX: can we do this with the GPU, and sync with respect to a previous
|
||||
* query ?
|
||||
*/
|
||||
q->data[0] = q->sequence; /* initialize sequence */
|
||||
q->data[1] = 1; /* initial render condition = true */
|
||||
q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
|
||||
q->data[5] = 0;
|
||||
}
|
||||
if (!q->is64bit)
|
||||
q->data[0] = q->sequence++; /* the previously used one */
|
||||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
q->nesting = nv50->screen->num_occlusion_queries_active++;
|
||||
if (q->nesting) {
|
||||
nv50_query_get(push, q, 0x10, 0x0100f002);
|
||||
} else {
|
||||
PUSH_SPACE(push, 4);
|
||||
BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
|
||||
PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
|
||||
BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
}
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
nv50_query_get(push, q, 0x10, 0x06805002);
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
nv50_query_get(push, q, 0x10, 0x05805002);
|
||||
break;
|
||||
case PIPE_QUERY_SO_STATISTICS:
|
||||
nv50_query_get(push, q, 0x20, 0x05805002);
|
||||
nv50_query_get(push, q, 0x30, 0x06805002);
|
||||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||
nv50_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */
|
||||
nv50_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */
|
||||
nv50_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */
|
||||
nv50_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */
|
||||
nv50_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */
|
||||
nv50_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */
|
||||
nv50_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */
|
||||
nv50_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */
|
||||
break;
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
nv50_query_get(push, q, 0x10, 0x00005002);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
q->state = NV50_QUERY_STATE_ACTIVE;
|
||||
return true;
|
||||
return q->funcs->begin_query(nv50_context(pipe), q);
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
|
||||
nv50_end_query(struct pipe_context *pipe, struct pipe_query *pq)
|
||||
{
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
struct nouveau_pushbuf *push = nv50->base.pushbuf;
|
||||
struct nv50_query *q = nv50_query(pq);
|
||||
|
||||
q->state = NV50_QUERY_STATE_ENDED;
|
||||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
nv50_query_get(push, q, 0, 0x0100f002);
|
||||
if (--nv50->screen->num_occlusion_queries_active == 0) {
|
||||
PUSH_SPACE(push, 2);
|
||||
BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
}
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
nv50_query_get(push, q, 0, 0x06805002);
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
nv50_query_get(push, q, 0, 0x05805002);
|
||||
break;
|
||||
case PIPE_QUERY_SO_STATISTICS:
|
||||
nv50_query_get(push, q, 0x00, 0x05805002);
|
||||
nv50_query_get(push, q, 0x10, 0x06805002);
|
||||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||
nv50_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
|
||||
nv50_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
|
||||
nv50_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
|
||||
nv50_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
|
||||
nv50_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
|
||||
nv50_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
|
||||
nv50_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
|
||||
nv50_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
q->sequence++;
|
||||
/* fall through */
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
nv50_query_get(push, q, 0, 0x00005002);
|
||||
break;
|
||||
case PIPE_QUERY_GPU_FINISHED:
|
||||
q->sequence++;
|
||||
nv50_query_get(push, q, 0, 0x1000f010);
|
||||
break;
|
||||
case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
|
||||
q->sequence++;
|
||||
nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT:
|
||||
/* This query is not issued on GPU because disjoint is forced to false */
|
||||
q->state = NV50_QUERY_STATE_READY;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
|
||||
if (q->is64bit)
|
||||
nouveau_fence_ref(nv50->screen->base.fence.current, &q->fence);
|
||||
}
|
||||
|
||||
static inline void
|
||||
nv50_query_update(struct nv50_query *q)
|
||||
{
|
||||
if (q->is64bit) {
|
||||
if (nouveau_fence_signalled(q->fence))
|
||||
q->state = NV50_QUERY_STATE_READY;
|
||||
} else {
|
||||
if (q->data[0] == q->sequence)
|
||||
q->state = NV50_QUERY_STATE_READY;
|
||||
}
|
||||
q->funcs->end_query(nv50_context(pipe), q);
|
||||
}
|
||||
|
||||
static boolean
|
||||
nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
|
||||
boolean wait, union pipe_query_result *result)
|
||||
{
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
struct nv50_query *q = nv50_query(pq);
|
||||
uint64_t *res64 = (uint64_t *)result;
|
||||
uint32_t *res32 = (uint32_t *)result;
|
||||
uint8_t *res8 = (uint8_t *)result;
|
||||
uint64_t *data64 = (uint64_t *)q->data;
|
||||
int i;
|
||||
|
||||
if (q->state != NV50_QUERY_STATE_READY)
|
||||
nv50_query_update(q);
|
||||
|
||||
if (q->state != NV50_QUERY_STATE_READY) {
|
||||
if (!wait) {
|
||||
/* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */
|
||||
if (q->state != NV50_QUERY_STATE_FLUSHED) {
|
||||
q->state = NV50_QUERY_STATE_FLUSHED;
|
||||
PUSH_KICK(nv50->base.pushbuf);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nv50->screen->base.client))
|
||||
return false;
|
||||
}
|
||||
q->state = NV50_QUERY_STATE_READY;
|
||||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_GPU_FINISHED:
|
||||
res8[0] = true;
|
||||
break;
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
|
||||
res64[0] = q->data[1] - q->data[5];
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
|
||||
res64[0] = data64[0] - data64[2];
|
||||
break;
|
||||
case PIPE_QUERY_SO_STATISTICS:
|
||||
res64[0] = data64[0] - data64[4];
|
||||
res64[1] = data64[2] - data64[6];
|
||||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||
for (i = 0; i < 8; ++i)
|
||||
res64[i] = data64[i * 2] - data64[16 + i * 2];
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
res64[0] = data64[1];
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT:
|
||||
res64[0] = 1000000000;
|
||||
res8[8] = false;
|
||||
break;
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
res64[0] = data64[1] - data64[3];
|
||||
break;
|
||||
case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
|
||||
res32[0] = q->data[1];
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
|
||||
nv50_get_query_result(struct pipe_context *pipe, struct pipe_query *pq,
|
||||
boolean wait, union pipe_query_result *result)
|
||||
{
|
||||
struct nv50_query *q = nv50_query(pq);
|
||||
unsigned offset = q->offset;
|
||||
|
||||
PUSH_SPACE(push, 5);
|
||||
PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
|
||||
BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
|
||||
PUSH_DATAh(push, q->bo->offset + offset);
|
||||
PUSH_DATA (push, q->bo->offset + offset);
|
||||
PUSH_DATA (push, q->sequence);
|
||||
PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
|
||||
return q->funcs->get_query_result(nv50_context(pipe), q, wait, result);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -384,7 +74,8 @@ nv50_render_condition(struct pipe_context *pipe,
|
|||
{
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
struct nouveau_pushbuf *push = nv50->base.pushbuf;
|
||||
struct nv50_query *q;
|
||||
struct nv50_query *q = nv50_query(pq);
|
||||
struct nv50_hw_query *hq = nv50_hw_query(q);
|
||||
uint32_t cond;
|
||||
bool wait =
|
||||
mode != PIPE_RENDER_COND_NO_WAIT &&
|
||||
|
@ -394,7 +85,6 @@ nv50_render_condition(struct pipe_context *pipe,
|
|||
cond = NV50_3D_COND_MODE_ALWAYS;
|
||||
}
|
||||
else {
|
||||
q = nv50_query(pq);
|
||||
/* NOTE: comparison of 2 queries only works if both have completed */
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
|
||||
|
@ -405,7 +95,7 @@ nv50_render_condition(struct pipe_context *pipe,
|
|||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
if (likely(!condition)) {
|
||||
if (unlikely(q->nesting))
|
||||
if (unlikely(hq->nesting))
|
||||
cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL :
|
||||
NV50_3D_COND_MODE_ALWAYS;
|
||||
else
|
||||
|
@ -440,48 +130,15 @@ nv50_render_condition(struct pipe_context *pipe,
|
|||
PUSH_DATA (push, 0);
|
||||
}
|
||||
|
||||
PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
|
||||
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
|
||||
BEGIN_NV04(push, NV50_3D(COND_ADDRESS_HIGH), 3);
|
||||
PUSH_DATAh(push, q->bo->offset + q->offset);
|
||||
PUSH_DATA (push, q->bo->offset + q->offset);
|
||||
PUSH_DATAh(push, hq->bo->offset + hq->offset);
|
||||
PUSH_DATA (push, hq->bo->offset + hq->offset);
|
||||
PUSH_DATA (push, cond);
|
||||
|
||||
BEGIN_NV04(push, NV50_2D(COND_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, q->bo->offset + q->offset);
|
||||
PUSH_DATA (push, q->bo->offset + q->offset);
|
||||
}
|
||||
|
||||
void
|
||||
nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
|
||||
struct pipe_query *pq, unsigned result_offset)
|
||||
{
|
||||
struct nv50_query *q = nv50_query(pq);
|
||||
|
||||
nv50_query_update(q);
|
||||
if (q->state != NV50_QUERY_STATE_READY)
|
||||
nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client);
|
||||
q->state = NV50_QUERY_STATE_READY;
|
||||
|
||||
BEGIN_NV04(push, SUBC_3D(method), 1);
|
||||
PUSH_DATA (push, q->data[result_offset / 4]);
|
||||
}
|
||||
|
||||
void
|
||||
nva0_so_target_save_offset(struct pipe_context *pipe,
|
||||
struct pipe_stream_output_target *ptarg,
|
||||
unsigned index, bool serialize)
|
||||
{
|
||||
struct nv50_so_target *targ = nv50_so_target(ptarg);
|
||||
|
||||
if (serialize) {
|
||||
struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
|
||||
PUSH_SPACE(push, 2);
|
||||
BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
}
|
||||
|
||||
nv50_query(targ->pq)->index = index;
|
||||
nv50_query_end(pipe, targ->pq);
|
||||
PUSH_DATAh(push, hq->bo->offset + hq->offset);
|
||||
PUSH_DATA (push, hq->bo->offset + hq->offset);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -489,10 +146,10 @@ nv50_init_query_functions(struct nv50_context *nv50)
|
|||
{
|
||||
struct pipe_context *pipe = &nv50->base.pipe;
|
||||
|
||||
pipe->create_query = nv50_query_create;
|
||||
pipe->destroy_query = nv50_query_destroy;
|
||||
pipe->begin_query = nv50_query_begin;
|
||||
pipe->end_query = nv50_query_end;
|
||||
pipe->get_query_result = nv50_query_result;
|
||||
pipe->create_query = nv50_create_query;
|
||||
pipe->destroy_query = nv50_destroy_query;
|
||||
pipe->begin_query = nv50_begin_query;
|
||||
pipe->end_query = nv50_end_query;
|
||||
pipe->get_query_result = nv50_get_query_result;
|
||||
pipe->render_condition = nv50_render_condition;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
#ifndef __NV50_QUERY_H__
|
||||
#define __NV50_QUERY_H__
|
||||
|
||||
#include "pipe/p_context.h"
|
||||
|
||||
#include "nouveau_context.h"
|
||||
|
||||
struct nv50_context;
|
||||
struct nv50_query;
|
||||
|
||||
struct nv50_query_funcs {
|
||||
void (*destroy_query)(struct nv50_context *, struct nv50_query *);
|
||||
boolean (*begin_query)(struct nv50_context *, struct nv50_query *);
|
||||
void (*end_query)(struct nv50_context *, struct nv50_query *);
|
||||
boolean (*get_query_result)(struct nv50_context *, struct nv50_query *,
|
||||
boolean, union pipe_query_result *);
|
||||
};
|
||||
|
||||
struct nv50_query {
|
||||
const struct nv50_query_funcs *funcs;
|
||||
uint16_t type;
|
||||
uint16_t index;
|
||||
};
|
||||
|
||||
static inline struct nv50_query *
|
||||
nv50_query(struct pipe_query *pipe)
|
||||
{
|
||||
return (struct nv50_query *)pipe;
|
||||
}
|
||||
|
||||
void nv50_init_query_functions(struct nv50_context *);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,406 @@
|
|||
/*
|
||||
* Copyright 2011 Christoph Bumiller
|
||||
* Copyright 2015 Samuel Pitoiset
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#define NV50_PUSH_EXPLICIT_SPACE_CHECKING
|
||||
|
||||
#include "nv50/nv50_context.h"
|
||||
#include "nv50/nv50_query_hw.h"
|
||||
#include "nv_object.xml.h"
|
||||
|
||||
#define NV50_HW_QUERY_STATE_READY 0
|
||||
#define NV50_HW_QUERY_STATE_ACTIVE 1
|
||||
#define NV50_HW_QUERY_STATE_ENDED 2
|
||||
#define NV50_HW_QUERY_STATE_FLUSHED 3
|
||||
|
||||
/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
|
||||
* (since we use only a single GPU channel per screen) will not work properly.
|
||||
*
|
||||
* The first is not that big of an issue because OpenGL does not allow nested
|
||||
* queries anyway.
|
||||
*/
|
||||
|
||||
#define NV50_HW_QUERY_ALLOC_SPACE 256
|
||||
|
||||
static bool
|
||||
nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q,
|
||||
int size)
|
||||
{
|
||||
struct nv50_screen *screen = nv50->screen;
|
||||
struct nv50_hw_query *hq = nv50_hw_query(q);
|
||||
int ret;
|
||||
|
||||
if (hq->bo) {
|
||||
nouveau_bo_ref(NULL, &hq->bo);
|
||||
if (hq->mm) {
|
||||
if (hq->state == NV50_HW_QUERY_STATE_READY)
|
||||
nouveau_mm_free(hq->mm);
|
||||
else
|
||||
nouveau_fence_work(screen->base.fence.current,
|
||||
nouveau_mm_free_work, hq->mm);
|
||||
}
|
||||
}
|
||||
if (size) {
|
||||
hq->mm = nouveau_mm_allocate(screen->base.mm_GART, size,
|
||||
&hq->bo, &hq->base_offset);
|
||||
if (!hq->bo)
|
||||
return false;
|
||||
hq->offset = hq->base_offset;
|
||||
|
||||
ret = nouveau_bo_map(hq->bo, 0, screen->base.client);
|
||||
if (ret) {
|
||||
nv50_hw_query_allocate(nv50, q, 0);
|
||||
return false;
|
||||
}
|
||||
hq->data = (uint32_t *)((uint8_t *)hq->bo->map + hq->base_offset);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_hw_query_get(struct nouveau_pushbuf *push, struct nv50_query *q,
|
||||
unsigned offset, uint32_t get)
|
||||
{
|
||||
struct nv50_hw_query *hq = nv50_hw_query(q);
|
||||
|
||||
offset += hq->offset;
|
||||
|
||||
PUSH_SPACE(push, 5);
|
||||
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
|
||||
BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4);
|
||||
PUSH_DATAh(push, hq->bo->offset + offset);
|
||||
PUSH_DATA (push, hq->bo->offset + offset);
|
||||
PUSH_DATA (push, hq->sequence);
|
||||
PUSH_DATA (push, get);
|
||||
}
|
||||
|
||||
static inline void
|
||||
nv50_hw_query_update(struct nv50_query *q)
|
||||
{
|
||||
struct nv50_hw_query *hq = nv50_hw_query(q);
|
||||
|
||||
if (hq->is64bit) {
|
||||
if (nouveau_fence_signalled(hq->fence))
|
||||
hq->state = NV50_HW_QUERY_STATE_READY;
|
||||
} else {
|
||||
if (hq->data[0] == hq->sequence)
|
||||
hq->state = NV50_HW_QUERY_STATE_READY;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_hw_destroy_query(struct nv50_context *nv50, struct nv50_query *q)
|
||||
{
|
||||
struct nv50_hw_query *hq = nv50_hw_query(q);
|
||||
nv50_hw_query_allocate(nv50, q, 0);
|
||||
nouveau_fence_ref(NULL, &hq->fence);
|
||||
FREE(hq);
|
||||
}
|
||||
|
||||
static boolean
|
||||
nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
|
||||
{
|
||||
struct nouveau_pushbuf *push = nv50->base.pushbuf;
|
||||
struct nv50_hw_query *hq = nv50_hw_query(q);
|
||||
|
||||
/* For occlusion queries we have to change the storage, because a previous
|
||||
* query might set the initial render condition to false even *after* we re-
|
||||
* initialized it to true.
|
||||
*/
|
||||
if (hq->rotate) {
|
||||
hq->offset += hq->rotate;
|
||||
hq->data += hq->rotate / sizeof(*hq->data);
|
||||
if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE)
|
||||
nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE);
|
||||
|
||||
/* XXX: can we do this with the GPU, and sync with respect to a previous
|
||||
* query ?
|
||||
*/
|
||||
hq->data[0] = hq->sequence; /* initialize sequence */
|
||||
hq->data[1] = 1; /* initial render condition = true */
|
||||
hq->data[4] = hq->sequence + 1; /* for comparison COND_MODE */
|
||||
hq->data[5] = 0;
|
||||
}
|
||||
if (!hq->is64bit)
|
||||
hq->data[0] = hq->sequence++; /* the previously used one */
|
||||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
hq->nesting = nv50->screen->num_occlusion_queries_active++;
|
||||
if (hq->nesting) {
|
||||
nv50_hw_query_get(push, q, 0x10, 0x0100f002);
|
||||
} else {
|
||||
PUSH_SPACE(push, 4);
|
||||
BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
|
||||
PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
|
||||
BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
}
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
nv50_hw_query_get(push, q, 0x10, 0x06805002);
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
nv50_hw_query_get(push, q, 0x10, 0x05805002);
|
||||
break;
|
||||
case PIPE_QUERY_SO_STATISTICS:
|
||||
nv50_hw_query_get(push, q, 0x20, 0x05805002);
|
||||
nv50_hw_query_get(push, q, 0x30, 0x06805002);
|
||||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||
nv50_hw_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */
|
||||
nv50_hw_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */
|
||||
nv50_hw_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */
|
||||
nv50_hw_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */
|
||||
nv50_hw_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */
|
||||
nv50_hw_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */
|
||||
nv50_hw_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */
|
||||
nv50_hw_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */
|
||||
break;
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
nv50_hw_query_get(push, q, 0x10, 0x00005002);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
return false;
|
||||
}
|
||||
hq->state = NV50_HW_QUERY_STATE_ACTIVE;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
|
||||
{
|
||||
struct nouveau_pushbuf *push = nv50->base.pushbuf;
|
||||
struct nv50_hw_query *hq = nv50_hw_query(q);
|
||||
|
||||
hq->state = NV50_HW_QUERY_STATE_ENDED;
|
||||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
nv50_hw_query_get(push, q, 0, 0x0100f002);
|
||||
if (--nv50->screen->num_occlusion_queries_active == 0) {
|
||||
PUSH_SPACE(push, 2);
|
||||
BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
}
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
nv50_hw_query_get(push, q, 0, 0x06805002);
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
nv50_hw_query_get(push, q, 0, 0x05805002);
|
||||
break;
|
||||
case PIPE_QUERY_SO_STATISTICS:
|
||||
nv50_hw_query_get(push, q, 0x00, 0x05805002);
|
||||
nv50_hw_query_get(push, q, 0x10, 0x06805002);
|
||||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||
nv50_hw_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
|
||||
nv50_hw_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
|
||||
nv50_hw_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
|
||||
nv50_hw_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
|
||||
nv50_hw_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
|
||||
nv50_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
|
||||
nv50_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
|
||||
nv50_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
hq->sequence++;
|
||||
/* fall through */
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
nv50_hw_query_get(push, q, 0, 0x00005002);
|
||||
break;
|
||||
case PIPE_QUERY_GPU_FINISHED:
|
||||
hq->sequence++;
|
||||
nv50_hw_query_get(push, q, 0, 0x1000f010);
|
||||
break;
|
||||
case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
|
||||
hq->sequence++;
|
||||
nv50_hw_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT:
|
||||
/* This query is not issued on GPU because disjoint is forced to false */
|
||||
hq->state = NV50_HW_QUERY_STATE_READY;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
if (hq->is64bit)
|
||||
nouveau_fence_ref(nv50->screen->base.fence.current, &hq->fence);
|
||||
}
|
||||
|
||||
static boolean
|
||||
nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,
|
||||
boolean wait, union pipe_query_result *result)
|
||||
{
|
||||
struct nv50_hw_query *hq = nv50_hw_query(q);
|
||||
uint64_t *res64 = (uint64_t *)result;
|
||||
uint32_t *res32 = (uint32_t *)result;
|
||||
uint8_t *res8 = (uint8_t *)result;
|
||||
uint64_t *data64 = (uint64_t *)hq->data;
|
||||
int i;
|
||||
|
||||
if (hq->state != NV50_HW_QUERY_STATE_READY)
|
||||
nv50_hw_query_update(q);
|
||||
|
||||
if (hq->state != NV50_HW_QUERY_STATE_READY) {
|
||||
if (!wait) {
|
||||
/* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */
|
||||
if (hq->state != NV50_HW_QUERY_STATE_FLUSHED) {
|
||||
hq->state = NV50_HW_QUERY_STATE_FLUSHED;
|
||||
PUSH_KICK(nv50->base.pushbuf);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client))
|
||||
return false;
|
||||
}
|
||||
hq->state = NV50_HW_QUERY_STATE_READY;
|
||||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_GPU_FINISHED:
|
||||
res8[0] = true;
|
||||
break;
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
|
||||
res64[0] = hq->data[1] - hq->data[5];
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
|
||||
res64[0] = data64[0] - data64[2];
|
||||
break;
|
||||
case PIPE_QUERY_SO_STATISTICS:
|
||||
res64[0] = data64[0] - data64[4];
|
||||
res64[1] = data64[2] - data64[6];
|
||||
break;
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||
for (i = 0; i < 8; ++i)
|
||||
res64[i] = data64[i * 2] - data64[16 + i * 2];
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
res64[0] = data64[1];
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT:
|
||||
res64[0] = 1000000000;
|
||||
res8[8] = false;
|
||||
break;
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
res64[0] = data64[1] - data64[3];
|
||||
break;
|
||||
case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
|
||||
res32[0] = hq->data[1];
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static const struct nv50_query_funcs hw_query_funcs = {
|
||||
.destroy_query = nv50_hw_destroy_query,
|
||||
.begin_query = nv50_hw_begin_query,
|
||||
.end_query = nv50_hw_end_query,
|
||||
.get_query_result = nv50_hw_get_query_result,
|
||||
};
|
||||
|
||||
struct nv50_query *
|
||||
nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)
|
||||
{
|
||||
struct nv50_hw_query *hq;
|
||||
struct nv50_query *q;
|
||||
|
||||
hq = CALLOC_STRUCT(nv50_hw_query);
|
||||
if (!hq)
|
||||
return NULL;
|
||||
|
||||
q = &hq->base;
|
||||
q->funcs = &hw_query_funcs;
|
||||
q->type = type;
|
||||
|
||||
switch (q->type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
hq->rotate = 32;
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
case PIPE_QUERY_SO_STATISTICS:
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||
hq->is64bit = true;
|
||||
break;
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
case PIPE_QUERY_TIMESTAMP:
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT:
|
||||
case PIPE_QUERY_GPU_FINISHED:
|
||||
case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
|
||||
break;
|
||||
default:
|
||||
debug_printf("invalid query type: %u\n", type);
|
||||
FREE(q);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) {
|
||||
FREE(hq);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (hq->rotate) {
|
||||
/* we advance before query_begin ! */
|
||||
hq->offset -= hq->rotate;
|
||||
hq->data -= hq->rotate / sizeof(*hq->data);
|
||||
}
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
void
|
||||
nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
|
||||
struct nv50_query *q, unsigned result_offset)
|
||||
{
|
||||
struct nv50_hw_query *hq = nv50_hw_query(q);
|
||||
|
||||
nv50_hw_query_update(q);
|
||||
if (hq->state != NV50_HW_QUERY_STATE_READY)
|
||||
nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, push->client);
|
||||
hq->state = NV50_HW_QUERY_STATE_READY;
|
||||
|
||||
BEGIN_NV04(push, SUBC_3D(method), 1);
|
||||
PUSH_DATA (push, hq->data[result_offset / 4]);
|
||||
}
|
||||
|
||||
void
|
||||
nv84_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q)
|
||||
{
|
||||
struct nv50_hw_query *hq = nv50_hw_query(q);
|
||||
unsigned offset = hq->offset;
|
||||
|
||||
PUSH_SPACE(push, 5);
|
||||
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
|
||||
BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
|
||||
PUSH_DATAh(push, hq->bo->offset + offset);
|
||||
PUSH_DATA (push, hq->bo->offset + offset);
|
||||
PUSH_DATA (push, hq->sequence);
|
||||
PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
#ifndef __NV50_QUERY_HW_H__
|
||||
#define __NV50_QUERY_HW_H__
|
||||
|
||||
#include "nouveau_fence.h"
|
||||
#include "nouveau_mm.h"
|
||||
|
||||
#include "nv50_query.h"
|
||||
|
||||
#define NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
|
||||
|
||||
struct nv50_hw_query {
|
||||
struct nv50_query base;
|
||||
uint32_t *data;
|
||||
uint32_t sequence;
|
||||
struct nouveau_bo *bo;
|
||||
uint32_t base_offset;
|
||||
uint32_t offset; /* base + i * rotate */
|
||||
uint8_t state;
|
||||
bool is64bit;
|
||||
uint8_t rotate;
|
||||
int nesting; /* only used for occlusion queries */
|
||||
struct nouveau_mm_allocation *mm;
|
||||
struct nouveau_fence *fence;
|
||||
};
|
||||
|
||||
static inline struct nv50_hw_query *
|
||||
nv50_hw_query(struct nv50_query *q)
|
||||
{
|
||||
return (struct nv50_hw_query *)q;
|
||||
}
|
||||
|
||||
struct nv50_query *
|
||||
nv50_hw_create_query(struct nv50_context *, unsigned, unsigned);
|
||||
void
|
||||
nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t,
|
||||
struct nv50_query *, unsigned);
|
||||
void
|
||||
nv84_hw_query_fifo_wait(struct nouveau_pushbuf *, struct nv50_query *);
|
||||
|
||||
#endif
|
|
@ -32,8 +32,8 @@ nv50_resource_from_handle(struct pipe_screen * screen,
|
|||
|
||||
struct pipe_surface *
|
||||
nv50_surface_from_buffer(struct pipe_context *pipe,
|
||||
struct pipe_resource *pbuf,
|
||||
const struct pipe_surface *templ)
|
||||
struct pipe_resource *pbuf,
|
||||
const struct pipe_surface *templ)
|
||||
{
|
||||
struct nv50_surface *sf = CALLOC_STRUCT(nv50_surface);
|
||||
if (!sf)
|
||||
|
@ -65,8 +65,8 @@ nv50_surface_from_buffer(struct pipe_context *pipe,
|
|||
|
||||
static struct pipe_surface *
|
||||
nv50_surface_create(struct pipe_context *pipe,
|
||||
struct pipe_resource *pres,
|
||||
const struct pipe_surface *templ)
|
||||
struct pipe_resource *pres,
|
||||
const struct pipe_surface *templ)
|
||||
{
|
||||
/* surfaces are assumed to be miptrees all over the place. */
|
||||
assert(pres->target != PIPE_BUFFER);
|
||||
|
|
|
@ -180,6 +180,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
|
||||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
return 1;
|
||||
case PIPE_CAP_SEAMLESS_CUBE_MAP:
|
||||
return 1; /* class_3d >= NVA0_3D_CLASS; */
|
||||
|
@ -191,6 +193,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_INDEP_BLEND_FUNC:
|
||||
case PIPE_CAP_TEXTURE_QUERY_LOD:
|
||||
case PIPE_CAP_SAMPLE_SHADING:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
return class_3d >= NVA3_3D_CLASS;
|
||||
|
||||
/* unsupported caps */
|
||||
|
@ -215,8 +218,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
|
||||
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
|
||||
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "util/u_inlines.h"
|
||||
|
||||
#include "nv50/nv50_context.h"
|
||||
#include "nv50/nv50_query_hw.h"
|
||||
|
||||
void
|
||||
nv50_constbufs_validate(struct nv50_context *nv50)
|
||||
|
@ -168,11 +169,23 @@ nv50_fragprog_validate(struct nv50_context *nv50)
|
|||
{
|
||||
struct nouveau_pushbuf *push = nv50->base.pushbuf;
|
||||
struct nv50_program *fp = nv50->fragprog;
|
||||
struct pipe_rasterizer_state *rast = &nv50->rast->pipe;
|
||||
|
||||
fp->fp.sample_interp = nv50->min_samples > 1;
|
||||
if (fp->fp.force_persample_interp != rast->force_persample_interp) {
|
||||
/* Force the program to be reuploaded, which will trigger interp fixups
|
||||
* to get applied
|
||||
*/
|
||||
if (fp->mem)
|
||||
nouveau_heap_free(&fp->mem);
|
||||
|
||||
fp->fp.force_persample_interp = rast->force_persample_interp;
|
||||
}
|
||||
|
||||
if (fp->mem && !(nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_MIN_SAMPLES)))
|
||||
return;
|
||||
|
||||
if (!nv50_program_validate(nv50, fp))
|
||||
return;
|
||||
return;
|
||||
nv50_program_update_context_state(nv50, fp, 1);
|
||||
|
||||
BEGIN_NV04(push, NV50_3D(FP_REG_ALLOC_TEMP), 1);
|
||||
|
@ -629,7 +642,7 @@ nv50_stream_output_validate(struct nv50_context *nv50)
|
|||
const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3;
|
||||
|
||||
if (n == 4 && !targ->clean)
|
||||
nv84_query_fifo_wait(push, targ->pq);
|
||||
nv84_hw_query_fifo_wait(push, nv50_query(targ->pq));
|
||||
BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n);
|
||||
PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
|
||||
PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
|
||||
|
@ -638,8 +651,8 @@ nv50_stream_output_validate(struct nv50_context *nv50)
|
|||
PUSH_DATA(push, targ->pipe.buffer_size);
|
||||
if (!targ->clean) {
|
||||
assert(targ->pq);
|
||||
nv50_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i),
|
||||
targ->pq, 0x4);
|
||||
nv50_hw_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i),
|
||||
nv50_query(targ->pq), 0x4);
|
||||
} else {
|
||||
BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
|
||||
PUSH_DATA(push, 0);
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
|
||||
#include "nv50/nv50_stateobj.h"
|
||||
#include "nv50/nv50_context.h"
|
||||
#include "nv50/nv50_query_hw.h"
|
||||
|
||||
#include "nv50/nv50_3d.xml.h"
|
||||
#include "nv50/nv50_texture.xml.h"
|
||||
|
@ -725,6 +726,9 @@ nv50_sp_state_create(struct pipe_context *pipe,
|
|||
if (cso->stream_output.num_outputs)
|
||||
prog->pipe.stream_output = cso->stream_output;
|
||||
|
||||
prog->translated = nv50_program_translate(
|
||||
prog, nv50_context(pipe)->screen->base.device->chipset);
|
||||
|
||||
return (void *)prog;
|
||||
}
|
||||
|
||||
|
@ -1033,7 +1037,7 @@ nv50_so_target_create(struct pipe_context *pipe,
|
|||
|
||||
if (nouveau_context(pipe)->screen->class_3d >= NVA0_3D_CLASS) {
|
||||
targ->pq = pipe->create_query(pipe,
|
||||
NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET, 0);
|
||||
NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET, 0);
|
||||
if (!targ->pq) {
|
||||
FREE(targ);
|
||||
return NULL;
|
||||
|
@ -1056,6 +1060,24 @@ nv50_so_target_create(struct pipe_context *pipe,
|
|||
return &targ->pipe;
|
||||
}
|
||||
|
||||
static void
|
||||
nva0_so_target_save_offset(struct pipe_context *pipe,
|
||||
struct pipe_stream_output_target *ptarg,
|
||||
unsigned index, bool serialize)
|
||||
{
|
||||
struct nv50_so_target *targ = nv50_so_target(ptarg);
|
||||
|
||||
if (serialize) {
|
||||
struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
|
||||
PUSH_SPACE(push, 2);
|
||||
BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
}
|
||||
|
||||
nv50_query(targ->pq)->index = index;
|
||||
pipe->end_query(pipe, targ->pq);
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_so_target_destroy(struct pipe_context *pipe,
|
||||
struct pipe_stream_output_target *ptarg)
|
||||
|
|
|
@ -487,7 +487,7 @@ static struct state_validate {
|
|||
{ nv50_validate_viewport, NV50_NEW_VIEWPORT },
|
||||
{ nv50_vertprog_validate, NV50_NEW_VERTPROG },
|
||||
{ nv50_gmtyprog_validate, NV50_NEW_GMTYPROG },
|
||||
{ nv50_fragprog_validate, NV50_NEW_FRAGPROG |
|
||||
{ nv50_fragprog_validate, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
|
||||
NV50_NEW_MIN_SAMPLES },
|
||||
{ nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
|
||||
NV50_NEW_GMTYPROG | NV50_NEW_RASTERIZER },
|
||||
|
|
|
@ -220,10 +220,14 @@ nv50_resource_copy_region(struct pipe_context *pipe,
|
|||
nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
|
||||
|
||||
if (m2mf) {
|
||||
struct nv50_miptree *src_mt = nv50_miptree(src);
|
||||
struct nv50_miptree *dst_mt = nv50_miptree(dst);
|
||||
struct nv50_m2mf_rect drect, srect;
|
||||
unsigned i;
|
||||
unsigned nx = util_format_get_nblocksx(src->format, src_box->width);
|
||||
unsigned ny = util_format_get_nblocksy(src->format, src_box->height);
|
||||
unsigned nx = util_format_get_nblocksx(src->format, src_box->width)
|
||||
<< src_mt->ms_x;
|
||||
unsigned ny = util_format_get_nblocksy(src->format, src_box->height)
|
||||
<< src_mt->ms_y;
|
||||
|
||||
nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz);
|
||||
nv50_m2mf_rect_setup(&srect, src, src_level,
|
||||
|
@ -232,15 +236,15 @@ nv50_resource_copy_region(struct pipe_context *pipe,
|
|||
for (i = 0; i < src_box->depth; ++i) {
|
||||
nv50_m2mf_transfer_rect(nv50, &drect, &srect, nx, ny);
|
||||
|
||||
if (nv50_miptree(dst)->layout_3d)
|
||||
if (dst_mt->layout_3d)
|
||||
drect.z++;
|
||||
else
|
||||
drect.base += nv50_miptree(dst)->layer_stride;
|
||||
drect.base += dst_mt->layer_stride;
|
||||
|
||||
if (nv50_miptree(src)->layout_3d)
|
||||
if (src_mt->layout_3d)
|
||||
srect.z++;
|
||||
else
|
||||
srect.base += nv50_miptree(src)->layer_stride;
|
||||
srect.base += src_mt->layer_stride;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -270,7 +274,7 @@ nv50_resource_copy_region(struct pipe_context *pipe,
|
|||
static void
|
||||
nv50_clear_render_target(struct pipe_context *pipe,
|
||||
struct pipe_surface *dst,
|
||||
const union pipe_color_union *color,
|
||||
const union pipe_color_union *color,
|
||||
unsigned dstx, unsigned dsty,
|
||||
unsigned width, unsigned height)
|
||||
{
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "translate/translate.h"
|
||||
|
||||
#include "nv50/nv50_context.h"
|
||||
#include "nv50/nv50_query_hw.h"
|
||||
#include "nv50/nv50_resource.h"
|
||||
|
||||
#include "nv50/nv50_3d.xml.h"
|
||||
|
@ -745,7 +746,8 @@ nva0_draw_stream_output(struct nv50_context *nv50,
|
|||
PUSH_DATA (push, 0);
|
||||
BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1);
|
||||
PUSH_DATA (push, so->stride);
|
||||
nv50_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES, so->pq, 0x4);
|
||||
nv50_hw_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES,
|
||||
nv50_query(so->pq), 0x4);
|
||||
BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
|
||||
|
|
|
@ -27,33 +27,33 @@
|
|||
static void dump_comm_vp(struct nouveau_vp3_decoder *dec, struct comm *comm, u32 comm_seq,
|
||||
struct nouveau_bo *inter_bo, unsigned slice_size)
|
||||
{
|
||||
unsigned i, idx = comm->pvp_cur_index & 0xf;
|
||||
debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage);
|
||||
unsigned i, idx = comm->pvp_cur_index & 0xf;
|
||||
debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage);
|
||||
#if 0
|
||||
debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs);
|
||||
debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index);
|
||||
debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs);
|
||||
debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index);
|
||||
|
||||
for (i = 0; i != comm->irq_index; ++i)
|
||||
debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]);
|
||||
for (i = 0; i != comm->parse_endpos_index; ++i)
|
||||
debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]);
|
||||
for (i = 0; i != comm->irq_index; ++i)
|
||||
debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]);
|
||||
for (i = 0; i != comm->parse_endpos_index; ++i)
|
||||
debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]);
|
||||
#endif
|
||||
debug_printf("mb_y = %u\n", comm->mb_y[idx]);
|
||||
if (comm->status_vp[idx] <= 1)
|
||||
return;
|
||||
debug_printf("mb_y = %u\n", comm->mb_y[idx]);
|
||||
if (comm->status_vp[idx] <= 1)
|
||||
return;
|
||||
|
||||
if ((comm->pvp_stage & 0xff) != 0xff) {
|
||||
unsigned *map;
|
||||
int ret = nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client);
|
||||
assert(ret >= 0);
|
||||
map = inter_bo->map;
|
||||
for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) {
|
||||
debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]);
|
||||
}
|
||||
munmap(inter_bo->map, inter_bo->size);
|
||||
inter_bo->map = NULL;
|
||||
}
|
||||
assert((comm->pvp_stage & 0xff) == 0xff);
|
||||
if ((comm->pvp_stage & 0xff) != 0xff) {
|
||||
unsigned *map;
|
||||
int ret = nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client);
|
||||
assert(ret >= 0);
|
||||
map = inter_bo->map;
|
||||
for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) {
|
||||
debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]);
|
||||
}
|
||||
munmap(inter_bo->map, inter_bo->size);
|
||||
inter_bo->map = NULL;
|
||||
}
|
||||
assert((comm->pvp_stage & 0xff) == 0xff);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -252,10 +252,10 @@ nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
|
|||
}
|
||||
}
|
||||
|
||||
vp->vp.clip_enable = info->io.clipDistanceMask;
|
||||
for (i = 0; i < 8; ++i)
|
||||
if (info->io.cullDistanceMask & (1 << i))
|
||||
vp->vp.clip_mode |= 1 << (i * 4);
|
||||
vp->vp.clip_enable =
|
||||
(1 << (info->io.clipDistances + info->io.cullDistances)) - 1;
|
||||
for (i = 0; i < info->io.cullDistances; ++i)
|
||||
vp->vp.clip_mode |= 1 << ((info->io.clipDistances + i) * 4);
|
||||
|
||||
if (info->io.genUserClip < 0)
|
||||
vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */
|
||||
|
@ -269,8 +269,6 @@ nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
|
|||
vp->hdr[0] = 0x20061 | (1 << 10);
|
||||
vp->hdr[4] = 0xff000;
|
||||
|
||||
vp->hdr[18] = info->io.clipDistanceMask;
|
||||
|
||||
return nvc0_vtgp_gen_header(vp, info);
|
||||
}
|
||||
|
||||
|
@ -424,6 +422,11 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
|
|||
|
||||
for (i = 0; i < info->numInputs; ++i) {
|
||||
m = nvc0_hdr_interp_mode(&info->in[i]);
|
||||
if (info->in[i].sn == TGSI_SEMANTIC_COLOR) {
|
||||
fp->fp.colors |= 1 << info->in[i].si;
|
||||
if (info->in[i].sc)
|
||||
fp->fp.color_interp[info->in[i].si] = m | (info->in[i].mask << 4);
|
||||
}
|
||||
for (c = 0; c < 4; ++c) {
|
||||
if (!(info->in[i].mask & (1 << c)))
|
||||
continue;
|
||||
|
@ -531,7 +534,6 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
|
|||
info->io.genUserClip = prog->vp.num_ucps;
|
||||
info->io.ucpBase = 256;
|
||||
info->io.ucpCBSlot = 15;
|
||||
info->io.sampleInterp = prog->fp.sample_interp;
|
||||
|
||||
if (prog->type == PIPE_SHADER_COMPUTE) {
|
||||
if (chipset >= NVISA_GK104_CHIPSET) {
|
||||
|
@ -575,6 +577,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
|
|||
prog->immd_data = info->immd.buf;
|
||||
prog->immd_size = info->immd.bufSize;
|
||||
prog->relocs = info->bin.relocData;
|
||||
prog->interps = info->bin.interpData;
|
||||
prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
|
||||
prog->num_barriers = info->numBarriers;
|
||||
|
||||
|
@ -713,6 +716,23 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
|
|||
|
||||
if (prog->relocs)
|
||||
nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0);
|
||||
if (prog->interps) {
|
||||
nv50_ir_change_interp(prog->interps, prog->code,
|
||||
prog->fp.force_persample_interp,
|
||||
prog->fp.flatshade);
|
||||
for (int i = 0; i < 2; i++) {
|
||||
unsigned mask = prog->fp.color_interp[i] >> 4;
|
||||
unsigned interp = prog->fp.color_interp[i] & 3;
|
||||
if (!mask)
|
||||
continue;
|
||||
prog->hdr[14] &= ~(0xff << (8 * i));
|
||||
if (prog->fp.flatshade)
|
||||
interp = NVC0_INTERP_FLAT;
|
||||
for (int c = 0; c < 4; c++)
|
||||
if (mask & (1 << c))
|
||||
prog->hdr[14] |= interp << (2 * (4 * i + c));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
if (debug_get_bool_option("NV50_PROG_DEBUG", false))
|
||||
|
@ -773,6 +793,7 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
|
|||
FREE(prog->code); /* may be 0 for hardcoded shaders */
|
||||
FREE(prog->immd_data);
|
||||
FREE(prog->relocs);
|
||||
FREE(prog->interps);
|
||||
if (prog->type == PIPE_SHADER_COMPUTE && prog->cp.syms)
|
||||
FREE(prog->cp.syms);
|
||||
if (prog->tfb) {
|
||||
|
|
|
@ -45,8 +45,10 @@ struct nvc0_program {
|
|||
} vp;
|
||||
struct {
|
||||
uint8_t early_z;
|
||||
uint8_t in_pos[PIPE_MAX_SHADER_INPUTS];
|
||||
uint8_t sample_interp;
|
||||
uint8_t colors;
|
||||
uint8_t color_interp[2];
|
||||
bool force_persample_interp;
|
||||
bool flatshade;
|
||||
} fp;
|
||||
struct {
|
||||
uint32_t tess_mode; /* ~0 if defined by the other stage */
|
||||
|
@ -61,6 +63,7 @@ struct nvc0_program {
|
|||
uint8_t num_barriers;
|
||||
|
||||
void *relocs;
|
||||
void *interps;
|
||||
|
||||
struct nvc0_transform_feedback_state *tfb;
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "nvc0/nvc0_query.h"
|
||||
#include "nvc0/nvc0_query_sw.h"
|
||||
#include "nvc0/nvc0_query_hw.h"
|
||||
#include "nvc0/nvc0_query_hw_metric.h"
|
||||
#include "nvc0/nvc0_query_hw_sm.h"
|
||||
|
||||
static struct pipe_query *
|
||||
|
@ -188,7 +189,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
|
|||
count++;
|
||||
} else
|
||||
if (screen->base.class_3d < NVE4_3D_CLASS) {
|
||||
count++;
|
||||
count += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -218,6 +219,17 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
|
|||
return 1;
|
||||
}
|
||||
}
|
||||
} else
|
||||
if (id == NVC0_HW_METRIC_QUERY_GROUP) {
|
||||
if (screen->compute) {
|
||||
if (screen->base.class_3d < NVE4_3D_CLASS) {
|
||||
info->name = "Performance metrics";
|
||||
info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
|
||||
info->max_active_queries = 1;
|
||||
info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
|
||||
else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
|
||||
|
|
|
@ -32,7 +32,8 @@ nvc0_query(struct pipe_query *pipe)
|
|||
* Driver queries groups:
|
||||
*/
|
||||
#define NVC0_HW_SM_QUERY_GROUP 0
|
||||
#define NVC0_SW_QUERY_DRV_STAT_GROUP 1
|
||||
#define NVC0_HW_METRIC_QUERY_GROUP 1
|
||||
#define NVC0_SW_QUERY_DRV_STAT_GROUP 2
|
||||
|
||||
void nvc0_init_query_functions(struct nvc0_context *);
|
||||
|
||||
|
|
|
@ -431,7 +431,7 @@ nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
|
|||
id = nvc0_hw_metric_get_next_query_id(queries, id);
|
||||
info->name = nvc0_hw_metric_names[id];
|
||||
info->query_type = NVC0_HW_METRIC_QUERY(id);
|
||||
info->group_id = -1;
|
||||
info->group_id = NVC0_HW_METRIC_QUERY_GROUP;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,7 +26,8 @@ nvc0_resource_from_handle(struct pipe_screen * screen,
|
|||
} else {
|
||||
struct pipe_resource *res = nv50_miptree_from_handle(screen,
|
||||
templ, whandle);
|
||||
nv04_resource(res)->vtbl = &nvc0_miptree_vtbl;
|
||||
if (res)
|
||||
nv04_resource(res)->vtbl = &nvc0_miptree_vtbl;
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -179,6 +179,9 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
|
||||
case PIPE_CAP_DEPTH_BOUNDS_TEST:
|
||||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
return 1;
|
||||
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
|
||||
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
|
||||
|
@ -201,8 +204,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_VERTEXID_NOBASE:
|
||||
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
|
||||
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
@ -352,45 +353,51 @@ static int
|
|||
nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
|
||||
enum pipe_compute_cap param, void *data)
|
||||
{
|
||||
uint64_t *data64 = (uint64_t *)data;
|
||||
uint32_t *data32 = (uint32_t *)data;
|
||||
const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;
|
||||
struct nvc0_screen *screen = nvc0_screen(pscreen);
|
||||
const uint16_t obj_class = screen->compute->oclass;
|
||||
|
||||
#define RET(x) do { \
|
||||
if (data) \
|
||||
memcpy(data, x, sizeof(x)); \
|
||||
return sizeof(x); \
|
||||
} while (0)
|
||||
|
||||
switch (param) {
|
||||
case PIPE_COMPUTE_CAP_GRID_DIMENSION:
|
||||
data64[0] = 3;
|
||||
return 8;
|
||||
RET((uint64_t []) { 3 });
|
||||
case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
|
||||
data64[0] = (obj_class >= NVE4_COMPUTE_CLASS) ? 0x7fffffff : 65535;
|
||||
data64[1] = 65535;
|
||||
data64[2] = 65535;
|
||||
return 24;
|
||||
if (obj_class >= NVE4_COMPUTE_CLASS) {
|
||||
RET(((uint64_t []) { 0x7fffffff, 65535, 65535 }));
|
||||
} else {
|
||||
RET(((uint64_t []) { 65535, 65535, 65535 }));
|
||||
}
|
||||
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
|
||||
data64[0] = 1024;
|
||||
data64[1] = 1024;
|
||||
data64[2] = 64;
|
||||
return 24;
|
||||
RET(((uint64_t []) { 1024, 1024, 64 }));
|
||||
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
|
||||
data64[0] = 1024;
|
||||
return 8;
|
||||
RET((uint64_t []) { 1024 });
|
||||
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */
|
||||
data64[0] = (uint64_t)1 << 40;
|
||||
return 8;
|
||||
RET((uint64_t []) { 1ULL << 40 });
|
||||
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
|
||||
data64[0] = 48 << 10;
|
||||
return 8;
|
||||
RET((uint64_t []) { 48 << 10 });
|
||||
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
|
||||
data64[0] = 512 << 10;
|
||||
return 8;
|
||||
RET((uint64_t []) { 512 << 10 });
|
||||
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
|
||||
data64[0] = 4096;
|
||||
return 8;
|
||||
RET((uint64_t []) { 4096 });
|
||||
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
|
||||
data32[0] = 32;
|
||||
return 4;
|
||||
RET((uint32_t []) { 32 });
|
||||
case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
|
||||
RET((uint64_t []) { 1ULL << 40 });
|
||||
case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
|
||||
RET((uint32_t []) { 0 });
|
||||
case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
|
||||
RET((uint32_t []) { screen->mp_count_compute });
|
||||
case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
|
||||
RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
#undef RET
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -827,6 +834,8 @@ nvc0_screen_create(struct nouveau_device *dev)
|
|||
PUSH_DATA (push, 1);
|
||||
BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1);
|
||||
PUSH_DATA (push, NVC0_3D_SHADE_MODEL_SMOOTH);
|
||||
if (screen->eng3d->oclass < NVE4_3D_CLASS) {
|
||||
BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1);
|
||||
PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
|
||||
|
|
|
@ -38,6 +38,7 @@ struct nvc0_graph_state {
|
|||
uint32_t constant_elts;
|
||||
int32_t index_bias;
|
||||
uint16_t scissor;
|
||||
bool flatshade;
|
||||
uint8_t patch_vertices;
|
||||
uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
|
||||
uint8_t num_vtxbufs;
|
||||
|
|
|
@ -107,8 +107,54 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0)
|
|||
{
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
struct nvc0_program *fp = nvc0->fragprog;
|
||||
struct pipe_rasterizer_state *rast = &nvc0->rast->pipe;
|
||||
|
||||
fp->fp.sample_interp = nvc0->min_samples > 1;
|
||||
if (fp->fp.force_persample_interp != rast->force_persample_interp) {
|
||||
/* Force the program to be reuploaded, which will trigger interp fixups
|
||||
* to get applied
|
||||
*/
|
||||
if (fp->mem)
|
||||
nouveau_heap_free(&fp->mem);
|
||||
|
||||
fp->fp.force_persample_interp = rast->force_persample_interp;
|
||||
}
|
||||
|
||||
/* Shade model works well enough when both colors follow it. However if one
|
||||
* (or both) is explicitly set, then we have to go the patching route.
|
||||
*/
|
||||
bool has_explicit_color = fp->fp.colors &&
|
||||
(((fp->fp.colors & 1) && !fp->fp.color_interp[0]) ||
|
||||
((fp->fp.colors & 2) && !fp->fp.color_interp[1]));
|
||||
bool hwflatshade = false;
|
||||
if (has_explicit_color && fp->fp.flatshade != rast->flatshade) {
|
||||
/* Force re-upload */
|
||||
if (fp->mem)
|
||||
nouveau_heap_free(&fp->mem);
|
||||
|
||||
fp->fp.flatshade = rast->flatshade;
|
||||
|
||||
/* Always smooth-shade in this mode, the shader will decide on its own
|
||||
* when to flat-shade.
|
||||
*/
|
||||
} else if (!has_explicit_color) {
|
||||
hwflatshade = rast->flatshade;
|
||||
|
||||
/* No need to binary-patch the shader each time, make sure that it's set
|
||||
* up for the default behaviour.
|
||||
*/
|
||||
fp->fp.flatshade = 0;
|
||||
}
|
||||
|
||||
if (hwflatshade != nvc0->state.flatshade) {
|
||||
nvc0->state.flatshade = hwflatshade;
|
||||
BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1);
|
||||
PUSH_DATA (push, hwflatshade ? NVC0_3D_SHADE_MODEL_FLAT :
|
||||
NVC0_3D_SHADE_MODEL_SMOOTH);
|
||||
}
|
||||
|
||||
if (fp->mem && !(nvc0->dirty & NVC0_NEW_FRAGPROG)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!nvc0_program_validate(nvc0, fp))
|
||||
return;
|
||||
|
|
|
@ -212,9 +212,6 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe,
|
|||
* always emit 16 commands, one for each scissor rectangle, here.
|
||||
*/
|
||||
|
||||
SB_BEGIN_3D(so, SHADE_MODEL, 1);
|
||||
SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT :
|
||||
NVC0_3D_SHADE_MODEL_SMOOTH);
|
||||
SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first);
|
||||
SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside);
|
||||
|
||||
|
@ -683,6 +680,9 @@ nvc0_sp_state_create(struct pipe_context *pipe,
|
|||
if (cso->stream_output.num_outputs)
|
||||
prog->pipe.stream_output = cso->stream_output;
|
||||
|
||||
prog->translated = nvc0_program_translate(
|
||||
prog, nvc0_context(pipe)->screen->base.device->chipset);
|
||||
|
||||
return (void *)prog;
|
||||
}
|
||||
|
||||
|
|
|
@ -606,6 +606,9 @@ nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
|
|||
ctx_to->constbuf_dirty[s] = (1 << NVC0_MAX_PIPE_CONSTBUFS) - 1;
|
||||
}
|
||||
|
||||
/* Reset tfb as the shader that owns it may have been deleted. */
|
||||
ctx_to->state.tfb = NULL;
|
||||
|
||||
if (!ctx_to->vertex)
|
||||
ctx_to->dirty &= ~(NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS);
|
||||
if (!ctx_to->idxbuf.buffer)
|
||||
|
@ -645,7 +648,7 @@ static struct state_validate {
|
|||
{ nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG },
|
||||
{ nvc0_validate_tess_state, NVC0_NEW_TESSFACTOR },
|
||||
{ nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG },
|
||||
{ nvc0_fragprog_validate, NVC0_NEW_FRAGPROG },
|
||||
{ nvc0_fragprog_validate, NVC0_NEW_FRAGPROG | NVC0_NEW_RASTERIZER },
|
||||
{ nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA |
|
||||
NVC0_NEW_RASTERIZER },
|
||||
{ nvc0_validate_derived_2, NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER },
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue