Merge remote-tracking branch 'mesa-public/master' into vulkan
This pulls in Matt's big compiler refactor.
This commit is contained in:
commit
1469ccb746
|
@ -149,7 +149,7 @@ GL 4.2, GLSL 4.20:
|
|||
|
||||
GL 4.3, GLSL 4.30:
|
||||
|
||||
GL_ARB_arrays_of_arrays started (Timothy)
|
||||
GL_ARB_arrays_of_arrays DONE (i965)
|
||||
GL_ARB_ES3_compatibility DONE (all drivers that support GLSL 3.30)
|
||||
GL_ARB_clear_buffer_object DONE (all drivers)
|
||||
GL_ARB_compute_shader in progress (jljusten)
|
||||
|
@ -169,7 +169,7 @@ GL 4.3, GLSL 4.30:
|
|||
GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
|
||||
GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30)
|
||||
GL_ARB_texture_storage_multisample DONE (all drivers that support GL_ARB_texture_multisample)
|
||||
GL_ARB_texture_view DONE (i965, nv50, nvc0, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_texture_view DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_vertex_attrib_binding DONE (all drivers)
|
||||
|
||||
|
||||
|
@ -177,7 +177,7 @@ GL 4.4, GLSL 4.40:
|
|||
|
||||
GL_MAX_VERTEX_ATTRIB_STRIDE DONE (all drivers)
|
||||
GL_ARB_buffer_storage DONE (i965, nv50, nvc0, r600, radeonsi)
|
||||
GL_ARB_clear_texture DONE (i965) (gallium - in progress, VMware)
|
||||
GL_ARB_clear_texture DONE (i965, nv50, nvc0)
|
||||
GL_ARB_enhanced_layouts in progress (Timothy)
|
||||
- compile-time constant expressions in progress
|
||||
- explicit byte offsets for blocks in progress
|
||||
|
@ -209,7 +209,7 @@ GL 4.5, GLSL 4.50:
|
|||
|
||||
These are the extensions cherry-picked to make GLES 3.1
|
||||
GLES3.1, GLSL ES 3.1
|
||||
GL_ARB_arrays_of_arrays started (Timothy)
|
||||
GL_ARB_arrays_of_arrays DONE (i965)
|
||||
GL_ARB_compute_shader in progress (jljusten)
|
||||
GL_ARB_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
|
||||
|
|
|
@ -2,8 +2,8 @@ The software may implement third party technologies (e.g. third party
|
|||
libraries) that are not licensed to you by AMD and for which you may need
|
||||
to obtain licenses from other parties. Unless explicitly stated otherwise,
|
||||
these third party technologies are not licensed hereunder. Such third
|
||||
party technologies include, but are not limited, to H.264, MPEG-2, MPEG-4,
|
||||
AVC, and VC-1.
|
||||
party technologies include, but are not limited, to H.264, H.265, HEVC, MPEG-2,
|
||||
MPEG-4, AVC, and VC-1.
|
||||
|
||||
For MPEG-2 Encoding Products ANY USE OF THIS PRODUCT IN ANY MANNER OTHER
|
||||
THAN PERSONAL USE THAT COMPLIES WITH THE MPEG-2 STANDARD FOR ENCODING VIDEO
|
||||
|
|
|
@ -16,6 +16,12 @@
|
|||
|
||||
<h1>News</h1>
|
||||
|
||||
<h2>November 11, 2015</h2>
|
||||
<p>
|
||||
<a href="relnotes/11.0.5.html">Mesa 11.0.5</a> is released.
|
||||
This is a bug-fix release.
|
||||
</p>
|
||||
|
||||
<h2>October 24, 2015</h2>
|
||||
<p>
|
||||
<a href="relnotes/11.0.4.html">Mesa 11.0.4</a> is released.
|
||||
|
|
|
@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release.
|
|||
</p>
|
||||
|
||||
<ul>
|
||||
<li><a href="relnotes/11.0.5.html">11.0.5 release notes</a>
|
||||
<li><a href="relnotes/11.0.4.html">11.0.4 release notes</a>
|
||||
<li><a href="relnotes/11.0.3.html">11.0.3 release notes</a>
|
||||
<li><a href="relnotes/10.6.9.html">10.6.9 release notes</a>
|
||||
|
|
|
@ -0,0 +1,174 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 11.0.5 Release Notes / November 11, 2015</h1>
|
||||
|
||||
<p>
|
||||
Mesa 11.0.5 is a bug fix release which fixes bugs found since the 11.0.4 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 11.0.5 implements the OpenGL 4.1 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.1. OpenGL
|
||||
4.1 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
8495ef5c06f7f726452462b7d408a5b40048373ff908f2283a3b4d1f49b45ee6 mesa-11.0.5.tar.gz
|
||||
9c255a2a6695fcc6ef4a279e1df0aeaf417dc142f39ee59dfb533d80494bb67a mesa-11.0.5.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<p>This list is likely incomplete.</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91993">Bug 91993</a> - Graphical glitch in Astromenace (open-source game).</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92214">Bug 92214</a> - Flightgear crashes during splashboot with R600 driver, LLVM 3.7.0 and mesa 11.0.2</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92437">Bug 92437</a> - osmesa: Expose GL entry points for Windows build, via .def file</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92476">Bug 92476</a> - [cts] ES2-CTS.gtf.GL2ExtensionTests.egl_image.egl_image fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92623">Bug 92623</a> - Differences in prog_data ignored when caching fragment programs (causes hangs)</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Alex Deucher (1):</p>
|
||||
<ul>
|
||||
<li>radeon/uvd: don't expose HEVC on old UVD hw (v3)</li>
|
||||
</ul>
|
||||
|
||||
<p>Ben Widawsky (1):</p>
|
||||
<ul>
|
||||
<li>i965/skl: Add GT4 PCI IDs</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (4):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 11.0.4</li>
|
||||
<li>cherry-ignore: ignore a possible wrong nomination</li>
|
||||
<li>Revert "mesa/glformats: Undo code changes from _mesa_base_tex_format() move"</li>
|
||||
<li>Update version to 11.0.5</li>
|
||||
</ul>
|
||||
|
||||
<p>Emmanuel Gil Peyrot (1):</p>
|
||||
<ul>
|
||||
<li>gbm.h: Add a missing stddef.h include for size_t.</li>
|
||||
</ul>
|
||||
|
||||
<p>Eric Anholt (1):</p>
|
||||
<ul>
|
||||
<li>vc4: When the create ioctl fails, free our cache and try again.</li>
|
||||
</ul>
|
||||
|
||||
<p>Ian Romanick (1):</p>
|
||||
<ul>
|
||||
<li>i965: Fix is-renderable check in intel_image_target_renderbuffer_storage</li>
|
||||
</ul>
|
||||
|
||||
<p>Ilia Mirkin (3):</p>
|
||||
<ul>
|
||||
<li>nvc0: respect edgeflag attribute width</li>
|
||||
<li>nouveau: set MaxDrawBuffers to the same value as MaxColorAttachments</li>
|
||||
<li>nouveau: relax fence emit space assert</li>
|
||||
</ul>
|
||||
|
||||
<p>Ivan Kalvachev (1):</p>
|
||||
<ul>
|
||||
<li>r600g: Fix special negative immediate constants when using ABS modifier.</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (2):</p>
|
||||
<ul>
|
||||
<li>nir/lower_vec_to_movs: Pass the shader around directly</li>
|
||||
<li>nir: Report progress from lower_vec_to_movs().</li>
|
||||
</ul>
|
||||
|
||||
<p>Jose Fonseca (2):</p>
|
||||
<ul>
|
||||
<li>gallivm: Translate all util_cpu_caps bits to LLVM attributes.</li>
|
||||
<li>gallivm: Explicitly disable unsupported CPU features.</li>
|
||||
</ul>
|
||||
|
||||
<p>Julien Isorce (4):</p>
|
||||
<ul>
|
||||
<li>st/va: pass picture desc to begin and decode</li>
|
||||
<li>nvc0: fix crash when nv50_miptree_from_handle fails</li>
|
||||
<li>st/va: do not destroy old buffer when new one failed</li>
|
||||
<li>st/va: add more errors checks in vlVaBufferSetNumElements and vlVaMapBuffer</li>
|
||||
</ul>
|
||||
|
||||
<p>Kenneth Graunke (6):</p>
|
||||
<ul>
|
||||
<li>i965: Fix missing BRW_NEW_*_PROG_DATA flagging caused by cache reuse.</li>
|
||||
<li>nir: Report progress from nir_split_var_copies().</li>
|
||||
<li>nir: Properly invalidate metadata in nir_split_var_copies().</li>
|
||||
<li>nir: Properly invalidate metadata in nir_opt_copy_prop().</li>
|
||||
<li>nir: Properly invalidate metadata in nir_lower_vec_to_movs().</li>
|
||||
<li>nir: Properly invalidate metadata in nir_opt_remove_phis().</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (1):</p>
|
||||
<ul>
|
||||
<li>radeonsi: add register definitions for Stoney</li>
|
||||
</ul>
|
||||
|
||||
<p>Nanley Chery (1):</p>
|
||||
<ul>
|
||||
<li>mesa/glformats: Undo code changes from _mesa_base_tex_format() move</li>
|
||||
</ul>
|
||||
|
||||
<p>Nicolai Hähnle (1):</p>
|
||||
<ul>
|
||||
<li>st/mesa: fix mipmap generation for immutable textures with incomplete pyramids</li>
|
||||
</ul>
|
||||
|
||||
<p>Nigel Stewart (1):</p>
|
||||
<ul>
|
||||
<li>osmesa: Expose GL entry points for Windows build via DEF file.</li>
|
||||
</ul>
|
||||
|
||||
<p>Roland Scheidegger (1):</p>
|
||||
<ul>
|
||||
<li>gallivm: disable f16c when not using AVX</li>
|
||||
</ul>
|
||||
|
||||
<p>Samuel Li (2):</p>
|
||||
<ul>
|
||||
<li>radeonsi: add support for Stoney asics (v3)</li>
|
||||
<li>radeonsi: add Stoney pci ids</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
|
@ -44,8 +44,10 @@ Note: some of the new features are only available with certain drivers.
|
|||
</p>
|
||||
|
||||
<ul>
|
||||
<li>GL_ARB_arrays_of_arrays on i965</li>
|
||||
<li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
|
||||
<li>GL_ARB_copy_image on radeonsi</li>
|
||||
<li>GL_ARB_clear_texture on nv50, nvc0</li>
|
||||
<li>GL_ARB_copy_image on nv50, nvc0, radeonsi</li>
|
||||
<li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
|
||||
<li>GL_ARB_gpu_shader5 on r600 for Evergreen and later chips</li>
|
||||
<li>GL_ARB_shader_clock on i965 (gen7+)</li>
|
||||
|
@ -54,7 +56,8 @@ Note: some of the new features are only available with certain drivers.
|
|||
<li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li>
|
||||
<li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
|
||||
<li>GL_ARB_texture_query_lod on softpipe</li>
|
||||
<li>GL_ARB_texture_view on radeonsi</li>
|
||||
<li>GL_ARB_texture_view on radeonsi and r600 (for evergeen and newer)</li>
|
||||
<li>GL_EXT_buffer_storage implemented for when ES 3.1 support is gained</li>
|
||||
<li>GL_EXT_draw_elements_base_vertex on all drivers</li>
|
||||
<li>GL_OES_draw_elements_base_vertex on all drivers</li>
|
||||
<li>EGL_KHR_create_context on softpipe, llvmpipe</li>
|
||||
|
|
|
@ -30,6 +30,10 @@
|
|||
<dt><a href="http://www.valgrind.org">Valgrind</a></dt>
|
||||
<dd>is a very useful tool for tracking down
|
||||
memory-related problems in your code.</dd>
|
||||
|
||||
<dt><a href="http:scan.coverity.com/projects/mesa">Coverity</a><dt>
|
||||
<dd>provides static code analysis of Mesa. If you create an account
|
||||
you can see the results and try to fix outstanding issues.</dd>
|
||||
</dl>
|
||||
|
||||
</div>
|
||||
|
|
|
@ -148,10 +148,33 @@ To get the latest code from git:
|
|||
<h2>Building the Code</h2>
|
||||
|
||||
<ul>
|
||||
<li>Build libdrm: If you're on a 32-bit system, you should skip the --libdir configure option. Note also the comment about toolchain libdrm above.
|
||||
<li>
|
||||
Determine where the GL-related libraries reside on your system and set
|
||||
the LIBDIR environment variable accordingly.
|
||||
<br><br>
|
||||
For 32-bit Ubuntu systems:
|
||||
<pre>
|
||||
export LIBDIR=/usr/lib/i386-linux-gnu
|
||||
</pre>
|
||||
For 64-bit Ubuntu systems:
|
||||
<pre>
|
||||
export LIBDIR=/usr/lib/x86_64-linux-gnu
|
||||
</pre>
|
||||
For 32-bit Fedora systems:
|
||||
<pre>
|
||||
export LIBDIR=/usr/lib
|
||||
</pre>
|
||||
For 64-bit Fedora systems:
|
||||
<pre>
|
||||
export LIBDIR=/usr/lib64
|
||||
</pre>
|
||||
|
||||
</li>
|
||||
|
||||
<li>Build libdrm:
|
||||
<pre>
|
||||
cd $TOP/drm
|
||||
./autogen.sh --prefix=/usr --libdir=/usr/lib64
|
||||
./autogen.sh --prefix=/usr --libdir=${LIBDIR}
|
||||
make
|
||||
sudo make install
|
||||
</pre>
|
||||
|
@ -162,12 +185,9 @@ The libxatracker library is used exclusively by the X server to do render,
|
|||
copy and video acceleration:
|
||||
<br>
|
||||
The following configure options doesn't build the EGL system.
|
||||
<br>
|
||||
As before, if you're on a 32-bit system, you should skip the --libdir
|
||||
configure option.
|
||||
<pre>
|
||||
cd $TOP/mesa
|
||||
./autogen.sh --prefix=/usr --libdir=/usr/lib64 --with-gallium-drivers=svga --with-dri-drivers= --enable-xa --disable-dri3
|
||||
./autogen.sh --prefix=/usr --libdir=${LIBDIR} --with-gallium-drivers=svga --with-dri-drivers=swrast --enable-xa --disable-dri3 --enable-glx-tls
|
||||
make
|
||||
sudo make install
|
||||
</pre>
|
||||
|
@ -177,25 +197,39 @@ if they're not installed in your system. You should be told what's missing.
|
|||
<br>
|
||||
<br>
|
||||
|
||||
<li>xf86-video-vmware: Now, once libxatracker is installed, we proceed with building and replacing the current Xorg driver. First check if your system is 32- or 64-bit. If you're building for a 32-bit system, you will not be needing the --libdir=/usr/lib64 option to autogen.
|
||||
<li>xf86-video-vmware: Now, once libxatracker is installed, we proceed with
|
||||
building and replacing the current Xorg driver.
|
||||
First check if your system is 32- or 64-bit.
|
||||
<pre>
|
||||
cd $TOP/xf86-video-vmware
|
||||
./autogen.sh --prefix=/usr --libdir=/usr/lib64
|
||||
./autogen.sh --prefix=/usr --libdir=${LIBDIR}
|
||||
make
|
||||
sudo make install
|
||||
</pre>
|
||||
|
||||
<li>vmwgfx kernel module. First make sure that any old version of this kernel module is removed from the system by issuing
|
||||
<pre>
|
||||
<pre>
|
||||
sudo rm /lib/modules/`uname -r`/kernel/drivers/gpu/drm/vmwgfx.ko*
|
||||
</pre>
|
||||
Then
|
||||
<pre>
|
||||
</pre>
|
||||
Build and install:
|
||||
<pre>
|
||||
cd $TOP/vmwgfx
|
||||
make
|
||||
sudo make install
|
||||
sudo cp 00-vmwgfx.rules /etc/udev/rules.d
|
||||
sudo depmod -ae
|
||||
</pre>
|
||||
sudo depmod -a
|
||||
</pre>
|
||||
If you're using a Ubuntu OS:
|
||||
<pre>
|
||||
sudo update-initramfs -u
|
||||
</pre>
|
||||
If you're using a Fedora OS:
|
||||
<pre>
|
||||
sudo dracut --force
|
||||
</pre>
|
||||
Add 'vmwgfx' to the /etc/modules file:
|
||||
<pre>
|
||||
echo vmwgfx | sudo tee -a /etc/modules
|
||||
</pre>
|
||||
|
||||
Note: some distros put DRM kernel drivers in different directories.
|
||||
For example, sometimes vmwgfx.ko might be found in
|
||||
|
|
|
@ -703,18 +703,10 @@ dri2_wl_swap_buffers_with_damage(_EGLDriver *drv,
|
|||
dri2_surf->dx = 0;
|
||||
dri2_surf->dy = 0;
|
||||
|
||||
if (n_rects == 0) {
|
||||
wl_surface_damage(dri2_surf->wl_win->surface,
|
||||
0, 0, INT32_MAX, INT32_MAX);
|
||||
} else {
|
||||
for (i = 0; i < n_rects; i++) {
|
||||
const int *rect = &rects[i * 4];
|
||||
wl_surface_damage(dri2_surf->wl_win->surface,
|
||||
rect[0],
|
||||
dri2_surf->base.Height - rect[1] - rect[3],
|
||||
rect[2], rect[3]);
|
||||
}
|
||||
}
|
||||
/* We deliberately ignore the damage region and post maximum damage, due to
|
||||
* https://bugs.freedesktop.org/78190 */
|
||||
wl_surface_damage(dri2_surf->wl_win->surface,
|
||||
0, 0, INT32_MAX, INT32_MAX);
|
||||
|
||||
if (dri2_dpy->is_different_gpu) {
|
||||
_EGLContext *ctx = _eglGetCurrentContext();
|
||||
|
|
|
@ -349,7 +349,8 @@ VL_SOURCES := \
|
|||
|
||||
# XXX: Nuke this as our dri targets no longer depend on VL.
|
||||
VL_WINSYS_SOURCES := \
|
||||
vl/vl_winsys_dri.c
|
||||
vl/vl_winsys_dri.c \
|
||||
vl/vl_winsys_drm.c
|
||||
|
||||
VL_STUB_SOURCES := \
|
||||
vl/vl_stubs.c
|
||||
|
@ -378,7 +379,9 @@ GALLIVM_SOURCES := \
|
|||
gallivm/lp_bld_flow.h \
|
||||
gallivm/lp_bld_format_aos_array.c \
|
||||
gallivm/lp_bld_format_aos.c \
|
||||
gallivm/lp_bld_format_cached.c \
|
||||
gallivm/lp_bld_format_float.c \
|
||||
gallivm/lp_bld_format.c \
|
||||
gallivm/lp_bld_format.h \
|
||||
gallivm/lp_bld_format_soa.c \
|
||||
gallivm/lp_bld_format_srgb.c \
|
||||
|
|
|
@ -625,6 +625,7 @@ generate_vs(struct draw_llvm_variant *variant,
|
|||
inputs,
|
||||
outputs,
|
||||
context_ptr,
|
||||
NULL,
|
||||
draw_sampler,
|
||||
&llvm->draw->vs.vertex_shader->info,
|
||||
NULL);
|
||||
|
@ -749,7 +750,8 @@ generate_fetch(struct gallivm_state *gallivm,
|
|||
lp_float32_vec4_type(),
|
||||
FALSE,
|
||||
map_ptr,
|
||||
zero, zero, zero);
|
||||
zero, zero, zero,
|
||||
NULL);
|
||||
LLVMBuildStore(builder, val, temp_ptr);
|
||||
}
|
||||
lp_build_endif(&if_ctx);
|
||||
|
@ -2193,6 +2195,7 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
|
|||
NULL,
|
||||
outputs,
|
||||
context_ptr,
|
||||
NULL,
|
||||
sampler,
|
||||
&llvm->draw->gs.geometry_shader->info,
|
||||
(const struct lp_build_tgsi_gs_iface *)&gs_iface);
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2010 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
#include "lp_bld_format.h"
|
||||
|
||||
|
||||
|
||||
LLVMTypeRef
|
||||
lp_build_format_cache_type(struct gallivm_state *gallivm)
|
||||
{
|
||||
LLVMTypeRef elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_COUNT];
|
||||
LLVMTypeRef s;
|
||||
|
||||
elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_DATA] =
|
||||
LLVMArrayType(LLVMInt32TypeInContext(gallivm->context),
|
||||
LP_BUILD_FORMAT_CACHE_SIZE * 16);
|
||||
elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_TAGS] =
|
||||
LLVMArrayType(LLVMInt64TypeInContext(gallivm->context),
|
||||
LP_BUILD_FORMAT_CACHE_SIZE);
|
||||
#if LP_BUILD_FORMAT_CACHE_DEBUG
|
||||
elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL] =
|
||||
LLVMInt64TypeInContext(gallivm->context);
|
||||
elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS] =
|
||||
LLVMInt64TypeInContext(gallivm->context);
|
||||
#endif
|
||||
|
||||
s = LLVMStructTypeInContext(gallivm->context, elem_types,
|
||||
LP_BUILD_FORMAT_CACHE_MEMBER_COUNT, 0);
|
||||
|
||||
return s;
|
||||
}
|
|
@ -44,6 +44,45 @@ struct lp_type;
|
|||
struct lp_build_context;
|
||||
|
||||
|
||||
#define LP_BUILD_FORMAT_CACHE_DEBUG 0
|
||||
/*
|
||||
* Block cache
|
||||
*
|
||||
* Optional block cache to be used when unpacking big pixel blocks.
|
||||
* Must be a power of 2
|
||||
*/
|
||||
|
||||
#define LP_BUILD_FORMAT_CACHE_SIZE 128
|
||||
|
||||
/*
|
||||
* Note: cache_data needs 16 byte alignment.
|
||||
*/
|
||||
struct lp_build_format_cache
|
||||
{
|
||||
PIPE_ALIGN_VAR(16) uint32_t cache_data[LP_BUILD_FORMAT_CACHE_SIZE][4][4];
|
||||
uint64_t cache_tags[LP_BUILD_FORMAT_CACHE_SIZE];
|
||||
#if LP_BUILD_FORMAT_CACHE_DEBUG
|
||||
uint64_t cache_access_total;
|
||||
uint64_t cache_access_miss;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
enum {
|
||||
LP_BUILD_FORMAT_CACHE_MEMBER_DATA = 0,
|
||||
LP_BUILD_FORMAT_CACHE_MEMBER_TAGS,
|
||||
#if LP_BUILD_FORMAT_CACHE_DEBUG
|
||||
LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL,
|
||||
LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS,
|
||||
#endif
|
||||
LP_BUILD_FORMAT_CACHE_MEMBER_COUNT
|
||||
};
|
||||
|
||||
|
||||
LLVMTypeRef
|
||||
lp_build_format_cache_type(struct gallivm_state *gallivm);
|
||||
|
||||
|
||||
/*
|
||||
* AoS
|
||||
*/
|
||||
|
@ -66,7 +105,8 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
|
|||
LLVMValueRef base_ptr,
|
||||
LLVMValueRef offset,
|
||||
LLVMValueRef i,
|
||||
LLVMValueRef j);
|
||||
LLVMValueRef j,
|
||||
LLVMValueRef cache);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm,
|
||||
|
@ -107,13 +147,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
|
|||
LLVMValueRef offsets,
|
||||
LLVMValueRef i,
|
||||
LLVMValueRef j,
|
||||
LLVMValueRef cache,
|
||||
LLVMValueRef rgba_out[4]);
|
||||
|
||||
/*
|
||||
* YUV
|
||||
*/
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
|
||||
const struct util_format_description *format_desc,
|
||||
|
@ -123,6 +163,18 @@ lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
|
|||
LLVMValueRef i,
|
||||
LLVMValueRef j);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_fetch_cached_texels(struct gallivm_state *gallivm,
|
||||
const struct util_format_description *format_desc,
|
||||
unsigned n,
|
||||
LLVMValueRef base_ptr,
|
||||
LLVMValueRef offset,
|
||||
LLVMValueRef i,
|
||||
LLVMValueRef j,
|
||||
LLVMValueRef cache);
|
||||
|
||||
|
||||
/*
|
||||
* special float formats
|
||||
*/
|
||||
|
|
|
@ -370,7 +370,8 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
|
|||
LLVMValueRef base_ptr,
|
||||
LLVMValueRef offset,
|
||||
LLVMValueRef i,
|
||||
LLVMValueRef j)
|
||||
LLVMValueRef j,
|
||||
LLVMValueRef cache)
|
||||
{
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
unsigned num_pixels = type.length / 4;
|
||||
|
@ -502,6 +503,34 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
|
|||
return tmp;
|
||||
}
|
||||
|
||||
/*
|
||||
* s3tc rgb formats
|
||||
*/
|
||||
|
||||
if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC && cache) {
|
||||
struct lp_type tmp_type;
|
||||
LLVMValueRef tmp;
|
||||
|
||||
memset(&tmp_type, 0, sizeof tmp_type);
|
||||
tmp_type.width = 8;
|
||||
tmp_type.length = num_pixels * 4;
|
||||
tmp_type.norm = TRUE;
|
||||
|
||||
tmp = lp_build_fetch_cached_texels(gallivm,
|
||||
format_desc,
|
||||
num_pixels,
|
||||
base_ptr,
|
||||
offset,
|
||||
i, j,
|
||||
cache);
|
||||
|
||||
lp_build_conv(gallivm,
|
||||
tmp_type, type,
|
||||
&tmp, 1, &tmp, 1);
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fallback to util_format_description::fetch_rgba_8unorm().
|
||||
*/
|
||||
|
|
|
@ -0,0 +1,374 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2015 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#include "lp_bld_format.h"
|
||||
#include "lp_bld_type.h"
|
||||
#include "lp_bld_struct.h"
|
||||
#include "lp_bld_const.h"
|
||||
#include "lp_bld_flow.h"
|
||||
#include "lp_bld_swizzle.h"
|
||||
|
||||
#include "util/u_math.h"
|
||||
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Complex block-compression based formats are handled here by using a cache,
|
||||
* so re-decoding of every pixel is not required.
|
||||
* Especially for bilinear filtering, texel reuse is very high hence even
|
||||
* a small cache helps.
|
||||
* The elements in the cache are the decoded blocks - currently things
|
||||
* are restricted to formats which are 4x4 block based, and the decoded
|
||||
* texels must fit into 4x8 bits.
|
||||
* The cache is direct mapped so hitrates aren't all that great and cache
|
||||
* thrashing could happen.
|
||||
*
|
||||
* @author Roland Scheidegger <sroland@vmware.com>
|
||||
*/
|
||||
|
||||
|
||||
#if LP_BUILD_FORMAT_CACHE_DEBUG
|
||||
static void
|
||||
update_cache_access(struct gallivm_state *gallivm,
|
||||
LLVMValueRef ptr,
|
||||
unsigned count,
|
||||
unsigned index)
|
||||
{
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
LLVMValueRef member_ptr, cache_access;
|
||||
|
||||
assert(index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL ||
|
||||
index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
|
||||
|
||||
member_ptr = lp_build_struct_get_ptr(gallivm, ptr, index, "");
|
||||
cache_access = LLVMBuildLoad(builder, member_ptr, "cache_access");
|
||||
cache_access = LLVMBuildAdd(builder, cache_access,
|
||||
LLVMConstInt(LLVMInt64TypeInContext(gallivm->context),
|
||||
count, 0), "");
|
||||
LLVMBuildStore(builder, cache_access, member_ptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static void
|
||||
store_cached_block(struct gallivm_state *gallivm,
|
||||
LLVMValueRef *col,
|
||||
LLVMValueRef tag_value,
|
||||
LLVMValueRef hash_index,
|
||||
LLVMValueRef cache)
|
||||
{
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
LLVMValueRef ptr, indices[3];
|
||||
LLVMTypeRef type_ptr4x32;
|
||||
unsigned count;
|
||||
|
||||
type_ptr4x32 = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0);
|
||||
indices[0] = lp_build_const_int32(gallivm, 0);
|
||||
indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS);
|
||||
indices[2] = hash_index;
|
||||
ptr = LLVMBuildGEP(builder, cache, indices, Elements(indices), "");
|
||||
LLVMBuildStore(builder, tag_value, ptr);
|
||||
|
||||
indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA);
|
||||
hash_index = LLVMBuildMul(builder, hash_index,
|
||||
lp_build_const_int32(gallivm, 16), "");
|
||||
for (count = 0; count < 4; count++) {
|
||||
indices[2] = hash_index;
|
||||
ptr = LLVMBuildGEP(builder, cache, indices, Elements(indices), "");
|
||||
ptr = LLVMBuildBitCast(builder, ptr, type_ptr4x32, "");
|
||||
LLVMBuildStore(builder, col[count], ptr);
|
||||
hash_index = LLVMBuildAdd(builder, hash_index,
|
||||
lp_build_const_int32(gallivm, 4), "");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static LLVMValueRef
|
||||
lookup_cached_pixel(struct gallivm_state *gallivm,
|
||||
LLVMValueRef ptr,
|
||||
LLVMValueRef index)
|
||||
{
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
LLVMValueRef member_ptr, indices[3];
|
||||
|
||||
indices[0] = lp_build_const_int32(gallivm, 0);
|
||||
indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA);
|
||||
indices[2] = index;
|
||||
member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
|
||||
return LLVMBuildLoad(builder, member_ptr, "cache_data");
|
||||
}
|
||||
|
||||
|
||||
static LLVMValueRef
|
||||
lookup_tag_data(struct gallivm_state *gallivm,
|
||||
LLVMValueRef ptr,
|
||||
LLVMValueRef index)
|
||||
{
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
LLVMValueRef member_ptr, indices[3];
|
||||
|
||||
indices[0] = lp_build_const_int32(gallivm, 0);
|
||||
indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS);
|
||||
indices[2] = index;
|
||||
member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
|
||||
return LLVMBuildLoad(builder, member_ptr, "tag_data");
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
update_cached_block(struct gallivm_state *gallivm,
|
||||
const struct util_format_description *format_desc,
|
||||
LLVMValueRef ptr_addr,
|
||||
LLVMValueRef hash_index,
|
||||
LLVMValueRef cache)
|
||||
|
||||
{
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
|
||||
LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
|
||||
LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
|
||||
LLVMTypeRef i32x4 = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4);
|
||||
LLVMValueRef function;
|
||||
LLVMValueRef tag_value, tmp_ptr;
|
||||
LLVMValueRef col[4];
|
||||
unsigned i, j;
|
||||
|
||||
/*
|
||||
* Use format_desc->fetch_rgba_8unorm() for each pixel in the block.
|
||||
* This doesn't actually make any sense whatsoever, someone would need
|
||||
* to write a function doing this for all pixels in a block (either as
|
||||
* an external c function or with generated code). Don't ask.
|
||||
*/
|
||||
|
||||
{
|
||||
/*
|
||||
* Function to call looks like:
|
||||
* fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
|
||||
*/
|
||||
LLVMTypeRef ret_type;
|
||||
LLVMTypeRef arg_types[4];
|
||||
LLVMTypeRef function_type;
|
||||
|
||||
assert(format_desc->fetch_rgba_8unorm);
|
||||
|
||||
ret_type = LLVMVoidTypeInContext(gallivm->context);
|
||||
arg_types[0] = pi8t;
|
||||
arg_types[1] = pi8t;
|
||||
arg_types[2] = i32t;
|
||||
arg_types[3] = i32t;
|
||||
function_type = LLVMFunctionType(ret_type, arg_types,
|
||||
Elements(arg_types), 0);
|
||||
|
||||
/* make const pointer for the C fetch_rgba_8unorm function */
|
||||
function = lp_build_const_int_pointer(gallivm,
|
||||
func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm));
|
||||
|
||||
/* cast the callee pointer to the function's type */
|
||||
function = LLVMBuildBitCast(builder, function,
|
||||
LLVMPointerType(function_type, 0),
|
||||
"cast callee");
|
||||
}
|
||||
|
||||
tmp_ptr = lp_build_array_alloca(gallivm, i32x4,
|
||||
lp_build_const_int32(gallivm, 16),
|
||||
"tmp_decode_store");
|
||||
tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
|
||||
|
||||
/*
|
||||
* Invoke format_desc->fetch_rgba_8unorm() for each pixel.
|
||||
* This is going to be really really slow.
|
||||
* Note: the block store format is actually
|
||||
* x0y0x0y1x0y2x0y3 x1y0x1y1x1y2x1y3 ...
|
||||
*/
|
||||
for (i = 0; i < 4; ++i) {
|
||||
for (j = 0; j < 4; ++j) {
|
||||
LLVMValueRef args[4];
|
||||
LLVMValueRef dst_offset = lp_build_const_int32(gallivm, (i * 4 + j) * 4);
|
||||
|
||||
/*
|
||||
* Note we actually supply a pointer to the start of the block,
|
||||
* not the start of the texture.
|
||||
*/
|
||||
args[0] = LLVMBuildGEP(gallivm->builder, tmp_ptr, &dst_offset, 1, "");
|
||||
args[1] = ptr_addr;
|
||||
args[2] = LLVMConstInt(i32t, i, 0);
|
||||
args[3] = LLVMConstInt(i32t, j, 0);
|
||||
LLVMBuildCall(builder, function, args, Elements(args), "");
|
||||
}
|
||||
}
|
||||
|
||||
/* Finally store the block - pointless mem copy + update tag. */
|
||||
tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, LLVMPointerType(i32x4, 0), "");
|
||||
for (i = 0; i < 4; ++i) {
|
||||
LLVMValueRef tmp_offset = lp_build_const_int32(gallivm, i);
|
||||
LLVMValueRef ptr = LLVMBuildGEP(gallivm->builder, tmp_ptr, &tmp_offset, 1, "");
|
||||
col[i] = LLVMBuildLoad(builder, ptr, "");
|
||||
}
|
||||
|
||||
tag_value = LLVMBuildPtrToInt(gallivm->builder, ptr_addr,
|
||||
LLVMInt64TypeInContext(gallivm->context), "");
|
||||
store_cached_block(gallivm, col, tag_value, hash_index, cache);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Do a cached lookup.
|
||||
*
|
||||
* Returns (vectors of) 4x8 rgba aos value
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_fetch_cached_texels(struct gallivm_state *gallivm,
|
||||
const struct util_format_description *format_desc,
|
||||
unsigned n,
|
||||
LLVMValueRef base_ptr,
|
||||
LLVMValueRef offset,
|
||||
LLVMValueRef i,
|
||||
LLVMValueRef j,
|
||||
LLVMValueRef cache)
|
||||
|
||||
{
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
unsigned count, low_bit, log2size;
|
||||
LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp;
|
||||
LLVMValueRef ij_index, hash_index, hash_mask, block_index;
|
||||
LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
|
||||
LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
|
||||
LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context);
|
||||
struct lp_type type;
|
||||
struct lp_build_context bld32;
|
||||
memset(&type, 0, sizeof type);
|
||||
type.width = 32;
|
||||
type.length = n;
|
||||
|
||||
assert(format_desc->block.width == 4);
|
||||
assert(format_desc->block.height == 4);
|
||||
|
||||
lp_build_context_init(&bld32, gallivm, type);
|
||||
|
||||
/*
|
||||
* compute hash - we use direct mapped cache, the hash function could
|
||||
* be better but it needs to be simple
|
||||
* per-element:
|
||||
* compare offset with offset stored at tag (hash)
|
||||
* if not equal decode/store block, update tag
|
||||
* extract color from cache
|
||||
* assemble result vector
|
||||
*/
|
||||
|
||||
/* TODO: not ideal with 32bit pointers... */
|
||||
|
||||
low_bit = util_logbase2(format_desc->block.bits / 8);
|
||||
log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE);
|
||||
addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, "");
|
||||
ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, "");
|
||||
ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc);
|
||||
/* For the hash function, first mask off the unused lowest bits. Then just
|
||||
do some xor with address bits - only use lower 32bits */
|
||||
ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, "");
|
||||
ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
|
||||
lp_build_const_int_vec(gallivm, type, low_bit), "");
|
||||
/* This only really makes sense for size 64,128,256 */
|
||||
hash_index = ptr_addrtrunc;
|
||||
ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
|
||||
lp_build_const_int_vec(gallivm, type, 2*log2size), "");
|
||||
hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, "");
|
||||
tmp = LLVMBuildLShr(builder, hash_index,
|
||||
lp_build_const_int_vec(gallivm, type, log2size), "");
|
||||
hash_index = LLVMBuildXor(builder, hash_index, tmp, "");
|
||||
|
||||
hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1);
|
||||
hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, "");
|
||||
ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), "");
|
||||
ij_index = LLVMBuildAdd(builder, ij_index, j, "");
|
||||
block_index = LLVMBuildShl(builder, hash_index,
|
||||
lp_build_const_int_vec(gallivm, type, 4), "");
|
||||
block_index = LLVMBuildAdd(builder, ij_index, block_index, "");
|
||||
|
||||
if (n > 1) {
|
||||
color = LLVMGetUndef(LLVMVectorType(i32t, n));
|
||||
for (count = 0; count < n; count++) {
|
||||
LLVMValueRef index, cond, colorx;
|
||||
LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx;
|
||||
struct lp_build_if_state if_ctx;
|
||||
|
||||
index = lp_build_const_int32(gallivm, count);
|
||||
offsetx = LLVMBuildExtractElement(builder, offset, index, "");
|
||||
addrx = LLVMBuildZExt(builder, offsetx, i64t, "");
|
||||
addrx = LLVMBuildAdd(builder, addrx, addr, "");
|
||||
block_indexx = LLVMBuildExtractElement(builder, block_index, index, "");
|
||||
hash_indexx = LLVMBuildLShr(builder, block_indexx,
|
||||
lp_build_const_int32(gallivm, 4), "");
|
||||
offset_stored = lookup_tag_data(gallivm, cache, hash_indexx);
|
||||
cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, "");
|
||||
|
||||
lp_build_if(&if_ctx, gallivm, cond);
|
||||
{
|
||||
ptr_addrx = LLVMBuildIntToPtr(builder, addrx,
|
||||
LLVMPointerType(i8t, 0), "");
|
||||
update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache);
|
||||
#if LP_BUILD_FORMAT_CACHE_DEBUG
|
||||
update_cache_access(gallivm, cache, 1,
|
||||
LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
|
||||
#endif
|
||||
}
|
||||
lp_build_endif(&if_ctx);
|
||||
|
||||
colorx = lookup_cached_pixel(gallivm, cache, block_indexx);
|
||||
|
||||
color = LLVMBuildInsertElement(builder, color, colorx,
|
||||
lp_build_const_int32(gallivm, count), "");
|
||||
}
|
||||
}
|
||||
else {
|
||||
LLVMValueRef cond;
|
||||
struct lp_build_if_state if_ctx;
|
||||
|
||||
tmp = LLVMBuildZExt(builder, offset, i64t, "");
|
||||
addr = LLVMBuildAdd(builder, tmp, addr, "");
|
||||
offset_stored = lookup_tag_data(gallivm, cache, hash_index);
|
||||
cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, "");
|
||||
|
||||
lp_build_if(&if_ctx, gallivm, cond);
|
||||
{
|
||||
tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), "");
|
||||
update_cached_block(gallivm, format_desc, tmp, hash_index, cache);
|
||||
#if LP_BUILD_FORMAT_CACHE_DEBUG
|
||||
update_cache_access(gallivm, cache, 1,
|
||||
LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
|
||||
#endif
|
||||
}
|
||||
lp_build_endif(&if_ctx);
|
||||
|
||||
color = lookup_cached_pixel(gallivm, cache, block_index);
|
||||
}
|
||||
#if LP_BUILD_FORMAT_CACHE_DEBUG
|
||||
update_cache_access(gallivm, cache, n,
|
||||
LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL);
|
||||
#endif
|
||||
return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), "");
|
||||
}
|
||||
|
|
@ -346,6 +346,7 @@ lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
|
|||
* \param i, j the sub-block pixel coordinates. For non-compressed formats
|
||||
* these will always be (0,0). For compressed formats, i will
|
||||
* be in [0, block_width-1] and j will be in [0, block_height-1].
|
||||
* \param cache optional value pointing to a lp_build_format_cache structure
|
||||
*/
|
||||
void
|
||||
lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
|
||||
|
@ -355,6 +356,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
|
|||
LLVMValueRef offset,
|
||||
LLVMValueRef i,
|
||||
LLVMValueRef j,
|
||||
LLVMValueRef cache,
|
||||
LLVMValueRef rgba_out[4])
|
||||
{
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
|
@ -473,7 +475,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
|
|||
tmp_type.norm = TRUE;
|
||||
|
||||
tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
|
||||
TRUE, base_ptr, offset, i, j);
|
||||
TRUE, base_ptr, offset, i, j, cache);
|
||||
|
||||
lp_build_rgba8_to_fi32_soa(gallivm,
|
||||
type,
|
||||
|
@ -483,6 +485,39 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
|
|||
return;
|
||||
}
|
||||
|
||||
if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC &&
|
||||
/* non-srgb case is already handled above */
|
||||
format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB &&
|
||||
type.floating && type.width == 32 &&
|
||||
(type.length == 1 || (type.length % 4 == 0)) &&
|
||||
cache) {
|
||||
const struct util_format_description *format_decompressed;
|
||||
const struct util_format_description *flinear_desc;
|
||||
LLVMValueRef packed;
|
||||
flinear_desc = util_format_description(util_format_linear(format_desc->format));
|
||||
packed = lp_build_fetch_cached_texels(gallivm,
|
||||
flinear_desc,
|
||||
type.length,
|
||||
base_ptr,
|
||||
offset,
|
||||
i, j,
|
||||
cache);
|
||||
packed = LLVMBuildBitCast(builder, packed,
|
||||
lp_build_int_vec_type(gallivm, type), "");
|
||||
/*
|
||||
* The values are now packed so they match ordinary srgb RGBA8 format,
|
||||
* hence need to use matching format for unpack.
|
||||
*/
|
||||
format_decompressed = util_format_description(PIPE_FORMAT_R8G8B8A8_SRGB);
|
||||
|
||||
lp_build_unpack_rgba_soa(gallivm,
|
||||
format_decompressed,
|
||||
type,
|
||||
packed, rgba_out);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fallback to calling lp_build_fetch_rgba_aos for each pixel.
|
||||
*
|
||||
|
@ -524,7 +559,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
|
|||
/* Get a single float[4]={R,G,B,A} pixel */
|
||||
tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
|
||||
TRUE, base_ptr, offset_elem,
|
||||
i_elem, j_elem);
|
||||
i_elem, j_elem, cache);
|
||||
|
||||
/*
|
||||
* Insert the AoS tmp value channels into the SoA result vectors at
|
||||
|
|
|
@ -99,6 +99,7 @@ struct lp_sampler_params
|
|||
unsigned sampler_index;
|
||||
unsigned sample_key;
|
||||
LLVMValueRef context_ptr;
|
||||
LLVMValueRef thread_data_ptr;
|
||||
const LLVMValueRef *coords;
|
||||
const LLVMValueRef *offsets;
|
||||
LLVMValueRef lod;
|
||||
|
@ -267,6 +268,17 @@ struct lp_sampler_dynamic_state
|
|||
struct gallivm_state *gallivm,
|
||||
LLVMValueRef context_ptr,
|
||||
unsigned sampler_unit);
|
||||
|
||||
/**
|
||||
* Obtain texture cache (returns ptr to lp_build_format_cache).
|
||||
*
|
||||
* It's optional: no caching will be done if it's NULL.
|
||||
*/
|
||||
LLVMValueRef
|
||||
(*cache_ptr)(const struct lp_sampler_dynamic_state *state,
|
||||
struct gallivm_state *gallivm,
|
||||
LLVMValueRef thread_data_ptr,
|
||||
unsigned unit);
|
||||
};
|
||||
|
||||
|
||||
|
@ -356,6 +368,7 @@ struct lp_build_sample_context
|
|||
LLVMValueRef img_stride_array;
|
||||
LLVMValueRef base_ptr;
|
||||
LLVMValueRef mip_offsets;
|
||||
LLVMValueRef cache;
|
||||
|
||||
/** Integer vector with texture width, height, depth */
|
||||
LLVMValueRef int_size;
|
||||
|
|
|
@ -593,7 +593,8 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
|
|||
TRUE,
|
||||
data_ptr, offset,
|
||||
x_subcoord,
|
||||
y_subcoord);
|
||||
y_subcoord,
|
||||
bld->cache);
|
||||
}
|
||||
|
||||
*colors = rgba8;
|
||||
|
@ -933,7 +934,8 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
|
|||
TRUE,
|
||||
data_ptr, offset[k][j][i],
|
||||
x_subcoord[i],
|
||||
y_subcoord[j]);
|
||||
y_subcoord[j],
|
||||
bld->cache);
|
||||
}
|
||||
|
||||
neighbors[k][j][i] = rgba8;
|
||||
|
|
|
@ -161,6 +161,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
|
|||
bld->texel_type,
|
||||
data_ptr, offset,
|
||||
i, j,
|
||||
bld->cache,
|
||||
texel_out);
|
||||
|
||||
/*
|
||||
|
@ -2389,6 +2390,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
|
|||
bld->texel_type,
|
||||
bld->base_ptr, offset,
|
||||
i, j,
|
||||
bld->cache,
|
||||
colors_out);
|
||||
|
||||
if (out_of_bound_ret_zero) {
|
||||
|
@ -2442,6 +2444,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
|
|||
unsigned texture_index,
|
||||
unsigned sampler_index,
|
||||
LLVMValueRef context_ptr,
|
||||
LLVMValueRef thread_data_ptr,
|
||||
const LLVMValueRef *coords,
|
||||
const LLVMValueRef *offsets,
|
||||
const struct lp_derivatives *derivs, /* optional */
|
||||
|
@ -2707,6 +2710,11 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
|
|||
context_ptr, texture_index);
|
||||
/* Note that mip_offsets is an array[level] of offsets to texture images */
|
||||
|
||||
if (dynamic_state->cache_ptr && thread_data_ptr) {
|
||||
bld.cache = dynamic_state->cache_ptr(dynamic_state, gallivm,
|
||||
thread_data_ptr, texture_index);
|
||||
}
|
||||
|
||||
/* width, height, depth as single int vector */
|
||||
if (dims <= 1) {
|
||||
bld.int_size = tex_width;
|
||||
|
@ -2883,6 +2891,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
|
|||
bld4.base_ptr = bld.base_ptr;
|
||||
bld4.mip_offsets = bld.mip_offsets;
|
||||
bld4.int_size = bld.int_size;
|
||||
bld4.cache = bld.cache;
|
||||
|
||||
bld4.vector_width = lp_type_width(type4);
|
||||
|
||||
|
@ -3081,12 +3090,14 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm,
|
|||
LLVMValueRef offsets[3] = { NULL };
|
||||
LLVMValueRef lod = NULL;
|
||||
LLVMValueRef context_ptr;
|
||||
LLVMValueRef thread_data_ptr = NULL;
|
||||
LLVMValueRef texel_out[4];
|
||||
struct lp_derivatives derivs;
|
||||
struct lp_derivatives *deriv_ptr = NULL;
|
||||
unsigned num_param = 0;
|
||||
unsigned i, num_coords, num_derivs, num_offsets, layer;
|
||||
enum lp_sampler_lod_control lod_control;
|
||||
boolean need_cache = FALSE;
|
||||
|
||||
lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
|
||||
LP_SAMPLER_LOD_CONTROL_SHIFT;
|
||||
|
@ -3094,8 +3105,19 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm,
|
|||
get_target_info(static_texture_state->target,
|
||||
&num_coords, &num_derivs, &num_offsets, &layer);
|
||||
|
||||
if (dynamic_state->cache_ptr) {
|
||||
const struct util_format_description *format_desc;
|
||||
format_desc = util_format_description(static_texture_state->format);
|
||||
if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
|
||||
need_cache = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
/* "unpack" arguments */
|
||||
context_ptr = LLVMGetParam(function, num_param++);
|
||||
if (need_cache) {
|
||||
thread_data_ptr = LLVMGetParam(function, num_param++);
|
||||
}
|
||||
for (i = 0; i < num_coords; i++) {
|
||||
coords[i] = LLVMGetParam(function, num_param++);
|
||||
}
|
||||
|
@ -3146,6 +3168,7 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm,
|
|||
texture_index,
|
||||
sampler_index,
|
||||
context_ptr,
|
||||
thread_data_ptr,
|
||||
coords,
|
||||
offsets,
|
||||
deriv_ptr,
|
||||
|
@ -3189,6 +3212,7 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
|
|||
const LLVMValueRef *offsets = params->offsets;
|
||||
const struct lp_derivatives *derivs = params->derivs;
|
||||
enum lp_sampler_lod_control lod_control;
|
||||
boolean need_cache = FALSE;
|
||||
|
||||
lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
|
||||
LP_SAMPLER_LOD_CONTROL_SHIFT;
|
||||
|
@ -3196,6 +3220,17 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
|
|||
get_target_info(static_texture_state->target,
|
||||
&num_coords, &num_derivs, &num_offsets, &layer);
|
||||
|
||||
if (dynamic_state->cache_ptr) {
|
||||
const struct util_format_description *format_desc;
|
||||
format_desc = util_format_description(static_texture_state->format);
|
||||
if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
|
||||
/*
|
||||
* This is not 100% correct, if we have cache but the
|
||||
* util_format_s3tc_prefer is true the cache won't get used
|
||||
* regardless (could hook up the block decode there...) */
|
||||
need_cache = TRUE;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* texture function matches are found by name.
|
||||
* Thus the name has to include both the texture and sampler unit
|
||||
|
@ -3221,6 +3256,9 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
|
|||
*/
|
||||
|
||||
arg_types[num_param++] = LLVMTypeOf(params->context_ptr);
|
||||
if (need_cache) {
|
||||
arg_types[num_param++] = LLVMTypeOf(params->thread_data_ptr);
|
||||
}
|
||||
for (i = 0; i < num_coords; i++) {
|
||||
arg_types[num_param++] = LLVMTypeOf(coords[0]);
|
||||
assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i]));
|
||||
|
@ -3280,6 +3318,9 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
|
|||
|
||||
num_args = 0;
|
||||
args[num_args++] = params->context_ptr;
|
||||
if (need_cache) {
|
||||
args[num_args++] = params->thread_data_ptr;
|
||||
}
|
||||
for (i = 0; i < num_coords; i++) {
|
||||
args[num_args++] = coords[i];
|
||||
}
|
||||
|
@ -3384,6 +3425,7 @@ lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state,
|
|||
params->texture_index,
|
||||
params->sampler_index,
|
||||
params->context_ptr,
|
||||
params->thread_data_ptr,
|
||||
params->coords,
|
||||
params->offsets,
|
||||
params->derivs,
|
||||
|
|
|
@ -230,6 +230,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
|
|||
const LLVMValueRef (*inputs)[4],
|
||||
LLVMValueRef (*outputs)[4],
|
||||
LLVMValueRef context_ptr,
|
||||
LLVMValueRef thread_data_ptr,
|
||||
struct lp_build_sampler_soa *sampler,
|
||||
const struct tgsi_shader_info *info,
|
||||
const struct lp_build_tgsi_gs_iface *gs_iface);
|
||||
|
@ -447,6 +448,7 @@ struct lp_build_tgsi_soa_context
|
|||
const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS];
|
||||
LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS];
|
||||
LLVMValueRef context_ptr;
|
||||
LLVMValueRef thread_data_ptr;
|
||||
|
||||
const struct lp_build_sampler_soa *sampler;
|
||||
|
||||
|
|
|
@ -2321,6 +2321,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
|
|||
params.texture_index = unit;
|
||||
params.sampler_index = unit;
|
||||
params.context_ptr = bld->context_ptr;
|
||||
params.thread_data_ptr = bld->thread_data_ptr;
|
||||
params.coords = coords;
|
||||
params.offsets = offsets;
|
||||
params.lod = lod;
|
||||
|
@ -2488,6 +2489,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
|
|||
params.texture_index = texture_unit;
|
||||
params.sampler_index = sampler_unit;
|
||||
params.context_ptr = bld->context_ptr;
|
||||
params.thread_data_ptr = bld->thread_data_ptr;
|
||||
params.coords = coords;
|
||||
params.offsets = offsets;
|
||||
params.lod = lod;
|
||||
|
@ -2608,6 +2610,7 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
|
|||
params.texture_index = unit;
|
||||
params.sampler_index = unit;
|
||||
params.context_ptr = bld->context_ptr;
|
||||
params.thread_data_ptr = bld->thread_data_ptr;
|
||||
params.coords = coords;
|
||||
params.offsets = offsets;
|
||||
params.derivs = NULL;
|
||||
|
@ -3858,6 +3861,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
|
|||
const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
|
||||
LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
|
||||
LLVMValueRef context_ptr,
|
||||
LLVMValueRef thread_data_ptr,
|
||||
struct lp_build_sampler_soa *sampler,
|
||||
const struct tgsi_shader_info *info,
|
||||
const struct lp_build_tgsi_gs_iface *gs_iface)
|
||||
|
@ -3893,6 +3897,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
|
|||
bld.bld_base.info = info;
|
||||
bld.indirect_files = info->indirect_files;
|
||||
bld.context_ptr = context_ptr;
|
||||
bld.thread_data_ptr = thread_data_ptr;
|
||||
|
||||
/*
|
||||
* If the number of temporaries is rather large then we just
|
||||
|
|
|
@ -33,6 +33,58 @@
|
|||
#include "util/u_memory.h"
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
#ifdef PIPE_OS_WINDOWS
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef PIPE_OS_WINDOWS
|
||||
|
||||
static inline uint64_t
|
||||
filetime_to_scalar(FILETIME ft)
|
||||
{
|
||||
ULARGE_INTEGER uli;
|
||||
uli.LowPart = ft.dwLowDateTime;
|
||||
uli.HighPart = ft.dwHighDateTime;
|
||||
return uli.QuadPart;
|
||||
}
|
||||
|
||||
static boolean
|
||||
get_cpu_stats(unsigned cpu_index, uint64_t *busy_time, uint64_t *total_time)
|
||||
{
|
||||
SYSTEM_INFO sysInfo;
|
||||
FILETIME ftNow, ftCreation, ftExit, ftKernel, ftUser;
|
||||
|
||||
GetSystemInfo(&sysInfo);
|
||||
assert(sysInfo.dwNumberOfProcessors >= 1);
|
||||
if (cpu_index != ALL_CPUS && cpu_index >= sysInfo.dwNumberOfProcessors) {
|
||||
/* Tell hud_get_num_cpus there are only this many CPUs. */
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Get accumulated user and sys time for all threads */
|
||||
if (!GetProcessTimes(GetCurrentProcess(), &ftCreation, &ftExit,
|
||||
&ftKernel, &ftUser))
|
||||
return FALSE;
|
||||
|
||||
GetSystemTimeAsFileTime(&ftNow);
|
||||
|
||||
*busy_time = filetime_to_scalar(ftUser) + filetime_to_scalar(ftKernel);
|
||||
*total_time = filetime_to_scalar(ftNow) - filetime_to_scalar(ftCreation);
|
||||
|
||||
/* busy_time already has the time accross all cpus.
|
||||
* XXX: if we want 100% to mean one CPU, 200% two cpus, eliminate the
|
||||
* following line.
|
||||
*/
|
||||
*total_time *= sysInfo.dwNumberOfProcessors;
|
||||
|
||||
/* XXX: we ignore cpu_index, i.e, we assume that the individual CPU usage
|
||||
* and the system usage are one and the same.
|
||||
*/
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static boolean
|
||||
get_cpu_stats(unsigned cpu_index, uint64_t *busy_time, uint64_t *total_time)
|
||||
|
@ -81,6 +133,8 @@ get_cpu_stats(unsigned cpu_index, uint64_t *busy_time, uint64_t *total_time)
|
|||
fclose(f);
|
||||
return FALSE;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
struct cpu_info {
|
||||
unsigned cpu_index;
|
||||
|
|
|
@ -68,17 +68,18 @@ static void translate_memcpy_uint( const void *in,
|
|||
* \param out_nr returns number of new vertices
|
||||
* \param out_translate returns the translation function to use by the caller
|
||||
*/
|
||||
int u_index_translator( unsigned hw_mask,
|
||||
unsigned prim,
|
||||
unsigned in_index_size,
|
||||
unsigned nr,
|
||||
unsigned in_pv,
|
||||
unsigned out_pv,
|
||||
unsigned prim_restart,
|
||||
unsigned *out_prim,
|
||||
unsigned *out_index_size,
|
||||
unsigned *out_nr,
|
||||
u_translate_func *out_translate )
|
||||
enum indices_mode
|
||||
u_index_translator(unsigned hw_mask,
|
||||
unsigned prim,
|
||||
unsigned in_index_size,
|
||||
unsigned nr,
|
||||
unsigned in_pv,
|
||||
unsigned out_pv,
|
||||
unsigned prim_restart,
|
||||
unsigned *out_prim,
|
||||
unsigned *out_index_size,
|
||||
unsigned *out_nr,
|
||||
u_translate_func *out_translate)
|
||||
{
|
||||
unsigned in_idx;
|
||||
unsigned out_idx;
|
||||
|
@ -204,17 +205,17 @@ int u_index_translator( unsigned hw_mask,
|
|||
* \param out_nr returns new number of vertices to draw
|
||||
* \param out_generate returns pointer to the generator function
|
||||
*/
|
||||
int u_index_generator( unsigned hw_mask,
|
||||
unsigned prim,
|
||||
unsigned start,
|
||||
unsigned nr,
|
||||
unsigned in_pv,
|
||||
unsigned out_pv,
|
||||
unsigned *out_prim,
|
||||
unsigned *out_index_size,
|
||||
unsigned *out_nr,
|
||||
u_generate_func *out_generate )
|
||||
|
||||
enum indices_mode
|
||||
u_index_generator(unsigned hw_mask,
|
||||
unsigned prim,
|
||||
unsigned start,
|
||||
unsigned nr,
|
||||
unsigned in_pv,
|
||||
unsigned out_pv,
|
||||
unsigned *out_prim,
|
||||
unsigned *out_index_size,
|
||||
unsigned *out_nr,
|
||||
u_generate_func *out_generate)
|
||||
{
|
||||
unsigned out_idx;
|
||||
|
||||
|
|
|
@ -67,66 +67,68 @@ typedef void (*u_generate_func)( unsigned start,
|
|||
/* Return codes describe the translate/generate operation. Caller may
|
||||
* be able to reuse translated indices under some circumstances.
|
||||
*/
|
||||
#define U_TRANSLATE_ERROR -1
|
||||
#define U_TRANSLATE_NORMAL 1
|
||||
#define U_TRANSLATE_MEMCPY 2
|
||||
#define U_GENERATE_LINEAR 3
|
||||
#define U_GENERATE_REUSABLE 4
|
||||
#define U_GENERATE_ONE_OFF 5
|
||||
|
||||
enum indices_mode {
|
||||
U_TRANSLATE_ERROR = -1,
|
||||
U_TRANSLATE_NORMAL = 1,
|
||||
U_TRANSLATE_MEMCPY = 2,
|
||||
U_GENERATE_LINEAR = 3,
|
||||
U_GENERATE_REUSABLE= 4,
|
||||
U_GENERATE_ONE_OFF = 5,
|
||||
};
|
||||
|
||||
void u_index_init( void );
|
||||
|
||||
int u_index_translator( unsigned hw_mask,
|
||||
unsigned prim,
|
||||
unsigned in_index_size,
|
||||
unsigned nr,
|
||||
unsigned in_pv, /* API */
|
||||
unsigned out_pv, /* hardware */
|
||||
unsigned prim_restart,
|
||||
unsigned *out_prim,
|
||||
unsigned *out_index_size,
|
||||
unsigned *out_nr,
|
||||
u_translate_func *out_translate );
|
||||
enum indices_mode
|
||||
u_index_translator(unsigned hw_mask,
|
||||
unsigned prim,
|
||||
unsigned in_index_size,
|
||||
unsigned nr,
|
||||
unsigned in_pv, /* API */
|
||||
unsigned out_pv, /* hardware */
|
||||
unsigned prim_restart,
|
||||
unsigned *out_prim,
|
||||
unsigned *out_index_size,
|
||||
unsigned *out_nr,
|
||||
u_translate_func *out_translate);
|
||||
|
||||
/* Note that even when generating it is necessary to know what the
|
||||
* API's PV is, as the indices generated will depend on whether it is
|
||||
* the same as hardware or not, and in the case of triangle strips,
|
||||
* whether it is first or last.
|
||||
*/
|
||||
int u_index_generator( unsigned hw_mask,
|
||||
unsigned prim,
|
||||
unsigned start,
|
||||
unsigned nr,
|
||||
unsigned in_pv, /* API */
|
||||
unsigned out_pv, /* hardware */
|
||||
unsigned *out_prim,
|
||||
unsigned *out_index_size,
|
||||
unsigned *out_nr,
|
||||
u_generate_func *out_generate );
|
||||
enum indices_mode
|
||||
u_index_generator(unsigned hw_mask,
|
||||
unsigned prim,
|
||||
unsigned start,
|
||||
unsigned nr,
|
||||
unsigned in_pv, /* API */
|
||||
unsigned out_pv, /* hardware */
|
||||
unsigned *out_prim,
|
||||
unsigned *out_index_size,
|
||||
unsigned *out_nr,
|
||||
u_generate_func *out_generate);
|
||||
|
||||
|
||||
void u_unfilled_init( void );
|
||||
|
||||
int u_unfilled_translator( unsigned prim,
|
||||
unsigned in_index_size,
|
||||
unsigned nr,
|
||||
unsigned unfilled_mode,
|
||||
unsigned *out_prim,
|
||||
unsigned *out_index_size,
|
||||
unsigned *out_nr,
|
||||
u_translate_func *out_translate );
|
||||
|
||||
int u_unfilled_generator( unsigned prim,
|
||||
unsigned start,
|
||||
unsigned nr,
|
||||
unsigned unfilled_mode,
|
||||
unsigned *out_prim,
|
||||
unsigned *out_index_size,
|
||||
unsigned *out_nr,
|
||||
u_generate_func *out_generate );
|
||||
|
||||
|
||||
enum indices_mode
|
||||
u_unfilled_translator(unsigned prim,
|
||||
unsigned in_index_size,
|
||||
unsigned nr,
|
||||
unsigned unfilled_mode,
|
||||
unsigned *out_prim,
|
||||
unsigned *out_index_size,
|
||||
unsigned *out_nr,
|
||||
u_translate_func *out_translate);
|
||||
|
||||
enum indices_mode
|
||||
u_unfilled_generator(unsigned prim,
|
||||
unsigned start,
|
||||
unsigned nr,
|
||||
unsigned unfilled_mode,
|
||||
unsigned *out_prim,
|
||||
unsigned *out_index_size,
|
||||
unsigned *out_nr,
|
||||
u_generate_func *out_generate);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -111,14 +111,15 @@ static unsigned nr_lines( unsigned prim,
|
|||
|
||||
|
||||
|
||||
int u_unfilled_translator( unsigned prim,
|
||||
unsigned in_index_size,
|
||||
unsigned nr,
|
||||
unsigned unfilled_mode,
|
||||
unsigned *out_prim,
|
||||
unsigned *out_index_size,
|
||||
unsigned *out_nr,
|
||||
u_translate_func *out_translate )
|
||||
enum indices_mode
|
||||
u_unfilled_translator(unsigned prim,
|
||||
unsigned in_index_size,
|
||||
unsigned nr,
|
||||
unsigned unfilled_mode,
|
||||
unsigned *out_prim,
|
||||
unsigned *out_index_size,
|
||||
unsigned *out_nr,
|
||||
u_translate_func *out_translate)
|
||||
{
|
||||
unsigned in_idx;
|
||||
unsigned out_idx;
|
||||
|
@ -170,14 +171,15 @@ int u_unfilled_translator( unsigned prim,
|
|||
* different front/back fill modes, that can be handled with the
|
||||
* 'draw' module.
|
||||
*/
|
||||
int u_unfilled_generator( unsigned prim,
|
||||
unsigned start,
|
||||
unsigned nr,
|
||||
unsigned unfilled_mode,
|
||||
unsigned *out_prim,
|
||||
unsigned *out_index_size,
|
||||
unsigned *out_nr,
|
||||
u_generate_func *out_generate )
|
||||
enum indices_mode
|
||||
u_unfilled_generator(unsigned prim,
|
||||
unsigned start,
|
||||
unsigned nr,
|
||||
unsigned unfilled_mode,
|
||||
unsigned *out_prim,
|
||||
unsigned *out_index_size,
|
||||
unsigned *out_nr,
|
||||
u_generate_func *out_generate)
|
||||
{
|
||||
unsigned out_idx;
|
||||
|
||||
|
|
|
@ -95,6 +95,7 @@ const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
|
|||
"TESSOUTER",
|
||||
"TESSINNER",
|
||||
"VERTICESIN",
|
||||
"HELPER_INVOCATION",
|
||||
};
|
||||
|
||||
const char *tgsi_texture_names[TGSI_TEXTURE_COUNT] =
|
||||
|
|
|
@ -70,7 +70,7 @@ struct blitter_context_priv
|
|||
/* Constant state objects. */
|
||||
/* Vertex shaders. */
|
||||
void *vs; /**< Vertex shader which passes {pos, generic} to the output.*/
|
||||
void *vs_pos_only; /**< Vertex shader which passes pos to the output.*/
|
||||
void *vs_pos_only[4]; /**< Vertex shader which passes pos to the output.*/
|
||||
void *vs_layered; /**< Vertex shader which sets LAYER = INSTANCEID. */
|
||||
|
||||
/* Fragment shaders. */
|
||||
|
@ -325,27 +325,29 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
|
|||
return &ctx->base;
|
||||
}
|
||||
|
||||
static void bind_vs_pos_only(struct blitter_context_priv *ctx)
|
||||
static void bind_vs_pos_only(struct blitter_context_priv *ctx,
|
||||
unsigned num_so_channels)
|
||||
{
|
||||
struct pipe_context *pipe = ctx->base.pipe;
|
||||
int index = num_so_channels ? num_so_channels - 1 : 0;
|
||||
|
||||
if (!ctx->vs_pos_only) {
|
||||
if (!ctx->vs_pos_only[index]) {
|
||||
struct pipe_stream_output_info so;
|
||||
const uint semantic_names[] = { TGSI_SEMANTIC_POSITION };
|
||||
const uint semantic_indices[] = { 0 };
|
||||
|
||||
memset(&so, 0, sizeof(so));
|
||||
so.num_outputs = 1;
|
||||
so.output[0].num_components = 1;
|
||||
so.stride[0] = 1;
|
||||
so.output[0].num_components = num_so_channels;
|
||||
so.stride[0] = num_so_channels;
|
||||
|
||||
ctx->vs_pos_only =
|
||||
ctx->vs_pos_only[index] =
|
||||
util_make_vertex_passthrough_shader_with_so(pipe, 1, semantic_names,
|
||||
semantic_indices, FALSE,
|
||||
&so);
|
||||
}
|
||||
|
||||
pipe->bind_vs_state(pipe, ctx->vs_pos_only);
|
||||
pipe->bind_vs_state(pipe, ctx->vs_pos_only[index]);
|
||||
}
|
||||
|
||||
static void bind_vs_passthrough(struct blitter_context_priv *ctx)
|
||||
|
@ -441,8 +443,9 @@ void util_blitter_destroy(struct blitter_context *blitter)
|
|||
pipe->delete_rasterizer_state(pipe, ctx->rs_discard_state);
|
||||
if (ctx->vs)
|
||||
pipe->delete_vs_state(pipe, ctx->vs);
|
||||
if (ctx->vs_pos_only)
|
||||
pipe->delete_vs_state(pipe, ctx->vs_pos_only);
|
||||
for (i = 0; i < 4; i++)
|
||||
if (ctx->vs_pos_only[i])
|
||||
pipe->delete_vs_state(pipe, ctx->vs_pos_only[i]);
|
||||
if (ctx->vs_layered)
|
||||
pipe->delete_vs_state(pipe, ctx->vs_layered);
|
||||
pipe->delete_vertex_elements_state(pipe, ctx->velem_state);
|
||||
|
@ -2036,7 +2039,7 @@ void util_blitter_copy_buffer(struct blitter_context *blitter,
|
|||
|
||||
pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
|
||||
pipe->bind_vertex_elements_state(pipe, ctx->velem_state_readbuf[0]);
|
||||
bind_vs_pos_only(ctx);
|
||||
bind_vs_pos_only(ctx, 1);
|
||||
if (ctx->has_geometry_shader)
|
||||
pipe->bind_gs_state(pipe, NULL);
|
||||
if (ctx->has_tessellation) {
|
||||
|
@ -2103,7 +2106,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,
|
|||
pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
|
||||
pipe->bind_vertex_elements_state(pipe,
|
||||
ctx->velem_state_readbuf[num_channels-1]);
|
||||
bind_vs_pos_only(ctx);
|
||||
bind_vs_pos_only(ctx, num_channels);
|
||||
if (ctx->has_geometry_shader)
|
||||
pipe->bind_gs_state(pipe, NULL);
|
||||
if (ctx->has_tessellation) {
|
||||
|
|
|
@ -70,6 +70,20 @@ void _debug_vprintf(const char *format, va_list ap)
|
|||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
_pipe_debug_message(
|
||||
struct pipe_debug_callback *cb,
|
||||
unsigned *id,
|
||||
enum pipe_debug_type type,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
if (cb && cb->debug_message)
|
||||
cb->debug_message(cb->data, id, type, fmt, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
debug_disable_error_message_boxes(void)
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
#include "os/os_misc.h"
|
||||
|
||||
#include "pipe/p_format.h"
|
||||
#include "pipe/p_defines.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -262,6 +263,25 @@ void _debug_assert_fail(const char *expr,
|
|||
_debug_printf("error: %s\n", __msg)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Output a debug log message to the debug info callback.
|
||||
*/
|
||||
#define pipe_debug_message(cb, type, fmt, ...) do { \
|
||||
static unsigned id = 0; \
|
||||
_pipe_debug_message(cb, &id, \
|
||||
PIPE_DEBUG_TYPE_ ## type, \
|
||||
fmt, __VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
struct pipe_debug_callback;
|
||||
|
||||
void
|
||||
_pipe_debug_message(
|
||||
struct pipe_debug_callback *cb,
|
||||
unsigned *id,
|
||||
enum pipe_debug_type type,
|
||||
const char *fmt, ...) _util_printf_format(4, 5);
|
||||
|
||||
|
||||
/**
|
||||
* Used by debug_dump_enum and debug_dump_flags to describe symbols.
|
||||
|
|
|
@ -998,26 +998,30 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr,
|
|||
return PIPE_OK;
|
||||
}
|
||||
|
||||
static boolean u_vbuf_need_minmax_index(struct u_vbuf *mgr)
|
||||
static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr)
|
||||
{
|
||||
/* See if there are any per-vertex attribs which will be uploaded or
|
||||
* translated. Use bitmasks to get the info instead of looping over vertex
|
||||
* elements. */
|
||||
return (mgr->ve->used_vb_mask &
|
||||
((mgr->user_vb_mask | mgr->incompatible_vb_mask |
|
||||
((mgr->user_vb_mask |
|
||||
mgr->incompatible_vb_mask |
|
||||
mgr->ve->incompatible_vb_mask_any) &
|
||||
mgr->ve->noninstance_vb_mask_any & mgr->nonzero_stride_vb_mask)) != 0;
|
||||
mgr->ve->noninstance_vb_mask_any &
|
||||
mgr->nonzero_stride_vb_mask)) != 0;
|
||||
}
|
||||
|
||||
static boolean u_vbuf_mapping_vertex_buffer_blocks(struct u_vbuf *mgr)
|
||||
static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr)
|
||||
{
|
||||
/* Return true if there are hw buffers which don't need to be translated.
|
||||
*
|
||||
* We could query whether each buffer is busy, but that would
|
||||
* be way more costly than this. */
|
||||
return (mgr->ve->used_vb_mask &
|
||||
(~mgr->user_vb_mask & ~mgr->incompatible_vb_mask &
|
||||
mgr->ve->compatible_vb_mask_all & mgr->ve->noninstance_vb_mask_any &
|
||||
(~mgr->user_vb_mask &
|
||||
~mgr->incompatible_vb_mask &
|
||||
mgr->ve->compatible_vb_mask_all &
|
||||
mgr->ve->noninstance_vb_mask_any &
|
||||
mgr->nonzero_stride_vb_mask)) != 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -62,6 +62,18 @@ const enum pipe_format const_resource_formats_VUYA[3] = {
|
|||
PIPE_FORMAT_NONE
|
||||
};
|
||||
|
||||
const enum pipe_format const_resource_formats_YUVX[3] = {
|
||||
PIPE_FORMAT_R8G8B8X8_UNORM,
|
||||
PIPE_FORMAT_NONE,
|
||||
PIPE_FORMAT_NONE
|
||||
};
|
||||
|
||||
const enum pipe_format const_resource_formats_VUYX[3] = {
|
||||
PIPE_FORMAT_B8G8R8X8_UNORM,
|
||||
PIPE_FORMAT_NONE,
|
||||
PIPE_FORMAT_NONE
|
||||
};
|
||||
|
||||
const enum pipe_format const_resource_formats_YUYV[3] = {
|
||||
PIPE_FORMAT_R8G8_R8B8_UNORM,
|
||||
PIPE_FORMAT_NONE,
|
||||
|
@ -102,6 +114,12 @@ vl_video_buffer_formats(struct pipe_screen *screen, enum pipe_format format)
|
|||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
return const_resource_formats_VUYA;
|
||||
|
||||
case PIPE_FORMAT_R8G8B8X8_UNORM:
|
||||
return const_resource_formats_VUYX;
|
||||
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
return const_resource_formats_VUYX;
|
||||
|
||||
case PIPE_FORMAT_YUYV:
|
||||
return const_resource_formats_YUYV;
|
||||
|
||||
|
|
|
@ -66,4 +66,10 @@ vl_screen_set_next_timestamp(struct vl_screen *vscreen, uint64_t stamp);
|
|||
void*
|
||||
vl_screen_get_private(struct vl_screen *vscreen);
|
||||
|
||||
struct vl_screen*
|
||||
vl_drm_screen_create(int fd);
|
||||
|
||||
void
|
||||
vl_drm_screen_destroy(struct vl_screen *vscreen);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2015 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "pipe/p_screen.h"
|
||||
#include "pipe-loader/pipe_loader.h"
|
||||
#include "state_tracker/drm_driver.h"
|
||||
|
||||
#include "util/u_memory.h"
|
||||
#include "vl/vl_winsys.h"
|
||||
|
||||
struct vl_screen*
|
||||
vl_drm_screen_create(int fd)
|
||||
{
|
||||
struct vl_screen *vscreen;
|
||||
|
||||
vscreen = CALLOC_STRUCT(vl_screen);
|
||||
if (!vscreen)
|
||||
return NULL;
|
||||
|
||||
#if GALLIUM_STATIC_TARGETS
|
||||
vscreen->pscreen = dd_create_screen(fd);
|
||||
#else
|
||||
if (pipe_loader_drm_probe_fd(&vscreen->dev, dup(fd))) {
|
||||
vscreen->pscreen =
|
||||
pipe_loader_create_screen(vscreen->dev, PIPE_SEARCH_DIR);
|
||||
if (!vscreen->pscreen)
|
||||
pipe_loader_release(&vscreen->dev, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!vscreen->pscreen) {
|
||||
FREE(vscreen);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return vscreen;
|
||||
}
|
||||
|
||||
void
|
||||
vl_drm_screen_destroy(struct vl_screen *vscreen)
|
||||
{
|
||||
assert(vscreen);
|
||||
|
||||
vscreen->pscreen->destroy(vscreen->pscreen);
|
||||
|
||||
#if !GALLIUM_STATIC_TARGETS
|
||||
pipe_loader_release(&vscreen->dev, 1);
|
||||
#endif
|
||||
|
||||
FREE(vscreen);
|
||||
}
|
|
@ -84,6 +84,9 @@ objects. They all follow simple, one-method binding calls, e.g.
|
|||
levels. This corresponds to GL's ``PATCH_DEFAULT_OUTER_LEVEL``.
|
||||
* ``default_inner_level`` is the default value for the inner tessellation
|
||||
levels. This corresponds to GL's ``PATCH_DEFAULT_INNER_LEVEL``.
|
||||
* ``set_debug_callback`` sets the callback to be used for reporting
|
||||
various debug messages, eventually reported via KHR_debug and
|
||||
similar mechanisms.
|
||||
|
||||
|
||||
Sampler Views
|
||||
|
@ -224,6 +227,10 @@ is is also possible to only clear one or the other part). While it is only
|
|||
possible to clear one surface at a time (which can include several layers),
|
||||
this surface need not be bound to the framebuffer.
|
||||
|
||||
``clear_texture`` clears a non-PIPE_BUFFER resource's specified level
|
||||
and bounding box with a clear value provided in that resource's native
|
||||
format.
|
||||
|
||||
``clear_buffer`` clears a PIPE_BUFFER resource with the specified clear value
|
||||
(which may be multiple bytes in length). Logically this is a memset with a
|
||||
multi-byte element value starting at offset bytes from resource start, going
|
||||
|
|
|
@ -281,6 +281,8 @@ The integer capabilities:
|
|||
* ``PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS``:
|
||||
Whether copying between compressed and plain formats is supported where
|
||||
a compressed block is copied to/from a plain pixel of the same size.
|
||||
* ``PIPE_CAP_CLEAR_TEXTURE``: Whether `clear_texture` will be
|
||||
available in contexts.
|
||||
|
||||
|
||||
.. _pipe_capf:
|
||||
|
|
|
@ -2941,6 +2941,14 @@ TGSI_SEMANTIC_VERTICESIN
|
|||
For tessellation evaluation/control shaders, this semantic label indicates the
|
||||
number of vertices provided in the input patch. Only the X value is defined.
|
||||
|
||||
TGSI_SEMANTIC_HELPER_INVOCATION
|
||||
"""""""""""""""""""""""""""""""
|
||||
|
||||
For fragment shaders, this semantic indicates whether the current
|
||||
invocation is covered or not. Helper invocations are created in order
|
||||
to properly compute derivatives, however it may be desirable to skip
|
||||
some of the logic in those cases. See ``gl_HelperInvocation`` documentation.
|
||||
|
||||
|
||||
Declaration Interpolate
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
|
|
@ -8,13 +8,14 @@ http://github.com/freedreno/envytools/
|
|||
git clone https://github.com/freedreno/envytools.git
|
||||
|
||||
The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63970 bytes, from 2015-09-14 20:50:12)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2015 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
|
|
|
@ -8,13 +8,14 @@ http://github.com/freedreno/envytools/
|
|||
git clone https://github.com/freedreno/envytools.git
|
||||
|
||||
The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63970 bytes, from 2015-09-14 20:50:12)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2015 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
|
|
|
@ -8,13 +8,14 @@ http://github.com/freedreno/envytools/
|
|||
git clone https://github.com/freedreno/envytools.git
|
||||
|
||||
The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63970 bytes, from 2015-09-14 20:50:12)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2015 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
|
@ -489,8 +490,8 @@ static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_r
|
|||
return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK;
|
||||
}
|
||||
|
||||
#define REG_A4XX_RB_BLEND_RED 0x000020f3
|
||||
#define A4XX_RB_BLEND_RED_UINT__MASK 0x00007fff
|
||||
#define REG_A4XX_RB_BLEND_RED 0x000020f0
|
||||
#define A4XX_RB_BLEND_RED_UINT__MASK 0x0000ffff
|
||||
#define A4XX_RB_BLEND_RED_UINT__SHIFT 0
|
||||
static inline uint32_t A4XX_RB_BLEND_RED_UINT(uint32_t val)
|
||||
{
|
||||
|
@ -503,8 +504,16 @@ static inline uint32_t A4XX_RB_BLEND_RED_FLOAT(float val)
|
|||
return ((util_float_to_half(val)) << A4XX_RB_BLEND_RED_FLOAT__SHIFT) & A4XX_RB_BLEND_RED_FLOAT__MASK;
|
||||
}
|
||||
|
||||
#define REG_A4XX_RB_BLEND_GREEN 0x000020f4
|
||||
#define A4XX_RB_BLEND_GREEN_UINT__MASK 0x00007fff
|
||||
#define REG_A4XX_RB_BLEND_RED_F32 0x000020f1
|
||||
#define A4XX_RB_BLEND_RED_F32__MASK 0xffffffff
|
||||
#define A4XX_RB_BLEND_RED_F32__SHIFT 0
|
||||
static inline uint32_t A4XX_RB_BLEND_RED_F32(float val)
|
||||
{
|
||||
return ((fui(val)) << A4XX_RB_BLEND_RED_F32__SHIFT) & A4XX_RB_BLEND_RED_F32__MASK;
|
||||
}
|
||||
|
||||
#define REG_A4XX_RB_BLEND_GREEN 0x000020f2
|
||||
#define A4XX_RB_BLEND_GREEN_UINT__MASK 0x0000ffff
|
||||
#define A4XX_RB_BLEND_GREEN_UINT__SHIFT 0
|
||||
static inline uint32_t A4XX_RB_BLEND_GREEN_UINT(uint32_t val)
|
||||
{
|
||||
|
@ -517,8 +526,16 @@ static inline uint32_t A4XX_RB_BLEND_GREEN_FLOAT(float val)
|
|||
return ((util_float_to_half(val)) << A4XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A4XX_RB_BLEND_GREEN_FLOAT__MASK;
|
||||
}
|
||||
|
||||
#define REG_A4XX_RB_BLEND_BLUE 0x000020f5
|
||||
#define A4XX_RB_BLEND_BLUE_UINT__MASK 0x00007fff
|
||||
#define REG_A4XX_RB_BLEND_GREEN_F32 0x000020f3
|
||||
#define A4XX_RB_BLEND_GREEN_F32__MASK 0xffffffff
|
||||
#define A4XX_RB_BLEND_GREEN_F32__SHIFT 0
|
||||
static inline uint32_t A4XX_RB_BLEND_GREEN_F32(float val)
|
||||
{
|
||||
return ((fui(val)) << A4XX_RB_BLEND_GREEN_F32__SHIFT) & A4XX_RB_BLEND_GREEN_F32__MASK;
|
||||
}
|
||||
|
||||
#define REG_A4XX_RB_BLEND_BLUE 0x000020f4
|
||||
#define A4XX_RB_BLEND_BLUE_UINT__MASK 0x0000ffff
|
||||
#define A4XX_RB_BLEND_BLUE_UINT__SHIFT 0
|
||||
static inline uint32_t A4XX_RB_BLEND_BLUE_UINT(uint32_t val)
|
||||
{
|
||||
|
@ -531,8 +548,16 @@ static inline uint32_t A4XX_RB_BLEND_BLUE_FLOAT(float val)
|
|||
return ((util_float_to_half(val)) << A4XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A4XX_RB_BLEND_BLUE_FLOAT__MASK;
|
||||
}
|
||||
|
||||
#define REG_A4XX_RB_BLEND_BLUE_F32 0x000020f5
|
||||
#define A4XX_RB_BLEND_BLUE_F32__MASK 0xffffffff
|
||||
#define A4XX_RB_BLEND_BLUE_F32__SHIFT 0
|
||||
static inline uint32_t A4XX_RB_BLEND_BLUE_F32(float val)
|
||||
{
|
||||
return ((fui(val)) << A4XX_RB_BLEND_BLUE_F32__SHIFT) & A4XX_RB_BLEND_BLUE_F32__MASK;
|
||||
}
|
||||
|
||||
#define REG_A4XX_RB_BLEND_ALPHA 0x000020f6
|
||||
#define A4XX_RB_BLEND_ALPHA_UINT__MASK 0x00007fff
|
||||
#define A4XX_RB_BLEND_ALPHA_UINT__MASK 0x0000ffff
|
||||
#define A4XX_RB_BLEND_ALPHA_UINT__SHIFT 0
|
||||
static inline uint32_t A4XX_RB_BLEND_ALPHA_UINT(uint32_t val)
|
||||
{
|
||||
|
@ -545,6 +570,14 @@ static inline uint32_t A4XX_RB_BLEND_ALPHA_FLOAT(float val)
|
|||
return ((util_float_to_half(val)) << A4XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A4XX_RB_BLEND_ALPHA_FLOAT__MASK;
|
||||
}
|
||||
|
||||
#define REG_A4XX_RB_BLEND_ALPHA_F32 0x000020f7
|
||||
#define A4XX_RB_BLEND_ALPHA_F32__MASK 0xffffffff
|
||||
#define A4XX_RB_BLEND_ALPHA_F32__SHIFT 0
|
||||
static inline uint32_t A4XX_RB_BLEND_ALPHA_F32(float val)
|
||||
{
|
||||
return ((fui(val)) << A4XX_RB_BLEND_ALPHA_F32__SHIFT) & A4XX_RB_BLEND_ALPHA_F32__MASK;
|
||||
}
|
||||
|
||||
#define REG_A4XX_RB_ALPHA_CONTROL 0x000020f8
|
||||
#define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK 0x000000ff
|
||||
#define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT 0
|
||||
|
@ -2645,20 +2678,6 @@ static inline uint32_t A4XX_PC_HS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val)
|
|||
|
||||
#define REG_A4XX_UNKNOWN_20EF 0x000020ef
|
||||
|
||||
#define REG_A4XX_UNKNOWN_20F0 0x000020f0
|
||||
|
||||
#define REG_A4XX_UNKNOWN_20F1 0x000020f1
|
||||
|
||||
#define REG_A4XX_UNKNOWN_20F2 0x000020f2
|
||||
|
||||
#define REG_A4XX_UNKNOWN_20F7 0x000020f7
|
||||
#define A4XX_UNKNOWN_20F7__MASK 0xffffffff
|
||||
#define A4XX_UNKNOWN_20F7__SHIFT 0
|
||||
static inline uint32_t A4XX_UNKNOWN_20F7(float val)
|
||||
{
|
||||
return ((fui(val)) << A4XX_UNKNOWN_20F7__SHIFT) & A4XX_UNKNOWN_20F7__MASK;
|
||||
}
|
||||
|
||||
#define REG_A4XX_UNKNOWN_2152 0x00002152
|
||||
|
||||
#define REG_A4XX_UNKNOWN_2153 0x00002153
|
||||
|
|
|
@ -613,15 +613,19 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
|
||||
if (dirty & FD_DIRTY_BLEND_COLOR) {
|
||||
struct pipe_blend_color *bcolor = &ctx->blend_color;
|
||||
OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4);
|
||||
OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 255.0) |
|
||||
OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 8);
|
||||
OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 65535.0) |
|
||||
A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0]));
|
||||
OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 255.0) |
|
||||
OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[0]));
|
||||
OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 65535.0) |
|
||||
A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]));
|
||||
OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 255.0) |
|
||||
OUT_RING(ring, A4XX_RB_BLEND_GREEN_F32(bcolor->color[1]));
|
||||
OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 65535.0) |
|
||||
A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]));
|
||||
OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 255.0) |
|
||||
OUT_RING(ring, A4XX_RB_BLEND_BLUE_F32(bcolor->color[2]));
|
||||
OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 65535.0) |
|
||||
A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]));
|
||||
OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
|
||||
}
|
||||
|
||||
if (dirty & FD_DIRTY_VERTTEX) {
|
||||
|
@ -699,15 +703,6 @@ fd4_emit_restore(struct fd_context *ctx)
|
|||
OUT_PKT0(ring, REG_A4XX_UNKNOWN_20EF, 1);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
|
||||
OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F0, 1);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
|
||||
OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F1, 1);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
|
||||
OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F2, 1);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
|
||||
OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4);
|
||||
OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(0) |
|
||||
A4XX_RB_BLEND_RED_FLOAT(0.0));
|
||||
|
@ -718,9 +713,6 @@ fd4_emit_restore(struct fd_context *ctx)
|
|||
OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(0x7fff) |
|
||||
A4XX_RB_BLEND_ALPHA_FLOAT(1.0));
|
||||
|
||||
OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F7, 1);
|
||||
OUT_RING(ring, 0x3f800000);
|
||||
|
||||
OUT_PKT0(ring, REG_A4XX_UNKNOWN_2152, 1);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
|
||||
|
|
|
@ -8,13 +8,14 @@ http://github.com/freedreno/envytools/
|
|||
git clone https://github.com/freedreno/envytools.git
|
||||
|
||||
The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63970 bytes, from 2015-09-14 20:50:12)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2015 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
|
|
|
@ -8,13 +8,14 @@ http://github.com/freedreno/envytools/
|
|||
git clone https://github.com/freedreno/envytools.git
|
||||
|
||||
The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63970 bytes, from 2015-09-14 20:50:12)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63914 bytes, from 2015-10-27 17:13:16)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2015 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
|
|
|
@ -239,6 +239,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
case PIPE_CAP_CLEAR_TEXTURE:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_MAX_VIEWPORTS:
|
||||
|
@ -549,6 +550,7 @@ fd_screen_create(struct fd_device *dev)
|
|||
case 220:
|
||||
fd2_screen_init(pscreen);
|
||||
break;
|
||||
case 305:
|
||||
case 307:
|
||||
case 320:
|
||||
case 330:
|
||||
|
|
|
@ -2325,17 +2325,17 @@ emit_instructions(struct ir3_compile *ctx)
|
|||
}
|
||||
|
||||
/* Setup inputs: */
|
||||
foreach_list_typed(nir_variable, var, node, &ctx->s->inputs) {
|
||||
nir_foreach_variable(var, &ctx->s->inputs) {
|
||||
setup_input(ctx, var);
|
||||
}
|
||||
|
||||
/* Setup outputs: */
|
||||
foreach_list_typed(nir_variable, var, node, &ctx->s->outputs) {
|
||||
nir_foreach_variable(var, &ctx->s->outputs) {
|
||||
setup_output(ctx, var);
|
||||
}
|
||||
|
||||
/* Setup variables (which should only be arrays): */
|
||||
foreach_list_typed(nir_variable, var, node, &ctx->s->globals) {
|
||||
nir_foreach_variable(var, &ctx->s->globals) {
|
||||
declare_var(ctx, var);
|
||||
}
|
||||
|
||||
|
|
|
@ -253,6 +253,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
|
|||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
case PIPE_CAP_CLEAR_TEXTURE:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
|
||||
|
|
|
@ -475,6 +475,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
case PIPE_CAP_CLEAR_TEXTURE:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
|
|
@ -746,7 +746,12 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
|
|||
|
||||
pos_init(bld, x0, y0);
|
||||
|
||||
if (coeff_type.length > 4) {
|
||||
/*
|
||||
* Simple method (single step interpolation) may be slower if vector length
|
||||
* is just 4, but the results are different (generally less accurate) with
|
||||
* the other method, so always use more accurate version.
|
||||
*/
|
||||
if (1) {
|
||||
bld->simple_interp = TRUE;
|
||||
{
|
||||
/* XXX this should use a global static table */
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#include "util/u_memory.h"
|
||||
#include "gallivm/lp_bld_init.h"
|
||||
#include "gallivm/lp_bld_debug.h"
|
||||
#include "gallivm/lp_bld_format.h"
|
||||
#include "lp_context.h"
|
||||
#include "lp_jit.h"
|
||||
|
||||
|
@ -208,6 +209,8 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
|
|||
LLVMTypeRef elem_types[LP_JIT_THREAD_DATA_COUNT];
|
||||
LLVMTypeRef thread_data_type;
|
||||
|
||||
elem_types[LP_JIT_THREAD_DATA_CACHE] =
|
||||
LLVMPointerType(lp_build_format_cache_type(gallivm), 0);
|
||||
elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc);
|
||||
elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX] =
|
||||
LLVMInt32TypeInContext(lc);
|
||||
|
|
|
@ -43,6 +43,7 @@
|
|||
#include "lp_texture.h"
|
||||
|
||||
|
||||
struct lp_build_format_cache;
|
||||
struct lp_fragment_shader_variant;
|
||||
struct llvmpipe_screen;
|
||||
|
||||
|
@ -189,6 +190,7 @@ enum {
|
|||
|
||||
struct lp_jit_thread_data
|
||||
{
|
||||
struct lp_build_format_cache *cache;
|
||||
uint64_t vis_counter;
|
||||
|
||||
/*
|
||||
|
@ -201,12 +203,16 @@ struct lp_jit_thread_data
|
|||
|
||||
|
||||
enum {
|
||||
LP_JIT_THREAD_DATA_COUNTER = 0,
|
||||
LP_JIT_THREAD_DATA_CACHE = 0,
|
||||
LP_JIT_THREAD_DATA_COUNTER,
|
||||
LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX,
|
||||
LP_JIT_THREAD_DATA_COUNT
|
||||
};
|
||||
|
||||
|
||||
#define lp_jit_thread_data_cache(_gallivm, _ptr) \
|
||||
lp_build_struct_get(_gallivm, _ptr, LP_JIT_THREAD_DATA_CACHE, "cache")
|
||||
|
||||
#define lp_jit_thread_data_counter(_gallivm, _ptr) \
|
||||
lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_THREAD_DATA_COUNTER, "counter")
|
||||
|
||||
|
|
|
@ -43,6 +43,7 @@
|
|||
#include "lp_query.h"
|
||||
#include "lp_rast.h"
|
||||
#include "lp_rast_priv.h"
|
||||
#include "gallivm/lp_bld_format.h"
|
||||
#include "gallivm/lp_bld_debug.h"
|
||||
#include "lp_scene.h"
|
||||
#include "lp_tex_sample.h"
|
||||
|
@ -664,6 +665,17 @@ rasterize_scene(struct lp_rasterizer_task *task,
|
|||
{
|
||||
task->scene = scene;
|
||||
|
||||
/* Clear the cache tags. This should not always be necessary but
|
||||
simpler for now. */
|
||||
#if LP_USE_TEXTURE_CACHE
|
||||
memset(task->thread_data.cache->cache_tags, 0,
|
||||
sizeof(task->thread_data.cache->cache_tags));
|
||||
#if LP_BUILD_FORMAT_CACHE_DEBUG
|
||||
task->thread_data.cache->cache_access_total = 0;
|
||||
task->thread_data.cache->cache_access_miss = 0;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (!task->rast->no_rast && !scene->discard) {
|
||||
/* loop over scene bins, rasterize each */
|
||||
{
|
||||
|
@ -679,6 +691,20 @@ rasterize_scene(struct lp_rasterizer_task *task,
|
|||
}
|
||||
|
||||
|
||||
#if LP_BUILD_FORMAT_CACHE_DEBUG
|
||||
{
|
||||
uint64_t total, miss;
|
||||
total = task->thread_data.cache->cache_access_total;
|
||||
miss = task->thread_data.cache->cache_access_miss;
|
||||
if (total) {
|
||||
debug_printf("thread %d cache access %llu miss %llu hit rate %f\n",
|
||||
task->thread_index, (long long unsigned)total,
|
||||
(long long unsigned)miss,
|
||||
(float)(total - miss)/(float)total);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (scene->fence) {
|
||||
lp_fence_signal(scene->fence);
|
||||
}
|
||||
|
@ -866,10 +892,15 @@ lp_rast_create( unsigned num_threads )
|
|||
goto no_full_scenes;
|
||||
}
|
||||
|
||||
for (i = 0; i < Elements(rast->tasks); i++) {
|
||||
for (i = 0; i < MAX2(1, num_threads); i++) {
|
||||
struct lp_rasterizer_task *task = &rast->tasks[i];
|
||||
task->rast = rast;
|
||||
task->thread_index = i;
|
||||
task->thread_data.cache = align_malloc(sizeof(struct lp_build_format_cache),
|
||||
16);
|
||||
if (!task->thread_data.cache) {
|
||||
goto no_thread_data_cache;
|
||||
}
|
||||
}
|
||||
|
||||
rast->num_threads = num_threads;
|
||||
|
@ -885,6 +916,14 @@ lp_rast_create( unsigned num_threads )
|
|||
|
||||
return rast;
|
||||
|
||||
no_thread_data_cache:
|
||||
for (i = 0; i < MAX2(1, rast->num_threads); i++) {
|
||||
if (rast->tasks[i].thread_data.cache) {
|
||||
align_free(rast->tasks[i].thread_data.cache);
|
||||
}
|
||||
}
|
||||
|
||||
lp_scene_queue_destroy(rast->full_scenes);
|
||||
no_full_scenes:
|
||||
FREE(rast);
|
||||
no_rast:
|
||||
|
@ -923,6 +962,9 @@ void lp_rast_destroy( struct lp_rasterizer *rast )
|
|||
pipe_semaphore_destroy(&rast->tasks[i].work_ready);
|
||||
pipe_semaphore_destroy(&rast->tasks[i].work_done);
|
||||
}
|
||||
for (i = 0; i < MAX2(1, rast->num_threads); i++) {
|
||||
align_free(rast->tasks[i].thread_data.cache);
|
||||
}
|
||||
|
||||
/* for synchronizing rasterization threads */
|
||||
pipe_barrier_destroy( &rast->barrier );
|
||||
|
|
|
@ -300,6 +300,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
|||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
case PIPE_CAP_CLEAR_TEXTURE:
|
||||
return 0;
|
||||
}
|
||||
/* should only get here on unhandled cases */
|
||||
|
|
|
@ -421,7 +421,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
|
|||
lp_build_tgsi_soa(gallivm, tokens, type, &mask,
|
||||
consts_ptr, num_consts_ptr, &system_values,
|
||||
interp->inputs,
|
||||
outputs, context_ptr,
|
||||
outputs, context_ptr, thread_data_ptr,
|
||||
sampler, &shader->info.base, NULL);
|
||||
|
||||
/* Alpha test */
|
||||
|
@ -2303,8 +2303,8 @@ generate_fragment(struct llvmpipe_context *lp,
|
|||
lp_build_name(dady_ptr, "dady");
|
||||
lp_build_name(color_ptr_ptr, "color_ptr_ptr");
|
||||
lp_build_name(depth_ptr, "depth");
|
||||
lp_build_name(thread_data_ptr, "thread_data");
|
||||
lp_build_name(mask_input, "mask_input");
|
||||
lp_build_name(thread_data_ptr, "thread_data");
|
||||
lp_build_name(stride_ptr, "stride_ptr");
|
||||
lp_build_name(depth_stride, "depth_stride");
|
||||
|
||||
|
|
|
@ -44,6 +44,9 @@
|
|||
|
||||
#include "lp_test.h"
|
||||
|
||||
#define USE_TEXTURE_CACHE 1
|
||||
|
||||
static struct lp_build_format_cache *cache_ptr;
|
||||
|
||||
void
|
||||
write_tsv_header(FILE *fp)
|
||||
|
@ -71,7 +74,7 @@ write_tsv_row(FILE *fp,
|
|||
|
||||
typedef void
|
||||
(*fetch_ptr_t)(void *unpacked, const void *packed,
|
||||
unsigned i, unsigned j);
|
||||
unsigned i, unsigned j, struct lp_build_format_cache *cache);
|
||||
|
||||
|
||||
static LLVMValueRef
|
||||
|
@ -83,7 +86,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
|
|||
LLVMContextRef context = gallivm->context;
|
||||
LLVMModuleRef module = gallivm->module;
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
LLVMTypeRef args[4];
|
||||
LLVMTypeRef args[5];
|
||||
LLVMValueRef func;
|
||||
LLVMValueRef packed_ptr;
|
||||
LLVMValueRef offset = LLVMConstNull(LLVMInt32TypeInContext(context));
|
||||
|
@ -92,6 +95,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
|
|||
LLVMValueRef j;
|
||||
LLVMBasicBlockRef block;
|
||||
LLVMValueRef rgba;
|
||||
LLVMValueRef cache = NULL;
|
||||
|
||||
util_snprintf(name, sizeof name, "fetch_%s_%s", desc->short_name,
|
||||
type.floating ? "float" : "unorm8");
|
||||
|
@ -99,6 +103,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
|
|||
args[0] = LLVMPointerType(lp_build_vec_type(gallivm, type), 0);
|
||||
args[1] = LLVMPointerType(LLVMInt8TypeInContext(context), 0);
|
||||
args[3] = args[2] = LLVMInt32TypeInContext(context);
|
||||
args[4] = LLVMPointerType(lp_build_format_cache_type(gallivm), 0);
|
||||
|
||||
func = LLVMAddFunction(module, name,
|
||||
LLVMFunctionType(LLVMVoidTypeInContext(context),
|
||||
|
@ -109,11 +114,15 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
|
|||
i = LLVMGetParam(func, 2);
|
||||
j = LLVMGetParam(func, 3);
|
||||
|
||||
if (cache_ptr) {
|
||||
cache = LLVMGetParam(func, 4);
|
||||
}
|
||||
|
||||
block = LLVMAppendBasicBlockInContext(context, func, "entry");
|
||||
LLVMPositionBuilderAtEnd(builder, block);
|
||||
|
||||
rgba = lp_build_fetch_rgba_aos(gallivm, desc, type, TRUE,
|
||||
packed_ptr, offset, i, j);
|
||||
packed_ptr, offset, i, j, cache);
|
||||
|
||||
LLVMBuildStore(builder, rgba, rgba_ptr);
|
||||
|
||||
|
@ -170,7 +179,7 @@ test_format_float(unsigned verbose, FILE *fp,
|
|||
|
||||
memset(unpacked, 0, sizeof unpacked);
|
||||
|
||||
fetch_ptr(unpacked, packed, j, i);
|
||||
fetch_ptr(unpacked, packed, j, i, cache_ptr);
|
||||
|
||||
for(k = 0; k < 4; ++k) {
|
||||
if (util_double_inf_sign(test->unpacked[i][j][k]) != util_inf_sign(unpacked[k])) {
|
||||
|
@ -187,6 +196,11 @@ test_format_float(unsigned verbose, FILE *fp,
|
|||
}
|
||||
}
|
||||
|
||||
/* Ignore errors in S3TC for now */
|
||||
if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
|
||||
match = TRUE;
|
||||
}
|
||||
|
||||
if (!match) {
|
||||
printf("FAILED\n");
|
||||
printf(" Packed: %02x %02x %02x %02x\n",
|
||||
|
@ -261,7 +275,7 @@ test_format_unorm8(unsigned verbose, FILE *fp,
|
|||
|
||||
memset(unpacked, 0, sizeof unpacked);
|
||||
|
||||
fetch_ptr(unpacked, packed, j, i);
|
||||
fetch_ptr(unpacked, packed, j, i, cache_ptr);
|
||||
|
||||
match = TRUE;
|
||||
for(k = 0; k < 4; ++k) {
|
||||
|
@ -277,6 +291,11 @@ test_format_unorm8(unsigned verbose, FILE *fp,
|
|||
match = FALSE;
|
||||
}
|
||||
|
||||
/* Ignore errors in S3TC as we only implement a poor man approach */
|
||||
if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
|
||||
match = TRUE;
|
||||
}
|
||||
|
||||
if (!match) {
|
||||
printf("FAILED\n");
|
||||
printf(" Packed: %02x %02x %02x %02x\n",
|
||||
|
@ -334,6 +353,10 @@ test_all(unsigned verbose, FILE *fp)
|
|||
|
||||
util_format_s3tc_init();
|
||||
|
||||
#if USE_TEXTURE_CACHE
|
||||
cache_ptr = align_malloc(sizeof(struct lp_build_format_cache), 16);
|
||||
#endif
|
||||
|
||||
for (format = 1; format < PIPE_FORMAT_COUNT; ++format) {
|
||||
const struct util_format_description *format_desc;
|
||||
|
||||
|
@ -363,6 +386,9 @@ test_all(unsigned verbose, FILE *fp)
|
|||
success = FALSE;
|
||||
}
|
||||
}
|
||||
#if USE_TEXTURE_CACHE
|
||||
align_free(cache_ptr);
|
||||
#endif
|
||||
|
||||
return success;
|
||||
}
|
||||
|
|
|
@ -221,6 +221,21 @@ LP_LLVM_SAMPLER_MEMBER(lod_bias, LP_JIT_SAMPLER_LOD_BIAS, TRUE)
|
|||
LP_LLVM_SAMPLER_MEMBER(border_color, LP_JIT_SAMPLER_BORDER_COLOR, FALSE)
|
||||
|
||||
|
||||
#if LP_USE_TEXTURE_CACHE
|
||||
static LLVMValueRef
|
||||
lp_llvm_texture_cache_ptr(const struct lp_sampler_dynamic_state *base,
|
||||
struct gallivm_state *gallivm,
|
||||
LLVMValueRef thread_data_ptr,
|
||||
unsigned unit)
|
||||
{
|
||||
/* We use the same cache for all units */
|
||||
(void)unit;
|
||||
|
||||
return lp_jit_thread_data_cache(gallivm, thread_data_ptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static void
|
||||
lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
|
||||
{
|
||||
|
@ -314,6 +329,10 @@ lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state)
|
|||
sampler->dynamic_state.base.lod_bias = lp_llvm_sampler_lod_bias;
|
||||
sampler->dynamic_state.base.border_color = lp_llvm_sampler_border_color;
|
||||
|
||||
#if LP_USE_TEXTURE_CACHE
|
||||
sampler->dynamic_state.base.cache_ptr = lp_llvm_texture_cache_ptr;
|
||||
#endif
|
||||
|
||||
sampler->dynamic_state.static_state = static_state;
|
||||
|
||||
return &sampler->base;
|
||||
|
|
|
@ -34,6 +34,10 @@
|
|||
|
||||
struct lp_sampler_static_state;
|
||||
|
||||
/**
|
||||
* Whether texture cache is used for s3tc textures.
|
||||
*/
|
||||
#define LP_USE_TEXTURE_CACHE 0
|
||||
|
||||
/**
|
||||
* Pure-LLVM texture sampling code generator.
|
||||
|
@ -42,5 +46,4 @@ struct lp_sampler_static_state;
|
|||
struct lp_build_sampler_soa *
|
||||
lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *key);
|
||||
|
||||
|
||||
#endif /* LP_TEX_SAMPLE_H */
|
||||
|
|
|
@ -805,7 +805,7 @@ llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen)
|
|||
#endif
|
||||
|
||||
screen->resource_create = llvmpipe_resource_create;
|
||||
screen->resource_create_front = llvmpipe_resource_create_front;
|
||||
/* screen->resource_create_front = llvmpipe_resource_create_front; */
|
||||
screen->resource_destroy = llvmpipe_resource_destroy;
|
||||
screen->resource_from_handle = llvmpipe_resource_from_handle;
|
||||
screen->resource_get_handle = llvmpipe_resource_get_handle;
|
||||
|
|
|
@ -389,6 +389,7 @@ enum SVSemantic
|
|||
SV_SBASE,
|
||||
SV_VERTEX_STRIDE,
|
||||
SV_INVOCATION_INFO,
|
||||
SV_THREAD_KILL,
|
||||
SV_UNDEFINED,
|
||||
SV_LAST
|
||||
};
|
||||
|
|
|
@ -392,12 +392,24 @@ BuildUtil::mkImm(float f)
|
|||
return mkImm(u.u32);
|
||||
}
|
||||
|
||||
ImmediateValue *
|
||||
BuildUtil::mkImm(double d)
|
||||
{
|
||||
return new_ImmediateValue(prog, d);
|
||||
}
|
||||
|
||||
Value *
|
||||
BuildUtil::loadImm(Value *dst, float f)
|
||||
{
|
||||
return mkOp1v(OP_MOV, TYPE_F32, dst ? dst : getScratch(), mkImm(f));
|
||||
}
|
||||
|
||||
Value *
|
||||
BuildUtil::loadImm(Value *dst, double d)
|
||||
{
|
||||
return mkOp1v(OP_MOV, TYPE_F64, dst ? dst : getScratch(), mkImm(d));
|
||||
}
|
||||
|
||||
Value *
|
||||
BuildUtil::loadImm(Value *dst, uint32_t u)
|
||||
{
|
||||
|
@ -555,6 +567,12 @@ BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i,
|
|||
switch (i->dType) {
|
||||
case TYPE_U64: hTy = TYPE_U32; break;
|
||||
case TYPE_S64: hTy = TYPE_S32; break;
|
||||
case TYPE_F64:
|
||||
if (i->op == OP_MOV) {
|
||||
hTy = TYPE_U32;
|
||||
break;
|
||||
}
|
||||
/* fallthrough */
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -90,12 +90,14 @@ public:
|
|||
void mkClobber(DataFile file, uint32_t regMask, int regUnitLog2);
|
||||
|
||||
ImmediateValue *mkImm(float);
|
||||
ImmediateValue *mkImm(double);
|
||||
ImmediateValue *mkImm(uint32_t);
|
||||
ImmediateValue *mkImm(uint64_t);
|
||||
|
||||
ImmediateValue *mkImm(int i) { return mkImm((uint32_t)i); }
|
||||
|
||||
Value *loadImm(Value *dst, float);
|
||||
Value *loadImm(Value *dst, double);
|
||||
Value *loadImm(Value *dst, uint32_t);
|
||||
Value *loadImm(Value *dst, uint64_t);
|
||||
|
||||
|
|
|
@ -96,6 +96,7 @@ struct nv50_ir_prog_info
|
|||
uint32_t tlsSpace; /* required local memory per thread */
|
||||
uint32_t *code;
|
||||
uint32_t codeSize;
|
||||
uint32_t instructions;
|
||||
uint8_t sourceRep; /* NV50_PROGRAM_IR */
|
||||
const void *source;
|
||||
void *relocData;
|
||||
|
|
|
@ -1644,6 +1644,7 @@ CodeEmitterGK110::getSRegEncoding(const ValueRef& ref)
|
|||
case SV_VERTEX_COUNT: return 0x10;
|
||||
case SV_INVOCATION_ID: return 0x11;
|
||||
case SV_YDIR: return 0x12;
|
||||
case SV_THREAD_KILL: return 0x13;
|
||||
case SV_TID: return 0x21 + SDATA(ref).sv.index;
|
||||
case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
|
||||
case SV_NTID: return 0x29 + SDATA(ref).sv.index;
|
||||
|
|
|
@ -244,6 +244,7 @@ CodeEmitterGM107::emitSYS(int pos, const Value *val)
|
|||
case SV_LANEID : id = 0x00; break;
|
||||
case SV_VERTEX_COUNT : id = 0x10; break;
|
||||
case SV_INVOCATION_ID : id = 0x11; break;
|
||||
case SV_THREAD_KILL : id = 0x13; break;
|
||||
case SV_INVOCATION_INFO: id = 0x1d; break;
|
||||
default:
|
||||
assert(!"invalid system value");
|
||||
|
@ -310,9 +311,12 @@ CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
|
|||
uint32_t val = imm->reg.data.u32;
|
||||
|
||||
if (len == 19) {
|
||||
if (isFloatType(insn->sType)) {
|
||||
if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
|
||||
assert(!(val & 0x00000fff));
|
||||
val >>= 12;
|
||||
} else if (insn->sType == TYPE_F64) {
|
||||
assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
|
||||
val = imm->reg.data.u64 >> 44;
|
||||
}
|
||||
assert(!(val & 0xfff00000) || (val & 0xfff00000) == 0xfff00000);
|
||||
emitField( 56, 1, (val & 0x80000) >> 19);
|
||||
|
|
|
@ -96,9 +96,12 @@ private:
|
|||
void emitUADD(const Instruction *);
|
||||
void emitAADD(const Instruction *);
|
||||
void emitFADD(const Instruction *);
|
||||
void emitDADD(const Instruction *);
|
||||
void emitIMUL(const Instruction *);
|
||||
void emitFMUL(const Instruction *);
|
||||
void emitDMUL(const Instruction *);
|
||||
void emitFMAD(const Instruction *);
|
||||
void emitDMAD(const Instruction *);
|
||||
void emitIMAD(const Instruction *);
|
||||
void emitISAD(const Instruction *);
|
||||
|
||||
|
@ -438,9 +441,9 @@ CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
|
|||
return;
|
||||
|
||||
if ((mode & 3) == 1) {
|
||||
const int pos = i->src(1).getFile() == FILE_IMMEDIATE ? 13 : 14;
|
||||
const int pos = ((mode >> 2) & 3) == 3 ? 13 : 14;
|
||||
|
||||
switch (i->getSrc(0)->reg.type) {
|
||||
switch (i->sType) {
|
||||
case TYPE_U8:
|
||||
break;
|
||||
case TYPE_U16:
|
||||
|
@ -954,11 +957,13 @@ CodeEmitterNV50::emitMINMAX(const Instruction *i)
|
|||
assert(0);
|
||||
break;
|
||||
}
|
||||
code[1] |= i->src(0).mod.abs() << 20;
|
||||
code[1] |= i->src(0).mod.neg() << 26;
|
||||
code[1] |= i->src(1).mod.abs() << 19;
|
||||
code[1] |= i->src(1).mod.neg() << 27;
|
||||
}
|
||||
|
||||
code[1] |= i->src(0).mod.abs() << 20;
|
||||
code[1] |= i->src(0).mod.neg() << 26;
|
||||
code[1] |= i->src(1).mod.abs() << 19;
|
||||
code[1] |= i->src(1).mod.neg() << 27;
|
||||
|
||||
emitForm_MAD(i);
|
||||
}
|
||||
|
||||
|
@ -993,6 +998,26 @@ CodeEmitterNV50::emitFMAD(const Instruction *i)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
CodeEmitterNV50::emitDMAD(const Instruction *i)
|
||||
{
|
||||
const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
|
||||
const int neg_add = i->src(2).mod.neg();
|
||||
|
||||
assert(i->encSize == 8);
|
||||
assert(!i->saturate);
|
||||
|
||||
code[1] = 0x40000000;
|
||||
code[0] = 0xe0000000;
|
||||
|
||||
code[1] |= neg_mul << 26;
|
||||
code[1] |= neg_add << 27;
|
||||
|
||||
roundMode_MAD(i);
|
||||
|
||||
emitForm_MAD(i);
|
||||
}
|
||||
|
||||
void
|
||||
CodeEmitterNV50::emitFADD(const Instruction *i)
|
||||
{
|
||||
|
@ -1027,6 +1052,25 @@ CodeEmitterNV50::emitFADD(const Instruction *i)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
CodeEmitterNV50::emitDADD(const Instruction *i)
|
||||
{
|
||||
const int neg0 = i->src(0).mod.neg();
|
||||
const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
|
||||
|
||||
assert(!(i->src(0).mod | i->src(1).mod).abs());
|
||||
assert(!i->saturate);
|
||||
assert(i->encSize == 8);
|
||||
|
||||
code[1] = 0x60000000;
|
||||
code[0] = 0xe0000000;
|
||||
|
||||
emitForm_ADD(i);
|
||||
|
||||
code[1] |= neg0 << 26;
|
||||
code[1] |= neg1 << 27;
|
||||
}
|
||||
|
||||
void
|
||||
CodeEmitterNV50::emitUADD(const Instruction *i)
|
||||
{
|
||||
|
@ -1081,7 +1125,10 @@ CodeEmitterNV50::emitIMUL(const Instruction *i)
|
|||
|
||||
if (i->encSize == 8) {
|
||||
code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
|
||||
emitForm_MAD(i);
|
||||
if (i->src(1).getFile() == FILE_IMMEDIATE)
|
||||
emitForm_IMM(i);
|
||||
else
|
||||
emitForm_MAD(i);
|
||||
} else {
|
||||
if (i->sType == TYPE_S16)
|
||||
code[0] |= 0x8100;
|
||||
|
@ -1120,6 +1167,25 @@ CodeEmitterNV50::emitFMUL(const Instruction *i)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
CodeEmitterNV50::emitDMUL(const Instruction *i)
|
||||
{
|
||||
const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
|
||||
|
||||
assert(!i->saturate);
|
||||
assert(i->encSize == 8);
|
||||
|
||||
code[1] = 0x80000000;
|
||||
code[0] = 0xe0000000;
|
||||
|
||||
if (neg)
|
||||
code[1] |= 0x08000000;
|
||||
|
||||
roundMode_CVT(i->rnd);
|
||||
|
||||
emitForm_MAD(i);
|
||||
}
|
||||
|
||||
void
|
||||
CodeEmitterNV50::emitIMAD(const Instruction *i)
|
||||
{
|
||||
|
@ -1136,7 +1202,10 @@ CodeEmitterNV50::emitIMAD(const Instruction *i)
|
|||
code[1] |= neg1 << 27;
|
||||
code[1] |= neg2 << 26;
|
||||
|
||||
emitForm_MAD(i);
|
||||
if (i->src(1).getFile() == FILE_IMMEDIATE)
|
||||
emitForm_IMM(i);
|
||||
else
|
||||
emitForm_MAD(i);
|
||||
|
||||
if (i->flagsSrc >= 0) {
|
||||
// add with carry from $cX
|
||||
|
@ -1181,9 +1250,11 @@ CodeEmitterNV50::emitSET(const Instruction *i)
|
|||
code[0] = 0x30000000;
|
||||
code[1] = 0x60000000;
|
||||
|
||||
emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
|
||||
|
||||
switch (i->sType) {
|
||||
case TYPE_F64:
|
||||
code[0] = 0xe0000000;
|
||||
code[1] = 0xe0000000;
|
||||
break;
|
||||
case TYPE_F32: code[0] |= 0x80000000; break;
|
||||
case TYPE_S32: code[1] |= 0x0c000000; break;
|
||||
case TYPE_U32: code[1] |= 0x04000000; break;
|
||||
|
@ -1193,6 +1264,9 @@ CodeEmitterNV50::emitSET(const Instruction *i)
|
|||
assert(0);
|
||||
break;
|
||||
}
|
||||
|
||||
emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
|
||||
|
||||
if (i->src(0).mod.neg()) code[1] |= 0x04000000;
|
||||
if (i->src(1).mod.neg()) code[1] |= 0x08000000;
|
||||
if (i->src(0).mod.abs()) code[1] |= 0x00100000;
|
||||
|
@ -1756,7 +1830,9 @@ CodeEmitterNV50::emitInstruction(Instruction *insn)
|
|||
break;
|
||||
case OP_ADD:
|
||||
case OP_SUB:
|
||||
if (isFloatType(insn->dType))
|
||||
if (insn->dType == TYPE_F64)
|
||||
emitDADD(insn);
|
||||
else if (isFloatType(insn->dType))
|
||||
emitFADD(insn);
|
||||
else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
|
||||
emitAADD(insn);
|
||||
|
@ -1764,14 +1840,18 @@ CodeEmitterNV50::emitInstruction(Instruction *insn)
|
|||
emitUADD(insn);
|
||||
break;
|
||||
case OP_MUL:
|
||||
if (isFloatType(insn->dType))
|
||||
if (insn->dType == TYPE_F64)
|
||||
emitDMUL(insn);
|
||||
else if (isFloatType(insn->dType))
|
||||
emitFMUL(insn);
|
||||
else
|
||||
emitIMUL(insn);
|
||||
break;
|
||||
case OP_MAD:
|
||||
case OP_FMA:
|
||||
if (isFloatType(insn->dType))
|
||||
if (insn->dType == TYPE_F64)
|
||||
emitDMAD(insn);
|
||||
else if (isFloatType(insn->dType))
|
||||
emitFMAD(insn);
|
||||
else
|
||||
emitIMAD(insn);
|
||||
|
@ -1943,7 +2023,7 @@ CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
|
|||
{
|
||||
const Target::OpInfo &info = targ->getOpInfo(i);
|
||||
|
||||
if (info.minEncSize > 4)
|
||||
if (info.minEncSize > 4 || i->dType == TYPE_F64)
|
||||
return 8;
|
||||
|
||||
// check constraints on dst and src operands
|
||||
|
|
|
@ -323,6 +323,14 @@ CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
|
|||
assert(imm);
|
||||
u32 = imm->reg.data.u32;
|
||||
|
||||
if ((code[0] & 0xf) == 0x1) {
|
||||
// double immediate
|
||||
uint64_t u64 = imm->reg.data.u64;
|
||||
assert(!(u64 & 0x00000fffffffffffULL));
|
||||
assert(!(code[1] & 0xc000));
|
||||
code[0] |= ((u64 >> 44) & 0x3f) << 26;
|
||||
code[1] |= 0xc000 | (u64 >> 50);
|
||||
} else
|
||||
if ((code[0] & 0xf) == 0x2) {
|
||||
// LIMM
|
||||
code[0] |= (u32 & 0x3f) << 26;
|
||||
|
@ -1831,6 +1839,7 @@ CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
|
|||
case SV_VERTEX_COUNT: return 0x10;
|
||||
case SV_INVOCATION_ID: return 0x11;
|
||||
case SV_YDIR: return 0x12;
|
||||
case SV_THREAD_KILL: return 0x13;
|
||||
case SV_TID: return 0x21 + SDATA(ref).sv.index;
|
||||
case SV_CTAID: return 0x25 + SDATA(ref).sv.index;
|
||||
case SV_NTID: return 0x29 + SDATA(ref).sv.index;
|
||||
|
|
|
@ -376,6 +376,7 @@ static nv50_ir::SVSemantic translateSysVal(uint sysval)
|
|||
case TGSI_SEMANTIC_TESSOUTER: return nv50_ir::SV_TESS_OUTER;
|
||||
case TGSI_SEMANTIC_TESSINNER: return nv50_ir::SV_TESS_INNER;
|
||||
case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT;
|
||||
case TGSI_SEMANTIC_HELPER_INVOCATION: return nv50_ir::SV_THREAD_KILL;
|
||||
default:
|
||||
assert(0);
|
||||
return nv50_ir::SV_CLOCK;
|
||||
|
|
|
@ -75,7 +75,7 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul)
|
|||
s[0] = mul->getSrc(0);
|
||||
s[1] = mul->getSrc(1);
|
||||
|
||||
if (isSignedType(mul->sType)) {
|
||||
if (isSignedType(mul->sType) && highResult) {
|
||||
s[0] = bld->getSSA(fullSize);
|
||||
s[1] = bld->getSSA(fullSize);
|
||||
bld->mkOp1(OP_ABS, mul->sType, s[0], mul->getSrc(0));
|
||||
|
|
|
@ -155,7 +155,7 @@ private:
|
|||
void checkSwapSrc01(Instruction *);
|
||||
|
||||
bool isCSpaceLoad(Instruction *);
|
||||
bool isImmd32Load(Instruction *);
|
||||
bool isImmdLoad(Instruction *);
|
||||
bool isAttribOrSharedLoad(Instruction *);
|
||||
};
|
||||
|
||||
|
@ -166,9 +166,10 @@ LoadPropagation::isCSpaceLoad(Instruction *ld)
|
|||
}
|
||||
|
||||
bool
|
||||
LoadPropagation::isImmd32Load(Instruction *ld)
|
||||
LoadPropagation::isImmdLoad(Instruction *ld)
|
||||
{
|
||||
if (!ld || (ld->op != OP_MOV) || (typeSizeof(ld->dType) != 4))
|
||||
if (!ld || (ld->op != OP_MOV) ||
|
||||
((typeSizeof(ld->dType) != 4) && (typeSizeof(ld->dType) != 8)))
|
||||
return false;
|
||||
return ld->src(0).getFile() == FILE_IMMEDIATE;
|
||||
}
|
||||
|
@ -201,8 +202,8 @@ LoadPropagation::checkSwapSrc01(Instruction *insn)
|
|||
else
|
||||
return;
|
||||
} else
|
||||
if (isImmd32Load(i0)) {
|
||||
if (!isCSpaceLoad(i1) && !isImmd32Load(i1))
|
||||
if (isImmdLoad(i0)) {
|
||||
if (!isCSpaceLoad(i1) && !isImmdLoad(i1))
|
||||
insn->swapSources(0, 1);
|
||||
else
|
||||
return;
|
||||
|
@ -447,6 +448,7 @@ ConstantFolding::expr(Instruction *i,
|
|||
{
|
||||
struct Storage *const a = &imm0.reg, *const b = &imm1.reg;
|
||||
struct Storage res;
|
||||
DataType type = i->dType;
|
||||
|
||||
memset(&res.data, 0, sizeof(res.data));
|
||||
|
||||
|
@ -588,6 +590,18 @@ ConstantFolding::expr(Instruction *i,
|
|||
// The two arguments to pfetch are logically added together. Normally
|
||||
// the second argument will not be constant, but that can happen.
|
||||
res.data.u32 = a->data.u32 + b->data.u32;
|
||||
type = TYPE_U32;
|
||||
break;
|
||||
case OP_MERGE:
|
||||
switch (i->dType) {
|
||||
case TYPE_U64:
|
||||
case TYPE_S64:
|
||||
case TYPE_F64:
|
||||
res.data.u64 = (((uint64_t)b->data.u32) << 32) | a->data.u32;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
|
@ -602,6 +616,8 @@ ConstantFolding::expr(Instruction *i,
|
|||
i->setSrc(1, NULL);
|
||||
|
||||
i->getSrc(0)->reg.data = res.data;
|
||||
i->getSrc(0)->reg.type = type;
|
||||
i->getSrc(0)->reg.size = typeSizeof(type);
|
||||
|
||||
switch (i->op) {
|
||||
case OP_MAD:
|
||||
|
@ -1148,6 +1164,11 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
|
|||
#define CASE(type, dst, fmin, fmax, imin, imax, umin, umax) \
|
||||
case type: \
|
||||
switch (i->sType) { \
|
||||
case TYPE_F64: \
|
||||
res.data.dst = util_iround(i->saturate ? \
|
||||
CLAMP(imm0.reg.data.f64, fmin, fmax) : \
|
||||
imm0.reg.data.f64); \
|
||||
break; \
|
||||
case TYPE_F32: \
|
||||
res.data.dst = util_iround(i->saturate ? \
|
||||
CLAMP(imm0.reg.data.f32, fmin, fmax) : \
|
||||
|
@ -1185,6 +1206,11 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
|
|||
CASE(TYPE_S32, s32, INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX, 0, INT32_MAX);
|
||||
case TYPE_F32:
|
||||
switch (i->sType) {
|
||||
case TYPE_F64:
|
||||
res.data.f32 = i->saturate ?
|
||||
CLAMP(imm0.reg.data.f64, 0.0f, 1.0f) :
|
||||
imm0.reg.data.f64;
|
||||
break;
|
||||
case TYPE_F32:
|
||||
res.data.f32 = i->saturate ?
|
||||
CLAMP(imm0.reg.data.f32, 0.0f, 1.0f) :
|
||||
|
@ -1199,6 +1225,27 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
|
|||
}
|
||||
i->setSrc(0, bld.mkImm(res.data.f32));
|
||||
break;
|
||||
case TYPE_F64:
|
||||
switch (i->sType) {
|
||||
case TYPE_F64:
|
||||
res.data.f64 = i->saturate ?
|
||||
CLAMP(imm0.reg.data.f64, 0.0f, 1.0f) :
|
||||
imm0.reg.data.f64;
|
||||
break;
|
||||
case TYPE_F32:
|
||||
res.data.f64 = i->saturate ?
|
||||
CLAMP(imm0.reg.data.f32, 0.0f, 1.0f) :
|
||||
imm0.reg.data.f32;
|
||||
break;
|
||||
case TYPE_U16: res.data.f64 = (double) imm0.reg.data.u16; break;
|
||||
case TYPE_U32: res.data.f64 = (double) imm0.reg.data.u32; break;
|
||||
case TYPE_S16: res.data.f64 = (double) imm0.reg.data.s16; break;
|
||||
case TYPE_S32: res.data.f64 = (double) imm0.reg.data.s32; break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
i->setSrc(0, bld.mkImm(res.data.f64));
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -275,6 +275,7 @@ static const char *SemanticStr[SV_LAST + 1] =
|
|||
"SBASE",
|
||||
"VERTEX_STRIDE",
|
||||
"INVOCATION_INFO",
|
||||
"THREAD_KILL",
|
||||
"?",
|
||||
"(INVALID)"
|
||||
};
|
||||
|
|
|
@ -373,6 +373,7 @@ Program::emitBinary(struct nv50_ir_prog_info *info)
|
|||
if (!code)
|
||||
return false;
|
||||
emit->setCodeLocation(code, binSize);
|
||||
info->bin.instructions = 0;
|
||||
|
||||
for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
|
||||
Function *fn = reinterpret_cast<Function *>(fi.get());
|
||||
|
@ -382,6 +383,7 @@ Program::emitBinary(struct nv50_ir_prog_info *info)
|
|||
for (int b = 0; b < fn->bbCount; ++b) {
|
||||
for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {
|
||||
emit->emitInstruction(i);
|
||||
info->bin.instructions++;
|
||||
if (i->sType == TYPE_F64 || i->dType == TYPE_F64)
|
||||
info->io.fp64 = true;
|
||||
}
|
||||
|
|
|
@ -343,7 +343,7 @@ TargetNV50::insnCanLoad(const Instruction *i, int s,
|
|||
}
|
||||
|
||||
if (sf == FILE_IMMEDIATE)
|
||||
return true;
|
||||
return ldSize <= 4;
|
||||
|
||||
|
||||
// Check if memory access is encodable:
|
||||
|
|
|
@ -338,17 +338,30 @@ TargetNVC0::insnCanLoad(const Instruction *i, int s,
|
|||
if (sf == FILE_IMMEDIATE) {
|
||||
Storage ® = ld->getSrc(0)->asImm()->reg;
|
||||
|
||||
if (typeSizeof(i->sType) > 4)
|
||||
return false;
|
||||
if (opInfo[i->op].immdBits != 0xffffffff) {
|
||||
if (i->sType == TYPE_F32) {
|
||||
if (opInfo[i->op].immdBits != 0xffffffff || typeSizeof(i->sType) > 4) {
|
||||
switch (i->sType) {
|
||||
case TYPE_F64:
|
||||
if (reg.data.u64 & 0x00000fffffffffffULL)
|
||||
return false;
|
||||
break;
|
||||
case TYPE_F32:
|
||||
if (reg.data.u32 & 0xfff)
|
||||
return false;
|
||||
} else
|
||||
if (i->sType == TYPE_S32 || i->sType == TYPE_U32) {
|
||||
break;
|
||||
case TYPE_S32:
|
||||
case TYPE_U32:
|
||||
// with u32, 0xfffff counts as 0xffffffff as well
|
||||
if (reg.data.s32 > 0x7ffff || reg.data.s32 < -0x80000)
|
||||
return false;
|
||||
break;
|
||||
case TYPE_U8:
|
||||
case TYPE_S8:
|
||||
case TYPE_U16:
|
||||
case TYPE_S16:
|
||||
case TYPE_F16:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
} else
|
||||
if (i->op == OP_MAD || i->op == OP_FMA) {
|
||||
|
|
|
@ -225,21 +225,22 @@ nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
|
|||
* for write/read by waiting on the buffer's relevant fences.
|
||||
*/
|
||||
static inline bool
|
||||
nouveau_buffer_sync(struct nv04_resource *buf, unsigned rw)
|
||||
nouveau_buffer_sync(struct nouveau_context *nv,
|
||||
struct nv04_resource *buf, unsigned rw)
|
||||
{
|
||||
if (rw == PIPE_TRANSFER_READ) {
|
||||
if (!buf->fence_wr)
|
||||
return true;
|
||||
NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
|
||||
!nouveau_fence_signalled(buf->fence_wr));
|
||||
if (!nouveau_fence_wait(buf->fence_wr))
|
||||
if (!nouveau_fence_wait(buf->fence_wr, &nv->debug))
|
||||
return false;
|
||||
} else {
|
||||
if (!buf->fence)
|
||||
return true;
|
||||
NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
|
||||
!nouveau_fence_signalled(buf->fence));
|
||||
if (!nouveau_fence_wait(buf->fence))
|
||||
if (!nouveau_fence_wait(buf->fence, &nv->debug))
|
||||
return false;
|
||||
|
||||
nouveau_fence_ref(NULL, &buf->fence);
|
||||
|
@ -478,7 +479,7 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
|
|||
if (unlikely(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)) {
|
||||
/* Discarding was not possible, must sync because
|
||||
* subsequent transfers might use UNSYNCHRONIZED. */
|
||||
nouveau_buffer_sync(buf, usage & PIPE_TRANSFER_READ_WRITE);
|
||||
nouveau_buffer_sync(nv, buf, usage & PIPE_TRANSFER_READ_WRITE);
|
||||
} else
|
||||
if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
|
||||
/* The whole range is being discarded, so it doesn't matter what was
|
||||
|
@ -490,7 +491,7 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
|
|||
if (usage & PIPE_TRANSFER_DONTBLOCK)
|
||||
map = NULL;
|
||||
else
|
||||
nouveau_buffer_sync(buf, usage & PIPE_TRANSFER_READ_WRITE);
|
||||
nouveau_buffer_sync(nv, buf, usage & PIPE_TRANSFER_READ_WRITE);
|
||||
} else {
|
||||
/* It is expected that the returned buffer be a representation of the
|
||||
* data in question, so we must copy it over from the buffer. */
|
||||
|
@ -615,7 +616,7 @@ nouveau_resource_map_offset(struct nouveau_context *nv,
|
|||
if (res->mm) {
|
||||
unsigned rw;
|
||||
rw = (flags & NOUVEAU_BO_WR) ? PIPE_TRANSFER_WRITE : PIPE_TRANSFER_READ;
|
||||
nouveau_buffer_sync(res, rw);
|
||||
nouveau_buffer_sync(nv, res, rw);
|
||||
if (nouveau_bo_map(res->bo, 0, NULL))
|
||||
return NULL;
|
||||
} else {
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#define __NOUVEAU_CONTEXT_H__
|
||||
|
||||
#include "pipe/p_context.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include <nouveau.h>
|
||||
|
||||
#define NOUVEAU_MAX_SCRATCH_BUFS 4
|
||||
|
@ -14,6 +15,7 @@ struct nouveau_context {
|
|||
|
||||
struct nouveau_client *client;
|
||||
struct nouveau_pushbuf *pushbuf;
|
||||
struct pipe_debug_callback debug;
|
||||
|
||||
bool vbo_dirty;
|
||||
|
||||
|
@ -63,6 +65,9 @@ nouveau_context(struct pipe_context *pipe)
|
|||
void
|
||||
nouveau_context_init_vdec(struct nouveau_context *);
|
||||
|
||||
void
|
||||
nouveau_context_init(struct nouveau_context *);
|
||||
|
||||
void
|
||||
nouveau_scratch_runout_release(struct nouveau_context *);
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "nouveau_screen.h"
|
||||
#include "nouveau_winsys.h"
|
||||
#include "nouveau_fence.h"
|
||||
#include "os/os_time.h"
|
||||
|
||||
#ifdef PIPE_OS_UNIX
|
||||
#include <sched.h>
|
||||
|
@ -58,26 +59,6 @@ nouveau_fence_trigger_work(struct nouveau_fence *fence)
|
|||
}
|
||||
}
|
||||
|
||||
bool
|
||||
nouveau_fence_work(struct nouveau_fence *fence,
|
||||
void (*func)(void *), void *data)
|
||||
{
|
||||
struct nouveau_fence_work *work;
|
||||
|
||||
if (!fence || fence->state == NOUVEAU_FENCE_STATE_SIGNALLED) {
|
||||
func(data);
|
||||
return true;
|
||||
}
|
||||
|
||||
work = CALLOC_STRUCT(nouveau_fence_work);
|
||||
if (!work)
|
||||
return false;
|
||||
work->func = func;
|
||||
work->data = data;
|
||||
LIST_ADD(&work->list, &fence->work);
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
nouveau_fence_emit(struct nouveau_fence *fence)
|
||||
{
|
||||
|
@ -181,11 +162,10 @@ nouveau_fence_signalled(struct nouveau_fence *fence)
|
|||
return fence->state == NOUVEAU_FENCE_STATE_SIGNALLED;
|
||||
}
|
||||
|
||||
bool
|
||||
nouveau_fence_wait(struct nouveau_fence *fence)
|
||||
static bool
|
||||
nouveau_fence_kick(struct nouveau_fence *fence)
|
||||
{
|
||||
struct nouveau_screen *screen = fence->screen;
|
||||
uint32_t spins = 0;
|
||||
|
||||
/* wtf, someone is waiting on a fence in flush_notify handler? */
|
||||
assert(fence->state != NOUVEAU_FENCE_STATE_EMITTING);
|
||||
|
@ -206,11 +186,32 @@ nouveau_fence_wait(struct nouveau_fence *fence)
|
|||
if (fence == screen->fence.current)
|
||||
nouveau_fence_next(screen);
|
||||
|
||||
do {
|
||||
nouveau_fence_update(screen, false);
|
||||
nouveau_fence_update(screen, false);
|
||||
|
||||
if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
nouveau_fence_wait(struct nouveau_fence *fence, struct pipe_debug_callback *debug)
|
||||
{
|
||||
struct nouveau_screen *screen = fence->screen;
|
||||
uint32_t spins = 0;
|
||||
int64_t start = 0;
|
||||
|
||||
if (debug && debug->debug_message)
|
||||
start = os_time_get_nano();
|
||||
|
||||
if (!nouveau_fence_kick(fence))
|
||||
return false;
|
||||
|
||||
do {
|
||||
if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED) {
|
||||
if (debug && debug->debug_message)
|
||||
pipe_debug_message(debug, PERF_INFO,
|
||||
"stalled %.3f ms waiting for fence",
|
||||
(os_time_get_nano() - start) / 1000000.f);
|
||||
return true;
|
||||
}
|
||||
if (!spins)
|
||||
NOUVEAU_DRV_STAT(screen, any_non_kernel_fence_sync_count, 1);
|
||||
spins++;
|
||||
|
@ -218,6 +219,8 @@ nouveau_fence_wait(struct nouveau_fence *fence)
|
|||
if (!(spins % 8)) /* donate a few cycles */
|
||||
sched_yield();
|
||||
#endif
|
||||
|
||||
nouveau_fence_update(screen, false);
|
||||
} while (spins < NOUVEAU_FENCE_MAX_SPINS);
|
||||
|
||||
debug_printf("Wait on fence %u (ack = %u, next = %u) timed out !\n",
|
||||
|
@ -249,3 +252,26 @@ nouveau_fence_unref_bo(void *data)
|
|||
|
||||
nouveau_bo_ref(NULL, &bo);
|
||||
}
|
||||
|
||||
bool
|
||||
nouveau_fence_work(struct nouveau_fence *fence,
|
||||
void (*func)(void *), void *data)
|
||||
{
|
||||
struct nouveau_fence_work *work;
|
||||
|
||||
if (!fence || fence->state == NOUVEAU_FENCE_STATE_SIGNALLED) {
|
||||
func(data);
|
||||
return true;
|
||||
}
|
||||
|
||||
work = CALLOC_STRUCT(nouveau_fence_work);
|
||||
if (!work)
|
||||
return false;
|
||||
work->func = func;
|
||||
work->data = data;
|
||||
LIST_ADD(&work->list, &fence->work);
|
||||
p_atomic_inc(&fence->work_count);
|
||||
if (fence->work_count > 64)
|
||||
nouveau_fence_kick(fence);
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -11,6 +11,8 @@
|
|||
#define NOUVEAU_FENCE_STATE_FLUSHED 3
|
||||
#define NOUVEAU_FENCE_STATE_SIGNALLED 4
|
||||
|
||||
struct pipe_debug_callback;
|
||||
|
||||
struct nouveau_fence_work {
|
||||
struct list_head list;
|
||||
void (*func)(void *);
|
||||
|
@ -23,6 +25,7 @@ struct nouveau_fence {
|
|||
int state;
|
||||
int ref;
|
||||
uint32_t sequence;
|
||||
uint32_t work_count;
|
||||
struct list_head work;
|
||||
};
|
||||
|
||||
|
@ -34,7 +37,7 @@ bool nouveau_fence_new(struct nouveau_screen *, struct nouveau_fence **,
|
|||
bool nouveau_fence_work(struct nouveau_fence *, void (*)(void *), void *);
|
||||
void nouveau_fence_update(struct nouveau_screen *, bool flushed);
|
||||
void nouveau_fence_next(struct nouveau_screen *);
|
||||
bool nouveau_fence_wait(struct nouveau_fence *);
|
||||
bool nouveau_fence_wait(struct nouveau_fence *, struct pipe_debug_callback *);
|
||||
bool nouveau_fence_signalled(struct nouveau_fence *);
|
||||
|
||||
void nouveau_fence_unref_bo(void *data); /* generic unref bo callback */
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
#include "nouveau_winsys.h"
|
||||
#include "nouveau_screen.h"
|
||||
#include "nouveau_context.h"
|
||||
#include "nouveau_fence.h"
|
||||
#include "nouveau_mm.h"
|
||||
#include "nouveau_buffer.h"
|
||||
|
@ -75,7 +76,7 @@ nouveau_screen_fence_finish(struct pipe_screen *screen,
|
|||
if (!timeout)
|
||||
return nouveau_fence_signalled(nouveau_fence(pfence));
|
||||
|
||||
return nouveau_fence_wait(nouveau_fence(pfence));
|
||||
return nouveau_fence_wait(nouveau_fence(pfence), NULL);
|
||||
}
|
||||
|
||||
|
||||
|
@ -238,3 +239,21 @@ nouveau_screen_fini(struct nouveau_screen *screen)
|
|||
|
||||
nouveau_device_del(&screen->device);
|
||||
}
|
||||
|
||||
static void
|
||||
nouveau_set_debug_callback(struct pipe_context *pipe,
|
||||
const struct pipe_debug_callback *cb)
|
||||
{
|
||||
struct nouveau_context *context = nouveau_context(pipe);
|
||||
|
||||
if (cb)
|
||||
context->debug = *cb;
|
||||
else
|
||||
memset(&context->debug, 0, sizeof(context->debug));
|
||||
}
|
||||
|
||||
void
|
||||
nouveau_context_init(struct nouveau_context *context)
|
||||
{
|
||||
context->pipe.set_debug_callback = nouveau_set_debug_callback;
|
||||
}
|
||||
|
|
|
@ -437,6 +437,7 @@ nouveau_vp3_screen_get_video_param(struct pipe_screen *pscreen,
|
|||
/* VP3 does not support MPEG4, VP4+ do. */
|
||||
return entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM &&
|
||||
profile >= PIPE_VIDEO_PROFILE_MPEG1 &&
|
||||
profile < PIPE_VIDEO_PROFILE_HEVC_MAIN &&
|
||||
(!vp3 || codec != PIPE_VIDEO_FORMAT_MPEG4) &&
|
||||
firmware_present(pscreen, profile);
|
||||
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
|
||||
|
|
|
@ -242,6 +242,7 @@ nv30_context_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
|
|||
if (debug_get_bool_option("NV30_SWTNL", false))
|
||||
nv30->draw_flags |= NV30_NEW_SWTNL;
|
||||
|
||||
nouveau_context_init(&nv30->base);
|
||||
nv30->sample_mask = 0xffff;
|
||||
nv30_vbo_init(pipe);
|
||||
nv30_query_init(pipe);
|
||||
|
|
|
@ -173,6 +173,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
case PIPE_CAP_CLEAR_TEXTURE:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
|
@ -353,7 +354,7 @@ nv30_screen_fence_emit(struct pipe_screen *pscreen, uint32_t *sequence)
|
|||
|
||||
*sequence = ++screen->base.fence.sequence;
|
||||
|
||||
assert(PUSH_AVAIL(push) >= 3);
|
||||
assert(PUSH_AVAIL(push) + push->rsvd_kick >= 3);
|
||||
PUSH_DATA (push, NV30_3D_FENCE_OFFSET |
|
||||
(2 /* size */ << 18) | (7 /* subchan */ << 13));
|
||||
PUSH_DATA (push, 0);
|
||||
|
@ -383,7 +384,7 @@ nv30_screen_destroy(struct pipe_screen *pscreen)
|
|||
* _current_ one, and remove both.
|
||||
*/
|
||||
nouveau_fence_ref(screen->base.fence.current, ¤t);
|
||||
nouveau_fence_wait(current);
|
||||
nouveau_fence_wait(current, NULL);
|
||||
nouveau_fence_ref(NULL, ¤t);
|
||||
nouveau_fence_ref(NULL, &screen->base.fence.current);
|
||||
}
|
||||
|
|
|
@ -306,6 +306,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
|
|||
}
|
||||
nv50->base.pushbuf->kick_notify = nv50_default_kick_notify;
|
||||
|
||||
nouveau_context_init(&nv50->base);
|
||||
nv50_init_query_functions(nv50);
|
||||
nv50_init_surface_functions(nv50);
|
||||
nv50_init_state_functions(nv50);
|
||||
|
|
|
@ -203,10 +203,8 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
|
|||
F3B(B5G6R5_UNORM, B5G6R5_UNORM, C2, C1, C0, xx, UNORM, 5_6_5, TD),
|
||||
C4B(B5G5R5A1_UNORM, BGR5_A1_UNORM, C2, C1, C0, C3, UNORM, 5_5_5_1, TD),
|
||||
F3B(B5G5R5X1_UNORM, BGR5_X1_UNORM, C2, C1, C0, xx, UNORM, 5_5_5_1, TD),
|
||||
#if NOUVEAU_DRIVER != 0xc0
|
||||
C4B(B4G4R4A4_UNORM, NONE, C2, C1, C0, C3, UNORM, 4_4_4_4, T),
|
||||
F3B(B4G4R4X4_UNORM, NONE, C2, C1, C0, xx, UNORM, 4_4_4_4, T),
|
||||
#endif
|
||||
F3B(R9G9B9E5_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 9_9_9_E5, T),
|
||||
|
||||
C4A(R10G10B10A2_UNORM, RGB10_A2_UNORM, C0, C1, C2, C3, UNORM, 10_10_10_2,
|
||||
|
|
|
@ -318,7 +318,8 @@ nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info,
|
|||
}
|
||||
|
||||
bool
|
||||
nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
|
||||
nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
|
||||
struct pipe_debug_callback *debug)
|
||||
{
|
||||
struct nv50_ir_prog_info *info;
|
||||
int ret;
|
||||
|
@ -406,6 +407,11 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
|
|||
prog->so = nv50_program_create_strmout_state(info,
|
||||
&prog->pipe.stream_output);
|
||||
|
||||
pipe_debug_message(debug, SHADER_INFO,
|
||||
"type: %d, local: %d, gpr: %d, inst: %d, bytes: %d",
|
||||
prog->type, info->bin.tlsSpace, prog->max_gpr,
|
||||
info->bin.instructions, info->bin.codeSize);
|
||||
|
||||
out:
|
||||
FREE(info);
|
||||
return !ret;
|
||||
|
|
|
@ -106,7 +106,8 @@ struct nv50_program {
|
|||
struct nv50_stream_output_state *so;
|
||||
};
|
||||
|
||||
bool nv50_program_translate(struct nv50_program *, uint16_t chipset);
|
||||
bool nv50_program_translate(struct nv50_program *, uint16_t chipset,
|
||||
struct pipe_debug_callback *);
|
||||
bool nv50_program_upload_code(struct nv50_context *, struct nv50_program *);
|
||||
void nv50_program_destroy(struct nv50_context *, struct nv50_program *);
|
||||
|
||||
|
|
|
@ -151,4 +151,11 @@ nv50_surface_from_buffer(struct pipe_context *pipe,
|
|||
void
|
||||
nv50_surface_destroy(struct pipe_context *, struct pipe_surface *);
|
||||
|
||||
void
|
||||
nv50_clear_texture(struct pipe_context *pipe,
|
||||
struct pipe_resource *res,
|
||||
unsigned level,
|
||||
const struct pipe_box *box,
|
||||
const void *data);
|
||||
|
||||
#endif /* __NV50_RESOURCE_H__ */
|
||||
|
|
|
@ -182,6 +182,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_TGSI_TXQS:
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
case PIPE_CAP_CLEAR_TEXTURE:
|
||||
return 1;
|
||||
case PIPE_CAP_SEAMLESS_CUBE_MAP:
|
||||
return 1; /* class_3d >= NVA0_3D_CLASS; */
|
||||
|
@ -350,7 +351,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
|
|||
* _current_ one, and remove both.
|
||||
*/
|
||||
nouveau_fence_ref(screen->base.fence.current, ¤t);
|
||||
nouveau_fence_wait(current);
|
||||
nouveau_fence_wait(current, NULL);
|
||||
nouveau_fence_ref(NULL, ¤t);
|
||||
nouveau_fence_ref(NULL, &screen->base.fence.current);
|
||||
}
|
||||
|
@ -392,7 +393,7 @@ nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
|
|||
/* we need to do it after possible flush in MARK_RING */
|
||||
*sequence = ++screen->base.fence.sequence;
|
||||
|
||||
assert(PUSH_AVAIL(push) >= 5);
|
||||
assert(PUSH_AVAIL(push) + push->rsvd_kick >= 5);
|
||||
PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
|
||||
PUSH_DATAh(push, screen->fence.bo->offset);
|
||||
PUSH_DATA (push, screen->fence.bo->offset);
|
||||
|
|
|
@ -113,7 +113,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog)
|
|||
{
|
||||
if (!prog->translated) {
|
||||
prog->translated = nv50_program_translate(
|
||||
prog, nv50->screen->base.device->chipset);
|
||||
prog, nv50->screen->base.device->chipset, &nv50->base.debug);
|
||||
if (!prog->translated)
|
||||
return false;
|
||||
} else
|
||||
|
|
|
@ -727,7 +727,8 @@ nv50_sp_state_create(struct pipe_context *pipe,
|
|||
prog->pipe.stream_output = cso->stream_output;
|
||||
|
||||
prog->translated = nv50_program_translate(
|
||||
prog, nv50_context(pipe)->screen->base.device->chipset);
|
||||
prog, nv50_context(pipe)->screen->base.device->chipset,
|
||||
&nouveau_context(pipe)->debug);
|
||||
|
||||
return (void *)prog;
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "util/u_inlines.h"
|
||||
#include "util/u_pack_color.h"
|
||||
#include "util/u_format.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_surface.h"
|
||||
|
||||
#include "tgsi/tgsi_ureg.h"
|
||||
|
@ -324,6 +325,9 @@ nv50_clear_render_target(struct pipe_context *pipe,
|
|||
else
|
||||
PUSH_DATA(push, 512);
|
||||
|
||||
BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
|
||||
PUSH_DATA (push, mt->ms_mode);
|
||||
|
||||
if (!nouveau_bo_memtype(bo)) {
|
||||
BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
|
@ -404,6 +408,9 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
|
|||
BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
|
||||
PUSH_DATA (push, 512);
|
||||
|
||||
BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
|
||||
PUSH_DATA (push, mt->ms_mode);
|
||||
|
||||
BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2);
|
||||
PUSH_DATA (push, (width << 16) | dstx);
|
||||
PUSH_DATA (push, (height << 16) | dsty);
|
||||
|
@ -417,6 +424,80 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
|
|||
nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
|
||||
}
|
||||
|
||||
void
|
||||
nv50_clear_texture(struct pipe_context *pipe,
|
||||
struct pipe_resource *res,
|
||||
unsigned level,
|
||||
const struct pipe_box *box,
|
||||
const void *data)
|
||||
{
|
||||
struct pipe_surface tmpl = {{0}}, *sf;
|
||||
|
||||
tmpl.format = res->format;
|
||||
tmpl.u.tex.first_layer = box->z;
|
||||
tmpl.u.tex.last_layer = box->z + box->depth - 1;
|
||||
tmpl.u.tex.level = level;
|
||||
sf = pipe->create_surface(pipe, res, &tmpl);
|
||||
if (!sf)
|
||||
return;
|
||||
|
||||
if (util_format_is_depth_or_stencil(res->format)) {
|
||||
float depth = 0;
|
||||
uint8_t stencil = 0;
|
||||
unsigned clear = 0;
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(res->format);
|
||||
|
||||
if (util_format_has_depth(desc)) {
|
||||
clear |= PIPE_CLEAR_DEPTH;
|
||||
desc->unpack_z_float(&depth, 0, data, 0, 1, 1);
|
||||
}
|
||||
if (util_format_has_stencil(desc)) {
|
||||
clear |= PIPE_CLEAR_STENCIL;
|
||||
desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
|
||||
}
|
||||
pipe->clear_depth_stencil(pipe, sf, clear, depth, stencil,
|
||||
box->x, box->y, box->width, box->height);
|
||||
} else {
|
||||
union pipe_color_union color;
|
||||
|
||||
switch (util_format_get_blocksizebits(res->format)) {
|
||||
case 128:
|
||||
sf->format = PIPE_FORMAT_R32G32B32A32_UINT;
|
||||
memcpy(&color.ui, data, 128 / 8);
|
||||
break;
|
||||
case 64:
|
||||
sf->format = PIPE_FORMAT_R32G32_UINT;
|
||||
memcpy(&color.ui, data, 64 / 8);
|
||||
memset(&color.ui[2], 0, 64 / 8);
|
||||
break;
|
||||
case 32:
|
||||
sf->format = PIPE_FORMAT_R32_UINT;
|
||||
memcpy(&color.ui, data, 32 / 8);
|
||||
memset(&color.ui[1], 0, 96 / 8);
|
||||
break;
|
||||
case 16:
|
||||
sf->format = PIPE_FORMAT_R16_UINT;
|
||||
color.ui[0] = util_cpu_to_le32(
|
||||
util_le16_to_cpu(*(unsigned short *)data));
|
||||
memset(&color.ui[1], 0, 96 / 8);
|
||||
break;
|
||||
case 8:
|
||||
sf->format = PIPE_FORMAT_R8_UINT;
|
||||
color.ui[0] = util_cpu_to_le32(*(unsigned char *)data);
|
||||
memset(&color.ui[1], 0, 96 / 8);
|
||||
break;
|
||||
default:
|
||||
assert(!"Unknown texel element size");
|
||||
return;
|
||||
}
|
||||
|
||||
pipe->clear_render_target(pipe, sf, &color,
|
||||
box->x, box->y, box->width, box->height);
|
||||
}
|
||||
pipe->surface_destroy(pipe, sf);
|
||||
}
|
||||
|
||||
void
|
||||
nv50_clear(struct pipe_context *pipe, unsigned buffers,
|
||||
const union pipe_color_union *color,
|
||||
|
@ -464,11 +545,9 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
|
|||
if (mode) {
|
||||
int zs_layers = 0, color0_layers = 0;
|
||||
if (fb->cbufs[0] && (mode & 0x3c))
|
||||
color0_layers = fb->cbufs[0]->u.tex.last_layer -
|
||||
fb->cbufs[0]->u.tex.first_layer + 1;
|
||||
color0_layers = nv50_surface(fb->cbufs[0])->depth;
|
||||
if (fb->zsbuf && (mode & ~0x3c))
|
||||
zs_layers = fb->zsbuf->u.tex.last_layer -
|
||||
fb->zsbuf->u.tex.first_layer + 1;
|
||||
zs_layers = nv50_surface(fb->zsbuf)->depth;
|
||||
|
||||
for (j = 0; j < MIN2(zs_layers, color0_layers); j++) {
|
||||
BEGIN_NV04(push, NV50_3D(CLEAR_BUFFERS), 1);
|
||||
|
@ -488,7 +567,7 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
|
|||
struct pipe_surface *sf = fb->cbufs[i];
|
||||
if (!sf || !(buffers & (PIPE_CLEAR_COLOR0 << i)))
|
||||
continue;
|
||||
for (j = 0; j <= sf->u.tex.last_layer - sf->u.tex.first_layer; j++) {
|
||||
for (j = 0; j < nv50_surface(sf)->depth; j++) {
|
||||
BEGIN_NV04(push, NV50_3D(CLEAR_BUFFERS), 1);
|
||||
PUSH_DATA (push, (i << 6) | 0x3c |
|
||||
(j << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT));
|
||||
|
@ -585,6 +664,8 @@ nv50_clear_buffer(struct pipe_context *pipe,
|
|||
PUSH_DATA (push, height);
|
||||
BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
|
||||
/* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */
|
||||
|
||||
|
@ -1593,6 +1674,7 @@ nv50_init_surface_functions(struct nv50_context *nv50)
|
|||
pipe->resource_copy_region = nv50_resource_copy_region;
|
||||
pipe->blit = nv50_blit;
|
||||
pipe->flush_resource = nv50_flush_resource;
|
||||
pipe->clear_texture = nv50_clear_texture;
|
||||
pipe->clear_render_target = nv50_clear_render_target;
|
||||
pipe->clear_depth_stencil = nv50_clear_depth_stencil;
|
||||
pipe->clear_buffer = nv50_clear_buffer;
|
||||
|
|
|
@ -636,7 +636,7 @@ nv50_draw_elements(struct nv50_context *nv50, bool shorten,
|
|||
* pushbuf submit, but it's probably not a big performance difference.
|
||||
*/
|
||||
if (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr))
|
||||
nouveau_fence_wait(buf->fence_wr);
|
||||
nouveau_fence_wait(buf->fence_wr, &nv50->base.debug);
|
||||
|
||||
while (instance_count--) {
|
||||
BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
|
||||
|
|
|
@ -120,7 +120,7 @@ nvc0_compute_validate_program(struct nvc0_context *nvc0)
|
|||
|
||||
if (!prog->translated) {
|
||||
prog->translated = nvc0_program_translate(
|
||||
prog, nvc0->screen->base.device->chipset);
|
||||
prog, nvc0->screen->base.device->chipset, &nvc0->base.debug);
|
||||
if (!prog->translated)
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -309,6 +309,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
|
|||
pipe->memory_barrier = nvc0_memory_barrier;
|
||||
pipe->get_sample_position = nvc0_context_get_sample_position;
|
||||
|
||||
nouveau_context_init(&nvc0->base);
|
||||
nvc0_init_query_functions(nvc0);
|
||||
nvc0_init_surface_functions(nvc0);
|
||||
nvc0_init_state_functions(nvc0);
|
||||
|
|
|
@ -224,7 +224,8 @@ void nvc0_default_kick_notify(struct nouveau_pushbuf *);
|
|||
extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
|
||||
|
||||
/* nvc0_program.c */
|
||||
bool nvc0_program_translate(struct nvc0_program *, uint16_t chipset);
|
||||
bool nvc0_program_translate(struct nvc0_program *, uint16_t chipset,
|
||||
struct pipe_debug_callback *);
|
||||
bool nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *);
|
||||
void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
|
||||
void nvc0_program_library_upload(struct nvc0_context *);
|
||||
|
|
|
@ -517,7 +517,8 @@ nvc0_program_dump(struct nvc0_program *prog)
|
|||
#endif
|
||||
|
||||
bool
|
||||
nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
|
||||
nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
|
||||
struct pipe_debug_callback *debug)
|
||||
{
|
||||
struct nv50_ir_prog_info *info;
|
||||
int ret;
|
||||
|
@ -639,6 +640,11 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
|
|||
prog->tfb = nvc0_program_create_tfb_state(info,
|
||||
&prog->pipe.stream_output);
|
||||
|
||||
pipe_debug_message(debug, SHADER_INFO,
|
||||
"type: %d, local: %d, gpr: %d, inst: %d, bytes: %d",
|
||||
prog->type, info->bin.tlsSpace, prog->num_gprs,
|
||||
info->bin.instructions, info->bin.codeSize);
|
||||
|
||||
out:
|
||||
FREE(info);
|
||||
return !ret;
|
||||
|
|
|
@ -182,11 +182,12 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
case PIPE_CAP_CLEAR_TEXTURE:
|
||||
return 1;
|
||||
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
|
||||
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
|
||||
case PIPE_CAP_COMPUTE:
|
||||
return (class_3d == NVE4_3D_CLASS) ? 1 : 0;
|
||||
return (class_3d <= NVE4_3D_CLASS) ? 1 : 0;
|
||||
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
|
||||
return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
|
||||
|
||||
|
@ -245,7 +246,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
|||
return 0;
|
||||
break;
|
||||
case PIPE_SHADER_COMPUTE:
|
||||
if (class_3d != NVE4_3D_CLASS)
|
||||
if (class_3d > NVE4_3D_CLASS)
|
||||
return 0;
|
||||
break;
|
||||
default:
|
||||
|
@ -415,7 +416,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
|
|||
* _current_ one, and remove both.
|
||||
*/
|
||||
nouveau_fence_ref(screen->base.fence.current, ¤t);
|
||||
nouveau_fence_wait(current);
|
||||
nouveau_fence_wait(current, NULL);
|
||||
nouveau_fence_ref(NULL, ¤t);
|
||||
nouveau_fence_ref(NULL, &screen->base.fence.current);
|
||||
}
|
||||
|
@ -547,7 +548,7 @@ nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
|
|||
/* we need to do it after possible flush in MARK_RING */
|
||||
*sequence = ++screen->base.fence.sequence;
|
||||
|
||||
assert(PUSH_AVAIL(push) >= 5);
|
||||
assert(PUSH_AVAIL(push) + push->rsvd_kick >= 5);
|
||||
PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(NVC0_3D(QUERY_ADDRESS_HIGH), 4));
|
||||
PUSH_DATAh(push, screen->fence.bo->offset);
|
||||
PUSH_DATA (push, screen->fence.bo->offset);
|
||||
|
|
|
@ -72,7 +72,7 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
|
|||
|
||||
if (!prog->translated) {
|
||||
prog->translated = nvc0_program_translate(
|
||||
prog, nvc0->screen->base.device->chipset);
|
||||
prog, nvc0->screen->base.device->chipset, &nvc0->base.debug);
|
||||
if (!prog->translated)
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -681,7 +681,8 @@ nvc0_sp_state_create(struct pipe_context *pipe,
|
|||
prog->pipe.stream_output = cso->stream_output;
|
||||
|
||||
prog->translated = nvc0_program_translate(
|
||||
prog, nvc0_context(pipe)->screen->base.device->chipset);
|
||||
prog, nvc0_context(pipe)->screen->base.device->chipset,
|
||||
&nouveau_context(pipe)->debug);
|
||||
|
||||
return (void *)prog;
|
||||
}
|
||||
|
|
|
@ -67,7 +67,7 @@ nvc0_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
|
|||
case 1:
|
||||
return NV50_SURFACE_FORMAT_R8_UNORM;
|
||||
case 2:
|
||||
return NV50_SURFACE_FORMAT_R16_UNORM;
|
||||
return NV50_SURFACE_FORMAT_RG8_UNORM;
|
||||
case 4:
|
||||
return NV50_SURFACE_FORMAT_BGRA8_UNORM;
|
||||
case 8:
|
||||
|
@ -319,6 +319,7 @@ nvc0_clear_render_target(struct pipe_context *pipe,
|
|||
PUSH_DATA(push, dst->u.tex.first_layer + sf->depth);
|
||||
PUSH_DATA(push, mt->layer_stride >> 2);
|
||||
PUSH_DATA(push, dst->u.tex.first_layer);
|
||||
IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode);
|
||||
} else {
|
||||
if (res->base.target == PIPE_BUFFER) {
|
||||
PUSH_DATA(push, 262144);
|
||||
|
@ -334,6 +335,7 @@ nvc0_clear_render_target(struct pipe_context *pipe,
|
|||
PUSH_DATA(push, 0);
|
||||
|
||||
IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0);
|
||||
IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 0);
|
||||
|
||||
/* tiled textures don't have to be fenced, they're not mapped directly */
|
||||
nvc0_resource_fence(res, NOUVEAU_BO_WR);
|
||||
|
@ -466,6 +468,7 @@ nvc0_clear_buffer(struct pipe_context *pipe,
|
|||
PUSH_DATA (push, 0);
|
||||
|
||||
IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0);
|
||||
IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 0);
|
||||
|
||||
IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
|
||||
|
||||
|
@ -540,6 +543,7 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
|
|||
PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer + sf->depth));
|
||||
BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1);
|
||||
PUSH_DATA (push, dst->u.tex.first_layer);
|
||||
IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode);
|
||||
|
||||
BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
|
||||
for (z = 0; z < sf->depth; ++z) {
|
||||
|
@ -1541,5 +1545,6 @@ nvc0_init_surface_functions(struct nvc0_context *nvc0)
|
|||
pipe->flush_resource = nvc0_flush_resource;
|
||||
pipe->clear_render_target = nvc0_clear_render_target;
|
||||
pipe->clear_depth_stencil = nvc0_clear_depth_stencil;
|
||||
pipe->clear_texture = nv50_clear_texture;
|
||||
pipe->clear_buffer = nvc0_clear_buffer;
|
||||
}
|
||||
|
|
|
@ -340,8 +340,8 @@ nvc0_mt_sync(struct nvc0_context *nvc0, struct nv50_miptree *mt, unsigned usage)
|
|||
return !nouveau_bo_wait(mt->base.bo, access, nvc0->base.client);
|
||||
}
|
||||
if (usage & PIPE_TRANSFER_WRITE)
|
||||
return !mt->base.fence || nouveau_fence_wait(mt->base.fence);
|
||||
return !mt->base.fence_wr || nouveau_fence_wait(mt->base.fence_wr);
|
||||
return !mt->base.fence || nouveau_fence_wait(mt->base.fence, &nvc0->base.debug);
|
||||
return !mt->base.fence_wr || nouveau_fence_wait(mt->base.fence_wr, &nvc0->base.debug);
|
||||
}
|
||||
|
||||
void *
|
||||
|
|
|
@ -199,6 +199,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
|
||||
case PIPE_CAP_SHAREABLE_SHADERS:
|
||||
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
|
||||
case PIPE_CAP_CLEAR_TEXTURE:
|
||||
return 0;
|
||||
|
||||
/* SWTCL-only features. */
|
||||
|
|
|
@ -346,7 +346,7 @@ static void evergreen_emit_direct_dispatch(
|
|||
const uint *block_layout, const uint *grid_layout)
|
||||
{
|
||||
int i;
|
||||
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
|
||||
struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
|
||||
struct r600_pipe_compute *shader = rctx->cs_shader_state.shader;
|
||||
unsigned num_waves;
|
||||
unsigned num_pipes = rctx->screen->b.info.r600_max_pipes;
|
||||
|
@ -417,12 +417,12 @@ static void evergreen_emit_direct_dispatch(
|
|||
static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
|
||||
const uint *grid_layout)
|
||||
{
|
||||
struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
|
||||
struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
|
||||
unsigned i;
|
||||
|
||||
/* make sure that the gfx ring is only one active */
|
||||
if (ctx->b.rings.dma.cs && ctx->b.rings.dma.cs->cdw) {
|
||||
ctx->b.rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
|
||||
if (ctx->b.dma.cs && ctx->b.dma.cs->cdw) {
|
||||
ctx->b.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
|
||||
}
|
||||
|
||||
/* Initialize all the compute-related registers.
|
||||
|
@ -439,7 +439,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
|
|||
/* XXX support more than 8 colorbuffers (the offsets are not a multiple of 0x3C for CB8-11) */
|
||||
for (i = 0; i < 8 && i < ctx->framebuffer.state.nr_cbufs; i++) {
|
||||
struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i];
|
||||
unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.gfx,
|
||||
unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.gfx,
|
||||
(struct r600_resource*)cb->base.texture,
|
||||
RADEON_USAGE_READWRITE,
|
||||
RADEON_PRIO_SHADER_RW_BUFFER);
|
||||
|
@ -538,7 +538,7 @@ void evergreen_emit_cs_shader(
|
|||
struct r600_cs_shader_state *state =
|
||||
(struct r600_cs_shader_state*)atom;
|
||||
struct r600_pipe_compute *shader = state->shader;
|
||||
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
|
||||
struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
|
||||
uint64_t va;
|
||||
struct r600_resource *code_bo;
|
||||
unsigned ngpr, nstack;
|
||||
|
@ -564,7 +564,7 @@ void evergreen_emit_cs_shader(
|
|||
radeon_emit(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */
|
||||
|
||||
radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
|
||||
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
|
||||
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
|
||||
code_bo, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_USER_SHADER));
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue