mesa/src/gallium/drivers/radeon/r600_texture.c

2501 lines
78 KiB
C
Raw Normal View History

2010-05-08 21:09:24 +01:00
/*
* Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
* Copyright 2018 Advanced Micro Devices, Inc.
* All Rights Reserved.
2010-05-08 21:09:24 +01:00
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "radeonsi/si_pipe.h"
#include "r600_cs.h"
#include "r600_query.h"
#include "util/u_format.h"
#include "util/u_log.h"
#include "util/u_memory.h"
#include "util/u_pack_color.h"
#include "util/u_resource.h"
#include "util/u_surface.h"
#include "util/os_time.h"
#include <errno.h>
#include <inttypes.h>
#include "state_tracker/drm_driver.h"
#include "amd/common/sid.h"
static enum radeon_surf_mode
r600_choose_tiling(struct si_screen *sscreen,
const struct pipe_resource *templ);
bool si_prepare_for_dma_blit(struct si_context *sctx,
struct r600_texture *rdst,
unsigned dst_level, unsigned dstx,
unsigned dsty, unsigned dstz,
struct r600_texture *rsrc,
unsigned src_level,
const struct pipe_box *src_box)
{
if (!sctx->b.dma_cs)
return false;
if (rdst->surface.bpe != rsrc->surface.bpe)
return false;
/* MSAA: Blits don't exist in the real world. */
if (rsrc->resource.b.b.nr_samples > 1 ||
rdst->resource.b.b.nr_samples > 1)
return false;
/* Depth-stencil surfaces:
* When dst is linear, the DB->CB copy preserves HTILE.
* When dst is tiled, the 3D path must be used to update HTILE.
*/
if (rsrc->is_depth || rdst->is_depth)
return false;
/* DCC as:
* src: Use the 3D path. DCC decompression is expensive.
* dst: Use the 3D path to compress the pixels with DCC.
*/
if (vi_dcc_enabled(rsrc, src_level) ||
vi_dcc_enabled(rdst, dst_level))
return false;
/* CMASK as:
* src: Both texture and SDMA paths need decompression. Use SDMA.
* dst: If overwriting the whole texture, discard CMASK and use
* SDMA. Otherwise, use the 3D path.
*/
if (rdst->cmask.size && rdst->dirty_level_mask & (1 << dst_level)) {
/* The CMASK clear is only enabled for the first level. */
assert(dst_level == 0);
if (!util_texrange_covers_whole_level(&rdst->resource.b.b, dst_level,
dstx, dsty, dstz, src_box->width,
src_box->height, src_box->depth))
return false;
si_texture_discard_cmask(sctx->screen, rdst);
}
/* All requirements are met. Prepare textures for SDMA. */
if (rsrc->cmask.size && rsrc->dirty_level_mask & (1 << src_level))
sctx->b.b.flush_resource(&sctx->b.b, &rsrc->resource.b.b);
assert(!(rsrc->dirty_level_mask & (1 << src_level)));
assert(!(rdst->dirty_level_mask & (1 << dst_level)));
return true;
}
/* Same as resource_copy_region, except that both upsampling and downsampling are allowed. */
static void r600_copy_region_with_blit(struct pipe_context *pipe,
struct pipe_resource *dst,
unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
struct pipe_resource *src,
unsigned src_level,
const struct pipe_box *src_box)
{
struct pipe_blit_info blit;
memset(&blit, 0, sizeof(blit));
blit.src.resource = src;
blit.src.format = src->format;
blit.src.level = src_level;
blit.src.box = *src_box;
blit.dst.resource = dst;
blit.dst.format = dst->format;
blit.dst.level = dst_level;
blit.dst.box.x = dstx;
blit.dst.box.y = dsty;
blit.dst.box.z = dstz;
blit.dst.box.width = src_box->width;
blit.dst.box.height = src_box->height;
blit.dst.box.depth = src_box->depth;
blit.mask = util_format_get_mask(src->format) &
util_format_get_mask(dst->format);
blit.filter = PIPE_TEX_FILTER_NEAREST;
if (blit.mask) {
pipe->blit(pipe, &blit);
}
}
/* Copy from a full GPU texture to a transfer's staging one. */
static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
{
struct si_context *sctx = (struct si_context*)ctx;
struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
struct pipe_resource *dst = &rtransfer->staging->b.b;
struct pipe_resource *src = transfer->resource;
if (src->nr_samples > 1) {
r600_copy_region_with_blit(ctx, dst, 0, 0, 0, 0,
src, transfer->level, &transfer->box);
return;
}
sctx->b.dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level,
&transfer->box);
}
/* Copy from a transfer's staging texture to a full GPU one. */
static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
2010-10-12 02:54:16 +01:00
{
struct si_context *sctx = (struct si_context*)ctx;
2010-10-12 02:54:16 +01:00
struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
struct pipe_resource *dst = transfer->resource;
struct pipe_resource *src = &rtransfer->staging->b.b;
gallium: support for array textures and related changes resources have a array_size parameter now. get_tex_surface and tex_surface_destroy have been renamed to create_surface and surface_destroy and moved to context, similar to sampler views (and create_surface now uses a template just like create_sampler_view). Surfaces now really should only be used for rendering. In particular they shouldn't be used as some kind of 2d abstraction for sharing a texture. offset/layout fields don't make sense any longer and have been removed, width/height should go too. surfaces and sampler views now specify a layer range (for texture resources), layer is either array slice, depth slice or cube face. pipe_subresource is gone array slices (or cube faces) are now treated the same as depth slices in transfers etc. (that is, they use the z coord of the respective functions). Squashed commit of the following: commit a45bd509014743d21a532194d7b658a1aeb00cb7 Merge: 1aeca28 32e1e59 Author: Roland Scheidegger <sroland@vmware.com> Date: Thu Dec 2 04:32:06 2010 +0100 Merge remote branch 'origin/master' into gallium-array-textures Conflicts: src/gallium/drivers/i915/i915_resource_texture.c src/gallium/drivers/i915/i915_state_emit.c src/gallium/drivers/i915/i915_surface.c commit 1aeca287a827f29206078fa1204715a477072c08 Merge: 912f042 6f7c8c3 Author: Roland Scheidegger <sroland@vmware.com> Date: Thu Dec 2 00:37:11 2010 +0100 Merge remote branch 'origin/master' into gallium-array-textures Conflicts: src/gallium/state_trackers/vega/api_filters.c src/gallium/state_trackers/vega/api_images.c src/gallium/state_trackers/vega/mask.c src/gallium/state_trackers/vega/paint.c src/gallium/state_trackers/vega/renderer.c src/gallium/state_trackers/vega/st_inlines.h src/gallium/state_trackers/vega/vg_context.c src/gallium/state_trackers/vega/vg_manager.c commit 912f042e1d439de17b36be9a740358c876fcd144 Author: Roland Scheidegger <sroland@vmware.com> Date: Wed Dec 1 03:01:55 2010 +0100 gallium: even more compile fixes after merge commit 6fc95a58866d2a291def333608ba9c10c3f07e82 Author: Roland Scheidegger <sroland@vmware.com> Date: Wed Dec 1 00:22:26 2010 +0100 gallium: some fixes after merge commit a8d5ffaeb5397ffaa12fb422e4e7efdf0494c3e2 Merge: f7a202f 2da02e7 Author: Roland Scheidegger <sroland@vmware.com> Date: Tue Nov 30 23:41:26 2010 +0100 Merge remote branch 'origin/master' into gallium-array-textures Conflicts: src/gallium/drivers/i915/i915_state_emit.c src/gallium/state_trackers/vega/api_images.c src/gallium/state_trackers/vega/vg_context.c commit f7a202fde2aea2ec78ef58830f945a5e214e56ab Author: Roland Scheidegger <sroland@vmware.com> Date: Wed Nov 24 19:19:32 2010 +0100 gallium: even more fixes/cleanups after merge commit 6895a7f969ed7f9fa8ceb788810df8dbcf04c4c9 Author: Roland Scheidegger <sroland@vmware.com> Date: Wed Nov 24 03:07:36 2010 +0100 gallium: more compile fixes after merge commit af0501a5103b9756bc4d79167bd81051ad6e8670 Author: Roland Scheidegger <sroland@vmware.com> Date: Tue Nov 23 19:24:45 2010 +0100 gallium: lots of compile fixes after merge commit 0332003c2feb60f2a20e9a40368180c4ecd33e6b Merge: 26c6346 b6b91fa Author: Roland Scheidegger <sroland@vmware.com> Date: Tue Nov 23 17:02:26 2010 +0100 Merge remote branch 'origin/master' into gallium-array-textures Conflicts: src/gallium/auxiliary/gallivm/lp_bld_sample.c src/gallium/auxiliary/util/u_blit.c src/gallium/auxiliary/util/u_blitter.c src/gallium/auxiliary/util/u_inlines.h src/gallium/auxiliary/util/u_surface.c src/gallium/auxiliary/util/u_surfaces.c src/gallium/docs/source/context.rst src/gallium/drivers/llvmpipe/lp_rast.c src/gallium/drivers/nv50/nv50_state_validate.c src/gallium/drivers/nvfx/nv04_surface_2d.c src/gallium/drivers/nvfx/nv04_surface_2d.h src/gallium/drivers/nvfx/nvfx_buffer.c src/gallium/drivers/nvfx/nvfx_miptree.c src/gallium/drivers/nvfx/nvfx_resource.c src/gallium/drivers/nvfx/nvfx_resource.h src/gallium/drivers/nvfx/nvfx_state_fb.c src/gallium/drivers/nvfx/nvfx_surface.c src/gallium/drivers/nvfx/nvfx_transfer.c src/gallium/drivers/r300/r300_state_derived.c src/gallium/drivers/r300/r300_texture.c src/gallium/drivers/r600/r600_blit.c src/gallium/drivers/r600/r600_buffer.c src/gallium/drivers/r600/r600_context.h src/gallium/drivers/r600/r600_screen.c src/gallium/drivers/r600/r600_screen.h src/gallium/drivers/r600/r600_state.c src/gallium/drivers/r600/r600_texture.c src/gallium/include/pipe/p_defines.h src/gallium/state_trackers/egl/common/egl_g3d_api.c src/gallium/state_trackers/glx/xlib/xm_st.c src/gallium/targets/libgl-gdi/gdi_softpipe_winsys.c src/gallium/targets/libgl-gdi/libgl_gdi.c src/gallium/tests/graw/tri.c src/mesa/state_tracker/st_cb_blit.c src/mesa/state_tracker/st_cb_readpixels.c commit 26c6346b385929fba94775f33838d0cceaaf1127 Author: Roland Scheidegger <sroland@vmware.com> Date: Mon Aug 2 19:37:21 2010 +0200 fix more merge breakage commit b30d87c6025eefe7f6979ffa8e369bbe755d5c1d Merge: 9461bf3 1f1928d Author: Roland Scheidegger <sroland@vmware.com> Date: Mon Aug 2 19:15:38 2010 +0200 Merge remote branch 'origin/master' into gallium-array-textures Conflicts: src/gallium/drivers/llvmpipe/lp_rast.c src/gallium/drivers/llvmpipe/lp_rast_priv.h src/gallium/drivers/r300/r300_blit.c src/gallium/drivers/r300/r300_screen_buffer.c src/gallium/drivers/r300/r300_state_derived.c src/gallium/drivers/r300/r300_texture.c src/gallium/drivers/r300/r300_texture.h src/gallium/drivers/r300/r300_transfer.c src/gallium/drivers/r600/r600_screen.c src/gallium/drivers/r600/r600_state.c src/gallium/drivers/r600/r600_texture.c src/gallium/drivers/r600/r600_texture.h src/gallium/state_trackers/dri/common/dri1_helper.c src/gallium/state_trackers/dri/sw/drisw.c src/gallium/state_trackers/xorg/xorg_exa.c commit 9461bf3cfb647d2301364ae29fc3084fff52862a Merge: 17492d7 0eaccb3 Author: Roland Scheidegger <sroland@vmware.com> Date: Thu Jul 15 20:13:45 2010 +0200 Merge commit 'origin/master' into gallium-array-textures Conflicts: src/gallium/auxiliary/util/u_blitter.c src/gallium/drivers/llvmpipe/lp_rast.c src/gallium/drivers/llvmpipe/lp_surface.c src/gallium/drivers/r300/r300_render.c src/gallium/drivers/r300/r300_state.c src/gallium/drivers/r300/r300_texture.c src/gallium/drivers/r300/r300_transfer.c src/gallium/tests/trivial/quad-tex.c commit 17492d705e7b7f607b71db045c3bf344cb6842b3 Author: Roland Scheidegger <sroland@vmware.com> Date: Fri Jun 18 10:58:08 2010 +0100 gallium: rename element_offset/width fields in views to first/last_element This is much more consistent with the other fields used there (first/last level, first/last layer). Actually thinking about removing the ugly union/structs again and rename first/last_layer to something even more generic which could also be used for buffers (like first/last_member) without inducing headaches. commit 1b717a289299f942de834dcccafbab91361e20ab Author: Roland Scheidegger <sroland@vmware.com> Date: Thu Jun 17 14:46:09 2010 +0100 gallium: remove PIPE_SURFACE_LAYOUT_LINEAR definition This was only used by the layout field of pipe_surface, but this driver internal stuff is gone so there's no need for this driver independent layout definition neither. commit 10cb644b31b3ef47e6c7b55e514ad24bb891fac4 Merge: 5691db9 c85971d Author: Roland Scheidegger <sroland@vmware.com> Date: Thu Jun 17 12:20:41 2010 +0100 Merge commit 'origin/master' into gallium-array-textures Conflicts: src/gallium/docs/source/glossary.rst src/gallium/tests/graw/fs-test.c src/gallium/tests/graw/gs-test.c commit 5691db960ca3d525ce7d6c32d9c7a28f5e907f3b Author: Roland Scheidegger <sroland@vmware.com> Date: Thu Jun 17 11:29:03 2010 +0100 st/wgl: fix interface changes bugs commit 2303ec32143d363b46e59e4b7c91b0ebd34a16b2 Author: Roland Scheidegger <sroland@vmware.com> Date: Wed Jun 16 19:42:32 2010 +0100 gallium: adapt code to interface changes... commit dcae4f586f0d0885b72674a355e5d56d47afe77d Author: Roland Scheidegger <sroland@vmware.com> Date: Wed Jun 16 19:42:05 2010 +0100 gallium: separate depth0 and array_size in the resource itself. These fields are still mutually exclusive (since no 3d array textures exist) but it ultimately seemed to error-prone to adapt all code accept the new meaning of depth0 (drivers stick that into hardware regs, calculate mipmap sizes etc.). And it isn't really cleaner anyway. So, array textures will have depth0 of 1, but instead use array_size, 3D textures will continue to use depth0 (and have array_size of 1). Cube maps also will use array_size to indicate their 6 faces, but since all drivers should just be fine by inferring this themselves from the fact it's a cube map as they always used to nothing should break. commit 621737a638d187d208712250fc19a91978fdea6b Author: Roland Scheidegger <sroland@vmware.com> Date: Wed Jun 16 17:47:38 2010 +0100 gallium: adapt code to interface changes There are still usages of pipe_surface where pipe_resource should be used, which should eventually be fixed. commit 2d17f5efe166b2c3d51957c76294165ab30b8ae2 Author: Roland Scheidegger <sroland@vmware.com> Date: Wed Jun 16 17:46:14 2010 +0100 gallium: more interface changes In particular to enable usage of buffers in views, and ability to use a different pipe_format in pipe_surface. Get rid of layout and offset parameter in pipe_surface - the former was not used in any (public) code anyway, and the latter should either be computed on-demand or driver can use subclass of pipe_surface. Also make create_surface() use a template to be more consistent with other functions. commit 71f885ee16aa5cf2742c44bfaf0dc5b8734b9901 Merge: 3232d11 8ad410d Author: Roland Scheidegger <sroland@vmware.com> Date: Mon Jun 14 14:19:51 2010 +0100 Merge commit 'origin/master' into gallium-array-textures Conflicts: src/gallium/auxiliary/util/u_box.h src/gallium/drivers/nv50/nv50_surface.c src/gallium/drivers/nvfx/nvfx_surface.c src/gallium/drivers/r300/r300_blit.c src/gallium/drivers/r300/r300_texture.c src/gallium/drivers/r300/r300_transfer.c src/gallium/drivers/r600/r600_blit.c src/gallium/drivers/r600/r600_screen.h src/gallium/include/pipe/p_state.h commit 3232d11fe3ebf7686286013c357b404714853984 Author: Roland Scheidegger <sroland@vmware.com> Date: Mon Jun 14 11:40:04 2010 +0100 mesa/st: adapt to interface changes still need to fix pipe_surface sharing (as that is now per-context). Also broken is depth0 handling - half the code assumes this is also used for array textures (and hence by extension of that cube maps would have depth 6), half the code does not... commit f433b7f7f552720e5eade0b4078db94590ee85e1 Author: Roland Scheidegger <sroland@vmware.com> Date: Mon Jun 14 11:35:52 2010 +0100 gallium: fix a couple of bugs in interface chnage fixes commit 818366b28ea18f514dc791646248ce6f08d9bbcf Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:42:11 2010 +0200 targets: adapt to interface changes Yes even that needs adjustments... commit 66c511ab1682c9918e0200902039247793acb41e Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:41:13 2010 +0200 tests: adapt to interface changes Everything needs to be fixed :-(. commit 6b494635d9dbdaa7605bc87b1ebf682b138c5808 Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:39:50 2010 +0200 st: adapt non-rendering state trackers to interface changes might not be quite right in all places, but they really don't want to use pipe_surface. commit 00c4289a35d86e4fe85919ec32aa9f5ffe69d16d Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:38:48 2010 +0200 winsys: adapt to interface changes commit 39d858554dc9ed5dbc795626fec3ef9deae552a0 Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:26:54 2010 +0200 st/python: adapt to interface changes don't think that will work, sorry. commit 6e9336bc49b32139cec4e683857d0958000e15e3 Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:26:07 2010 +0200 st/vega: adapt to interface changes commit e07f2ae9aaf8842757d5d50865f76f8276245e11 Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:25:56 2010 +0200 st/xorg: adapt to interface changes commit 05531c10a74a4358103e30d3b38a5eceb25c947f Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:24:53 2010 +0200 nv50: adapt to interface changes commit 97704f388d7042121c6d496ba8c003afa3ea2bf3 Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:24:45 2010 +0200 nvfx: adapt to interface changes commit a8a9c93d703af6e8f5c12e1cea9ec665add1abe0 Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:24:01 2010 +0200 i965g: adapt to interface changes commit 0dde209589872d20cc34ed0b237e3ed7ae0e2de3 Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:22:38 2010 +0200 i915g: adapt to interface changes commit 5cac9beede69d12f5807ee1a247a4c864652799e Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:20:58 2010 +0200 svga: adapt to interface changes resource_copy_region still looking fishy. Was not very suited to unified zslice/face approach... commit 08b5a6af4b963a3e4c75fc336bf6c0772dce5150 Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:20:01 2010 +0200 rbug: adapt to interface changes Not sure if that won't need changes elsewhere? commit c9fd24b1f586bcef2e0a6e76b68e40fca3408964 Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:19:31 2010 +0200 trace: adapt to interface changes commit ed84e010afc5635a1a47390b32247a266f65b8d1 Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:19:21 2010 +0200 failover: adapt to interface changes commit a1d4b4a293da933276908e3393435ec4b43cf201 Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:19:12 2010 +0200 identity: adapt to interface changes commit a8dd73e2c56c7d95ffcf174408f38f4f35fd2f4c Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:18:55 2010 +0200 softpipe: adapt to interface changes commit a886085893e461e8473978e8206ec2312b7077ff Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:18:44 2010 +0200 llvmpipe: adapt to interface changes commit 70523f6d567d8b7cfda682157556370fd3c43460 Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:18:14 2010 +0200 r600g: adapt to interface changes commit 3f4bc72bd80994865eb9f6b8dfd11e2b97060d19 Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:18:05 2010 +0200 r300g: adapt to interface changes commit 5d353b55ee14db0ac0515b5a3cf9389430832c19 Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:17:37 2010 +0200 cell: adapt to interface changes not even compile tested commit cf5d03601322c2dcb12d7a9c2f1745e2b2a35eb4 Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:14:59 2010 +0200 util: adapt to interface changes amazing how much code changes just due to some subtle interface changes? commit dc98d713c6937c0e177fc2caf23020402cc7ea7b Author: Roland Scheidegger <sroland@vmware.com> Date: Sat Jun 12 02:12:40 2010 +0200 gallium: more interface fail, docs this also changes flush_frontbuffer to use a pipe_resource instead of a pipe_surface - pipe_surface is not meant to be (or at least no longer) an abstraction for standalone 2d images which get passed around. (This has also implications for the non-rendering state-trackers.) commit 08436d27ddd59857c22827c609b692aa0c407b7b Author: Roland Scheidegger <sroland@vmware.com> Date: Thu Jun 10 17:42:52 2010 +0200 gallium: fix array texture interface changes bugs, docs commit 4a4d927609b62b4d7fb9dffa35158afe282f277b Author: Roland Scheidegger <sroland@vmware.com> Date: Thu Jun 3 22:02:44 2010 +0200 gallium: interface changes for array textures and related cleanups This patch introduces array textures to gallium (note they are not immediately usable without the associated changes to the shader side). Also, this abandons pipe_subresource in favor of using level and layer parameters since the distinction between several faces (which was part of pipe_subresource for cube textures) and several z slices (which were not part of pipe_subresource but instead part of pipe_box where appropriate for 3d textures) is gone at the resource level. Textures, be it array, cube, or 3d, now use a "unified" set of parameters, there is no distinction between array members, cube faces, or 3d zslices. This is unlike d3d10, whose subresource index includes layer information for array textures, but which considers all z slices of a 3d texture to be part of the same subresource. In contrast to d3d10, OpenGL though reuses old 2d and 3d function entry points for 1d and 2d array textures, respectively, which also implies that for instance it is possible to specify all layers of a 2d array texture at once (note that this is not possible for cube maps, which use the 2d entry points, although it is possible for cube map arrays, which aren't supported yet in gallium). This should possibly make drivers a bit simpler, and also get rid of mutually exclusive parameters in some functions (as z and face were exclusive), one potential downside would be that 3d array textures could not easily be supported without reverting this, but those are nowhere to be seen. Also along with adjusting to new parameters, rename get_tex_surface / tex_surface_destroy to create_surface / surface_destroy and move them from screen to context, which reflects much better what those do (they are analogous to create_sampler_view / sampler_view_destroy). PIPE_CAP_ARRAY_TEXTURES is used to indicate if a driver supports all of this functionality (that is, both sampling from array texture as well as use a range of layers as a render target, with selecting the layer from the geometry shader).
2010-12-02 03:33:43 +00:00
struct pipe_box sbox;
u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox);
if (dst->nr_samples > 1) {
r600_copy_region_with_blit(ctx, dst, transfer->level,
transfer->box.x, transfer->box.y, transfer->box.z,
src, 0, &sbox);
return;
}
sctx->b.dma_copy(ctx, dst, transfer->level,
transfer->box.x, transfer->box.y, transfer->box.z,
src, 0, &sbox);
2010-10-12 02:54:16 +01:00
}
static unsigned r600_texture_get_offset(struct si_screen *sscreen,
struct r600_texture *rtex, unsigned level,
const struct pipe_box *box,
unsigned *stride,
unsigned *layer_stride)
2010-05-08 21:09:24 +01:00
{
if (sscreen->info.chip_class >= GFX9) {
*stride = rtex->surface.u.gfx9.surf_pitch * rtex->surface.bpe;
*layer_stride = rtex->surface.u.gfx9.surf_slice_size;
if (!box)
return 0;
/* Each texture is an array of slices. Each slice is an array
* of mipmap levels. */
return box->z * rtex->surface.u.gfx9.surf_slice_size +
rtex->surface.u.gfx9.offset[level] +
(box->y / rtex->surface.blk_h *
rtex->surface.u.gfx9.surf_pitch +
box->x / rtex->surface.blk_w) * rtex->surface.bpe;
} else {
*stride = rtex->surface.u.legacy.level[level].nblk_x *
rtex->surface.bpe;
assert((uint64_t)rtex->surface.u.legacy.level[level].slice_size_dw * 4 <= UINT_MAX);
*layer_stride = (uint64_t)rtex->surface.u.legacy.level[level].slice_size_dw * 4;
if (!box)
return rtex->surface.u.legacy.level[level].offset;
/* Each texture is an array of mipmap levels. Each level is
* an array of slices. */
return rtex->surface.u.legacy.level[level].offset +
box->z * (uint64_t)rtex->surface.u.legacy.level[level].slice_size_dw * 4 +
(box->y / rtex->surface.blk_h *
rtex->surface.u.legacy.level[level].nblk_x +
box->x / rtex->surface.blk_w) * rtex->surface.bpe;
}
}
static int r600_init_surface(struct si_screen *sscreen,
struct radeon_surf *surface,
r600g: add support for common surface allocator for tiling v13 Tiled surface have all kind of alignment constraint that needs to be met. Instead of having all this code duplicated btw ddx and mesa use common code in libdrm_radeon this also ensure that both ddx and mesa compute those alignment in the same way. v2 fix evergreen v3 fix compressed texture and workaround cube texture issue by disabling 2D array mode for cubemap (need to check if r7xx and newer are also affected by the issue) v4 fix texture array v5 fix evergreen and newer, split surface values computation from mipmap tree generation so that we can get them directly from the ddx v6 final fix to evergreen tile split value v7 fix mipmap offset to avoid to use random value, use color view depth view to address different layer as hardware is doing some magic rotation depending on the layer v8 fix COLOR_VIEW on r6xx for linear array mode, use COLOR_VIEW on evergreen, align bytes per pixel to a multiple of a dword v9 fix handling of stencil on evergreen, half fix for compressed texture v10 fix evergreen compressed texture proper support for stencil tile split. Fix stencil issue when array mode was clear by the kernel, always program stencil bo. On evergreen depth buffer bo need to be big enough to hold depth buffer + stencil buffer as even with stencil disabled things get written there. v11 rebase on top of mesa, fix pitch issue with 1d surface on evergreen, old ddx overestimate those. Fix linear case when pitch*height < 64. Fix r300g. v12 Fix linear case when pitch*height < 64 for old path, adapt to libdrm API change v13 add libdrm check Signed-off-by: Jerome Glisse <jglisse@redhat.com>
2012-01-30 22:22:13 +00:00
const struct pipe_resource *ptex,
enum radeon_surf_mode array_mode,
unsigned pitch_in_bytes_override,
unsigned offset,
bool is_imported,
bool is_scanout,
bool is_flushed_depth,
bool tc_compatible_htile)
r600g: add support for common surface allocator for tiling v13 Tiled surface have all kind of alignment constraint that needs to be met. Instead of having all this code duplicated btw ddx and mesa use common code in libdrm_radeon this also ensure that both ddx and mesa compute those alignment in the same way. v2 fix evergreen v3 fix compressed texture and workaround cube texture issue by disabling 2D array mode for cubemap (need to check if r7xx and newer are also affected by the issue) v4 fix texture array v5 fix evergreen and newer, split surface values computation from mipmap tree generation so that we can get them directly from the ddx v6 final fix to evergreen tile split value v7 fix mipmap offset to avoid to use random value, use color view depth view to address different layer as hardware is doing some magic rotation depending on the layer v8 fix COLOR_VIEW on r6xx for linear array mode, use COLOR_VIEW on evergreen, align bytes per pixel to a multiple of a dword v9 fix handling of stencil on evergreen, half fix for compressed texture v10 fix evergreen compressed texture proper support for stencil tile split. Fix stencil issue when array mode was clear by the kernel, always program stencil bo. On evergreen depth buffer bo need to be big enough to hold depth buffer + stencil buffer as even with stencil disabled things get written there. v11 rebase on top of mesa, fix pitch issue with 1d surface on evergreen, old ddx overestimate those. Fix linear case when pitch*height < 64. Fix r300g. v12 Fix linear case when pitch*height < 64 for old path, adapt to libdrm API change v13 add libdrm check Signed-off-by: Jerome Glisse <jglisse@redhat.com>
2012-01-30 22:22:13 +00:00
{
const struct util_format_description *desc =
util_format_description(ptex->format);
bool is_depth, is_stencil;
int r;
unsigned i, bpe, flags = 0;
is_depth = util_format_has_depth(desc);
is_stencil = util_format_has_stencil(desc);
if (!is_flushed_depth &&
ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
bpe = 4; /* stencil is allocated separately on evergreen */
} else {
bpe = util_format_get_blocksize(ptex->format);
assert(util_is_power_of_two_or_zero(bpe));
r600g: add support for common surface allocator for tiling v13 Tiled surface have all kind of alignment constraint that needs to be met. Instead of having all this code duplicated btw ddx and mesa use common code in libdrm_radeon this also ensure that both ddx and mesa compute those alignment in the same way. v2 fix evergreen v3 fix compressed texture and workaround cube texture issue by disabling 2D array mode for cubemap (need to check if r7xx and newer are also affected by the issue) v4 fix texture array v5 fix evergreen and newer, split surface values computation from mipmap tree generation so that we can get them directly from the ddx v6 final fix to evergreen tile split value v7 fix mipmap offset to avoid to use random value, use color view depth view to address different layer as hardware is doing some magic rotation depending on the layer v8 fix COLOR_VIEW on r6xx for linear array mode, use COLOR_VIEW on evergreen, align bytes per pixel to a multiple of a dword v9 fix handling of stencil on evergreen, half fix for compressed texture v10 fix evergreen compressed texture proper support for stencil tile split. Fix stencil issue when array mode was clear by the kernel, always program stencil bo. On evergreen depth buffer bo need to be big enough to hold depth buffer + stencil buffer as even with stencil disabled things get written there. v11 rebase on top of mesa, fix pitch issue with 1d surface on evergreen, old ddx overestimate those. Fix linear case when pitch*height < 64. Fix r300g. v12 Fix linear case when pitch*height < 64 for old path, adapt to libdrm API change v13 add libdrm check Signed-off-by: Jerome Glisse <jglisse@redhat.com>
2012-01-30 22:22:13 +00:00
}
if (!is_flushed_depth && is_depth) {
flags |= RADEON_SURF_ZBUFFER;
r600g: add support for common surface allocator for tiling v13 Tiled surface have all kind of alignment constraint that needs to be met. Instead of having all this code duplicated btw ddx and mesa use common code in libdrm_radeon this also ensure that both ddx and mesa compute those alignment in the same way. v2 fix evergreen v3 fix compressed texture and workaround cube texture issue by disabling 2D array mode for cubemap (need to check if r7xx and newer are also affected by the issue) v4 fix texture array v5 fix evergreen and newer, split surface values computation from mipmap tree generation so that we can get them directly from the ddx v6 final fix to evergreen tile split value v7 fix mipmap offset to avoid to use random value, use color view depth view to address different layer as hardware is doing some magic rotation depending on the layer v8 fix COLOR_VIEW on r6xx for linear array mode, use COLOR_VIEW on evergreen, align bytes per pixel to a multiple of a dword v9 fix handling of stencil on evergreen, half fix for compressed texture v10 fix evergreen compressed texture proper support for stencil tile split. Fix stencil issue when array mode was clear by the kernel, always program stencil bo. On evergreen depth buffer bo need to be big enough to hold depth buffer + stencil buffer as even with stencil disabled things get written there. v11 rebase on top of mesa, fix pitch issue with 1d surface on evergreen, old ddx overestimate those. Fix linear case when pitch*height < 64. Fix r300g. v12 Fix linear case when pitch*height < 64 for old path, adapt to libdrm API change v13 add libdrm check Signed-off-by: Jerome Glisse <jglisse@redhat.com>
2012-01-30 22:22:13 +00:00
if (tc_compatible_htile &&
(sscreen->info.chip_class >= GFX9 ||
array_mode == RADEON_SURF_MODE_2D)) {
/* TC-compatible HTILE only supports Z32_FLOAT.
* GFX9 also supports Z16_UNORM.
* On VI, promote Z16 to Z32. DB->CB copies will convert
* the format for transfers.
*/
if (sscreen->info.chip_class == VI)
bpe = 4;
flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
}
if (is_stencil)
flags |= RADEON_SURF_SBUFFER;
}
if (sscreen->info.chip_class >= VI &&
(ptex->flags & R600_RESOURCE_FLAG_DISABLE_DCC ||
ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT ||
/* DCC MSAA array textures are disallowed due to incomplete clear impl. */
(ptex->nr_samples >= 2 &&
(!sscreen->dcc_msaa_allowed || ptex->array_size > 1))))
flags |= RADEON_SURF_DISABLE_DCC;
if (ptex->bind & PIPE_BIND_SCANOUT || is_scanout) {
/* This should catch bugs in gallium users setting incorrect flags. */
assert(ptex->nr_samples <= 1 &&
ptex->array_size == 1 &&
ptex->depth0 == 1 &&
ptex->last_level == 0 &&
!(flags & RADEON_SURF_Z_OR_SBUFFER));
flags |= RADEON_SURF_SCANOUT;
}
r600g: add support for common surface allocator for tiling v13 Tiled surface have all kind of alignment constraint that needs to be met. Instead of having all this code duplicated btw ddx and mesa use common code in libdrm_radeon this also ensure that both ddx and mesa compute those alignment in the same way. v2 fix evergreen v3 fix compressed texture and workaround cube texture issue by disabling 2D array mode for cubemap (need to check if r7xx and newer are also affected by the issue) v4 fix texture array v5 fix evergreen and newer, split surface values computation from mipmap tree generation so that we can get them directly from the ddx v6 final fix to evergreen tile split value v7 fix mipmap offset to avoid to use random value, use color view depth view to address different layer as hardware is doing some magic rotation depending on the layer v8 fix COLOR_VIEW on r6xx for linear array mode, use COLOR_VIEW on evergreen, align bytes per pixel to a multiple of a dword v9 fix handling of stencil on evergreen, half fix for compressed texture v10 fix evergreen compressed texture proper support for stencil tile split. Fix stencil issue when array mode was clear by the kernel, always program stencil bo. On evergreen depth buffer bo need to be big enough to hold depth buffer + stencil buffer as even with stencil disabled things get written there. v11 rebase on top of mesa, fix pitch issue with 1d surface on evergreen, old ddx overestimate those. Fix linear case when pitch*height < 64. Fix r300g. v12 Fix linear case when pitch*height < 64 for old path, adapt to libdrm API change v13 add libdrm check Signed-off-by: Jerome Glisse <jglisse@redhat.com>
2012-01-30 22:22:13 +00:00
if (ptex->bind & PIPE_BIND_SHARED)
flags |= RADEON_SURF_SHAREABLE;
if (is_imported)
flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE;
if (!(ptex->flags & R600_RESOURCE_FLAG_FORCE_TILING))
flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
r600g: add support for common surface allocator for tiling v13 Tiled surface have all kind of alignment constraint that needs to be met. Instead of having all this code duplicated btw ddx and mesa use common code in libdrm_radeon this also ensure that both ddx and mesa compute those alignment in the same way. v2 fix evergreen v3 fix compressed texture and workaround cube texture issue by disabling 2D array mode for cubemap (need to check if r7xx and newer are also affected by the issue) v4 fix texture array v5 fix evergreen and newer, split surface values computation from mipmap tree generation so that we can get them directly from the ddx v6 final fix to evergreen tile split value v7 fix mipmap offset to avoid to use random value, use color view depth view to address different layer as hardware is doing some magic rotation depending on the layer v8 fix COLOR_VIEW on r6xx for linear array mode, use COLOR_VIEW on evergreen, align bytes per pixel to a multiple of a dword v9 fix handling of stencil on evergreen, half fix for compressed texture v10 fix evergreen compressed texture proper support for stencil tile split. Fix stencil issue when array mode was clear by the kernel, always program stencil bo. On evergreen depth buffer bo need to be big enough to hold depth buffer + stencil buffer as even with stencil disabled things get written there. v11 rebase on top of mesa, fix pitch issue with 1d surface on evergreen, old ddx overestimate those. Fix linear case when pitch*height < 64. Fix r300g. v12 Fix linear case when pitch*height < 64 for old path, adapt to libdrm API change v13 add libdrm check Signed-off-by: Jerome Glisse <jglisse@redhat.com>
2012-01-30 22:22:13 +00:00
r = sscreen->ws->surface_init(sscreen->ws, ptex, flags, bpe,
array_mode, surface);
r600g: add support for common surface allocator for tiling v13 Tiled surface have all kind of alignment constraint that needs to be met. Instead of having all this code duplicated btw ddx and mesa use common code in libdrm_radeon this also ensure that both ddx and mesa compute those alignment in the same way. v2 fix evergreen v3 fix compressed texture and workaround cube texture issue by disabling 2D array mode for cubemap (need to check if r7xx and newer are also affected by the issue) v4 fix texture array v5 fix evergreen and newer, split surface values computation from mipmap tree generation so that we can get them directly from the ddx v6 final fix to evergreen tile split value v7 fix mipmap offset to avoid to use random value, use color view depth view to address different layer as hardware is doing some magic rotation depending on the layer v8 fix COLOR_VIEW on r6xx for linear array mode, use COLOR_VIEW on evergreen, align bytes per pixel to a multiple of a dword v9 fix handling of stencil on evergreen, half fix for compressed texture v10 fix evergreen compressed texture proper support for stencil tile split. Fix stencil issue when array mode was clear by the kernel, always program stencil bo. On evergreen depth buffer bo need to be big enough to hold depth buffer + stencil buffer as even with stencil disabled things get written there. v11 rebase on top of mesa, fix pitch issue with 1d surface on evergreen, old ddx overestimate those. Fix linear case when pitch*height < 64. Fix r300g. v12 Fix linear case when pitch*height < 64 for old path, adapt to libdrm API change v13 add libdrm check Signed-off-by: Jerome Glisse <jglisse@redhat.com>
2012-01-30 22:22:13 +00:00
if (r) {
return r;
}
unsigned pitch = pitch_in_bytes_override / bpe;
if (sscreen->info.chip_class >= GFX9) {
if (pitch) {
surface->u.gfx9.surf_pitch = pitch;
surface->u.gfx9.surf_slice_size =
(uint64_t)pitch * surface->u.gfx9.surf_height * bpe;
}
surface->u.gfx9.surf_offset = offset;
} else {
if (pitch) {
surface->u.legacy.level[0].nblk_x = pitch;
surface->u.legacy.level[0].slice_size_dw =
((uint64_t)pitch * surface->u.legacy.level[0].nblk_y * bpe) / 4;
}
if (offset) {
for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
surface->u.legacy.level[i].offset += offset;
}
}
r600g: add support for common surface allocator for tiling v13 Tiled surface have all kind of alignment constraint that needs to be met. Instead of having all this code duplicated btw ddx and mesa use common code in libdrm_radeon this also ensure that both ddx and mesa compute those alignment in the same way. v2 fix evergreen v3 fix compressed texture and workaround cube texture issue by disabling 2D array mode for cubemap (need to check if r7xx and newer are also affected by the issue) v4 fix texture array v5 fix evergreen and newer, split surface values computation from mipmap tree generation so that we can get them directly from the ddx v6 final fix to evergreen tile split value v7 fix mipmap offset to avoid to use random value, use color view depth view to address different layer as hardware is doing some magic rotation depending on the layer v8 fix COLOR_VIEW on r6xx for linear array mode, use COLOR_VIEW on evergreen, align bytes per pixel to a multiple of a dword v9 fix handling of stencil on evergreen, half fix for compressed texture v10 fix evergreen compressed texture proper support for stencil tile split. Fix stencil issue when array mode was clear by the kernel, always program stencil bo. On evergreen depth buffer bo need to be big enough to hold depth buffer + stencil buffer as even with stencil disabled things get written there. v11 rebase on top of mesa, fix pitch issue with 1d surface on evergreen, old ddx overestimate those. Fix linear case when pitch*height < 64. Fix r300g. v12 Fix linear case when pitch*height < 64 for old path, adapt to libdrm API change v13 add libdrm check Signed-off-by: Jerome Glisse <jglisse@redhat.com>
2012-01-30 22:22:13 +00:00
return 0;
}
static void r600_texture_init_metadata(struct si_screen *sscreen,
struct r600_texture *rtex,
struct radeon_bo_metadata *metadata)
{
struct radeon_surf *surface = &rtex->surface;
memset(metadata, 0, sizeof(*metadata));
if (sscreen->info.chip_class >= GFX9) {
metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
} else {
metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
metadata->u.legacy.bankw = surface->u.legacy.bankw;
metadata->u.legacy.bankh = surface->u.legacy.bankh;
metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
}
}
static void r600_surface_import_metadata(struct si_screen *sscreen,
struct radeon_surf *surf,
struct radeon_bo_metadata *metadata,
enum radeon_surf_mode *array_mode,
bool *is_scanout)
{
if (sscreen->info.chip_class >= GFX9) {
if (metadata->u.gfx9.swizzle_mode > 0)
*array_mode = RADEON_SURF_MODE_2D;
else
*array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
*is_scanout = metadata->u.gfx9.swizzle_mode == 0 ||
metadata->u.gfx9.swizzle_mode % 4 == 2;
surf->u.gfx9.surf.swizzle_mode = metadata->u.gfx9.swizzle_mode;
} else {
surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config;
surf->u.legacy.bankw = metadata->u.legacy.bankw;
surf->u.legacy.bankh = metadata->u.legacy.bankh;
surf->u.legacy.tile_split = metadata->u.legacy.tile_split;
surf->u.legacy.mtilea = metadata->u.legacy.mtilea;
surf->u.legacy.num_banks = metadata->u.legacy.num_banks;
if (metadata->u.legacy.macrotile == RADEON_LAYOUT_TILED)
*array_mode = RADEON_SURF_MODE_2D;
else if (metadata->u.legacy.microtile == RADEON_LAYOUT_TILED)
*array_mode = RADEON_SURF_MODE_1D;
else
*array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
*is_scanout = metadata->u.legacy.scanout;
}
}
void si_eliminate_fast_color_clear(struct si_context *sctx,
struct r600_texture *rtex)
{
struct si_screen *sscreen = sctx->screen;
struct pipe_context *ctx = &sctx->b.b;
if (ctx == sscreen->aux_context)
mtx_lock(&sscreen->aux_context_lock);
unsigned n = sctx->b.num_decompress_calls;
ctx->flush_resource(ctx, &rtex->resource.b.b);
/* Flush only if any fast clear elimination took place. */
if (n != sctx->b.num_decompress_calls)
ctx->flush(ctx, NULL, 0);
if (ctx == sscreen->aux_context)
mtx_unlock(&sscreen->aux_context_lock);
}
void si_texture_discard_cmask(struct si_screen *sscreen,
struct r600_texture *rtex)
{
if (!rtex->cmask.size)
return;
assert(rtex->resource.b.b.nr_samples <= 1);
/* Disable CMASK. */
memset(&rtex->cmask, 0, sizeof(rtex->cmask));
rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;
rtex->dirty_level_mask = 0;
rtex->cb_color_info &= ~S_028C70_FAST_CLEAR(1);
if (rtex->cmask_buffer != &rtex->resource)
r600_resource_reference(&rtex->cmask_buffer, NULL);
/* Notify all contexts about the change. */
p_atomic_inc(&sscreen->dirty_tex_counter);
p_atomic_inc(&sscreen->compressed_colortex_counter);
}
static bool r600_can_disable_dcc(struct r600_texture *rtex)
{
/* We can't disable DCC if it can be written by another process. */
return rtex->dcc_offset &&
(!rtex->resource.b.is_shared ||
!(rtex->resource.external_usage & PIPE_HANDLE_USAGE_WRITE));
}
static bool r600_texture_discard_dcc(struct si_screen *sscreen,
struct r600_texture *rtex)
{
if (!r600_can_disable_dcc(rtex))
return false;
assert(rtex->dcc_separate_buffer == NULL);
/* Disable DCC. */
rtex->dcc_offset = 0;
/* Notify all contexts about the change. */
p_atomic_inc(&sscreen->dirty_tex_counter);
return true;
}
/**
* Disable DCC for the texture. (first decompress, then discard metadata).
*
* There is unresolved multi-context synchronization issue between
* screen::aux_context and the current context. If applications do this with
* multiple contexts, it's already undefined behavior for them and we don't
* have to worry about that. The scenario is:
*
* If context 1 disables DCC and context 2 has queued commands that write
* to the texture via CB with DCC enabled, and the order of operations is
* as follows:
* context 2 queues draw calls rendering to the texture, but doesn't flush
* context 1 disables DCC and flushes
* context 1 & 2 reset descriptors and FB state
* context 2 flushes (new compressed tiles written by the draw calls)
* context 1 & 2 read garbage, because DCC is disabled, yet there are
* compressed tiled
*
* \param sctx the current context if you have one, or rscreen->aux_context
* if you don't.
*/
bool si_texture_disable_dcc(struct si_context *sctx,
struct r600_texture *rtex)
{
struct si_screen *sscreen = sctx->screen;
if (!r600_can_disable_dcc(rtex))
return false;
if (&sctx->b.b == sscreen->aux_context)
mtx_lock(&sscreen->aux_context_lock);
/* Decompress DCC. */
si_decompress_dcc(sctx, rtex);
sctx->b.b.flush(&sctx->b.b, NULL, 0);
if (&sctx->b.b == sscreen->aux_context)
mtx_unlock(&sscreen->aux_context_lock);
return r600_texture_discard_dcc(sscreen, rtex);
}
static void r600_reallocate_texture_inplace(struct si_context *sctx,
struct r600_texture *rtex,
unsigned new_bind_flag,
bool invalidate_storage)
{
struct pipe_screen *screen = sctx->b.b.screen;
struct r600_texture *new_tex;
struct pipe_resource templ = rtex->resource.b.b;
unsigned i;
templ.bind |= new_bind_flag;
if (rtex->resource.b.is_shared)
return;
if (new_bind_flag == PIPE_BIND_LINEAR) {
if (rtex->surface.is_linear)
return;
/* This fails with MSAA, depth, and compressed textures. */
if (r600_choose_tiling(sctx->screen, &templ) !=
RADEON_SURF_MODE_LINEAR_ALIGNED)
return;
}
new_tex = (struct r600_texture*)screen->resource_create(screen, &templ);
if (!new_tex)
return;
/* Copy the pixels to the new texture. */
if (!invalidate_storage) {
for (i = 0; i <= templ.last_level; i++) {
struct pipe_box box;
u_box_3d(0, 0, 0,
u_minify(templ.width0, i), u_minify(templ.height0, i),
util_num_layers(&templ, i), &box);
sctx->b.dma_copy(&sctx->b.b, &new_tex->resource.b.b, i, 0, 0, 0,
&rtex->resource.b.b, i, &box);
}
}
if (new_bind_flag == PIPE_BIND_LINEAR) {
si_texture_discard_cmask(sctx->screen, rtex);
r600_texture_discard_dcc(sctx->screen, rtex);
}
/* Replace the structure fields of rtex. */
rtex->resource.b.b.bind = templ.bind;
pb_reference(&rtex->resource.buf, new_tex->resource.buf);
rtex->resource.gpu_address = new_tex->resource.gpu_address;
rtex->resource.vram_usage = new_tex->resource.vram_usage;
rtex->resource.gart_usage = new_tex->resource.gart_usage;
rtex->resource.bo_size = new_tex->resource.bo_size;
rtex->resource.bo_alignment = new_tex->resource.bo_alignment;
rtex->resource.domains = new_tex->resource.domains;
rtex->resource.flags = new_tex->resource.flags;
rtex->size = new_tex->size;
rtex->db_render_format = new_tex->db_render_format;
rtex->db_compatible = new_tex->db_compatible;
rtex->can_sample_z = new_tex->can_sample_z;
rtex->can_sample_s = new_tex->can_sample_s;
rtex->surface = new_tex->surface;
rtex->fmask = new_tex->fmask;
rtex->cmask = new_tex->cmask;
rtex->cb_color_info = new_tex->cb_color_info;
rtex->last_msaa_resolve_target_micro_mode = new_tex->last_msaa_resolve_target_micro_mode;
rtex->htile_offset = new_tex->htile_offset;
rtex->tc_compatible_htile = new_tex->tc_compatible_htile;
rtex->depth_cleared = new_tex->depth_cleared;
rtex->stencil_cleared = new_tex->stencil_cleared;
rtex->dcc_gather_statistics = new_tex->dcc_gather_statistics;
rtex->framebuffers_bound = new_tex->framebuffers_bound;
if (new_bind_flag == PIPE_BIND_LINEAR) {
assert(!rtex->htile_offset);
assert(!rtex->cmask.size);
assert(!rtex->fmask.size);
assert(!rtex->dcc_offset);
assert(!rtex->is_depth);
}
r600_texture_reference(&new_tex, NULL);
p_atomic_inc(&sctx->screen->dirty_tex_counter);
}
static uint32_t si_get_bo_metadata_word1(struct si_screen *sscreen)
{
return (ATI_VENDOR_ID << 16) | sscreen->info.pci_id;
}
static void si_query_opaque_metadata(struct si_screen *sscreen,
struct r600_texture *rtex,
struct radeon_bo_metadata *md)
{
struct pipe_resource *res = &rtex->resource.b.b;
static const unsigned char swizzle[] = {
PIPE_SWIZZLE_X,
PIPE_SWIZZLE_Y,
PIPE_SWIZZLE_Z,
PIPE_SWIZZLE_W
};
uint32_t desc[8], i;
bool is_array = util_texture_is_array(res->target);
/* DRM 2.x.x doesn't support this. */
if (sscreen->info.drm_major != 3)
return;
assert(rtex->dcc_separate_buffer == NULL);
assert(rtex->fmask.size == 0);
/* Metadata image format format version 1:
* [0] = 1 (metadata format identifier)
* [1] = (VENDOR_ID << 16) | PCI_ID
* [2:9] = image descriptor for the whole resource
* [2] is always 0, because the base address is cleared
* [9] is the DCC offset bits [39:8] from the beginning of
* the buffer
* [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
*/
md->metadata[0] = 1; /* metadata image format version 1 */
/* TILE_MODE_INDEX is ambiguous without a PCI ID. */
md->metadata[1] = si_get_bo_metadata_word1(sscreen);
si_make_texture_descriptor(sscreen, rtex, true,
res->target, res->format,
swizzle, 0, res->last_level, 0,
is_array ? res->array_size - 1 : 0,
res->width0, res->height0, res->depth0,
desc, NULL);
si_set_mutable_tex_desc_fields(sscreen, rtex, &rtex->surface.u.legacy.level[0],
0, 0, rtex->surface.blk_w, false, desc);
/* Clear the base address and set the relative DCC offset. */
desc[0] = 0;
desc[1] &= C_008F14_BASE_ADDRESS_HI;
desc[7] = rtex->dcc_offset >> 8;
/* Dwords [2:9] contain the image descriptor. */
memcpy(&md->metadata[2], desc, sizeof(desc));
md->size_metadata = 10 * 4;
/* Dwords [10:..] contain the mipmap level offsets. */
if (sscreen->info.chip_class <= VI) {
for (i = 0; i <= res->last_level; i++)
md->metadata[10+i] = rtex->surface.u.legacy.level[i].offset >> 8;
md->size_metadata += (1 + res->last_level) * 4;
}
}
static void si_apply_opaque_metadata(struct si_screen *sscreen,
struct r600_texture *rtex,
struct radeon_bo_metadata *md)
{
uint32_t *desc = &md->metadata[2];
if (sscreen->info.chip_class < VI)
return;
/* Return if DCC is enabled. The texture should be set up with it
* already.
*/
if (md->size_metadata >= 10 * 4 && /* at least 2(header) + 8(desc) dwords */
md->metadata[0] != 0 &&
md->metadata[1] == si_get_bo_metadata_word1(sscreen) &&
G_008F28_COMPRESSION_EN(desc[6])) {
rtex->dcc_offset = (uint64_t)desc[7] << 8;
return;
}
/* Disable DCC. These are always set by texture_from_handle and must
* be cleared here.
*/
rtex->dcc_offset = 0;
}
static boolean r600_texture_get_handle(struct pipe_screen* screen,
struct pipe_context *ctx,
struct pipe_resource *resource,
struct winsys_handle *whandle,
unsigned usage)
{
struct si_screen *sscreen = (struct si_screen*)screen;
struct si_context *sctx;
struct r600_resource *res = (struct r600_resource*)resource;
struct r600_texture *rtex = (struct r600_texture*)resource;
struct radeon_bo_metadata metadata;
bool update_metadata = false;
unsigned stride, offset, slice_size;
bool flush = false;
ctx = threaded_context_unwrap_sync(ctx);
sctx = (struct si_context*)(ctx ? ctx : sscreen->aux_context);
if (resource->target != PIPE_BUFFER) {
/* This is not supported now, but it might be required for OpenCL
* interop in the future.
*/
if (resource->nr_samples > 1 || rtex->is_depth)
return false;
/* Move a suballocated texture into a non-suballocated allocation. */
if (sscreen->ws->buffer_is_suballocated(res->buf) ||
rtex->surface.tile_swizzle ||
(rtex->resource.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
sscreen->info.has_local_buffers &&
whandle->type != DRM_API_HANDLE_TYPE_KMS)) {
assert(!res->b.is_shared);
r600_reallocate_texture_inplace(sctx, rtex,
PIPE_BIND_SHARED, false);
flush = true;
assert(res->b.b.bind & PIPE_BIND_SHARED);
assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
assert(!(res->flags & RADEON_FLAG_NO_INTERPROCESS_SHARING));
assert(rtex->surface.tile_swizzle == 0);
}
/* Since shader image stores don't support DCC on VI,
* disable it for external clients that want write
* access.
*/
if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) {
if (si_texture_disable_dcc(sctx, rtex)) {
update_metadata = true;
/* si_texture_disable_dcc flushes the context */
flush = false;
}
}
if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
(rtex->cmask.size || rtex->dcc_offset)) {
/* Eliminate fast clear (both CMASK and DCC) */
si_eliminate_fast_color_clear(sctx, rtex);
/* eliminate_fast_color_clear flushes the context */
flush = false;
/* Disable CMASK if flush_resource isn't going
* to be called.
*/
if (rtex->cmask.size)
si_texture_discard_cmask(sscreen, rtex);
}
/* Set metadata. */
if (!res->b.is_shared || update_metadata) {
r600_texture_init_metadata(sscreen, rtex, &metadata);
si_query_opaque_metadata(sscreen, rtex, &metadata);
sscreen->ws->buffer_set_metadata(res->buf, &metadata);
}
if (sscreen->info.chip_class >= GFX9) {
offset = rtex->surface.u.gfx9.surf_offset;
stride = rtex->surface.u.gfx9.surf_pitch *
rtex->surface.bpe;
slice_size = rtex->surface.u.gfx9.surf_slice_size;
} else {
offset = rtex->surface.u.legacy.level[0].offset;
stride = rtex->surface.u.legacy.level[0].nblk_x *
rtex->surface.bpe;
slice_size = (uint64_t)rtex->surface.u.legacy.level[0].slice_size_dw * 4;
}
} else {
/* Buffer exports are for the OpenCL interop. */
/* Move a suballocated buffer into a non-suballocated allocation. */
if (sscreen->ws->buffer_is_suballocated(res->buf) ||
/* A DMABUF export always fails if the BO is local. */
(rtex->resource.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
sscreen->info.has_local_buffers)) {
assert(!res->b.is_shared);
/* Allocate a new buffer with PIPE_BIND_SHARED. */
struct pipe_resource templ = res->b.b;
templ.bind |= PIPE_BIND_SHARED;
struct pipe_resource *newb =
screen->resource_create(screen, &templ);
if (!newb)
return false;
/* Copy the old buffer contents to the new one. */
struct pipe_box box;
u_box_1d(0, newb->width0, &box);
sctx->b.b.resource_copy_region(&sctx->b.b, newb, 0, 0, 0, 0,
&res->b.b, 0, &box);
flush = true;
/* Move the new buffer storage to the old pipe_resource. */
si_replace_buffer_storage(&sctx->b.b, &res->b.b, newb);
pipe_resource_reference(&newb, NULL);
assert(res->b.b.bind & PIPE_BIND_SHARED);
assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
}
/* Buffers */
offset = 0;
stride = 0;
slice_size = 0;
}
if (flush)
sctx->b.b.flush(&sctx->b.b, NULL, 0);
if (res->b.is_shared) {
/* USAGE_EXPLICIT_FLUSH must be cleared if at least one user
* doesn't set it.
*/
res->external_usage |= usage & ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
res->external_usage &= ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
} else {
res->b.is_shared = true;
res->external_usage = usage;
}
return sscreen->ws->buffer_get_handle(res->buf, stride, offset,
slice_size, whandle);
}
static void r600_texture_destroy(struct pipe_screen *screen,
struct pipe_resource *ptex)
{
struct r600_texture *rtex = (struct r600_texture*)ptex;
struct r600_resource *resource = &rtex->resource;
r600_texture_reference(&rtex->flushed_depth_texture, NULL);
if (rtex->cmask_buffer != &rtex->resource) {
r600_resource_reference(&rtex->cmask_buffer, NULL);
}
pb_reference(&resource->buf, NULL);
r600_resource_reference(&rtex->dcc_separate_buffer, NULL);
r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL);
FREE(rtex);
}
static const struct u_resource_vtbl r600_texture_vtbl;
/* The number of samples can be specified independently of the texture. */
void si_texture_get_fmask_info(struct si_screen *sscreen,
struct r600_texture *rtex,
unsigned nr_samples,
struct r600_fmask_info *out)
{
/* FMASK is allocated like an ordinary texture. */
struct pipe_resource templ = rtex->resource.b.b;
struct radeon_surf fmask = {};
unsigned flags, bpe;
memset(out, 0, sizeof(*out));
if (sscreen->info.chip_class >= GFX9) {
out->alignment = rtex->surface.u.gfx9.fmask_alignment;
out->size = rtex->surface.u.gfx9.fmask_size;
out->tile_swizzle = rtex->surface.u.gfx9.fmask_tile_swizzle;
return;
}
templ.nr_samples = 1;
flags = rtex->surface.flags | RADEON_SURF_FMASK;
switch (nr_samples) {
case 2:
case 4:
bpe = 1;
break;
case 8:
bpe = 4;
break;
default:
R600_ERR("Invalid sample count for FMASK allocation.\n");
return;
}
if (sscreen->ws->surface_init(sscreen->ws, &templ, flags, bpe,
RADEON_SURF_MODE_2D, &fmask)) {
R600_ERR("Got error in surface_init while allocating FMASK.\n");
return;
}
assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64;
if (out->slice_tile_max)
out->slice_tile_max -= 1;
out->tile_mode_index = fmask.u.legacy.tiling_index[0];
out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
out->bank_height = fmask.u.legacy.bankh;
out->tile_swizzle = fmask.tile_swizzle;
out->alignment = MAX2(256, fmask.surf_alignment);
out->size = fmask.surf_size;
}
static void r600_texture_allocate_fmask(struct si_screen *sscreen,
struct r600_texture *rtex)
{
si_texture_get_fmask_info(sscreen, rtex,
rtex->resource.b.b.nr_samples, &rtex->fmask);
rtex->fmask.offset = align64(rtex->size, rtex->fmask.alignment);
rtex->size = rtex->fmask.offset + rtex->fmask.size;
}
void si_texture_get_cmask_info(struct si_screen *sscreen,
struct r600_texture *rtex,
struct r600_cmask_info *out)
{
unsigned pipe_interleave_bytes = sscreen->info.pipe_interleave_bytes;
unsigned num_pipes = sscreen->info.num_tile_pipes;
unsigned cl_width, cl_height;
if (sscreen->info.chip_class >= GFX9) {
out->alignment = rtex->surface.u.gfx9.cmask_alignment;
out->size = rtex->surface.u.gfx9.cmask_size;
return;
}
switch (num_pipes) {
case 2:
cl_width = 32;
cl_height = 16;
break;
case 4:
cl_width = 32;
cl_height = 32;
break;
case 8:
cl_width = 64;
cl_height = 32;
break;
case 16: /* Hawaii */
cl_width = 64;
cl_height = 64;
break;
default:
assert(0);
return;
}
unsigned base_align = num_pipes * pipe_interleave_bytes;
unsigned width = align(rtex->resource.b.b.width0, cl_width*8);
unsigned height = align(rtex->resource.b.b.height0, cl_height*8);
unsigned slice_elements = (width * height) / (8*8);
/* Each element of CMASK is a nibble. */
unsigned slice_bytes = slice_elements / 2;
out->slice_tile_max = (width * height) / (128*128);
if (out->slice_tile_max)
out->slice_tile_max -= 1;
out->alignment = MAX2(256, base_align);
out->size = util_num_layers(&rtex->resource.b.b, 0) *
align(slice_bytes, base_align);
}
static void r600_texture_allocate_cmask(struct si_screen *sscreen,
struct r600_texture *rtex)
{
si_texture_get_cmask_info(sscreen, rtex, &rtex->cmask);
rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment);
rtex->size = rtex->cmask.offset + rtex->cmask.size;
rtex->cb_color_info |= S_028C70_FAST_CLEAR(1);
}
static void r600_texture_get_htile_size(struct si_screen *sscreen,
struct r600_texture *rtex)
{
unsigned cl_width, cl_height, width, height;
unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
unsigned num_pipes = sscreen->info.num_tile_pipes;
assert(sscreen->info.chip_class <= VI);
rtex->surface.htile_size = 0;
/* HTILE is broken with 1D tiling on old kernels and CIK. */
if (sscreen->info.chip_class >= CIK &&
rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
sscreen->info.drm_major == 2 && sscreen->info.drm_minor < 38)
return;
/* Overalign HTILE on P2 configs to work around GPU hangs in
* piglit/depthstencil-render-miplevels 585.
*
* This has been confirmed to help Kabini & Stoney, where the hangs
* are always reproducible. I think I have seen the test hang
* on Carrizo too, though it was very rare there.
*/
if (sscreen->info.chip_class >= CIK && num_pipes < 4)
num_pipes = 4;
switch (num_pipes) {
case 1:
cl_width = 32;
cl_height = 16;
break;
case 2:
cl_width = 32;
cl_height = 32;
break;
case 4:
cl_width = 64;
cl_height = 32;
break;
case 8:
cl_width = 64;
cl_height = 64;
break;
case 16:
cl_width = 128;
cl_height = 64;
break;
default:
assert(0);
return;
}
width = align(rtex->resource.b.b.width0, cl_width * 8);
height = align(rtex->resource.b.b.height0, cl_height * 8);
slice_elements = (width * height) / (8 * 8);
slice_bytes = slice_elements * 4;
pipe_interleave_bytes = sscreen->info.pipe_interleave_bytes;
base_align = num_pipes * pipe_interleave_bytes;
rtex->surface.htile_alignment = base_align;
rtex->surface.htile_size =
util_num_layers(&rtex->resource.b.b, 0) *
align(slice_bytes, base_align);
}
static void r600_texture_allocate_htile(struct si_screen *sscreen,
struct r600_texture *rtex)
{
if (sscreen->info.chip_class <= VI && !rtex->tc_compatible_htile)
r600_texture_get_htile_size(sscreen, rtex);
if (!rtex->surface.htile_size)
return;
rtex->htile_offset = align(rtex->size, rtex->surface.htile_alignment);
rtex->size = rtex->htile_offset + rtex->surface.htile_size;
}
void si_print_texture_info(struct si_screen *sscreen,
struct r600_texture *rtex, struct u_log_context *log)
{
int i;
/* Common parameters. */
u_log_printf(log, " Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
"blk_h=%u, array_size=%u, last_level=%u, "
"bpe=%u, nsamples=%u, flags=0x%x, %s\n",
rtex->resource.b.b.width0, rtex->resource.b.b.height0,
rtex->resource.b.b.depth0, rtex->surface.blk_w,
rtex->surface.blk_h,
rtex->resource.b.b.array_size, rtex->resource.b.b.last_level,
rtex->surface.bpe, rtex->resource.b.b.nr_samples,
rtex->surface.flags, util_format_short_name(rtex->resource.b.b.format));
if (sscreen->info.chip_class >= GFX9) {
u_log_printf(log, " Surf: size=%"PRIu64", slice_size=%"PRIu64", "
"alignment=%u, swmode=%u, epitch=%u, pitch=%u\n",
rtex->surface.surf_size,
rtex->surface.u.gfx9.surf_slice_size,
rtex->surface.surf_alignment,
rtex->surface.u.gfx9.surf.swizzle_mode,
rtex->surface.u.gfx9.surf.epitch,
rtex->surface.u.gfx9.surf_pitch);
if (rtex->fmask.size) {
u_log_printf(log, " FMASK: offset=%"PRIu64", size=%"PRIu64", "
"alignment=%u, swmode=%u, epitch=%u\n",
rtex->fmask.offset,
rtex->surface.u.gfx9.fmask_size,
rtex->surface.u.gfx9.fmask_alignment,
rtex->surface.u.gfx9.fmask.swizzle_mode,
rtex->surface.u.gfx9.fmask.epitch);
}
if (rtex->cmask.size) {
u_log_printf(log, " CMask: offset=%"PRIu64", size=%"PRIu64", "
"alignment=%u, rb_aligned=%u, pipe_aligned=%u\n",
rtex->cmask.offset,
rtex->surface.u.gfx9.cmask_size,
rtex->surface.u.gfx9.cmask_alignment,
rtex->surface.u.gfx9.cmask.rb_aligned,
rtex->surface.u.gfx9.cmask.pipe_aligned);
}
if (rtex->htile_offset) {
u_log_printf(log, " HTile: offset=%"PRIu64", size=%u, alignment=%u, "
"rb_aligned=%u, pipe_aligned=%u\n",
rtex->htile_offset,
rtex->surface.htile_size,
rtex->surface.htile_alignment,
rtex->surface.u.gfx9.htile.rb_aligned,
rtex->surface.u.gfx9.htile.pipe_aligned);
}
if (rtex->dcc_offset) {
u_log_printf(log, " DCC: offset=%"PRIu64", size=%u, "
"alignment=%u, pitch_max=%u, num_dcc_levels=%u\n",
rtex->dcc_offset, rtex->surface.dcc_size,
rtex->surface.dcc_alignment,
rtex->surface.u.gfx9.dcc_pitch_max,
rtex->surface.num_dcc_levels);
}
if (rtex->surface.u.gfx9.stencil_offset) {
u_log_printf(log, " Stencil: offset=%"PRIu64", swmode=%u, epitch=%u\n",
rtex->surface.u.gfx9.stencil_offset,
rtex->surface.u.gfx9.stencil.swizzle_mode,
rtex->surface.u.gfx9.stencil.epitch);
}
return;
}
u_log_printf(log, " Layout: size=%"PRIu64", alignment=%u, bankw=%u, "
"bankh=%u, nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n",
rtex->surface.surf_size, rtex->surface.surf_alignment, rtex->surface.u.legacy.bankw,
rtex->surface.u.legacy.bankh, rtex->surface.u.legacy.num_banks, rtex->surface.u.legacy.mtilea,
rtex->surface.u.legacy.tile_split, rtex->surface.u.legacy.pipe_config,
(rtex->surface.flags & RADEON_SURF_SCANOUT) != 0);
if (rtex->fmask.size)
u_log_printf(log, " FMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch_in_pixels=%u, "
"bankh=%u, slice_tile_max=%u, tile_mode_index=%u\n",
rtex->fmask.offset, rtex->fmask.size, rtex->fmask.alignment,
rtex->fmask.pitch_in_pixels, rtex->fmask.bank_height,
rtex->fmask.slice_tile_max, rtex->fmask.tile_mode_index);
if (rtex->cmask.size)
u_log_printf(log, " CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, "
"slice_tile_max=%u\n",
rtex->cmask.offset, rtex->cmask.size, rtex->cmask.alignment,
rtex->cmask.slice_tile_max);
if (rtex->htile_offset)
u_log_printf(log, " HTile: offset=%"PRIu64", size=%u, "
"alignment=%u, TC_compatible = %u\n",
rtex->htile_offset, rtex->surface.htile_size,
rtex->surface.htile_alignment,
rtex->tc_compatible_htile);
if (rtex->dcc_offset) {
u_log_printf(log, " DCC: offset=%"PRIu64", size=%u, alignment=%u\n",
rtex->dcc_offset, rtex->surface.dcc_size,
rtex->surface.dcc_alignment);
for (i = 0; i <= rtex->resource.b.b.last_level; i++)
u_log_printf(log, " DCCLevel[%i]: enabled=%u, offset=%u, "
"fast_clear_size=%u\n",
i, i < rtex->surface.num_dcc_levels,
rtex->surface.u.legacy.level[i].dcc_offset,
rtex->surface.u.legacy.level[i].dcc_fast_clear_size);
}
for (i = 0; i <= rtex->resource.b.b.last_level; i++)
u_log_printf(log, " Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", "
"npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
"mode=%u, tiling_index = %u\n",
i, rtex->surface.u.legacy.level[i].offset,
(uint64_t)rtex->surface.u.legacy.level[i].slice_size_dw * 4,
u_minify(rtex->resource.b.b.width0, i),
u_minify(rtex->resource.b.b.height0, i),
u_minify(rtex->resource.b.b.depth0, i),
rtex->surface.u.legacy.level[i].nblk_x,
rtex->surface.u.legacy.level[i].nblk_y,
rtex->surface.u.legacy.level[i].mode,
rtex->surface.u.legacy.tiling_index[i]);
if (rtex->surface.has_stencil) {
u_log_printf(log, " StencilLayout: tilesplit=%u\n",
rtex->surface.u.legacy.stencil_tile_split);
for (i = 0; i <= rtex->resource.b.b.last_level; i++) {
u_log_printf(log, " StencilLevel[%i]: offset=%"PRIu64", "
"slice_size=%"PRIu64", npix_x=%u, "
"npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
"mode=%u, tiling_index = %u\n",
i, rtex->surface.u.legacy.stencil_level[i].offset,
(uint64_t)rtex->surface.u.legacy.stencil_level[i].slice_size_dw * 4,
u_minify(rtex->resource.b.b.width0, i),
u_minify(rtex->resource.b.b.height0, i),
u_minify(rtex->resource.b.b.depth0, i),
rtex->surface.u.legacy.stencil_level[i].nblk_x,
rtex->surface.u.legacy.stencil_level[i].nblk_y,
rtex->surface.u.legacy.stencil_level[i].mode,
rtex->surface.u.legacy.stencil_tiling_index[i]);
}
}
}
/* Common processing for r600_texture_create and r600_texture_from_handle */
static struct r600_texture *
r600_texture_create_object(struct pipe_screen *screen,
const struct pipe_resource *base,
struct pb_buffer *buf,
struct radeon_surf *surface)
2010-05-08 21:09:24 +01:00
{
struct r600_texture *rtex;
struct r600_resource *resource;
struct si_screen *sscreen = (struct si_screen*)screen;
r600g: add support for common surface allocator for tiling v13 Tiled surface have all kind of alignment constraint that needs to be met. Instead of having all this code duplicated btw ddx and mesa use common code in libdrm_radeon this also ensure that both ddx and mesa compute those alignment in the same way. v2 fix evergreen v3 fix compressed texture and workaround cube texture issue by disabling 2D array mode for cubemap (need to check if r7xx and newer are also affected by the issue) v4 fix texture array v5 fix evergreen and newer, split surface values computation from mipmap tree generation so that we can get them directly from the ddx v6 final fix to evergreen tile split value v7 fix mipmap offset to avoid to use random value, use color view depth view to address different layer as hardware is doing some magic rotation depending on the layer v8 fix COLOR_VIEW on r6xx for linear array mode, use COLOR_VIEW on evergreen, align bytes per pixel to a multiple of a dword v9 fix handling of stencil on evergreen, half fix for compressed texture v10 fix evergreen compressed texture proper support for stencil tile split. Fix stencil issue when array mode was clear by the kernel, always program stencil bo. On evergreen depth buffer bo need to be big enough to hold depth buffer + stencil buffer as even with stencil disabled things get written there. v11 rebase on top of mesa, fix pitch issue with 1d surface on evergreen, old ddx overestimate those. Fix linear case when pitch*height < 64. Fix r300g. v12 Fix linear case when pitch*height < 64 for old path, adapt to libdrm API change v13 add libdrm check Signed-off-by: Jerome Glisse <jglisse@redhat.com>
2012-01-30 22:22:13 +00:00
rtex = CALLOC_STRUCT(r600_texture);
if (!rtex)
2010-05-08 21:09:24 +01:00
return NULL;
resource = &rtex->resource;
2012-04-10 20:29:06 +01:00
resource->b.b = *base;
resource->b.b.next = NULL;
2012-04-10 20:29:06 +01:00
resource->b.vtbl = &r600_texture_vtbl;
pipe_reference_init(&resource->b.b.reference, 1);
resource->b.b.screen = screen;
/* don't include stencil-only formats which we don't support for rendering */
rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format));
rtex->surface = *surface;
rtex->size = rtex->surface.surf_size;
rtex->tc_compatible_htile = rtex->surface.htile_size != 0 &&
(rtex->surface.flags &
RADEON_SURF_TC_COMPATIBLE_HTILE);
/* TC-compatible HTILE:
* - VI only supports Z32_FLOAT.
* - GFX9 only supports Z32_FLOAT and Z16_UNORM. */
if (rtex->tc_compatible_htile) {
if (sscreen->info.chip_class >= GFX9 &&
base->format == PIPE_FORMAT_Z16_UNORM)
rtex->db_render_format = base->format;
else {
rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
rtex->upgraded_depth = base->format != PIPE_FORMAT_Z32_FLOAT &&
base->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT;
}
} else {
rtex->db_render_format = base->format;
}
/* Applies to GCN. */
rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode;
/* Disable separate DCC at the beginning. DRI2 doesn't reuse buffers
* between frames, so the only thing that can enable separate DCC
* with DRI2 is multiple slow clears within a frame.
*/
rtex->ps_draw_ratio = 0;
if (rtex->is_depth) {
if (sscreen->info.chip_class >= GFX9) {
rtex->can_sample_z = true;
rtex->can_sample_s = true;
} else {
rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted;
rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted;
}
if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
R600_RESOURCE_FLAG_FLUSHED_DEPTH))) {
rtex->db_compatible = true;
if (!(sscreen->debug_flags & DBG(NO_HYPERZ)))
r600_texture_allocate_htile(sscreen, rtex);
}
} else {
if (base->nr_samples > 1 &&
!buf &&
!(sscreen->debug_flags & DBG(NO_FMASK))) {
r600_texture_allocate_fmask(sscreen, rtex);
r600_texture_allocate_cmask(sscreen, rtex);
rtex->cmask_buffer = &rtex->resource;
if (!rtex->fmask.size || !rtex->cmask.size) {
FREE(rtex);
return NULL;
}
r600g: add htile support v16 htile is used for HiZ and HiS support and fast Z/S clears. This commit just adds the htile setup and Fast Z clear. We don't take full advantage of HiS with that patch. v2 really use fast clear, still random issue with some tiles need to try more flush combination, fix depth/stencil texture decompression v3 fix random issue on r6xx/r7xx v4 rebase on top of lastest mesa, disable CB export when clearing htile surface to avoid wasting bandwidth v5 resummarize htile surface when uploading z value. Fix z/stencil decompression, the custom blitter with custom dsa is no longer needed. v6 Reorganize render control/override update mecanism, fixing more issues in the process. v7 Add nop after depth surface base update to work around some htile flushing issue. For htile to 8x8 on r6xx/r7xx as other combination have issue. Do not enable hyperz when flushing/uncompressing depth buffer. v8 Fix htile surface, preload and prefetch setup. Only set preload and prefetch on htile surface clear like fglrx. Record depth clear value per level. Support several level for the htile surface. First depth clear can't be a fast clear. v9 Fix comments, properly account new register in emit function, disable fast zclear if clearing different layer of texture array to different value v10 Disable hyperz for texture array making test simpler. Force db_misc_state update when no depth buffer is bound. Remove unused variable, rename depth_clearstencil to depth_clear. Don't allocate htile surface for flushed depth. Something broken the cliprect change, this need to be investigated. v11 Rebase on top of newer mesa v12 Rebase on top of newer mesa v13 Rebase on top of newer mesa, htile surface need to be initialized to zero, somehow special casing first clear to not use fast clear and thus initialize the htile surface with proper value does not work in all case. v14 Use resource not texture for htile buffer make the htile buffer size computation easier and simpler. Disable preload on evergreen as its still troublesome in some case v15 Cleanup some comment and remove some left over v16 Define name for bit 20 of CP_COHER_CNTL Signed-off-by: Pierre-Eric Pelloux-Prayer <pelloux@gmail.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
2012-10-11 15:40:30 +01:00
}
/* Shared textures must always set up DCC here.
* If it's not present, it will be disabled by
* apply_opaque_metadata later.
*/
if (rtex->surface.dcc_size &&
(buf || !(sscreen->debug_flags & DBG(NO_DCC))) &&
!(rtex->surface.flags & RADEON_SURF_SCANOUT)) {
/* Reserve space for the DCC buffer. */
rtex->dcc_offset = align64(rtex->size, rtex->surface.dcc_alignment);
rtex->size = rtex->dcc_offset + rtex->surface.dcc_size;
}
r600g: add htile support v16 htile is used for HiZ and HiS support and fast Z/S clears. This commit just adds the htile setup and Fast Z clear. We don't take full advantage of HiS with that patch. v2 really use fast clear, still random issue with some tiles need to try more flush combination, fix depth/stencil texture decompression v3 fix random issue on r6xx/r7xx v4 rebase on top of lastest mesa, disable CB export when clearing htile surface to avoid wasting bandwidth v5 resummarize htile surface when uploading z value. Fix z/stencil decompression, the custom blitter with custom dsa is no longer needed. v6 Reorganize render control/override update mecanism, fixing more issues in the process. v7 Add nop after depth surface base update to work around some htile flushing issue. For htile to 8x8 on r6xx/r7xx as other combination have issue. Do not enable hyperz when flushing/uncompressing depth buffer. v8 Fix htile surface, preload and prefetch setup. Only set preload and prefetch on htile surface clear like fglrx. Record depth clear value per level. Support several level for the htile surface. First depth clear can't be a fast clear. v9 Fix comments, properly account new register in emit function, disable fast zclear if clearing different layer of texture array to different value v10 Disable hyperz for texture array making test simpler. Force db_misc_state update when no depth buffer is bound. Remove unused variable, rename depth_clearstencil to depth_clear. Don't allocate htile surface for flushed depth. Something broken the cliprect change, this need to be investigated. v11 Rebase on top of newer mesa v12 Rebase on top of newer mesa v13 Rebase on top of newer mesa, htile surface need to be initialized to zero, somehow special casing first clear to not use fast clear and thus initialize the htile surface with proper value does not work in all case. v14 Use resource not texture for htile buffer make the htile buffer size computation easier and simpler. Disable preload on evergreen as its still troublesome in some case v15 Cleanup some comment and remove some left over v16 Define name for bit 20 of CP_COHER_CNTL Signed-off-by: Pierre-Eric Pelloux-Prayer <pelloux@gmail.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
2012-10-11 15:40:30 +01:00
}
/* Now create the backing buffer. */
if (!buf) {
si_init_resource_fields(sscreen, resource, rtex->size,
rtex->surface.surf_alignment);
if (!si_alloc_resource(sscreen, resource)) {
FREE(rtex);
return NULL;
}
} else {
resource->buf = buf;
resource->gpu_address = sscreen->ws->buffer_get_virtual_address(resource->buf);
resource->bo_size = buf->size;
resource->bo_alignment = buf->alignment;
resource->domains = sscreen->ws->buffer_get_initial_domain(resource->buf);
if (resource->domains & RADEON_DOMAIN_VRAM)
resource->vram_usage = buf->size;
else if (resource->domains & RADEON_DOMAIN_GTT)
resource->gart_usage = buf->size;
2010-05-08 21:09:24 +01:00
}
if (rtex->cmask.size) {
/* Initialize the cmask to 0xCC (= compressed state). */
si_screen_clear_buffer(sscreen, &rtex->cmask_buffer->b.b,
rtex->cmask.offset, rtex->cmask.size,
0xCCCCCCCC);
}
if (rtex->htile_offset) {
uint32_t clear_value = 0;
if (sscreen->info.chip_class >= GFX9 || rtex->tc_compatible_htile)
clear_value = 0x0000030F;
si_screen_clear_buffer(sscreen, &rtex->resource.b.b,
rtex->htile_offset,
rtex->surface.htile_size,
clear_value);
}
/* Initialize DCC only if the texture is not being imported. */
if (!buf && rtex->dcc_offset) {
si_screen_clear_buffer(sscreen, &rtex->resource.b.b,
rtex->dcc_offset,
rtex->surface.dcc_size,
0xFFFFFFFF);
}
/* Initialize the CMASK base register value. */
rtex->cmask.base_address_reg =
(rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
if (sscreen->debug_flags & DBG(VM)) {
fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Texture %ix%ix%i, %i levels, %i samples, %s\n",
rtex->resource.gpu_address,
rtex->resource.gpu_address + rtex->resource.buf->size,
base->width0, base->height0, util_num_layers(base, 0), base->last_level+1,
base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format));
}
if (sscreen->debug_flags & DBG(TEX)) {
puts("Texture:");
struct u_log_context log;
u_log_context_init(&log);
si_print_texture_info(sscreen, rtex, &log);
u_log_new_page_print(&log, stdout);
fflush(stdout);
u_log_context_destroy(&log);
}
return rtex;
}
static enum radeon_surf_mode
r600_choose_tiling(struct si_screen *sscreen,
const struct pipe_resource *templ)
{
const struct util_format_description *desc = util_format_description(templ->format);
bool force_tiling = templ->flags & R600_RESOURCE_FLAG_FORCE_TILING;
bool is_depth_stencil = util_format_is_depth_or_stencil(templ->format) &&
!(templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH);
/* MSAA resources must be 2D tiled. */
if (templ->nr_samples > 1)
return RADEON_SURF_MODE_2D;
/* Transfer resources should be linear. */
if (templ->flags & R600_RESOURCE_FLAG_TRANSFER)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
/* Avoid Z/S decompress blits by forcing TC-compatible HTILE on VI,
* which requires 2D tiling.
*/
if (sscreen->info.chip_class == VI &&
is_depth_stencil &&
(templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY))
return RADEON_SURF_MODE_2D;
/* Handle common candidates for the linear mode.
* Compressed textures and DB surfaces must always be tiled.
*/
if (!force_tiling &&
!is_depth_stencil &&
!util_format_is_compressed(templ->format)) {
if (sscreen->debug_flags & DBG(NO_TILING))
return RADEON_SURF_MODE_LINEAR_ALIGNED;
/* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */
if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
/* Cursors are linear on SI.
* (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */
if (templ->bind & PIPE_BIND_CURSOR)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
if (templ->bind & PIPE_BIND_LINEAR)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
/* Textures with a very small height are recommended to be linear. */
if (templ->target == PIPE_TEXTURE_1D ||
templ->target == PIPE_TEXTURE_1D_ARRAY ||
/* Only very thin and long 2D textures should benefit from
* linear_aligned. */
(templ->width0 > 8 && templ->height0 <= 2))
return RADEON_SURF_MODE_LINEAR_ALIGNED;
/* Textures likely to be mapped often. */
if (templ->usage == PIPE_USAGE_STAGING ||
templ->usage == PIPE_USAGE_STREAM)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
}
/* Make small textures 1D tiled. */
if (templ->width0 <= 16 || templ->height0 <= 16 ||
(sscreen->debug_flags & DBG(NO_2D_TILING)))
return RADEON_SURF_MODE_1D;
/* The allocator will switch to 1D if needed. */
return RADEON_SURF_MODE_2D;
}
struct pipe_resource *si_texture_create(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
struct si_screen *sscreen = (struct si_screen*)screen;
struct radeon_surf surface = {0};
bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH;
bool tc_compatible_htile =
sscreen->info.chip_class >= VI &&
(templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) &&
!(sscreen->debug_flags & DBG(NO_HYPERZ)) &&
!is_flushed_depth &&
templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */
util_format_is_depth_or_stencil(templ->format);
int r;
r = r600_init_surface(sscreen, &surface, templ,
r600_choose_tiling(sscreen, templ), 0, 0,
false, false, is_flushed_depth,
tc_compatible_htile);
r600g: add support for common surface allocator for tiling v13 Tiled surface have all kind of alignment constraint that needs to be met. Instead of having all this code duplicated btw ddx and mesa use common code in libdrm_radeon this also ensure that both ddx and mesa compute those alignment in the same way. v2 fix evergreen v3 fix compressed texture and workaround cube texture issue by disabling 2D array mode for cubemap (need to check if r7xx and newer are also affected by the issue) v4 fix texture array v5 fix evergreen and newer, split surface values computation from mipmap tree generation so that we can get them directly from the ddx v6 final fix to evergreen tile split value v7 fix mipmap offset to avoid to use random value, use color view depth view to address different layer as hardware is doing some magic rotation depending on the layer v8 fix COLOR_VIEW on r6xx for linear array mode, use COLOR_VIEW on evergreen, align bytes per pixel to a multiple of a dword v9 fix handling of stencil on evergreen, half fix for compressed texture v10 fix evergreen compressed texture proper support for stencil tile split. Fix stencil issue when array mode was clear by the kernel, always program stencil bo. On evergreen depth buffer bo need to be big enough to hold depth buffer + stencil buffer as even with stencil disabled things get written there. v11 rebase on top of mesa, fix pitch issue with 1d surface on evergreen, old ddx overestimate those. Fix linear case when pitch*height < 64. Fix r300g. v12 Fix linear case when pitch*height < 64 for old path, adapt to libdrm API change v13 add libdrm check Signed-off-by: Jerome Glisse <jglisse@redhat.com>
2012-01-30 22:22:13 +00:00
if (r) {
return NULL;
}
return (struct pipe_resource *)
r600_texture_create_object(screen, templ, NULL, &surface);
2010-05-08 21:09:24 +01:00
}
static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
const struct pipe_resource *templ,
struct winsys_handle *whandle,
unsigned usage)
2010-05-08 21:09:24 +01:00
{
struct si_screen *sscreen = (struct si_screen*)screen;
struct pb_buffer *buf = NULL;
unsigned stride = 0, offset = 0;
enum radeon_surf_mode array_mode;
struct radeon_surf surface = {};
r600g: add support for common surface allocator for tiling v13 Tiled surface have all kind of alignment constraint that needs to be met. Instead of having all this code duplicated btw ddx and mesa use common code in libdrm_radeon this also ensure that both ddx and mesa compute those alignment in the same way. v2 fix evergreen v3 fix compressed texture and workaround cube texture issue by disabling 2D array mode for cubemap (need to check if r7xx and newer are also affected by the issue) v4 fix texture array v5 fix evergreen and newer, split surface values computation from mipmap tree generation so that we can get them directly from the ddx v6 final fix to evergreen tile split value v7 fix mipmap offset to avoid to use random value, use color view depth view to address different layer as hardware is doing some magic rotation depending on the layer v8 fix COLOR_VIEW on r6xx for linear array mode, use COLOR_VIEW on evergreen, align bytes per pixel to a multiple of a dword v9 fix handling of stencil on evergreen, half fix for compressed texture v10 fix evergreen compressed texture proper support for stencil tile split. Fix stencil issue when array mode was clear by the kernel, always program stencil bo. On evergreen depth buffer bo need to be big enough to hold depth buffer + stencil buffer as even with stencil disabled things get written there. v11 rebase on top of mesa, fix pitch issue with 1d surface on evergreen, old ddx overestimate those. Fix linear case when pitch*height < 64. Fix r300g. v12 Fix linear case when pitch*height < 64 for old path, adapt to libdrm API change v13 add libdrm check Signed-off-by: Jerome Glisse <jglisse@redhat.com>
2012-01-30 22:22:13 +00:00
int r;
struct radeon_bo_metadata metadata = {};
struct r600_texture *rtex;
bool is_scanout;
2010-05-08 21:09:24 +01:00
/* Support only 2D textures without mipmaps */
if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) ||
templ->depth0 != 1 || templ->last_level != 0)
2010-05-08 21:09:24 +01:00
return NULL;
buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle, &stride, &offset);
if (!buf)
return NULL;
sscreen->ws->buffer_get_metadata(buf, &metadata);
r600_surface_import_metadata(sscreen, &surface, &metadata,
&array_mode, &is_scanout);
r = r600_init_surface(sscreen, &surface, templ, array_mode, stride,
offset, true, is_scanout, false, false);
r600g: add support for common surface allocator for tiling v13 Tiled surface have all kind of alignment constraint that needs to be met. Instead of having all this code duplicated btw ddx and mesa use common code in libdrm_radeon this also ensure that both ddx and mesa compute those alignment in the same way. v2 fix evergreen v3 fix compressed texture and workaround cube texture issue by disabling 2D array mode for cubemap (need to check if r7xx and newer are also affected by the issue) v4 fix texture array v5 fix evergreen and newer, split surface values computation from mipmap tree generation so that we can get them directly from the ddx v6 final fix to evergreen tile split value v7 fix mipmap offset to avoid to use random value, use color view depth view to address different layer as hardware is doing some magic rotation depending on the layer v8 fix COLOR_VIEW on r6xx for linear array mode, use COLOR_VIEW on evergreen, align bytes per pixel to a multiple of a dword v9 fix handling of stencil on evergreen, half fix for compressed texture v10 fix evergreen compressed texture proper support for stencil tile split. Fix stencil issue when array mode was clear by the kernel, always program stencil bo. On evergreen depth buffer bo need to be big enough to hold depth buffer + stencil buffer as even with stencil disabled things get written there. v11 rebase on top of mesa, fix pitch issue with 1d surface on evergreen, old ddx overestimate those. Fix linear case when pitch*height < 64. Fix r300g. v12 Fix linear case when pitch*height < 64 for old path, adapt to libdrm API change v13 add libdrm check Signed-off-by: Jerome Glisse <jglisse@redhat.com>
2012-01-30 22:22:13 +00:00
if (r) {
return NULL;
}
rtex = r600_texture_create_object(screen, templ, buf, &surface);
if (!rtex)
return NULL;
rtex->resource.b.is_shared = true;
rtex->resource.external_usage = usage;
si_apply_opaque_metadata(sscreen, rtex, &metadata);
assert(rtex->surface.tile_swizzle == 0);
return &rtex->resource.b.b;
2010-05-08 21:09:24 +01:00
}
bool si_init_flushed_depth_texture(struct pipe_context *ctx,
struct pipe_resource *texture,
struct r600_texture **staging)
{
struct r600_texture *rtex = (struct r600_texture*)texture;
struct pipe_resource resource;
struct r600_texture **flushed_depth_texture = staging ?
staging : &rtex->flushed_depth_texture;
enum pipe_format pipe_format = texture->format;
if (!staging) {
if (rtex->flushed_depth_texture)
return true; /* it's ready */
if (!rtex->can_sample_z && rtex->can_sample_s) {
switch (pipe_format) {
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
/* Save memory by not allocating the S plane. */
pipe_format = PIPE_FORMAT_Z32_FLOAT;
break;
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
/* Save memory bandwidth by not copying the
* stencil part during flush.
*
* This potentially increases memory bandwidth
* if an application uses both Z and S texturing
* simultaneously (a flushed Z24S8 texture
* would be stored compactly), but how often
* does that really happen?
*/
pipe_format = PIPE_FORMAT_Z24X8_UNORM;
break;
default:;
}
} else if (!rtex->can_sample_s && rtex->can_sample_z) {
assert(util_format_has_stencil(util_format_description(pipe_format)));
/* DB->CB copies to an 8bpp surface don't work. */
pipe_format = PIPE_FORMAT_X24S8_UINT;
}
}
memset(&resource, 0, sizeof(resource));
resource.target = texture->target;
resource.format = pipe_format;
resource.width0 = texture->width0;
resource.height0 = texture->height0;
resource.depth0 = texture->depth0;
resource.array_size = texture->array_size;
resource.last_level = texture->last_level;
resource.nr_samples = texture->nr_samples;
resource.usage = staging ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL;
resource.flags = texture->flags | R600_RESOURCE_FLAG_FLUSHED_DEPTH;
if (staging)
resource.flags |= R600_RESOURCE_FLAG_TRANSFER;
*flushed_depth_texture = (struct r600_texture *)ctx->screen->resource_create(ctx->screen, &resource);
if (*flushed_depth_texture == NULL) {
R600_ERR("failed to create temporary texture to hold flushed depth\n");
return false;
}
return true;
}
/**
* Initialize the pipe_resource descriptor to be of the same size as the box,
* which is supposed to hold a subregion of the texture "orig" at the given
* mipmap level.
*/
static void r600_init_temp_resource_from_box(struct pipe_resource *res,
struct pipe_resource *orig,
const struct pipe_box *box,
unsigned level, unsigned flags)
{
memset(res, 0, sizeof(*res));
res->format = orig->format;
res->width0 = box->width;
res->height0 = box->height;
res->depth0 = 1;
res->array_size = 1;
res->usage = flags & R600_RESOURCE_FLAG_TRANSFER ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
res->flags = flags;
/* We must set the correct texture target and dimensions for a 3D box. */
if (box->depth > 1 && util_max_layer(orig, level) > 0) {
res->target = PIPE_TEXTURE_2D_ARRAY;
res->array_size = box->depth;
} else {
res->target = PIPE_TEXTURE_2D;
}
}
static bool r600_can_invalidate_texture(struct si_screen *sscreen,
struct r600_texture *rtex,
unsigned transfer_usage,
const struct pipe_box *box)
{
return !rtex->resource.b.is_shared &&
!(transfer_usage & PIPE_TRANSFER_READ) &&
rtex->resource.b.b.last_level == 0 &&
util_texrange_covers_whole_level(&rtex->resource.b.b, 0,
box->x, box->y, box->z,
box->width, box->height,
box->depth);
}
static void r600_texture_invalidate_storage(struct si_context *sctx,
struct r600_texture *rtex)
{
struct si_screen *sscreen = sctx->screen;
/* There is no point in discarding depth and tiled buffers. */
assert(!rtex->is_depth);
assert(rtex->surface.is_linear);
/* Reallocate the buffer in the same pipe_resource. */
si_alloc_resource(sscreen, &rtex->resource);
/* Initialize the CMASK base address (needed even without CMASK). */
rtex->cmask.base_address_reg =
(rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
p_atomic_inc(&sscreen->dirty_tex_counter);
sctx->b.num_alloc_tex_transfer_bytes += rtex->size;
}
static void *r600_texture_transfer_map(struct pipe_context *ctx,
struct pipe_resource *texture,
unsigned level,
unsigned usage,
const struct pipe_box *box,
struct pipe_transfer **ptransfer)
{
struct si_context *sctx = (struct si_context*)ctx;
struct r600_texture *rtex = (struct r600_texture*)texture;
struct r600_transfer *trans;
struct r600_resource *buf;
unsigned offset = 0;
char *map;
bool use_staging_texture = false;
assert(!(texture->flags & R600_RESOURCE_FLAG_TRANSFER));
assert(box->width && box->height && box->depth);
/* Depth textures use staging unconditionally. */
if (!rtex->is_depth) {
/* Degrade the tile mode if we get too many transfers on APUs.
* On dGPUs, the staging texture is always faster.
* Only count uploads that are at least 4x4 pixels large.
*/
if (!sctx->screen->info.has_dedicated_vram &&
level == 0 &&
box->width >= 4 && box->height >= 4 &&
p_atomic_inc_return(&rtex->num_level0_transfers) == 10) {
bool can_invalidate =
r600_can_invalidate_texture(sctx->screen, rtex,
usage, box);
r600_reallocate_texture_inplace(sctx, rtex,
PIPE_BIND_LINEAR,
can_invalidate);
}
/* Tiled textures need to be converted into a linear texture for CPU
* access. The staging texture is always linear and is placed in GART.
*
* Reading from VRAM or GTT WC is slow, always use the staging
* texture in this case.
*
* Use the staging texture for uploads if the underlying BO
* is busy.
*/
if (!rtex->surface.is_linear)
use_staging_texture = true;
else if (usage & PIPE_TRANSFER_READ)
use_staging_texture =
rtex->resource.domains & RADEON_DOMAIN_VRAM ||
rtex->resource.flags & RADEON_FLAG_GTT_WC;
/* Write & linear only: */
else if (si_rings_is_buffer_referenced(sctx, rtex->resource.buf,
RADEON_USAGE_READWRITE) ||
!sctx->b.ws->buffer_wait(rtex->resource.buf, 0,
RADEON_USAGE_READWRITE)) {
/* It's busy. */
if (r600_can_invalidate_texture(sctx->screen, rtex,
usage, box))
r600_texture_invalidate_storage(sctx, rtex);
else
use_staging_texture = true;
}
}
trans = CALLOC_STRUCT(r600_transfer);
if (!trans)
return NULL;
pipe_resource_reference(&trans->b.b.resource, texture);
trans->b.b.level = level;
trans->b.b.usage = usage;
trans->b.b.box = *box;
if (rtex->is_depth) {
struct r600_texture *staging_depth;
if (rtex->resource.b.b.nr_samples > 1) {
/* MSAA depth buffers need to be converted to single sample buffers.
*
* Mapping MSAA depth buffers can occur if ReadPixels is called
* with a multisample GLX visual.
*
* First downsample the depth buffer to a temporary texture,
* then decompress the temporary one to staging.
*
* Only the region being mapped is transfered.
*/
struct pipe_resource resource;
r600_init_temp_resource_from_box(&resource, texture, box, level, 0);
if (!si_init_flushed_depth_texture(ctx, &resource, &staging_depth)) {
R600_ERR("failed to create temporary texture to hold untiled copy\n");
FREE(trans);
return NULL;
}
if (usage & PIPE_TRANSFER_READ) {
struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource);
if (!temp) {
R600_ERR("failed to create a temporary depth texture\n");
FREE(trans);
return NULL;
}
r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box);
si_blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth,
0, 0, 0, box->depth, 0, 0);
pipe_resource_reference(&temp, NULL);
}
/* Just get the strides. */
r600_texture_get_offset(sctx->screen, staging_depth, level, NULL,
&trans->b.b.stride,
&trans->b.b.layer_stride);
} else {
/* XXX: only readback the rectangle which is being mapped? */
/* XXX: when discard is true, no need to read back from depth texture */
if (!si_init_flushed_depth_texture(ctx, texture, &staging_depth)) {
R600_ERR("failed to create temporary texture to hold untiled copy\n");
FREE(trans);
return NULL;
}
si_blit_decompress_depth(ctx, rtex, staging_depth,
level, level,
box->z, box->z + box->depth - 1,
0, 0);
offset = r600_texture_get_offset(sctx->screen, staging_depth,
level, box,
&trans->b.b.stride,
&trans->b.b.layer_stride);
}
trans->staging = (struct r600_resource*)staging_depth;
buf = trans->staging;
} else if (use_staging_texture) {
struct pipe_resource resource;
struct r600_texture *staging;
r600_init_temp_resource_from_box(&resource, texture, box, level,
R600_RESOURCE_FLAG_TRANSFER);
resource.usage = (usage & PIPE_TRANSFER_READ) ?
PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
/* Create the temporary texture. */
staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource);
if (!staging) {
R600_ERR("failed to create temporary texture to hold untiled copy\n");
FREE(trans);
return NULL;
}
trans->staging = &staging->resource;
/* Just get the strides. */
r600_texture_get_offset(sctx->screen, staging, 0, NULL,
&trans->b.b.stride,
&trans->b.b.layer_stride);
if (usage & PIPE_TRANSFER_READ)
r600_copy_to_staging_texture(ctx, trans);
else
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
buf = trans->staging;
} else {
/* the resource is mapped directly */
offset = r600_texture_get_offset(sctx->screen, rtex, level, box,
&trans->b.b.stride,
&trans->b.b.layer_stride);
buf = &rtex->resource;
}
if (!(map = si_buffer_map_sync_with_rings(sctx, buf, usage))) {
r600_resource_reference(&trans->staging, NULL);
FREE(trans);
return NULL;
}
*ptransfer = &trans->b.b;
return map + offset;
2010-05-08 21:09:24 +01:00
}
static void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer* transfer)
2010-05-08 21:09:24 +01:00
{
struct si_context *sctx = (struct si_context*)ctx;
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
struct pipe_resource *texture = transfer->resource;
struct r600_texture *rtex = (struct r600_texture*)texture;
if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) {
if (rtex->is_depth && rtex->resource.b.b.nr_samples <= 1) {
ctx->resource_copy_region(ctx, texture, transfer->level,
transfer->box.x, transfer->box.y, transfer->box.z,
&rtransfer->staging->b.b, transfer->level,
&transfer->box);
} else {
r600_copy_from_staging_texture(ctx, rtransfer);
}
}
if (rtransfer->staging) {
sctx->b.num_alloc_tex_transfer_bytes += rtransfer->staging->buf->size;
r600_resource_reference(&rtransfer->staging, NULL);
}
/* Heuristic for {upload, draw, upload, draw, ..}:
*
* Flush the gfx IB if we've allocated too much texture storage.
*
* The idea is that we don't want to build IBs that use too much
* memory and put pressure on the kernel memory manager and we also
* want to make temporary and invalidated buffers go idle ASAP to
* decrease the total memory usage or make them reusable. The memory
* usage will be slightly higher than given here because of the buffer
* cache in the winsys.
*
* The result is that the kernel memory manager is never a bottleneck.
*/
if (sctx->b.num_alloc_tex_transfer_bytes > sctx->screen->info.gart_size / 4) {
si_flush_gfx_cs(sctx, PIPE_FLUSH_ASYNC, NULL);
sctx->b.num_alloc_tex_transfer_bytes = 0;
}
pipe_resource_reference(&transfer->resource, NULL);
FREE(transfer);
2010-05-08 21:09:24 +01:00
}
static const struct u_resource_vtbl r600_texture_vtbl =
{
NULL, /* get_handle */
r600_texture_destroy, /* resource_destroy */
r600_texture_transfer_map, /* transfer_map */
u_default_transfer_flush_region, /* transfer_flush_region */
r600_texture_transfer_unmap, /* transfer_unmap */
};
/* DCC channel type categories within which formats can be reinterpreted
* while keeping the same DCC encoding. The swizzle must also match. */
enum dcc_channel_type {
dcc_channel_float,
/* uint and sint can be merged if we never use TC-compatible DCC clear
* encoding with the clear value of 1. */
dcc_channel_uint,
dcc_channel_sint,
dcc_channel_uint_10_10_10_2,
dcc_channel_incompatible,
};
/* Return the type of DCC encoding. */
static enum dcc_channel_type
vi_get_dcc_channel_type(const struct util_format_description *desc)
{
int i;
/* Find the first non-void channel. */
for (i = 0; i < desc->nr_channels; i++)
if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
break;
if (i == desc->nr_channels)
return dcc_channel_incompatible;
switch (desc->channel[i].size) {
case 32:
case 16:
case 8:
if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
return dcc_channel_float;
if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
return dcc_channel_uint;
return dcc_channel_sint;
case 10:
return dcc_channel_uint_10_10_10_2;
default:
return dcc_channel_incompatible;
}
}
/* Return if it's allowed to reinterpret one format as another with DCC enabled. */
bool vi_dcc_formats_compatible(enum pipe_format format1,
enum pipe_format format2)
{
const struct util_format_description *desc1, *desc2;
enum dcc_channel_type type1, type2;
int i;
if (format1 == format2)
return true;
desc1 = util_format_description(format1);
desc2 = util_format_description(format2);
if (desc1->nr_channels != desc2->nr_channels)
return false;
/* Swizzles must be the same. */
for (i = 0; i < desc1->nr_channels; i++)
if (desc1->swizzle[i] <= PIPE_SWIZZLE_W &&
desc2->swizzle[i] <= PIPE_SWIZZLE_W &&
desc1->swizzle[i] != desc2->swizzle[i])
return false;
type1 = vi_get_dcc_channel_type(desc1);
type2 = vi_get_dcc_channel_type(desc2);
return type1 != dcc_channel_incompatible &&
type2 != dcc_channel_incompatible &&
type1 == type2;
}
bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex,
unsigned level,
enum pipe_format view_format)
{
struct r600_texture *rtex = (struct r600_texture *)tex;
return vi_dcc_enabled(rtex, level) &&
!vi_dcc_formats_compatible(tex->format, view_format);
}
/* This can't be merged with the above function, because
* vi_dcc_formats_compatible should be called only when DCC is enabled. */
void vi_disable_dcc_if_incompatible_format(struct si_context *sctx,
struct pipe_resource *tex,
unsigned level,
enum pipe_format view_format)
{
struct r600_texture *rtex = (struct r600_texture *)tex;
if (vi_dcc_formats_are_incompatible(tex, level, view_format))
if (!si_texture_disable_dcc(sctx, (struct r600_texture*)tex))
si_decompress_dcc(sctx, rtex);
}
struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_surface *templ,
unsigned width0, unsigned height0,
unsigned width, unsigned height)
{
struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
if (!surface)
return NULL;
assert(templ->u.tex.first_layer <= util_max_layer(texture, templ->u.tex.level));
assert(templ->u.tex.last_layer <= util_max_layer(texture, templ->u.tex.level));
pipe_reference_init(&surface->base.reference, 1);
pipe_resource_reference(&surface->base.texture, texture);
surface->base.context = pipe;
surface->base.format = templ->format;
surface->base.width = width;
surface->base.height = height;
surface->base.u = templ->u;
surface->width0 = width0;
surface->height0 = height0;
surface->dcc_incompatible =
texture->target != PIPE_BUFFER &&
vi_dcc_formats_are_incompatible(texture, templ->u.tex.level,
templ->format);
return &surface->base;
}
static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
struct pipe_resource *tex,
const struct pipe_surface *templ)
{
unsigned level = templ->u.tex.level;
unsigned width = u_minify(tex->width0, level);
unsigned height = u_minify(tex->height0, level);
unsigned width0 = tex->width0;
unsigned height0 = tex->height0;
if (tex->target != PIPE_BUFFER && templ->format != tex->format) {
const struct util_format_description *tex_desc
= util_format_description(tex->format);
const struct util_format_description *templ_desc
= util_format_description(templ->format);
assert(tex_desc->block.bits == templ_desc->block.bits);
/* Adjust size of surface if and only if the block width or
* height is changed. */
if (tex_desc->block.width != templ_desc->block.width ||
tex_desc->block.height != templ_desc->block.height) {
unsigned nblks_x = util_format_get_nblocksx(tex->format, width);
unsigned nblks_y = util_format_get_nblocksy(tex->format, height);
width = nblks_x * templ_desc->block.width;
height = nblks_y * templ_desc->block.height;
width0 = util_format_get_nblocksx(tex->format, width0);
height0 = util_format_get_nblocksy(tex->format, height0);
}
}
return si_create_surface_custom(pipe, tex, templ,
width0, height0,
width, height);
}
static void r600_surface_destroy(struct pipe_context *pipe,
struct pipe_surface *surface)
{
pipe_resource_reference(&surface->texture, NULL);
FREE(surface);
}
unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap)
{
const struct util_format_description *desc = util_format_description(format);
#define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == PIPE_SWIZZLE_##swz)
if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
return V_028C70_SWAP_STD;
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
return ~0U;
switch (desc->nr_channels) {
case 1:
if (HAS_SWIZZLE(0,X))
return V_028C70_SWAP_STD; /* X___ */
else if (HAS_SWIZZLE(3,X))
return V_028C70_SWAP_ALT_REV; /* ___X */
break;
case 2:
if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) ||
(HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) ||
(HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y)))
return V_028C70_SWAP_STD; /* XY__ */
else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) ||
(HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) ||
(HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X)))
/* YX__ */
return (do_endian_swap ? V_028C70_SWAP_STD : V_028C70_SWAP_STD_REV);
else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y))
return V_028C70_SWAP_ALT; /* X__Y */
else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X))
return V_028C70_SWAP_ALT_REV; /* Y__X */
break;
case 3:
if (HAS_SWIZZLE(0,X))
return (do_endian_swap ? V_028C70_SWAP_STD_REV : V_028C70_SWAP_STD);
else if (HAS_SWIZZLE(0,Z))
return V_028C70_SWAP_STD_REV; /* ZYX */
break;
case 4:
/* check the middle channels, the 1st and 4th channel can be NONE */
if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) {
return V_028C70_SWAP_STD; /* XYZW */
} else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) {
return V_028C70_SWAP_STD_REV; /* WZYX */
} else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X)) {
return V_028C70_SWAP_ALT; /* ZYXW */
} else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W)) {
/* YZWX */
if (desc->is_array)
return V_028C70_SWAP_ALT_REV;
else
return (do_endian_swap ? V_028C70_SWAP_ALT : V_028C70_SWAP_ALT_REV);
}
break;
}
return ~0U;
}
/* PIPELINE_STAT-BASED DCC ENABLEMENT FOR DISPLAYABLE SURFACES */
static void vi_dcc_clean_up_context_slot(struct si_context *sctx,
int slot)
{
int i;
if (sctx->b.dcc_stats[slot].query_active)
vi_separate_dcc_stop_query(sctx,
sctx->b.dcc_stats[slot].tex);
for (i = 0; i < ARRAY_SIZE(sctx->b.dcc_stats[slot].ps_stats); i++)
if (sctx->b.dcc_stats[slot].ps_stats[i]) {
sctx->b.b.destroy_query(&sctx->b.b,
sctx->b.dcc_stats[slot].ps_stats[i]);
sctx->b.dcc_stats[slot].ps_stats[i] = NULL;
}
r600_texture_reference(&sctx->b.dcc_stats[slot].tex, NULL);
}
/**
* Return the per-context slot where DCC statistics queries for the texture live.
*/
static unsigned vi_get_context_dcc_stats_index(struct si_context *sctx,
struct r600_texture *tex)
{
int i, empty_slot = -1;
/* Remove zombie textures (textures kept alive by this array only). */
for (i = 0; i < ARRAY_SIZE(sctx->b.dcc_stats); i++)
if (sctx->b.dcc_stats[i].tex &&
sctx->b.dcc_stats[i].tex->resource.b.b.reference.count == 1)
vi_dcc_clean_up_context_slot(sctx, i);
/* Find the texture. */
for (i = 0; i < ARRAY_SIZE(sctx->b.dcc_stats); i++) {
/* Return if found. */
if (sctx->b.dcc_stats[i].tex == tex) {
sctx->b.dcc_stats[i].last_use_timestamp = os_time_get();
return i;
}
/* Record the first seen empty slot. */
if (empty_slot == -1 && !sctx->b.dcc_stats[i].tex)
empty_slot = i;
}
/* Not found. Remove the oldest member to make space in the array. */
if (empty_slot == -1) {
int oldest_slot = 0;
/* Find the oldest slot. */
for (i = 1; i < ARRAY_SIZE(sctx->b.dcc_stats); i++)
if (sctx->b.dcc_stats[oldest_slot].last_use_timestamp >
sctx->b.dcc_stats[i].last_use_timestamp)
oldest_slot = i;
/* Clean up the oldest slot. */
vi_dcc_clean_up_context_slot(sctx, oldest_slot);
empty_slot = oldest_slot;
}
/* Add the texture to the new slot. */
r600_texture_reference(&sctx->b.dcc_stats[empty_slot].tex, tex);
sctx->b.dcc_stats[empty_slot].last_use_timestamp = os_time_get();
return empty_slot;
}
static struct pipe_query *
vi_create_resuming_pipestats_query(struct si_context *sctx)
{
struct r600_query_hw *query = (struct r600_query_hw*)
sctx->b.b.create_query(&sctx->b.b, PIPE_QUERY_PIPELINE_STATISTICS, 0);
query->flags |= R600_QUERY_HW_FLAG_BEGIN_RESUMES;
return (struct pipe_query*)query;
}
/**
* Called when binding a color buffer.
*/
void vi_separate_dcc_start_query(struct si_context *sctx,
struct r600_texture *tex)
{
unsigned i = vi_get_context_dcc_stats_index(sctx, tex);
assert(!sctx->b.dcc_stats[i].query_active);
if (!sctx->b.dcc_stats[i].ps_stats[0])
sctx->b.dcc_stats[i].ps_stats[0] = vi_create_resuming_pipestats_query(sctx);
/* begin or resume the query */
sctx->b.b.begin_query(&sctx->b.b, sctx->b.dcc_stats[i].ps_stats[0]);
sctx->b.dcc_stats[i].query_active = true;
}
/**
* Called when unbinding a color buffer.
*/
void vi_separate_dcc_stop_query(struct si_context *sctx,
struct r600_texture *tex)
{
unsigned i = vi_get_context_dcc_stats_index(sctx, tex);
assert(sctx->b.dcc_stats[i].query_active);
assert(sctx->b.dcc_stats[i].ps_stats[0]);
/* pause or end the query */
sctx->b.b.end_query(&sctx->b.b, sctx->b.dcc_stats[i].ps_stats[0]);
sctx->b.dcc_stats[i].query_active = false;
}
static bool vi_should_enable_separate_dcc(struct r600_texture *tex)
{
/* The minimum number of fullscreen draws per frame that is required
* to enable DCC. */
return tex->ps_draw_ratio + tex->num_slow_clears >= 5;
}
/* Called by fast clear. */
void vi_separate_dcc_try_enable(struct si_context *sctx,
struct r600_texture *tex)
{
/* The intent is to use this with shared displayable back buffers,
* but it's not strictly limited only to them.
*/
if (!tex->resource.b.is_shared ||
!(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) ||
tex->resource.b.b.target != PIPE_TEXTURE_2D ||
tex->resource.b.b.last_level > 0 ||
!tex->surface.dcc_size)
return;
if (tex->dcc_offset)
return; /* already enabled */
/* Enable the DCC stat gathering. */
if (!tex->dcc_gather_statistics) {
tex->dcc_gather_statistics = true;
vi_separate_dcc_start_query(sctx, tex);
}
if (!vi_should_enable_separate_dcc(tex))
return; /* stats show that DCC decompression is too expensive */
assert(tex->surface.num_dcc_levels);
assert(!tex->dcc_separate_buffer);
si_texture_discard_cmask(sctx->screen, tex);
/* Get a DCC buffer. */
if (tex->last_dcc_separate_buffer) {
assert(tex->dcc_gather_statistics);
assert(!tex->dcc_separate_buffer);
tex->dcc_separate_buffer = tex->last_dcc_separate_buffer;
tex->last_dcc_separate_buffer = NULL;
} else {
tex->dcc_separate_buffer = (struct r600_resource*)
si_aligned_buffer_create(sctx->b.b.screen,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
tex->surface.dcc_size,
tex->surface.dcc_alignment);
if (!tex->dcc_separate_buffer)
return;
}
/* dcc_offset is the absolute GPUVM address. */
tex->dcc_offset = tex->dcc_separate_buffer->gpu_address;
/* no need to flag anything since this is called by fast clear that
* flags framebuffer state
*/
}
/**
* Called by pipe_context::flush_resource, the place where DCC decompression
* takes place.
*/
void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
struct r600_texture *tex)
{
struct si_context *sctx = (struct si_context*)ctx;
struct pipe_query *tmp;
unsigned i = vi_get_context_dcc_stats_index(sctx, tex);
bool query_active = sctx->b.dcc_stats[i].query_active;
bool disable = false;
if (sctx->b.dcc_stats[i].ps_stats[2]) {
union pipe_query_result result;
/* Read the results. */
ctx->get_query_result(ctx, sctx->b.dcc_stats[i].ps_stats[2],
true, &result);
si_query_hw_reset_buffers(sctx,
(struct r600_query_hw*)
sctx->b.dcc_stats[i].ps_stats[2]);
/* Compute the approximate number of fullscreen draws. */
tex->ps_draw_ratio =
result.pipeline_statistics.ps_invocations /
(tex->resource.b.b.width0 * tex->resource.b.b.height0);
sctx->b.last_tex_ps_draw_ratio = tex->ps_draw_ratio;
disable = tex->dcc_separate_buffer &&
!vi_should_enable_separate_dcc(tex);
}
tex->num_slow_clears = 0;
/* stop the statistics query for ps_stats[0] */
if (query_active)
vi_separate_dcc_stop_query(sctx, tex);
/* Move the queries in the queue by one. */
tmp = sctx->b.dcc_stats[i].ps_stats[2];
sctx->b.dcc_stats[i].ps_stats[2] = sctx->b.dcc_stats[i].ps_stats[1];
sctx->b.dcc_stats[i].ps_stats[1] = sctx->b.dcc_stats[i].ps_stats[0];
sctx->b.dcc_stats[i].ps_stats[0] = tmp;
/* create and start a new query as ps_stats[0] */
if (query_active)
vi_separate_dcc_start_query(sctx, tex);
if (disable) {
assert(!tex->last_dcc_separate_buffer);
tex->last_dcc_separate_buffer = tex->dcc_separate_buffer;
tex->dcc_separate_buffer = NULL;
tex->dcc_offset = 0;
/* no need to flag anything since this is called after
* decompression that re-sets framebuffer state
*/
}
}
static struct pipe_memory_object *
r600_memobj_from_handle(struct pipe_screen *screen,
struct winsys_handle *whandle,
bool dedicated)
{
struct si_screen *sscreen = (struct si_screen*)screen;
struct r600_memory_object *memobj = CALLOC_STRUCT(r600_memory_object);
struct pb_buffer *buf = NULL;
uint32_t stride, offset;
if (!memobj)
return NULL;
buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle,
&stride, &offset);
if (!buf) {
free(memobj);
return NULL;
}
memobj->b.dedicated = dedicated;
memobj->buf = buf;
memobj->stride = stride;
memobj->offset = offset;
return (struct pipe_memory_object *)memobj;
}
static void
r600_memobj_destroy(struct pipe_screen *screen,
struct pipe_memory_object *_memobj)
{
struct r600_memory_object *memobj = (struct r600_memory_object *)_memobj;
pb_reference(&memobj->buf, NULL);
free(memobj);
}
static struct pipe_resource *
r600_texture_from_memobj(struct pipe_screen *screen,
const struct pipe_resource *templ,
struct pipe_memory_object *_memobj,
uint64_t offset)
{
int r;
struct si_screen *sscreen = (struct si_screen*)screen;
struct r600_memory_object *memobj = (struct r600_memory_object *)_memobj;
struct r600_texture *rtex;
struct radeon_surf surface = {};
struct radeon_bo_metadata metadata = {};
enum radeon_surf_mode array_mode;
bool is_scanout;
struct pb_buffer *buf = NULL;
if (memobj->b.dedicated) {
sscreen->ws->buffer_get_metadata(memobj->buf, &metadata);
r600_surface_import_metadata(sscreen, &surface, &metadata,
&array_mode, &is_scanout);
} else {
/**
* The bo metadata is unset for un-dedicated images. So we fall
* back to linear. See answer to question 5 of the
* VK_KHX_external_memory spec for some details.
*
* It is possible that this case isn't going to work if the
* surface pitch isn't correctly aligned by default.
*
* In order to support it correctly we require multi-image
* metadata to be syncrhonized between radv and radeonsi. The
* semantics of associating multiple image metadata to a memory
* object on the vulkan export side are not concretely defined
* either.
*
* All the use cases we are aware of at the moment for memory
* objects use dedicated allocations. So lets keep the initial
* implementation simple.
*
* A possible alternative is to attempt to reconstruct the
* tiling information when the TexParameter TEXTURE_TILING_EXT
* is set.
*/
array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
is_scanout = false;
}
r = r600_init_surface(sscreen, &surface, templ,
array_mode, memobj->stride,
offset, true, is_scanout,
false, false);
if (r)
return NULL;
rtex = r600_texture_create_object(screen, templ, memobj->buf, &surface);
if (!rtex)
return NULL;
/* r600_texture_create_object doesn't increment refcount of
* memobj->buf, so increment it here.
*/
pb_reference(&buf, memobj->buf);
rtex->resource.b.is_shared = true;
rtex->resource.external_usage = PIPE_HANDLE_USAGE_READ_WRITE;
si_apply_opaque_metadata(sscreen, rtex, &metadata);
return &rtex->resource.b.b;
}
static bool si_check_resource_capability(struct pipe_screen *screen,
struct pipe_resource *resource,
unsigned bind)
{
struct r600_texture *tex = (struct r600_texture*)resource;
/* Buffers only support the linear flag. */
if (resource->target == PIPE_BUFFER)
return (bind & ~PIPE_BIND_LINEAR) == 0;
if (bind & PIPE_BIND_LINEAR && !tex->surface.is_linear)
return false;
if (bind & PIPE_BIND_SCANOUT && !tex->surface.is_displayable)
return false;
/* TODO: PIPE_BIND_CURSOR - do we care? */
return true;
}
void si_init_screen_texture_functions(struct si_screen *sscreen)
{
sscreen->b.resource_from_handle = r600_texture_from_handle;
sscreen->b.resource_get_handle = r600_texture_get_handle;
sscreen->b.resource_from_memobj = r600_texture_from_memobj;
sscreen->b.memobj_create_from_handle = r600_memobj_from_handle;
sscreen->b.memobj_destroy = r600_memobj_destroy;
sscreen->b.check_resource_capability = si_check_resource_capability;
}
void si_init_context_texture_functions(struct si_context *sctx)
{
sctx->b.b.create_surface = r600_create_surface;
sctx->b.b.surface_destroy = r600_surface_destroy;
}