r600: fork and import gallium/radeon

This marks the end of code sharing between r600 and radeonsi.
It's getting difficult to work on radeonsi without breaking r600.

A lot of functions had to be renamed to prevent linker conflicts.
There are also minor cleanups.

Acked-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
Marek Olšák 2017-09-13 02:26:26 +02:00
parent e1623da818
commit 06bfb2d28f
66 changed files with 15238 additions and 977 deletions

View File

@ -2704,8 +2704,7 @@ AM_CONDITIONAL(HAVE_GALLIUM_PL111, test "x$HAVE_GALLIUM_PL111" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_R300, test "x$HAVE_GALLIUM_R300" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_R600, test "x$HAVE_GALLIUM_R600" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_RADEONSI, test "x$HAVE_GALLIUM_RADEONSI" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_RADEON_COMMON, test "x$HAVE_GALLIUM_R600" = xyes -o \
"x$HAVE_GALLIUM_RADEONSI" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_RADEON_COMMON, test "x$HAVE_GALLIUM_RADEONSI" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_NOUVEAU, test "x$HAVE_GALLIUM_NOUVEAU" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_FREEDRENO, test "x$HAVE_GALLIUM_FREEDRENO" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_ETNAVIV, test "x$HAVE_GALLIUM_ETNAVIV" = xyes)

View File

@ -42,7 +42,7 @@ if HAVE_GALLIUM_R300
SUBDIRS += drivers/r300
endif
## radeon - linked into r600 and radeonsi
## radeon - linked into radeonsi
if HAVE_GALLIUM_RADEON_COMMON
SUBDIRS += drivers/radeon
endif

View File

@ -11,6 +11,4 @@ TARGET_LIB_DEPS += \
TARGET_RADEON_WINSYS = \
$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la
TARGET_RADEON_COMMON = \
$(top_builddir)/src/gallium/drivers/radeon/libradeon.la
endif

View File

@ -27,8 +27,7 @@ libr600_la_SOURCES = \
if HAVE_GALLIUM_LLVM
AM_CFLAGS += \
$(LLVM_CFLAGS) \
-I$(top_srcdir)/src/gallium/drivers/radeon/
$(LLVM_CFLAGS)
endif

View File

@ -29,7 +29,26 @@ C_SOURCES = \
r600_state_common.c \
r600_uvd.c \
r700_asm.c \
r700_sq.h
r700_sq.h \
cayman_msaa.c \
r600_buffer_common.c \
r600_cs.h \
r600_gpu_load.c \
r600_perfcounter.c \
r600_pipe_common.c \
r600_pipe_common.h \
r600_query.c \
r600_query.h \
r600_streamout.c \
r600_test_dma.c \
r600_texture.c \
r600_viewport.c \
radeon_uvd.c \
radeon_uvd.h \
radeon_vce.c \
radeon_vce.h \
radeon_video.c \
radeon_video.h
CXX_SOURCES = \
sb/sb_bc_builder.cpp \

View File

@ -0,0 +1,269 @@
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors: Marek Olšák <maraeo@gmail.com>
*
*/
#include "r600_cs.h"
/* 2xMSAA
* There are two locations (4, 4), (-4, -4). */
const uint32_t eg_sample_locs_2x[4] = {
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
};
const unsigned eg_max_dist_2x = 4;
/* 4xMSAA
* There are 4 locations: (-2, 6), (6, -2), (-6, 2), (2, 6). */
const uint32_t eg_sample_locs_4x[4] = {
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
};
const unsigned eg_max_dist_4x = 6;
/* Cayman 8xMSAA */
static const uint32_t cm_sample_locs_8x[] = {
FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
};
static const unsigned cm_max_dist_8x = 8;
/* Cayman 16xMSAA */
static const uint32_t cm_sample_locs_16x[] = {
FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
};
static const unsigned cm_max_dist_16x = 8;
void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
unsigned sample_index, float *out_value)
{
int offset, index;
struct {
int idx:4;
} val;
switch (sample_count) {
case 1:
default:
out_value[0] = out_value[1] = 0.5;
break;
case 2:
offset = 4 * (sample_index * 2);
val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf;
out_value[0] = (float)(val.idx + 8) / 16.0f;
val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf;
out_value[1] = (float)(val.idx + 8) / 16.0f;
break;
case 4:
offset = 4 * (sample_index * 2);
val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf;
out_value[0] = (float)(val.idx + 8) / 16.0f;
val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf;
out_value[1] = (float)(val.idx + 8) / 16.0f;
break;
case 8:
offset = 4 * (sample_index % 4 * 2);
index = (sample_index / 4) * 4;
val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
out_value[0] = (float)(val.idx + 8) / 16.0f;
val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
out_value[1] = (float)(val.idx + 8) / 16.0f;
break;
case 16:
offset = 4 * (sample_index % 4 * 2);
index = (sample_index / 4) * 4;
val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
out_value[0] = (float)(val.idx + 8) / 16.0f;
val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
out_value[1] = (float)(val.idx + 8) / 16.0f;
break;
}
}
void cayman_init_msaa(struct pipe_context *ctx)
{
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
int i;
cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]);
for (i = 0; i < 2; i++)
cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]);
for (i = 0; i < 4; i++)
cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]);
for (i = 0; i < 8; i++)
cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]);
for (i = 0; i < 16; i++)
cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]);
}
void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
{
switch (nr_samples) {
default:
case 1:
radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
break;
case 2:
radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
break;
case 4:
radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
break;
case 8:
radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
radeon_emit(cs, cm_sample_locs_8x[0]);
radeon_emit(cs, cm_sample_locs_8x[4]);
radeon_emit(cs, 0);
radeon_emit(cs, 0);
radeon_emit(cs, cm_sample_locs_8x[1]);
radeon_emit(cs, cm_sample_locs_8x[5]);
radeon_emit(cs, 0);
radeon_emit(cs, 0);
radeon_emit(cs, cm_sample_locs_8x[2]);
radeon_emit(cs, cm_sample_locs_8x[6]);
radeon_emit(cs, 0);
radeon_emit(cs, 0);
radeon_emit(cs, cm_sample_locs_8x[3]);
radeon_emit(cs, cm_sample_locs_8x[7]);
break;
case 16:
radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
radeon_emit(cs, cm_sample_locs_16x[0]);
radeon_emit(cs, cm_sample_locs_16x[4]);
radeon_emit(cs, cm_sample_locs_16x[8]);
radeon_emit(cs, cm_sample_locs_16x[12]);
radeon_emit(cs, cm_sample_locs_16x[1]);
radeon_emit(cs, cm_sample_locs_16x[5]);
radeon_emit(cs, cm_sample_locs_16x[9]);
radeon_emit(cs, cm_sample_locs_16x[13]);
radeon_emit(cs, cm_sample_locs_16x[2]);
radeon_emit(cs, cm_sample_locs_16x[6]);
radeon_emit(cs, cm_sample_locs_16x[10]);
radeon_emit(cs, cm_sample_locs_16x[14]);
radeon_emit(cs, cm_sample_locs_16x[3]);
radeon_emit(cs, cm_sample_locs_16x[7]);
radeon_emit(cs, cm_sample_locs_16x[11]);
radeon_emit(cs, cm_sample_locs_16x[15]);
break;
}
}
void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
int ps_iter_samples, int overrast_samples,
unsigned sc_mode_cntl_1)
{
int setup_samples = nr_samples > 1 ? nr_samples :
overrast_samples > 1 ? overrast_samples : 0;
/* Required by OpenGL line rasterization.
*
* TODO: We should also enable perpendicular endcaps for AA lines,
* but that requires implementing line stippling in the pixel
* shader. SC can only do line stippling with axis-aligned
* endcaps.
*/
unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
if (setup_samples > 1) {
/* indexed by log2(nr_samples) */
unsigned max_dist[] = {
0,
eg_max_dist_2x,
eg_max_dist_4x,
cm_max_dist_8x,
cm_max_dist_16x
};
unsigned log_samples = util_logbase2(setup_samples);
unsigned log_ps_iter_samples =
util_logbase2(util_next_power_of_two(ps_iter_samples));
radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
radeon_emit(cs, sc_line_cntl |
S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */
if (nr_samples > 1) {
radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
sc_mode_cntl_1);
} else if (overrast_samples > 1) {
radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
sc_mode_cntl_1);
}
} else {
radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
radeon_emit(cs, sc_line_cntl); /* CM_R_028BDC_PA_SC_LINE_CNTL */
radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
sc_mode_cntl_1);
}
}

View File

@ -0,0 +1,687 @@
/*
* Copyright 2013 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Marek Olšák
*/
#include "r600_cs.h"
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
#include <inttypes.h>
#include <stdio.h>
bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
struct pb_buffer *buf,
enum radeon_bo_usage usage)
{
if (ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, buf, usage)) {
return true;
}
if (radeon_emitted(ctx->dma.cs, 0) &&
ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, buf, usage)) {
return true;
}
return false;
}
void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
struct r600_resource *resource,
unsigned usage)
{
enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
bool busy = false;
assert(!(resource->flags & RADEON_FLAG_SPARSE));
if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
return ctx->ws->buffer_map(resource->buf, NULL, usage);
}
if (!(usage & PIPE_TRANSFER_WRITE)) {
/* have to wait for the last write */
rusage = RADEON_USAGE_WRITE;
}
if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
resource->buf, rusage)) {
if (usage & PIPE_TRANSFER_DONTBLOCK) {
ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
return NULL;
} else {
ctx->gfx.flush(ctx, 0, NULL);
busy = true;
}
}
if (radeon_emitted(ctx->dma.cs, 0) &&
ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
resource->buf, rusage)) {
if (usage & PIPE_TRANSFER_DONTBLOCK) {
ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
return NULL;
} else {
ctx->dma.flush(ctx, 0, NULL);
busy = true;
}
}
if (busy || !ctx->ws->buffer_wait(resource->buf, 0, rusage)) {
if (usage & PIPE_TRANSFER_DONTBLOCK) {
return NULL;
} else {
/* We will be wait for the GPU. Wait for any offloaded
* CS flush to complete to avoid busy-waiting in the winsys. */
ctx->ws->cs_sync_flush(ctx->gfx.cs);
if (ctx->dma.cs)
ctx->ws->cs_sync_flush(ctx->dma.cs);
}
}
/* Setting the CS to NULL will prevent doing checks we have done already. */
return ctx->ws->buffer_map(resource->buf, NULL, usage);
}
void r600_init_resource_fields(struct r600_common_screen *rscreen,
struct r600_resource *res,
uint64_t size, unsigned alignment)
{
struct r600_texture *rtex = (struct r600_texture*)res;
res->bo_size = size;
res->bo_alignment = alignment;
res->flags = 0;
res->texture_handle_allocated = false;
res->image_handle_allocated = false;
switch (res->b.b.usage) {
case PIPE_USAGE_STREAM:
res->flags = RADEON_FLAG_GTT_WC;
/* fall through */
case PIPE_USAGE_STAGING:
/* Transfers are likely to occur more often with these
* resources. */
res->domains = RADEON_DOMAIN_GTT;
break;
case PIPE_USAGE_DYNAMIC:
/* Older kernels didn't always flush the HDP cache before
* CS execution
*/
if (rscreen->info.drm_major == 2 &&
rscreen->info.drm_minor < 40) {
res->domains = RADEON_DOMAIN_GTT;
res->flags |= RADEON_FLAG_GTT_WC;
break;
}
/* fall through */
case PIPE_USAGE_DEFAULT:
case PIPE_USAGE_IMMUTABLE:
default:
/* Not listing GTT here improves performance in some
* apps. */
res->domains = RADEON_DOMAIN_VRAM;
res->flags |= RADEON_FLAG_GTT_WC;
break;
}
if (res->b.b.target == PIPE_BUFFER &&
res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
/* Use GTT for all persistent mappings with older
* kernels, because they didn't always flush the HDP
* cache before CS execution.
*
* Write-combined CPU mappings are fine, the kernel
* ensures all CPU writes finish before the GPU
* executes a command stream.
*/
if (rscreen->info.drm_major == 2 &&
rscreen->info.drm_minor < 40)
res->domains = RADEON_DOMAIN_GTT;
}
/* Tiled textures are unmappable. Always put them in VRAM. */
if ((res->b.b.target != PIPE_BUFFER && !rtex->surface.is_linear) ||
res->flags & R600_RESOURCE_FLAG_UNMAPPABLE) {
res->domains = RADEON_DOMAIN_VRAM;
res->flags |= RADEON_FLAG_NO_CPU_ACCESS |
RADEON_FLAG_GTT_WC;
}
/* Only displayable single-sample textures can be shared between
* processes. */
if (res->b.b.target == PIPE_BUFFER ||
res->b.b.nr_samples >= 2 ||
(rtex->surface.micro_tile_mode != RADEON_MICRO_MODE_DISPLAY &&
/* Raven doesn't use display micro mode for 32bpp, so check this: */
!(res->b.b.bind & PIPE_BIND_SCANOUT)))
res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
/* If VRAM is just stolen system memory, allow both VRAM and
* GTT, whichever has free space. If a buffer is evicted from
* VRAM to GTT, it will stay there.
*
* DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only
* placements even with a low amount of stolen VRAM.
*/
if (!rscreen->info.has_dedicated_vram &&
(rscreen->info.drm_major < 3 || rscreen->info.drm_minor < 6) &&
res->domains == RADEON_DOMAIN_VRAM) {
res->domains = RADEON_DOMAIN_VRAM_GTT;
res->flags &= ~RADEON_FLAG_NO_CPU_ACCESS; /* disallowed with VRAM_GTT */
}
if (rscreen->debug_flags & DBG_NO_WC)
res->flags &= ~RADEON_FLAG_GTT_WC;
if (res->b.b.bind & PIPE_BIND_SHARED)
res->flags |= RADEON_FLAG_NO_SUBALLOC;
/* Set expected VRAM and GART usage for the buffer. */
res->vram_usage = 0;
res->gart_usage = 0;
if (res->domains & RADEON_DOMAIN_VRAM)
res->vram_usage = size;
else if (res->domains & RADEON_DOMAIN_GTT)
res->gart_usage = size;
}
bool r600_alloc_resource(struct r600_common_screen *rscreen,
struct r600_resource *res)
{
struct pb_buffer *old_buf, *new_buf;
/* Allocate a new resource. */
new_buf = rscreen->ws->buffer_create(rscreen->ws, res->bo_size,
res->bo_alignment,
res->domains, res->flags);
if (!new_buf) {
return false;
}
/* Replace the pointer such that if res->buf wasn't NULL, it won't be
* NULL. This should prevent crashes with multiple contexts using
* the same buffer where one of the contexts invalidates it while
* the others are using it. */
old_buf = res->buf;
res->buf = new_buf; /* should be atomic */
if (rscreen->info.has_virtual_memory)
res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->buf);
else
res->gpu_address = 0;
pb_reference(&old_buf, NULL);
util_range_set_empty(&res->valid_buffer_range);
res->TC_L2_dirty = false;
/* Print debug information. */
if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %"PRIu64" bytes\n",
res->gpu_address, res->gpu_address + res->buf->size,
res->buf->size);
}
return true;
}
static void r600_buffer_destroy(struct pipe_screen *screen,
struct pipe_resource *buf)
{
struct r600_resource *rbuffer = r600_resource(buf);
threaded_resource_deinit(buf);
util_range_destroy(&rbuffer->valid_buffer_range);
pb_reference(&rbuffer->buf, NULL);
FREE(rbuffer);
}
static bool
r600_invalidate_buffer(struct r600_common_context *rctx,
struct r600_resource *rbuffer)
{
/* Shared buffers can't be reallocated. */
if (rbuffer->b.is_shared)
return false;
/* Sparse buffers can't be reallocated. */
if (rbuffer->flags & RADEON_FLAG_SPARSE)
return false;
/* In AMD_pinned_memory, the user pointer association only gets
* broken when the buffer is explicitly re-allocated.
*/
if (rbuffer->b.is_user_ptr)
return false;
/* Check if mapping this buffer would cause waiting for the GPU. */
if (r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
!rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
} else {
util_range_set_empty(&rbuffer->valid_buffer_range);
}
return true;
}
/* Replace the storage of dst with src. */
void r600_replace_buffer_storage(struct pipe_context *ctx,
struct pipe_resource *dst,
struct pipe_resource *src)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_resource *rdst = r600_resource(dst);
struct r600_resource *rsrc = r600_resource(src);
uint64_t old_gpu_address = rdst->gpu_address;
pb_reference(&rdst->buf, rsrc->buf);
rdst->gpu_address = rsrc->gpu_address;
rdst->b.b.bind = rsrc->b.b.bind;
rdst->flags = rsrc->flags;
assert(rdst->vram_usage == rsrc->vram_usage);
assert(rdst->gart_usage == rsrc->gart_usage);
assert(rdst->bo_size == rsrc->bo_size);
assert(rdst->bo_alignment == rsrc->bo_alignment);
assert(rdst->domains == rsrc->domains);
rctx->rebind_buffer(ctx, dst, old_gpu_address);
}
void r600_invalidate_resource(struct pipe_context *ctx,
struct pipe_resource *resource)
{
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_resource *rbuffer = r600_resource(resource);
/* We currently only do anyting here for buffers */
if (resource->target == PIPE_BUFFER)
(void)r600_invalidate_buffer(rctx, rbuffer);
}
static void *r600_buffer_get_transfer(struct pipe_context *ctx,
struct pipe_resource *resource,
unsigned usage,
const struct pipe_box *box,
struct pipe_transfer **ptransfer,
void *data, struct r600_resource *staging,
unsigned offset)
{
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_transfer *transfer;
if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC)
transfer = slab_alloc(&rctx->pool_transfers_unsync);
else
transfer = slab_alloc(&rctx->pool_transfers);
transfer->b.b.resource = NULL;
pipe_resource_reference(&transfer->b.b.resource, resource);
transfer->b.b.level = 0;
transfer->b.b.usage = usage;
transfer->b.b.box = *box;
transfer->b.b.stride = 0;
transfer->b.b.layer_stride = 0;
transfer->b.staging = NULL;
transfer->offset = offset;
transfer->staging = staging;
*ptransfer = &transfer->b.b;
return data;
}
static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx,
unsigned dstx, unsigned srcx, unsigned size)
{
bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4);
return rctx->screen->has_cp_dma ||
(dword_aligned && (rctx->dma.cs ||
rctx->screen->has_streamout));
}
static void *r600_buffer_transfer_map(struct pipe_context *ctx,
struct pipe_resource *resource,
unsigned level,
unsigned usage,
const struct pipe_box *box,
struct pipe_transfer **ptransfer)
{
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
struct r600_resource *rbuffer = r600_resource(resource);
uint8_t *data;
assert(box->x + box->width <= resource->width0);
/* From GL_AMD_pinned_memory issues:
*
* 4) Is glMapBuffer on a shared buffer guaranteed to return the
* same system address which was specified at creation time?
*
* RESOLVED: NO. The GL implementation might return a different
* virtual mapping of that memory, although the same physical
* page will be used.
*
* So don't ever use staging buffers.
*/
if (rbuffer->b.is_user_ptr)
usage |= PIPE_TRANSFER_PERSISTENT;
/* See if the buffer range being mapped has never been initialized,
* in which case it can be mapped unsynchronized. */
if (!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED)) &&
usage & PIPE_TRANSFER_WRITE &&
!rbuffer->b.is_shared &&
!util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
}
/* If discarding the entire range, discard the whole resource instead. */
if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
box->x == 0 && box->width == resource->width0) {
usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
}
if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
TC_TRANSFER_MAP_NO_INVALIDATE))) {
assert(usage & PIPE_TRANSFER_WRITE);
if (r600_invalidate_buffer(rctx, rbuffer)) {
/* At this point, the buffer is always idle. */
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
} else {
/* Fall back to a temporary buffer. */
usage |= PIPE_TRANSFER_DISCARD_RANGE;
}
}
if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
!(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
PIPE_TRANSFER_PERSISTENT)) &&
r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) ||
(rbuffer->flags & RADEON_FLAG_SPARSE))) {
assert(usage & PIPE_TRANSFER_WRITE);
/* Check if mapping this buffer would cause waiting for the GPU.
*/
if (rbuffer->flags & RADEON_FLAG_SPARSE ||
r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
!rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
/* Do a wait-free write-only transfer using a temporary buffer. */
unsigned offset;
struct r600_resource *staging = NULL;
u_upload_alloc(ctx->stream_uploader, 0,
box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
rctx->screen->info.tcc_cache_line_size,
&offset, (struct pipe_resource**)&staging,
(void**)&data);
if (staging) {
data += box->x % R600_MAP_BUFFER_ALIGNMENT;
return r600_buffer_get_transfer(ctx, resource, usage, box,
ptransfer, data, staging, offset);
} else if (rbuffer->flags & RADEON_FLAG_SPARSE) {
return NULL;
}
} else {
/* At this point, the buffer is always idle (we checked it above). */
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
}
}
/* Use a staging buffer in cached GTT for reads. */
else if (((usage & PIPE_TRANSFER_READ) &&
!(usage & PIPE_TRANSFER_PERSISTENT) &&
(rbuffer->domains & RADEON_DOMAIN_VRAM ||
rbuffer->flags & RADEON_FLAG_GTT_WC) &&
r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) ||
(rbuffer->flags & RADEON_FLAG_SPARSE)) {
struct r600_resource *staging;
assert(!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC));
staging = (struct r600_resource*) pipe_buffer_create(
ctx->screen, 0, PIPE_USAGE_STAGING,
box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT));
if (staging) {
/* Copy the VRAM buffer to the staging buffer. */
rctx->dma_copy(ctx, &staging->b.b, 0,
box->x % R600_MAP_BUFFER_ALIGNMENT,
0, 0, resource, 0, box);
data = r600_buffer_map_sync_with_rings(rctx, staging,
usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
if (!data) {
r600_resource_reference(&staging, NULL);
return NULL;
}
data += box->x % R600_MAP_BUFFER_ALIGNMENT;
return r600_buffer_get_transfer(ctx, resource, usage, box,
ptransfer, data, staging, 0);
} else if (rbuffer->flags & RADEON_FLAG_SPARSE) {
return NULL;
}
}
data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage);
if (!data) {
return NULL;
}
data += box->x;
return r600_buffer_get_transfer(ctx, resource, usage, box,
ptransfer, data, NULL, 0);
}
static void r600_buffer_do_flush_region(struct pipe_context *ctx,
struct pipe_transfer *transfer,
const struct pipe_box *box)
{
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
struct r600_resource *rbuffer = r600_resource(transfer->resource);
if (rtransfer->staging) {
struct pipe_resource *dst, *src;
unsigned soffset;
struct pipe_box dma_box;
dst = transfer->resource;
src = &rtransfer->staging->b.b;
soffset = rtransfer->offset + box->x % R600_MAP_BUFFER_ALIGNMENT;
u_box_1d(soffset, box->width, &dma_box);
/* Copy the staging buffer into the original one. */
ctx->resource_copy_region(ctx, dst, 0, box->x, 0, 0, src, 0, &dma_box);
}
util_range_add(&rbuffer->valid_buffer_range, box->x,
box->x + box->width);
}
static void r600_buffer_flush_region(struct pipe_context *ctx,
struct pipe_transfer *transfer,
const struct pipe_box *rel_box)
{
unsigned required_usage = PIPE_TRANSFER_WRITE |
PIPE_TRANSFER_FLUSH_EXPLICIT;
if ((transfer->usage & required_usage) == required_usage) {
struct pipe_box box;
u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
r600_buffer_do_flush_region(ctx, transfer, &box);
}
}
static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer *transfer)
{
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
if (transfer->usage & PIPE_TRANSFER_WRITE &&
!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
r600_buffer_do_flush_region(ctx, transfer, &transfer->box);
r600_resource_reference(&rtransfer->staging, NULL);
assert(rtransfer->b.staging == NULL); /* for threaded context only */
pipe_resource_reference(&transfer->resource, NULL);
/* Don't use pool_transfers_unsync. We are always in the driver
* thread. */
slab_free(&rctx->pool_transfers, transfer);
}
void r600_buffer_subdata(struct pipe_context *ctx,
struct pipe_resource *buffer,
unsigned usage, unsigned offset,
unsigned size, const void *data)
{
struct pipe_transfer *transfer = NULL;
struct pipe_box box;
uint8_t *map = NULL;
u_box_1d(offset, size, &box);
map = r600_buffer_transfer_map(ctx, buffer, 0,
PIPE_TRANSFER_WRITE |
PIPE_TRANSFER_DISCARD_RANGE |
usage,
&box, &transfer);
if (!map)
return;
memcpy(map, data, size);
r600_buffer_transfer_unmap(ctx, transfer);
}
static const struct u_resource_vtbl r600_buffer_vtbl =
{
NULL, /* get_handle */
r600_buffer_destroy, /* resource_destroy */
r600_buffer_transfer_map, /* transfer_map */
r600_buffer_flush_region, /* transfer_flush_region */
r600_buffer_transfer_unmap, /* transfer_unmap */
};
static struct r600_resource *
r600_alloc_buffer_struct(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
struct r600_resource *rbuffer;
rbuffer = MALLOC_STRUCT(r600_resource);
rbuffer->b.b = *templ;
rbuffer->b.b.next = NULL;
pipe_reference_init(&rbuffer->b.b.reference, 1);
rbuffer->b.b.screen = screen;
rbuffer->b.vtbl = &r600_buffer_vtbl;
threaded_resource_init(&rbuffer->b.b);
rbuffer->buf = NULL;
rbuffer->bind_history = 0;
rbuffer->TC_L2_dirty = false;
util_range_init(&rbuffer->valid_buffer_range);
return rbuffer;
}
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ,
unsigned alignment)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
r600_init_resource_fields(rscreen, rbuffer, templ->width0, alignment);
if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
rbuffer->flags |= RADEON_FLAG_SPARSE;
if (!r600_alloc_resource(rscreen, rbuffer)) {
FREE(rbuffer);
return NULL;
}
return &rbuffer->b.b;
}
struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen,
unsigned flags,
unsigned usage,
unsigned size,
unsigned alignment)
{
struct pipe_resource buffer;
memset(&buffer, 0, sizeof buffer);
buffer.target = PIPE_BUFFER;
buffer.format = PIPE_FORMAT_R8_UNORM;
buffer.bind = 0;
buffer.usage = usage;
buffer.flags = flags;
buffer.width0 = size;
buffer.height0 = 1;
buffer.depth0 = 1;
buffer.array_size = 1;
return r600_buffer_create(screen, &buffer, alignment);
}
struct pipe_resource *
r600_buffer_from_user_memory(struct pipe_screen *screen,
const struct pipe_resource *templ,
void *user_memory)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct radeon_winsys *ws = rscreen->ws;
struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
rbuffer->domains = RADEON_DOMAIN_GTT;
rbuffer->flags = 0;
rbuffer->b.is_user_ptr = true;
util_range_add(&rbuffer->valid_buffer_range, 0, templ->width0);
util_range_add(&rbuffer->b.valid_buffer_range, 0, templ->width0);
/* Convert a user pointer to a buffer. */
rbuffer->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0);
if (!rbuffer->buf) {
FREE(rbuffer);
return NULL;
}
if (rscreen->info.has_virtual_memory)
rbuffer->gpu_address =
ws->buffer_get_virtual_address(rbuffer->buf);
else
rbuffer->gpu_address = 0;
rbuffer->vram_usage = 0;
rbuffer->gart_usage = templ->width0;
return &rbuffer->b.b;
}

View File

@ -0,0 +1,209 @@
/*
* Copyright 2013 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Marek Olšák <maraeo@gmail.com>
*/
/**
* This file contains helpers for writing commands to commands streams.
*/
#ifndef R600_CS_H
#define R600_CS_H
#include "r600_pipe_common.h"
#include "amd/common/r600d_common.h"
/**
* Return true if there is enough memory in VRAM and GTT for the buffers
* added so far.
*
* \param vram VRAM memory size not added to the buffer list yet
* \param gtt GTT memory size not added to the buffer list yet
*/
static inline bool
radeon_cs_memory_below_limit(struct r600_common_screen *screen,
struct radeon_winsys_cs *cs,
uint64_t vram, uint64_t gtt)
{
vram += cs->used_vram;
gtt += cs->used_gart;
/* Anything that goes above the VRAM size should go to GTT. */
if (vram > screen->info.vram_size)
gtt += vram - screen->info.vram_size;
/* Now we just need to check if we have enough GTT. */
return gtt < screen->info.gart_size * 0.7;
}
/**
* Add a buffer to the buffer list for the given command stream (CS).
*
* All buffers used by a CS must be added to the list. This tells the kernel
* driver which buffers are used by GPU commands. Other buffers can
* be swapped out (not accessible) during execution.
*
* The buffer list becomes empty after every context flush and must be
* rebuilt.
*/
static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rctx,
struct r600_ring *ring,
struct r600_resource *rbo,
enum radeon_bo_usage usage,
enum radeon_bo_priority priority)
{
assert(usage);
return rctx->ws->cs_add_buffer(
ring->cs, rbo->buf,
(enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED),
rbo->domains, priority) * 4;
}
/**
* Same as above, but also checks memory usage and flushes the context
* accordingly.
*
* When this SHOULD NOT be used:
*
* - if r600_context_add_resource_size has been called for the buffer
* followed by *_need_cs_space for checking the memory usage
*
* - if r600_need_dma_space has been called for the buffer
*
* - when emitting state packets and draw packets (because preceding packets
* can't be re-emitted at that point)
*
* - if shader resource "enabled_mask" is not up-to-date or there is
* a different constraint disallowing a context flush
*/
static inline unsigned
radeon_add_to_buffer_list_check_mem(struct r600_common_context *rctx,
struct r600_ring *ring,
struct r600_resource *rbo,
enum radeon_bo_usage usage,
enum radeon_bo_priority priority,
bool check_mem)
{
if (check_mem &&
!radeon_cs_memory_below_limit(rctx->screen, ring->cs,
rctx->vram + rbo->vram_usage,
rctx->gtt + rbo->gart_usage))
ring->flush(rctx, RADEON_FLUSH_ASYNC, NULL);
return radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
}
static inline void r600_emit_reloc(struct r600_common_context *rctx,
struct r600_ring *ring, struct r600_resource *rbo,
enum radeon_bo_usage usage,
enum radeon_bo_priority priority)
{
struct radeon_winsys_cs *cs = ring->cs;
bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.has_virtual_memory;
unsigned reloc = radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
if (!has_vm) {
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, reloc);
}
}
static inline void radeon_set_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg < R600_CONTEXT_REG_OFFSET);
assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
}
static inline void radeon_set_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
radeon_set_config_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
static inline void radeon_set_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= R600_CONTEXT_REG_OFFSET);
assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
}
static inline void radeon_set_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
radeon_set_context_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
static inline void radeon_set_context_reg_idx(struct radeon_winsys_cs *cs,
unsigned reg, unsigned idx,
unsigned value)
{
assert(reg >= R600_CONTEXT_REG_OFFSET);
assert(cs->current.cdw + 3 <= cs->current.max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
radeon_emit(cs, value);
}
static inline void radeon_set_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
}
static inline void radeon_set_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
radeon_set_sh_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
static inline void radeon_set_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
}
static inline void radeon_set_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
radeon_set_uconfig_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
static inline void radeon_set_uconfig_reg_idx(struct radeon_winsys_cs *cs,
unsigned reg, unsigned idx,
unsigned value)
{
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
assert(cs->current.cdw + 3 <= cs->current.max_dw);
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0));
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28));
radeon_emit(cs, value);
}
#endif

View File

@ -0,0 +1,283 @@
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors: Marek Olšák <maraeo@gmail.com>
*
*/
/* The GPU load is measured as follows.
*
* There is a thread which samples the GRBM_STATUS register at a certain
* frequency and the "busy" or "idle" counter is incremented based on
* whether the GUI_ACTIVE bit is set or not.
*
* Then, the user can sample the counters twice and calculate the average
* GPU load between the two samples.
*/
#include "r600_pipe_common.h"
#include "r600_query.h"
#include "os/os_time.h"
/* For good accuracy at 1000 fps or lower. This will be inaccurate for higher
* fps (there are too few samples per frame). */
#define SAMPLES_PER_SEC 10000
#define GRBM_STATUS 0x8010
#define TA_BUSY(x) (((x) >> 14) & 0x1)
#define GDS_BUSY(x) (((x) >> 15) & 0x1)
#define VGT_BUSY(x) (((x) >> 17) & 0x1)
#define IA_BUSY(x) (((x) >> 19) & 0x1)
#define SX_BUSY(x) (((x) >> 20) & 0x1)
#define WD_BUSY(x) (((x) >> 21) & 0x1)
#define SPI_BUSY(x) (((x) >> 22) & 0x1)
#define BCI_BUSY(x) (((x) >> 23) & 0x1)
#define SC_BUSY(x) (((x) >> 24) & 0x1)
#define PA_BUSY(x) (((x) >> 25) & 0x1)
#define DB_BUSY(x) (((x) >> 26) & 0x1)
#define CP_BUSY(x) (((x) >> 29) & 0x1)
#define CB_BUSY(x) (((x) >> 30) & 0x1)
#define GUI_ACTIVE(x) (((x) >> 31) & 0x1)
#define SRBM_STATUS2 0x0e4c
#define SDMA_BUSY(x) (((x) >> 5) & 0x1)
#define CP_STAT 0x8680
#define PFP_BUSY(x) (((x) >> 15) & 0x1)
#define MEQ_BUSY(x) (((x) >> 16) & 0x1)
#define ME_BUSY(x) (((x) >> 17) & 0x1)
#define SURFACE_SYNC_BUSY(x) (((x) >> 21) & 0x1)
#define DMA_BUSY(x) (((x) >> 22) & 0x1)
#define SCRATCH_RAM_BUSY(x) (((x) >> 24) & 0x1)
#define IDENTITY(x) x
#define UPDATE_COUNTER(field, mask) \
do { \
if (mask(value)) \
p_atomic_inc(&counters->named.field.busy); \
else \
p_atomic_inc(&counters->named.field.idle); \
} while (0)
static void r600_update_mmio_counters(struct r600_common_screen *rscreen,
union r600_mmio_counters *counters)
{
uint32_t value = 0;
bool gui_busy, sdma_busy = false;
/* GRBM_STATUS */
rscreen->ws->read_registers(rscreen->ws, GRBM_STATUS, 1, &value);
UPDATE_COUNTER(ta, TA_BUSY);
UPDATE_COUNTER(gds, GDS_BUSY);
UPDATE_COUNTER(vgt, VGT_BUSY);
UPDATE_COUNTER(ia, IA_BUSY);
UPDATE_COUNTER(sx, SX_BUSY);
UPDATE_COUNTER(wd, WD_BUSY);
UPDATE_COUNTER(spi, SPI_BUSY);
UPDATE_COUNTER(bci, BCI_BUSY);
UPDATE_COUNTER(sc, SC_BUSY);
UPDATE_COUNTER(pa, PA_BUSY);
UPDATE_COUNTER(db, DB_BUSY);
UPDATE_COUNTER(cp, CP_BUSY);
UPDATE_COUNTER(cb, CB_BUSY);
UPDATE_COUNTER(gui, GUI_ACTIVE);
gui_busy = GUI_ACTIVE(value);
if (rscreen->chip_class == CIK || rscreen->chip_class == VI) {
/* SRBM_STATUS2 */
rscreen->ws->read_registers(rscreen->ws, SRBM_STATUS2, 1, &value);
UPDATE_COUNTER(sdma, SDMA_BUSY);
sdma_busy = SDMA_BUSY(value);
}
if (rscreen->chip_class >= VI) {
/* CP_STAT */
rscreen->ws->read_registers(rscreen->ws, CP_STAT, 1, &value);
UPDATE_COUNTER(pfp, PFP_BUSY);
UPDATE_COUNTER(meq, MEQ_BUSY);
UPDATE_COUNTER(me, ME_BUSY);
UPDATE_COUNTER(surf_sync, SURFACE_SYNC_BUSY);
UPDATE_COUNTER(cp_dma, DMA_BUSY);
UPDATE_COUNTER(scratch_ram, SCRATCH_RAM_BUSY);
}
value = gui_busy || sdma_busy;
UPDATE_COUNTER(gpu, IDENTITY);
}
#undef UPDATE_COUNTER
static int
r600_gpu_load_thread(void *param)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)param;
const int period_us = 1000000 / SAMPLES_PER_SEC;
int sleep_us = period_us;
int64_t cur_time, last_time = os_time_get();
while (!p_atomic_read(&rscreen->gpu_load_stop_thread)) {
if (sleep_us)
os_time_sleep(sleep_us);
/* Make sure we sleep the ideal amount of time to match
* the expected frequency. */
cur_time = os_time_get();
if (os_time_timeout(last_time, last_time + period_us,
cur_time))
sleep_us = MAX2(sleep_us - 1, 1);
else
sleep_us += 1;
/*printf("Hz: %.1f\n", 1000000.0 / (cur_time - last_time));*/
last_time = cur_time;
/* Update the counters. */
r600_update_mmio_counters(rscreen, &rscreen->mmio_counters);
}
p_atomic_dec(&rscreen->gpu_load_stop_thread);
return 0;
}
void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen)
{
if (!rscreen->gpu_load_thread)
return;
p_atomic_inc(&rscreen->gpu_load_stop_thread);
thrd_join(rscreen->gpu_load_thread, NULL);
rscreen->gpu_load_thread = 0;
}
static uint64_t r600_read_mmio_counter(struct r600_common_screen *rscreen,
unsigned busy_index)
{
/* Start the thread if needed. */
if (!rscreen->gpu_load_thread) {
mtx_lock(&rscreen->gpu_load_mutex);
/* Check again inside the mutex. */
if (!rscreen->gpu_load_thread)
rscreen->gpu_load_thread =
u_thread_create(r600_gpu_load_thread, rscreen);
mtx_unlock(&rscreen->gpu_load_mutex);
}
unsigned busy = p_atomic_read(&rscreen->mmio_counters.array[busy_index]);
unsigned idle = p_atomic_read(&rscreen->mmio_counters.array[busy_index + 1]);
return busy | ((uint64_t)idle << 32);
}
static unsigned r600_end_mmio_counter(struct r600_common_screen *rscreen,
uint64_t begin, unsigned busy_index)
{
uint64_t end = r600_read_mmio_counter(rscreen, busy_index);
unsigned busy = (end & 0xffffffff) - (begin & 0xffffffff);
unsigned idle = (end >> 32) - (begin >> 32);
/* Calculate the % of time the busy counter was being incremented.
*
* If no counters were incremented, return the current counter status.
* It's for the case when the load is queried faster than
* the counters are updated.
*/
if (idle || busy) {
return busy*100 / (busy + idle);
} else {
union r600_mmio_counters counters;
memset(&counters, 0, sizeof(counters));
r600_update_mmio_counters(rscreen, &counters);
return counters.array[busy_index] ? 100 : 0;
}
}
#define BUSY_INDEX(rscreen, field) (&rscreen->mmio_counters.named.field.busy - \
rscreen->mmio_counters.array)
static unsigned busy_index_from_type(struct r600_common_screen *rscreen,
unsigned type)
{
switch (type) {
case R600_QUERY_GPU_LOAD:
return BUSY_INDEX(rscreen, gpu);
case R600_QUERY_GPU_SHADERS_BUSY:
return BUSY_INDEX(rscreen, spi);
case R600_QUERY_GPU_TA_BUSY:
return BUSY_INDEX(rscreen, ta);
case R600_QUERY_GPU_GDS_BUSY:
return BUSY_INDEX(rscreen, gds);
case R600_QUERY_GPU_VGT_BUSY:
return BUSY_INDEX(rscreen, vgt);
case R600_QUERY_GPU_IA_BUSY:
return BUSY_INDEX(rscreen, ia);
case R600_QUERY_GPU_SX_BUSY:
return BUSY_INDEX(rscreen, sx);
case R600_QUERY_GPU_WD_BUSY:
return BUSY_INDEX(rscreen, wd);
case R600_QUERY_GPU_BCI_BUSY:
return BUSY_INDEX(rscreen, bci);
case R600_QUERY_GPU_SC_BUSY:
return BUSY_INDEX(rscreen, sc);
case R600_QUERY_GPU_PA_BUSY:
return BUSY_INDEX(rscreen, pa);
case R600_QUERY_GPU_DB_BUSY:
return BUSY_INDEX(rscreen, db);
case R600_QUERY_GPU_CP_BUSY:
return BUSY_INDEX(rscreen, cp);
case R600_QUERY_GPU_CB_BUSY:
return BUSY_INDEX(rscreen, cb);
case R600_QUERY_GPU_SDMA_BUSY:
return BUSY_INDEX(rscreen, sdma);
case R600_QUERY_GPU_PFP_BUSY:
return BUSY_INDEX(rscreen, pfp);
case R600_QUERY_GPU_MEQ_BUSY:
return BUSY_INDEX(rscreen, meq);
case R600_QUERY_GPU_ME_BUSY:
return BUSY_INDEX(rscreen, me);
case R600_QUERY_GPU_SURF_SYNC_BUSY:
return BUSY_INDEX(rscreen, surf_sync);
case R600_QUERY_GPU_CP_DMA_BUSY:
return BUSY_INDEX(rscreen, cp_dma);
case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
return BUSY_INDEX(rscreen, scratch_ram);
default:
unreachable("invalid query type");
}
}
uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type)
{
unsigned busy_index = busy_index_from_type(rscreen, type);
return r600_read_mmio_counter(rscreen, busy_index);
}
unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
uint64_t begin)
{
unsigned busy_index = busy_index_from_type(rscreen, type);
return r600_end_mmio_counter(rscreen, begin, busy_index);
}

View File

@ -0,0 +1,649 @@
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Nicolai Hähnle <nicolai.haehnle@amd.com>
*
*/
#include "util/u_memory.h"
#include "r600_query.h"
#include "r600_pipe_common.h"
#include "amd/common/r600d_common.h"
/* Max counters per HW block */
#define R600_QUERY_MAX_COUNTERS 16
static struct r600_perfcounter_block *
lookup_counter(struct r600_perfcounters *pc, unsigned index,
unsigned *base_gid, unsigned *sub_index)
{
struct r600_perfcounter_block *block = pc->blocks;
unsigned bid;
*base_gid = 0;
for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
unsigned total = block->num_groups * block->num_selectors;
if (index < total) {
*sub_index = index;
return block;
}
index -= total;
*base_gid += block->num_groups;
}
return NULL;
}
static struct r600_perfcounter_block *
lookup_group(struct r600_perfcounters *pc, unsigned *index)
{
unsigned bid;
struct r600_perfcounter_block *block = pc->blocks;
for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
if (*index < block->num_groups)
return block;
*index -= block->num_groups;
}
return NULL;
}
struct r600_pc_group {
struct r600_pc_group *next;
struct r600_perfcounter_block *block;
unsigned sub_gid; /* only used during init */
unsigned result_base; /* only used during init */
int se;
int instance;
unsigned num_counters;
unsigned selectors[R600_QUERY_MAX_COUNTERS];
};
struct r600_pc_counter {
unsigned base;
unsigned qwords;
unsigned stride; /* in uint64s */
};
#define R600_PC_SHADERS_WINDOWING (1 << 31)
struct r600_query_pc {
struct r600_query_hw b;
unsigned shaders;
unsigned num_counters;
struct r600_pc_counter *counters;
struct r600_pc_group *groups;
};
static void r600_pc_query_destroy(struct r600_common_screen *rscreen,
struct r600_query *rquery)
{
struct r600_query_pc *query = (struct r600_query_pc *)rquery;
while (query->groups) {
struct r600_pc_group *group = query->groups;
query->groups = group->next;
FREE(group);
}
FREE(query->counters);
r600_query_hw_destroy(rscreen, rquery);
}
static bool r600_pc_query_prepare_buffer(struct r600_common_screen *screen,
struct r600_query_hw *hwquery,
struct r600_resource *buffer)
{
/* no-op */
return true;
}
static void r600_pc_query_emit_start(struct r600_common_context *ctx,
struct r600_query_hw *hwquery,
struct r600_resource *buffer, uint64_t va)
{
struct r600_perfcounters *pc = ctx->screen->perfcounters;
struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
struct r600_pc_group *group;
int current_se = -1;
int current_instance = -1;
if (query->shaders)
pc->emit_shaders(ctx, query->shaders);
for (group = query->groups; group; group = group->next) {
struct r600_perfcounter_block *block = group->block;
if (group->se != current_se || group->instance != current_instance) {
current_se = group->se;
current_instance = group->instance;
pc->emit_instance(ctx, group->se, group->instance);
}
pc->emit_select(ctx, block, group->num_counters, group->selectors);
}
if (current_se != -1 || current_instance != -1)
pc->emit_instance(ctx, -1, -1);
pc->emit_start(ctx, buffer, va);
}
static void r600_pc_query_emit_stop(struct r600_common_context *ctx,
struct r600_query_hw *hwquery,
struct r600_resource *buffer, uint64_t va)
{
struct r600_perfcounters *pc = ctx->screen->perfcounters;
struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
struct r600_pc_group *group;
pc->emit_stop(ctx, buffer, va);
for (group = query->groups; group; group = group->next) {
struct r600_perfcounter_block *block = group->block;
unsigned se = group->se >= 0 ? group->se : 0;
unsigned se_end = se + 1;
if ((block->flags & R600_PC_BLOCK_SE) && (group->se < 0))
se_end = ctx->screen->info.max_se;
do {
unsigned instance = group->instance >= 0 ? group->instance : 0;
do {
pc->emit_instance(ctx, se, instance);
pc->emit_read(ctx, block,
group->num_counters, group->selectors,
buffer, va);
va += sizeof(uint64_t) * group->num_counters;
} while (group->instance < 0 && ++instance < block->num_instances);
} while (++se < se_end);
}
pc->emit_instance(ctx, -1, -1);
}
static void r600_pc_query_clear_result(struct r600_query_hw *hwquery,
union pipe_query_result *result)
{
struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
}
static void r600_pc_query_add_result(struct r600_common_screen *rscreen,
struct r600_query_hw *hwquery,
void *buffer,
union pipe_query_result *result)
{
struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
uint64_t *results = buffer;
unsigned i, j;
for (i = 0; i < query->num_counters; ++i) {
struct r600_pc_counter *counter = &query->counters[i];
for (j = 0; j < counter->qwords; ++j) {
uint32_t value = results[counter->base + j * counter->stride];
result->batch[i].u64 += value;
}
}
}
static struct r600_query_ops batch_query_ops = {
.destroy = r600_pc_query_destroy,
.begin = r600_query_hw_begin,
.end = r600_query_hw_end,
.get_result = r600_query_hw_get_result
};
static struct r600_query_hw_ops batch_query_hw_ops = {
.prepare_buffer = r600_pc_query_prepare_buffer,
.emit_start = r600_pc_query_emit_start,
.emit_stop = r600_pc_query_emit_stop,
.clear_result = r600_pc_query_clear_result,
.add_result = r600_pc_query_add_result,
};
static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
struct r600_query_pc *query,
struct r600_perfcounter_block *block,
unsigned sub_gid)
{
struct r600_pc_group *group = query->groups;
while (group) {
if (group->block == block && group->sub_gid == sub_gid)
return group;
group = group->next;
}
group = CALLOC_STRUCT(r600_pc_group);
if (!group)
return NULL;
group->block = block;
group->sub_gid = sub_gid;
if (block->flags & R600_PC_BLOCK_SHADER) {
unsigned sub_gids = block->num_instances;
unsigned shader_id;
unsigned shaders;
unsigned query_shaders;
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
sub_gids = sub_gids * screen->info.max_se;
shader_id = sub_gid / sub_gids;
sub_gid = sub_gid % sub_gids;
shaders = screen->perfcounters->shader_type_bits[shader_id];
query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING;
if (query_shaders && query_shaders != shaders) {
fprintf(stderr, "r600_perfcounter: incompatible shader groups\n");
FREE(group);
return NULL;
}
query->shaders = shaders;
}
if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
// A non-zero value in query->shaders ensures that the shader
// masking is reset unless the user explicitly requests one.
query->shaders = R600_PC_SHADERS_WINDOWING;
}
if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
group->se = sub_gid / block->num_instances;
sub_gid = sub_gid % block->num_instances;
} else {
group->se = -1;
}
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
group->instance = sub_gid;
} else {
group->instance = -1;
}
group->next = query->groups;
query->groups = group;
return group;
}
struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
unsigned num_queries,
unsigned *query_types)
{
struct r600_common_screen *screen =
(struct r600_common_screen *)ctx->screen;
struct r600_perfcounters *pc = screen->perfcounters;
struct r600_perfcounter_block *block;
struct r600_pc_group *group;
struct r600_query_pc *query;
unsigned base_gid, sub_gid, sub_index;
unsigned i, j;
if (!pc)
return NULL;
query = CALLOC_STRUCT(r600_query_pc);
if (!query)
return NULL;
query->b.b.ops = &batch_query_ops;
query->b.ops = &batch_query_hw_ops;
query->num_counters = num_queries;
/* Collect selectors per group */
for (i = 0; i < num_queries; ++i) {
unsigned sub_gid;
if (query_types[i] < R600_QUERY_FIRST_PERFCOUNTER)
goto error;
block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
&base_gid, &sub_index);
if (!block)
goto error;
sub_gid = sub_index / block->num_selectors;
sub_index = sub_index % block->num_selectors;
group = get_group_state(screen, query, block, sub_gid);
if (!group)
goto error;
if (group->num_counters >= block->num_counters) {
fprintf(stderr,
"perfcounter group %s: too many selected\n",
block->basename);
goto error;
}
group->selectors[group->num_counters] = sub_index;
++group->num_counters;
}
/* Compute result bases and CS size per group */
query->b.num_cs_dw_begin = pc->num_start_cs_dwords;
query->b.num_cs_dw_end = pc->num_stop_cs_dwords;
query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
query->b.num_cs_dw_end += pc->num_instance_cs_dwords;
i = 0;
for (group = query->groups; group; group = group->next) {
struct r600_perfcounter_block *block = group->block;
unsigned select_dw, read_dw;
unsigned instances = 1;
if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
instances = screen->info.max_se;
if (group->instance < 0)
instances *= block->num_instances;
group->result_base = i;
query->b.result_size += sizeof(uint64_t) * instances * group->num_counters;
i += instances * group->num_counters;
pc->get_size(block, group->num_counters, group->selectors,
&select_dw, &read_dw);
query->b.num_cs_dw_begin += select_dw;
query->b.num_cs_dw_end += instances * read_dw;
query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords;
}
if (query->shaders) {
if (query->shaders == R600_PC_SHADERS_WINDOWING)
query->shaders = 0xffffffff;
query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords;
}
/* Map user-supplied query array to result indices */
query->counters = CALLOC(num_queries, sizeof(*query->counters));
for (i = 0; i < num_queries; ++i) {
struct r600_pc_counter *counter = &query->counters[i];
struct r600_perfcounter_block *block;
block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
&base_gid, &sub_index);
sub_gid = sub_index / block->num_selectors;
sub_index = sub_index % block->num_selectors;
group = get_group_state(screen, query, block, sub_gid);
assert(group != NULL);
for (j = 0; j < group->num_counters; ++j) {
if (group->selectors[j] == sub_index)
break;
}
counter->base = group->result_base + j;
counter->stride = group->num_counters;
counter->qwords = 1;
if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
counter->qwords = screen->info.max_se;
if (group->instance < 0)
counter->qwords *= block->num_instances;
}
if (!r600_query_hw_init(screen, &query->b))
goto error;
return (struct pipe_query *)query;
error:
r600_pc_query_destroy(screen, &query->b.b);
return NULL;
}
static bool r600_init_block_names(struct r600_common_screen *screen,
struct r600_perfcounter_block *block)
{
unsigned i, j, k;
unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;
unsigned namelen;
char *groupname;
char *p;
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
groups_instance = block->num_instances;
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
groups_se = screen->info.max_se;
if (block->flags & R600_PC_BLOCK_SHADER)
groups_shader = screen->perfcounters->num_shader_types;
namelen = strlen(block->basename);
block->group_name_stride = namelen + 1;
if (block->flags & R600_PC_BLOCK_SHADER)
block->group_name_stride += 3;
if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
assert(groups_se <= 10);
block->group_name_stride += 1;
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
block->group_name_stride += 1;
}
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
assert(groups_instance <= 100);
block->group_name_stride += 2;
}
block->group_names = MALLOC(block->num_groups * block->group_name_stride);
if (!block->group_names)
return false;
groupname = block->group_names;
for (i = 0; i < groups_shader; ++i) {
const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i];
unsigned shaderlen = strlen(shader_suffix);
for (j = 0; j < groups_se; ++j) {
for (k = 0; k < groups_instance; ++k) {
strcpy(groupname, block->basename);
p = groupname + namelen;
if (block->flags & R600_PC_BLOCK_SHADER) {
strcpy(p, shader_suffix);
p += shaderlen;
}
if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
p += sprintf(p, "%d", j);
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
*p++ = '_';
}
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
p += sprintf(p, "%d", k);
groupname += block->group_name_stride;
}
}
}
assert(block->num_selectors <= 1000);
block->selector_name_stride = block->group_name_stride + 4;
block->selector_names = MALLOC(block->num_groups * block->num_selectors *
block->selector_name_stride);
if (!block->selector_names)
return false;
groupname = block->group_names;
p = block->selector_names;
for (i = 0; i < block->num_groups; ++i) {
for (j = 0; j < block->num_selectors; ++j) {
sprintf(p, "%s_%03d", groupname, j);
p += block->selector_name_stride;
}
groupname += block->group_name_stride;
}
return true;
}
int r600_get_perfcounter_info(struct r600_common_screen *screen,
unsigned index,
struct pipe_driver_query_info *info)
{
struct r600_perfcounters *pc = screen->perfcounters;
struct r600_perfcounter_block *block;
unsigned base_gid, sub;
if (!pc)
return 0;
if (!info) {
unsigned bid, num_queries = 0;
for (bid = 0; bid < pc->num_blocks; ++bid) {
num_queries += pc->blocks[bid].num_selectors *
pc->blocks[bid].num_groups;
}
return num_queries;
}
block = lookup_counter(pc, index, &base_gid, &sub);
if (!block)
return 0;
if (!block->selector_names) {
if (!r600_init_block_names(screen, block))
return 0;
}
info->name = block->selector_names + sub * block->selector_name_stride;
info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index;
info->max_value.u64 = 0;
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
info->group_id = base_gid + sub / block->num_selectors;
info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
if (sub > 0 && sub + 1 < block->num_selectors * block->num_groups)
info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST;
return 1;
}
int r600_get_perfcounter_group_info(struct r600_common_screen *screen,
unsigned index,
struct pipe_driver_query_group_info *info)
{
struct r600_perfcounters *pc = screen->perfcounters;
struct r600_perfcounter_block *block;
if (!pc)
return 0;
if (!info)
return pc->num_groups;
block = lookup_group(pc, &index);
if (!block)
return 0;
if (!block->group_names) {
if (!r600_init_block_names(screen, block))
return 0;
}
info->name = block->group_names + index * block->group_name_stride;
info->num_queries = block->num_selectors;
info->max_active_queries = block->num_counters;
return 1;
}
void r600_perfcounters_destroy(struct r600_common_screen *rscreen)
{
if (rscreen->perfcounters)
rscreen->perfcounters->cleanup(rscreen);
}
bool r600_perfcounters_init(struct r600_perfcounters *pc,
unsigned num_blocks)
{
pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block));
if (!pc->blocks)
return false;
pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false);
pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false);
return true;
}
void r600_perfcounters_add_block(struct r600_common_screen *rscreen,
struct r600_perfcounters *pc,
const char *name, unsigned flags,
unsigned counters, unsigned selectors,
unsigned instances, void *data)
{
struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks];
assert(counters <= R600_QUERY_MAX_COUNTERS);
block->basename = name;
block->flags = flags;
block->num_counters = counters;
block->num_selectors = selectors;
block->num_instances = MAX2(instances, 1);
block->data = data;
if (pc->separate_se && (block->flags & R600_PC_BLOCK_SE))
block->flags |= R600_PC_BLOCK_SE_GROUPS;
if (pc->separate_instance && block->num_instances > 1)
block->flags |= R600_PC_BLOCK_INSTANCE_GROUPS;
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
block->num_groups = block->num_instances;
} else {
block->num_groups = 1;
}
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
block->num_groups *= rscreen->info.max_se;
if (block->flags & R600_PC_BLOCK_SHADER)
block->num_groups *= pc->num_shader_types;
++pc->num_blocks;
pc->num_groups += block->num_groups;
}
void r600_perfcounters_do_destroy(struct r600_perfcounters *pc)
{
unsigned i;
for (i = 0; i < pc->num_blocks; ++i) {
FREE(pc->blocks[i].group_names);
FREE(pc->blocks[i].selector_names);
}
FREE(pc->blocks);
FREE(pc);
}

View File

@ -37,8 +37,8 @@
#include "util/u_math.h"
#include "vl/vl_decoder.h"
#include "vl/vl_video_buffer.h"
#include "radeon/radeon_video.h"
#include "radeon/radeon_uvd.h"
#include "radeon_video.h"
#include "radeon_uvd.h"
#include "os/os_time.h"
static const struct debug_named_value r600_debug_options[] = {

View File

@ -26,8 +26,8 @@
#ifndef R600_PIPE_H
#define R600_PIPE_H
#include "radeon/r600_pipe_common.h"
#include "radeon/r600_cs.h"
#include "r600_pipe_common.h"
#include "r600_cs.h"
#include "r600_public.h"
#include "pipe/p_defines.h"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,327 @@
/*
* Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Nicolai Hähnle <nicolai.haehnle@amd.com>
*
*/
#ifndef R600_QUERY_H
#define R600_QUERY_H
#include "util/u_threaded_context.h"
struct pipe_context;
struct pipe_query;
struct pipe_resource;
struct r600_common_context;
struct r600_common_screen;
struct r600_query;
struct r600_query_hw;
struct r600_resource;
enum {
R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC,
R600_QUERY_DECOMPRESS_CALLS,
R600_QUERY_MRT_DRAW_CALLS,
R600_QUERY_PRIM_RESTART_CALLS,
R600_QUERY_SPILL_DRAW_CALLS,
R600_QUERY_COMPUTE_CALLS,
R600_QUERY_SPILL_COMPUTE_CALLS,
R600_QUERY_DMA_CALLS,
R600_QUERY_CP_DMA_CALLS,
R600_QUERY_NUM_VS_FLUSHES,
R600_QUERY_NUM_PS_FLUSHES,
R600_QUERY_NUM_CS_FLUSHES,
R600_QUERY_NUM_CB_CACHE_FLUSHES,
R600_QUERY_NUM_DB_CACHE_FLUSHES,
R600_QUERY_NUM_L2_INVALIDATES,
R600_QUERY_NUM_L2_WRITEBACKS,
R600_QUERY_NUM_RESIDENT_HANDLES,
R600_QUERY_TC_OFFLOADED_SLOTS,
R600_QUERY_TC_DIRECT_SLOTS,
R600_QUERY_TC_NUM_SYNCS,
R600_QUERY_CS_THREAD_BUSY,
R600_QUERY_GALLIUM_THREAD_BUSY,
R600_QUERY_REQUESTED_VRAM,
R600_QUERY_REQUESTED_GTT,
R600_QUERY_MAPPED_VRAM,
R600_QUERY_MAPPED_GTT,
R600_QUERY_BUFFER_WAIT_TIME,
R600_QUERY_NUM_MAPPED_BUFFERS,
R600_QUERY_NUM_GFX_IBS,
R600_QUERY_NUM_SDMA_IBS,
R600_QUERY_GFX_BO_LIST_SIZE,
R600_QUERY_NUM_BYTES_MOVED,
R600_QUERY_NUM_EVICTIONS,
R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS,
R600_QUERY_VRAM_USAGE,
R600_QUERY_VRAM_VIS_USAGE,
R600_QUERY_GTT_USAGE,
R600_QUERY_GPU_TEMPERATURE,
R600_QUERY_CURRENT_GPU_SCLK,
R600_QUERY_CURRENT_GPU_MCLK,
R600_QUERY_GPU_LOAD,
R600_QUERY_GPU_SHADERS_BUSY,
R600_QUERY_GPU_TA_BUSY,
R600_QUERY_GPU_GDS_BUSY,
R600_QUERY_GPU_VGT_BUSY,
R600_QUERY_GPU_IA_BUSY,
R600_QUERY_GPU_SX_BUSY,
R600_QUERY_GPU_WD_BUSY,
R600_QUERY_GPU_BCI_BUSY,
R600_QUERY_GPU_SC_BUSY,
R600_QUERY_GPU_PA_BUSY,
R600_QUERY_GPU_DB_BUSY,
R600_QUERY_GPU_CP_BUSY,
R600_QUERY_GPU_CB_BUSY,
R600_QUERY_GPU_SDMA_BUSY,
R600_QUERY_GPU_PFP_BUSY,
R600_QUERY_GPU_MEQ_BUSY,
R600_QUERY_GPU_ME_BUSY,
R600_QUERY_GPU_SURF_SYNC_BUSY,
R600_QUERY_GPU_CP_DMA_BUSY,
R600_QUERY_GPU_SCRATCH_RAM_BUSY,
R600_QUERY_NUM_COMPILATIONS,
R600_QUERY_NUM_SHADERS_CREATED,
R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO,
R600_QUERY_NUM_SHADER_CACHE_HITS,
R600_QUERY_GPIN_ASIC_ID,
R600_QUERY_GPIN_NUM_SIMD,
R600_QUERY_GPIN_NUM_RB,
R600_QUERY_GPIN_NUM_SPI,
R600_QUERY_GPIN_NUM_SE,
R600_QUERY_FIRST_PERFCOUNTER = PIPE_QUERY_DRIVER_SPECIFIC + 100,
};
enum {
R600_QUERY_GROUP_GPIN = 0,
R600_NUM_SW_QUERY_GROUPS
};
struct r600_query_ops {
void (*destroy)(struct r600_common_screen *, struct r600_query *);
bool (*begin)(struct r600_common_context *, struct r600_query *);
bool (*end)(struct r600_common_context *, struct r600_query *);
bool (*get_result)(struct r600_common_context *,
struct r600_query *, bool wait,
union pipe_query_result *result);
void (*get_result_resource)(struct r600_common_context *,
struct r600_query *, bool wait,
enum pipe_query_value_type result_type,
int index,
struct pipe_resource *resource,
unsigned offset);
};
struct r600_query {
struct threaded_query b;
struct r600_query_ops *ops;
/* The type of query */
unsigned type;
};
enum {
R600_QUERY_HW_FLAG_NO_START = (1 << 0),
/* gap */
/* whether begin_query doesn't clear the result */
R600_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2),
};
struct r600_query_hw_ops {
bool (*prepare_buffer)(struct r600_common_screen *,
struct r600_query_hw *,
struct r600_resource *);
void (*emit_start)(struct r600_common_context *,
struct r600_query_hw *,
struct r600_resource *buffer, uint64_t va);
void (*emit_stop)(struct r600_common_context *,
struct r600_query_hw *,
struct r600_resource *buffer, uint64_t va);
void (*clear_result)(struct r600_query_hw *, union pipe_query_result *);
void (*add_result)(struct r600_common_screen *screen,
struct r600_query_hw *, void *buffer,
union pipe_query_result *result);
};
struct r600_query_buffer {
/* The buffer where query results are stored. */
struct r600_resource *buf;
/* Offset of the next free result after current query data */
unsigned results_end;
/* If a query buffer is full, a new buffer is created and the old one
* is put in here. When we calculate the result, we sum up the samples
* from all buffers. */
struct r600_query_buffer *previous;
};
struct r600_query_hw {
struct r600_query b;
struct r600_query_hw_ops *ops;
unsigned flags;
/* The query buffer and how many results are in it. */
struct r600_query_buffer buffer;
/* Size of the result in memory for both begin_query and end_query,
* this can be one or two numbers, or it could even be a size of a structure. */
unsigned result_size;
/* The number of dwords for begin_query or end_query. */
unsigned num_cs_dw_begin;
unsigned num_cs_dw_end;
/* Linked list of queries */
struct list_head list;
/* For transform feedback: which stream the query is for */
unsigned stream;
/* Workaround via compute shader */
struct r600_resource *workaround_buf;
unsigned workaround_offset;
};
bool r600_query_hw_init(struct r600_common_screen *rscreen,
struct r600_query_hw *query);
void r600_query_hw_destroy(struct r600_common_screen *rscreen,
struct r600_query *rquery);
bool r600_query_hw_begin(struct r600_common_context *rctx,
struct r600_query *rquery);
bool r600_query_hw_end(struct r600_common_context *rctx,
struct r600_query *rquery);
bool r600_query_hw_get_result(struct r600_common_context *rctx,
struct r600_query *rquery,
bool wait,
union pipe_query_result *result);
/* Performance counters */
enum {
/* This block is part of the shader engine */
R600_PC_BLOCK_SE = (1 << 0),
/* Expose per-instance groups instead of summing all instances (within
* an SE). */
R600_PC_BLOCK_INSTANCE_GROUPS = (1 << 1),
/* Expose per-SE groups instead of summing instances across SEs. */
R600_PC_BLOCK_SE_GROUPS = (1 << 2),
/* Shader block */
R600_PC_BLOCK_SHADER = (1 << 3),
/* Non-shader block with perfcounters windowed by shaders. */
R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
};
/* Describes a hardware block with performance counters. Multiple instances of
* each block, possibly per-SE, may exist on the chip. Depending on the block
* and on the user's configuration, we either
* (a) expose every instance as a performance counter group,
* (b) expose a single performance counter group that reports the sum over all
* instances, or
* (c) expose one performance counter group per instance, but summed over all
* shader engines.
*/
struct r600_perfcounter_block {
const char *basename;
unsigned flags;
unsigned num_counters;
unsigned num_selectors;
unsigned num_instances;
unsigned num_groups;
char *group_names;
unsigned group_name_stride;
char *selector_names;
unsigned selector_name_stride;
void *data;
};
struct r600_perfcounters {
unsigned num_groups;
unsigned num_blocks;
struct r600_perfcounter_block *blocks;
unsigned num_start_cs_dwords;
unsigned num_stop_cs_dwords;
unsigned num_instance_cs_dwords;
unsigned num_shaders_cs_dwords;
unsigned num_shader_types;
const char * const *shader_type_suffixes;
const unsigned *shader_type_bits;
void (*get_size)(struct r600_perfcounter_block *,
unsigned count, unsigned *selectors,
unsigned *num_select_dw, unsigned *num_read_dw);
void (*emit_instance)(struct r600_common_context *,
int se, int instance);
void (*emit_shaders)(struct r600_common_context *, unsigned shaders);
void (*emit_select)(struct r600_common_context *,
struct r600_perfcounter_block *,
unsigned count, unsigned *selectors);
void (*emit_start)(struct r600_common_context *,
struct r600_resource *buffer, uint64_t va);
void (*emit_stop)(struct r600_common_context *,
struct r600_resource *buffer, uint64_t va);
void (*emit_read)(struct r600_common_context *,
struct r600_perfcounter_block *,
unsigned count, unsigned *selectors,
struct r600_resource *buffer, uint64_t va);
void (*cleanup)(struct r600_common_screen *);
bool separate_se;
bool separate_instance;
};
struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
unsigned num_queries,
unsigned *query_types);
int r600_get_perfcounter_info(struct r600_common_screen *,
unsigned index,
struct pipe_driver_query_info *info);
int r600_get_perfcounter_group_info(struct r600_common_screen *,
unsigned index,
struct pipe_driver_query_group_info *info);
bool r600_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks);
void r600_perfcounters_add_block(struct r600_common_screen *,
struct r600_perfcounters *,
const char *name, unsigned flags,
unsigned counters, unsigned selectors,
unsigned instances, void *data);
void r600_perfcounters_do_destroy(struct r600_perfcounters *);
void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
struct r600_query_hw *query);
struct r600_qbo_state {
void *saved_compute;
struct pipe_constant_buffer saved_const0;
struct pipe_shader_buffer saved_ssbo[3];
};
#endif /* R600_QUERY_H */

View File

@ -0,0 +1,381 @@
/*
* Copyright 2013 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors: Marek Olšák <maraeo@gmail.com>
*
*/
#include "r600_pipe_common.h"
#include "r600_cs.h"
#include "util/u_memory.h"
static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable);
static struct pipe_stream_output_target *
r600_create_so_target(struct pipe_context *ctx,
struct pipe_resource *buffer,
unsigned buffer_offset,
unsigned buffer_size)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_so_target *t;
struct r600_resource *rbuffer = (struct r600_resource*)buffer;
t = CALLOC_STRUCT(r600_so_target);
if (!t) {
return NULL;
}
u_suballocator_alloc(rctx->allocator_zeroed_memory, 4, 4,
&t->buf_filled_size_offset,
(struct pipe_resource**)&t->buf_filled_size);
if (!t->buf_filled_size) {
FREE(t);
return NULL;
}
t->b.reference.count = 1;
t->b.context = ctx;
pipe_resource_reference(&t->b.buffer, buffer);
t->b.buffer_offset = buffer_offset;
t->b.buffer_size = buffer_size;
util_range_add(&rbuffer->valid_buffer_range, buffer_offset,
buffer_offset + buffer_size);
return &t->b;
}
static void r600_so_target_destroy(struct pipe_context *ctx,
struct pipe_stream_output_target *target)
{
struct r600_so_target *t = (struct r600_so_target*)target;
pipe_resource_reference(&t->b.buffer, NULL);
r600_resource_reference(&t->buf_filled_size, NULL);
FREE(t);
}
void r600_streamout_buffers_dirty(struct r600_common_context *rctx)
{
struct r600_atom *begin = &rctx->streamout.begin_atom;
unsigned num_bufs = util_bitcount(rctx->streamout.enabled_mask);
unsigned num_bufs_appended = util_bitcount(rctx->streamout.enabled_mask &
rctx->streamout.append_bitmask);
if (!num_bufs)
return;
rctx->streamout.num_dw_for_end =
12 + /* flush_vgt_streamout */
num_bufs * 11; /* STRMOUT_BUFFER_UPDATE, BUFFER_SIZE */
begin->num_dw = 12; /* flush_vgt_streamout */
if (rctx->chip_class >= SI) {
begin->num_dw += num_bufs * 4; /* SET_CONTEXT_REG */
} else {
begin->num_dw += num_bufs * 7; /* SET_CONTEXT_REG */
if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740)
begin->num_dw += num_bufs * 5; /* STRMOUT_BASE_UPDATE */
}
begin->num_dw +=
num_bufs_appended * 8 + /* STRMOUT_BUFFER_UPDATE */
(num_bufs - num_bufs_appended) * 6 + /* STRMOUT_BUFFER_UPDATE */
(rctx->family > CHIP_R600 && rctx->family < CHIP_RS780 ? 2 : 0); /* SURFACE_BASE_UPDATE */
rctx->set_atom_dirty(rctx, begin, true);
r600_set_streamout_enable(rctx, true);
}
void r600_set_streamout_targets(struct pipe_context *ctx,
unsigned num_targets,
struct pipe_stream_output_target **targets,
const unsigned *offsets)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
unsigned i;
unsigned enabled_mask = 0, append_bitmask = 0;
/* Stop streamout. */
if (rctx->streamout.num_targets && rctx->streamout.begin_emitted) {
r600_emit_streamout_end(rctx);
}
/* Set the new targets. */
for (i = 0; i < num_targets; i++) {
pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], targets[i]);
if (!targets[i])
continue;
r600_context_add_resource_size(ctx, targets[i]->buffer);
enabled_mask |= 1 << i;
if (offsets[i] == ((unsigned)-1))
append_bitmask |= 1 << i;
}
for (; i < rctx->streamout.num_targets; i++) {
pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], NULL);
}
rctx->streamout.enabled_mask = enabled_mask;
rctx->streamout.num_targets = num_targets;
rctx->streamout.append_bitmask = append_bitmask;
if (num_targets) {
r600_streamout_buffers_dirty(rctx);
} else {
rctx->set_atom_dirty(rctx, &rctx->streamout.begin_atom, false);
r600_set_streamout_enable(rctx, false);
}
}
static void r600_flush_vgt_streamout(struct r600_common_context *rctx)
{
struct radeon_winsys_cs *cs = rctx->gfx.cs;
unsigned reg_strmout_cntl;
/* The register is at different places on different ASICs. */
if (rctx->chip_class >= CIK) {
reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
} else if (rctx->chip_class >= EVERGREEN) {
reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL;
} else {
reg_strmout_cntl = R_008490_CP_STRMOUT_CNTL;
}
if (rctx->chip_class >= CIK) {
radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
} else {
radeon_set_config_reg(cs, reg_strmout_cntl, 0);
}
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0));
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
radeon_emit(cs, reg_strmout_cntl >> 2); /* register */
radeon_emit(cs, 0);
radeon_emit(cs, S_008490_OFFSET_UPDATE_DONE(1)); /* reference value */
radeon_emit(cs, S_008490_OFFSET_UPDATE_DONE(1)); /* mask */
radeon_emit(cs, 4); /* poll interval */
}
static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = rctx->gfx.cs;
struct r600_so_target **t = rctx->streamout.targets;
uint16_t *stride_in_dw = rctx->streamout.stride_in_dw;
unsigned i, update_flags = 0;
r600_flush_vgt_streamout(rctx);
for (i = 0; i < rctx->streamout.num_targets; i++) {
if (!t[i])
continue;
t[i]->stride_in_dw = stride_in_dw[i];
if (rctx->chip_class >= SI) {
/* SI binds streamout buffers as shader resources.
* VGT only counts primitives and tells the shader
* through SGPRs what to do. */
radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
radeon_emit(cs, (t[i]->b.buffer_offset +
t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */
radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */
} else {
uint64_t va = r600_resource(t[i]->b.buffer)->gpu_address;
update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);
radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3);
radeon_emit(cs, (t[i]->b.buffer_offset +
t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */
radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */
radeon_emit(cs, va >> 8); /* BUFFER_BASE */
r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer),
RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER);
/* R7xx requires this packet after updating BUFFER_BASE.
* Without this, R7xx locks up. */
if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) {
radeon_emit(cs, PKT3(PKT3_STRMOUT_BASE_UPDATE, 1, 0));
radeon_emit(cs, i);
radeon_emit(cs, va >> 8);
r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer),
RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER);
}
}
if (rctx->streamout.append_bitmask & (1 << i) && t[i]->buf_filled_size_valid) {
uint64_t va = t[i]->buf_filled_size->gpu_address +
t[i]->buf_filled_size_offset;
/* Append. */
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, va); /* src address lo */
radeon_emit(cs, va >> 32); /* src address hi */
r600_emit_reloc(rctx, &rctx->gfx, t[i]->buf_filled_size,
RADEON_USAGE_READ, RADEON_PRIO_SO_FILLED_SIZE);
} else {
/* Start from the beginning. */
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, t[i]->b.buffer_offset >> 2); /* buffer offset in DW */
radeon_emit(cs, 0); /* unused */
}
}
if (rctx->family > CHIP_R600 && rctx->family < CHIP_RV770) {
radeon_emit(cs, PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0));
radeon_emit(cs, update_flags);
}
rctx->streamout.begin_emitted = true;
}
void r600_emit_streamout_end(struct r600_common_context *rctx)
{
struct radeon_winsys_cs *cs = rctx->gfx.cs;
struct r600_so_target **t = rctx->streamout.targets;
unsigned i;
uint64_t va;
r600_flush_vgt_streamout(rctx);
for (i = 0; i < rctx->streamout.num_targets; i++) {
if (!t[i])
continue;
va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset;
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */
radeon_emit(cs, va); /* dst address lo */
radeon_emit(cs, va >> 32); /* dst address hi */
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, 0); /* unused */
r600_emit_reloc(rctx, &rctx->gfx, t[i]->buf_filled_size,
RADEON_USAGE_WRITE, RADEON_PRIO_SO_FILLED_SIZE);
/* Zero the buffer size. The counters (primitives generated,
* primitives emitted) may be enabled even if there is not
* buffer bound. This ensures that the primitives-emitted query
* won't increment. */
radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
t[i]->buf_filled_size_valid = true;
}
rctx->streamout.begin_emitted = false;
rctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH;
}
/* STREAMOUT CONFIG DERIVED STATE
*
* Streamout must be enabled for the PRIMITIVES_GENERATED query to work.
* The buffer mask is an independent state, so no writes occur if there
* are no buffers bound.
*/
static void r600_emit_streamout_enable(struct r600_common_context *rctx,
struct r600_atom *atom)
{
unsigned strmout_config_reg = R_028AB0_VGT_STRMOUT_EN;
unsigned strmout_config_val = S_028B94_STREAMOUT_0_EN(r600_get_strmout_en(rctx));
unsigned strmout_buffer_reg = R_028B20_VGT_STRMOUT_BUFFER_EN;
unsigned strmout_buffer_val = rctx->streamout.hw_enabled_mask &
rctx->streamout.enabled_stream_buffers_mask;
if (rctx->chip_class >= EVERGREEN) {
strmout_buffer_reg = R_028B98_VGT_STRMOUT_BUFFER_CONFIG;
strmout_config_reg = R_028B94_VGT_STRMOUT_CONFIG;
strmout_config_val |=
S_028B94_RAST_STREAM(0) |
S_028B94_STREAMOUT_1_EN(r600_get_strmout_en(rctx)) |
S_028B94_STREAMOUT_2_EN(r600_get_strmout_en(rctx)) |
S_028B94_STREAMOUT_3_EN(r600_get_strmout_en(rctx));
}
radeon_set_context_reg(rctx->gfx.cs, strmout_buffer_reg, strmout_buffer_val);
radeon_set_context_reg(rctx->gfx.cs, strmout_config_reg, strmout_config_val);
}
static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable)
{
bool old_strmout_en = r600_get_strmout_en(rctx);
unsigned old_hw_enabled_mask = rctx->streamout.hw_enabled_mask;
rctx->streamout.streamout_enabled = enable;
rctx->streamout.hw_enabled_mask = rctx->streamout.enabled_mask |
(rctx->streamout.enabled_mask << 4) |
(rctx->streamout.enabled_mask << 8) |
(rctx->streamout.enabled_mask << 12);
if ((old_strmout_en != r600_get_strmout_en(rctx)) ||
(old_hw_enabled_mask != rctx->streamout.hw_enabled_mask)) {
rctx->set_atom_dirty(rctx, &rctx->streamout.enable_atom, true);
}
}
void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
unsigned type, int diff)
{
if (type == PIPE_QUERY_PRIMITIVES_GENERATED) {
bool old_strmout_en = r600_get_strmout_en(rctx);
rctx->streamout.num_prims_gen_queries += diff;
assert(rctx->streamout.num_prims_gen_queries >= 0);
rctx->streamout.prims_gen_query_enabled =
rctx->streamout.num_prims_gen_queries != 0;
if (old_strmout_en != r600_get_strmout_en(rctx)) {
rctx->set_atom_dirty(rctx, &rctx->streamout.enable_atom, true);
}
}
}
void r600_streamout_init(struct r600_common_context *rctx)
{
rctx->b.create_stream_output_target = r600_create_so_target;
rctx->b.stream_output_target_destroy = r600_so_target_destroy;
rctx->streamout.begin_atom.emit = r600_emit_streamout_begin;
rctx->streamout.enable_atom.emit = r600_emit_streamout_enable;
rctx->streamout.enable_atom.num_dw = 6;
}

View File

@ -0,0 +1,398 @@
/*
* Copyright 2016 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
/* This file implements randomized SDMA texture blit tests. */
#include "r600_pipe_common.h"
#include "util/u_surface.h"
#include "util/rand_xor.h"
static uint64_t seed_xorshift128plus[2];
#define RAND_NUM_SIZE 8
/* The GPU blits are emulated on the CPU using these CPU textures. */
struct cpu_texture {
uint8_t *ptr;
uint64_t size;
uint64_t layer_stride;
unsigned stride;
};
static void alloc_cpu_texture(struct cpu_texture *tex,
struct pipe_resource *templ, int bpp)
{
tex->stride = align(templ->width0 * bpp, RAND_NUM_SIZE);
tex->layer_stride = (uint64_t)tex->stride * templ->height0;
tex->size = tex->layer_stride * templ->array_size;
tex->ptr = malloc(tex->size);
assert(tex->ptr);
}
static void set_random_pixels(struct pipe_context *ctx,
struct pipe_resource *tex,
struct cpu_texture *cpu)
{
struct pipe_transfer *t;
uint8_t *map;
int x,y,z;
map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_TRANSFER_WRITE,
0, 0, 0, tex->width0, tex->height0,
tex->array_size, &t);
assert(map);
for (z = 0; z < tex->array_size; z++) {
for (y = 0; y < tex->height0; y++) {
uint64_t *ptr = (uint64_t*)
(map + t->layer_stride*z + t->stride*y);
uint64_t *ptr_cpu = (uint64_t*)
(cpu->ptr + cpu->layer_stride*z + cpu->stride*y);
unsigned size = cpu->stride / RAND_NUM_SIZE;
assert(t->stride % RAND_NUM_SIZE == 0);
assert(cpu->stride % RAND_NUM_SIZE == 0);
for (x = 0; x < size; x++) {
*ptr++ = *ptr_cpu++ =
rand_xorshift128plus(seed_xorshift128plus);
}
}
}
pipe_transfer_unmap(ctx, t);
}
static bool compare_textures(struct pipe_context *ctx,
struct pipe_resource *tex,
struct cpu_texture *cpu, int bpp)
{
struct pipe_transfer *t;
uint8_t *map;
int y,z;
bool pass = true;
map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_TRANSFER_READ,
0, 0, 0, tex->width0, tex->height0,
tex->array_size, &t);
assert(map);
for (z = 0; z < tex->array_size; z++) {
for (y = 0; y < tex->height0; y++) {
uint8_t *ptr = map + t->layer_stride*z + t->stride*y;
uint8_t *cpu_ptr = cpu->ptr +
cpu->layer_stride*z + cpu->stride*y;
if (memcmp(ptr, cpu_ptr, tex->width0 * bpp)) {
pass = false;
goto done;
}
}
}
done:
pipe_transfer_unmap(ctx, t);
return pass;
}
static enum pipe_format get_format_from_bpp(int bpp)
{
switch (bpp) {
case 1:
return PIPE_FORMAT_R8_UINT;
case 2:
return PIPE_FORMAT_R16_UINT;
case 4:
return PIPE_FORMAT_R32_UINT;
case 8:
return PIPE_FORMAT_R32G32_UINT;
case 16:
return PIPE_FORMAT_R32G32B32A32_UINT;
default:
assert(0);
return PIPE_FORMAT_NONE;
}
}
static const char *array_mode_to_string(struct r600_common_screen *rscreen,
struct radeon_surf *surf)
{
if (rscreen->chip_class >= GFX9) {
/* TODO */
return " UNKNOWN";
} else {
switch (surf->u.legacy.level[0].mode) {
case RADEON_SURF_MODE_LINEAR_ALIGNED:
return "LINEAR_ALIGNED";
case RADEON_SURF_MODE_1D:
return "1D_TILED_THIN1";
case RADEON_SURF_MODE_2D:
return "2D_TILED_THIN1";
default:
assert(0);
return " UNKNOWN";
}
}
}
static unsigned generate_max_tex_side(unsigned max_tex_side)
{
switch (rand() % 4) {
case 0:
/* Try to hit large sizes in 1/4 of the cases. */
return max_tex_side;
case 1:
/* Try to hit 1D tiling in 1/4 of the cases. */
return 128;
default:
/* Try to hit common sizes in 2/4 of the cases. */
return 2048;
}
}
void r600_test_dma(struct r600_common_screen *rscreen)
{
struct pipe_screen *screen = &rscreen->b;
struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
uint64_t max_alloc_size;
unsigned i, iterations, num_partial_copies, max_levels, max_tex_side;
unsigned num_pass = 0, num_fail = 0;
max_levels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS);
max_tex_side = 1 << (max_levels - 1);
/* Max 128 MB allowed for both textures. */
max_alloc_size = 128 * 1024 * 1024;
/* the seed for random test parameters */
srand(0x9b47d95b);
/* the seed for random pixel data */
s_rand_xorshift128plus(seed_xorshift128plus, false);
iterations = 1000000000; /* just kill it when you are bored */
num_partial_copies = 30;
/* These parameters are randomly generated per test:
* - whether to do one whole-surface copy or N partial copies per test
* - which tiling modes to use (LINEAR_ALIGNED, 1D, 2D)
* - which texture dimensions to use
* - whether to use VRAM (all tiling modes) and GTT (staging, linear
* only) allocations
* - random initial pixels in src
* - generate random subrectangle copies for partial blits
*/
for (i = 0; i < iterations; i++) {
struct pipe_resource tsrc = {}, tdst = {}, *src, *dst;
struct r600_texture *rdst;
struct r600_texture *rsrc;
struct cpu_texture src_cpu, dst_cpu;
unsigned bpp, max_width, max_height, max_depth, j, num;
unsigned gfx_blits = 0, dma_blits = 0, max_tex_side_gen;
unsigned max_tex_layers;
bool pass;
bool do_partial_copies = rand() & 1;
/* generate a random test case */
tsrc.target = tdst.target = PIPE_TEXTURE_2D_ARRAY;
tsrc.depth0 = tdst.depth0 = 1;
bpp = 1 << (rand() % 5);
tsrc.format = tdst.format = get_format_from_bpp(bpp);
max_tex_side_gen = generate_max_tex_side(max_tex_side);
max_tex_layers = rand() % 4 ? 1 : 5;
tsrc.width0 = (rand() % max_tex_side_gen) + 1;
tsrc.height0 = (rand() % max_tex_side_gen) + 1;
tsrc.array_size = (rand() % max_tex_layers) + 1;
/* Have a 1/4 chance of getting power-of-two dimensions. */
if (rand() % 4 == 0) {
tsrc.width0 = util_next_power_of_two(tsrc.width0);
tsrc.height0 = util_next_power_of_two(tsrc.height0);
}
if (!do_partial_copies) {
/* whole-surface copies only, same dimensions */
tdst = tsrc;
} else {
max_tex_side_gen = generate_max_tex_side(max_tex_side);
max_tex_layers = rand() % 4 ? 1 : 5;
/* many partial copies, dimensions can be different */
tdst.width0 = (rand() % max_tex_side_gen) + 1;
tdst.height0 = (rand() % max_tex_side_gen) + 1;
tdst.array_size = (rand() % max_tex_layers) + 1;
/* Have a 1/4 chance of getting power-of-two dimensions. */
if (rand() % 4 == 0) {
tdst.width0 = util_next_power_of_two(tdst.width0);
tdst.height0 = util_next_power_of_two(tdst.height0);
}
}
/* check texture sizes */
if ((uint64_t)tsrc.width0 * tsrc.height0 * tsrc.array_size * bpp +
(uint64_t)tdst.width0 * tdst.height0 * tdst.array_size * bpp >
max_alloc_size) {
/* too large, try again */
i--;
continue;
}
/* VRAM + the tiling mode depends on dimensions (3/4 of cases),
* or GTT + linear only (1/4 of cases)
*/
tsrc.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING;
tdst.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING;
/* Allocate textures (both the GPU and CPU copies).
* The CPU will emulate what the GPU should be doing.
*/
src = screen->resource_create(screen, &tsrc);
dst = screen->resource_create(screen, &tdst);
assert(src);
assert(dst);
rdst = (struct r600_texture*)dst;
rsrc = (struct r600_texture*)src;
alloc_cpu_texture(&src_cpu, &tsrc, bpp);
alloc_cpu_texture(&dst_cpu, &tdst, bpp);
printf("%4u: dst = (%5u x %5u x %u, %s), "
" src = (%5u x %5u x %u, %s), bpp = %2u, ",
i, tdst.width0, tdst.height0, tdst.array_size,
array_mode_to_string(rscreen, &rdst->surface),
tsrc.width0, tsrc.height0, tsrc.array_size,
array_mode_to_string(rscreen, &rsrc->surface), bpp);
fflush(stdout);
/* set src pixels */
set_random_pixels(ctx, src, &src_cpu);
/* clear dst pixels */
rctx->clear_buffer(ctx, dst, 0, rdst->surface.surf_size, 0, true);
memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size);
/* preparation */
max_width = MIN2(tsrc.width0, tdst.width0);
max_height = MIN2(tsrc.height0, tdst.height0);
max_depth = MIN2(tsrc.array_size, tdst.array_size);
num = do_partial_copies ? num_partial_copies : 1;
for (j = 0; j < num; j++) {
int width, height, depth;
int srcx, srcy, srcz, dstx, dsty, dstz;
struct pipe_box box;
unsigned old_num_draw_calls = rctx->num_draw_calls;
unsigned old_num_dma_calls = rctx->num_dma_calls;
if (!do_partial_copies) {
/* copy whole src to dst */
width = max_width;
height = max_height;
depth = max_depth;
srcx = srcy = srcz = dstx = dsty = dstz = 0;
} else {
/* random sub-rectangle copies from src to dst */
depth = (rand() % max_depth) + 1;
srcz = rand() % (tsrc.array_size - depth + 1);
dstz = rand() % (tdst.array_size - depth + 1);
/* special code path to hit the tiled partial copies */
if (!rsrc->surface.is_linear &&
!rdst->surface.is_linear &&
rand() & 1) {
if (max_width < 8 || max_height < 8)
continue;
width = ((rand() % (max_width / 8)) + 1) * 8;
height = ((rand() % (max_height / 8)) + 1) * 8;
srcx = rand() % (tsrc.width0 - width + 1) & ~0x7;
srcy = rand() % (tsrc.height0 - height + 1) & ~0x7;
dstx = rand() % (tdst.width0 - width + 1) & ~0x7;
dsty = rand() % (tdst.height0 - height + 1) & ~0x7;
} else {
/* just make sure that it doesn't divide by zero */
assert(max_width > 0 && max_height > 0);
width = (rand() % max_width) + 1;
height = (rand() % max_height) + 1;
srcx = rand() % (tsrc.width0 - width + 1);
srcy = rand() % (tsrc.height0 - height + 1);
dstx = rand() % (tdst.width0 - width + 1);
dsty = rand() % (tdst.height0 - height + 1);
}
/* special code path to hit out-of-bounds reads in L2T */
if (rsrc->surface.is_linear &&
!rdst->surface.is_linear &&
rand() % 4 == 0) {
srcx = 0;
srcy = 0;
srcz = 0;
}
}
/* GPU copy */
u_box_3d(srcx, srcy, srcz, width, height, depth, &box);
rctx->dma_copy(ctx, dst, 0, dstx, dsty, dstz, src, 0, &box);
/* See which engine was used. */
gfx_blits += rctx->num_draw_calls > old_num_draw_calls;
dma_blits += rctx->num_dma_calls > old_num_dma_calls;
/* CPU copy */
util_copy_box(dst_cpu.ptr, tdst.format, dst_cpu.stride,
dst_cpu.layer_stride,
dstx, dsty, dstz, width, height, depth,
src_cpu.ptr, src_cpu.stride,
src_cpu.layer_stride,
srcx, srcy, srcz);
}
pass = compare_textures(ctx, dst, &dst_cpu, bpp);
if (pass)
num_pass++;
else
num_fail++;
printf("BLITs: GFX = %2u, DMA = %2u, %s [%u/%u]\n",
gfx_blits, dma_blits, pass ? "pass" : "fail",
num_pass, num_pass+num_fail);
/* cleanup */
pipe_resource_reference(&src, NULL);
pipe_resource_reference(&dst, NULL);
free(src_cpu.ptr);
free(dst_cpu.ptr);
}
ctx->destroy(ctx);
exit(0);
}

File diff suppressed because it is too large Load Diff

View File

@ -45,9 +45,9 @@
#include "vl/vl_mpeg12_decoder.h"
#include "r600_pipe.h"
#include "radeon/radeon_video.h"
#include "radeon/radeon_uvd.h"
#include "radeon/radeon_vce.h"
#include "radeon_video.h"
#include "radeon_uvd.h"
#include "radeon_vce.h"
#include "r600d.h"
#define R600_UVD_ENABLE_TILING 0

View File

@ -0,0 +1,433 @@
/*
* Copyright 2012 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "r600_cs.h"
#include "util/u_viewport.h"
#include "tgsi/tgsi_scan.h"
#define GET_MAX_SCISSOR(rctx) (rctx->chip_class >= EVERGREEN ? 16384 : 8192)
static void r600_set_scissor_states(struct pipe_context *ctx,
unsigned start_slot,
unsigned num_scissors,
const struct pipe_scissor_state *state)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
int i;
for (i = 0; i < num_scissors; i++)
rctx->scissors.states[start_slot + i] = state[i];
if (!rctx->scissor_enabled)
return;
rctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
}
/* Since the guard band disables clipping, we have to clip per-pixel
* using a scissor.
*/
static void r600_get_scissor_from_viewport(struct r600_common_context *rctx,
const struct pipe_viewport_state *vp,
struct r600_signed_scissor *scissor)
{
float tmp, minx, miny, maxx, maxy;
/* Convert (-1, -1) and (1, 1) from clip space into window space. */
minx = -vp->scale[0] + vp->translate[0];
miny = -vp->scale[1] + vp->translate[1];
maxx = vp->scale[0] + vp->translate[0];
maxy = vp->scale[1] + vp->translate[1];
/* r600_draw_rectangle sets this. Disable the scissor. */
if (minx == -1 && miny == -1 && maxx == 1 && maxy == 1) {
scissor->minx = scissor->miny = 0;
scissor->maxx = scissor->maxy = GET_MAX_SCISSOR(rctx);
return;
}
/* Handle inverted viewports. */
if (minx > maxx) {
tmp = minx;
minx = maxx;
maxx = tmp;
}
if (miny > maxy) {
tmp = miny;
miny = maxy;
maxy = tmp;
}
/* Convert to integer and round up the max bounds. */
scissor->minx = minx;
scissor->miny = miny;
scissor->maxx = ceilf(maxx);
scissor->maxy = ceilf(maxy);
}
static void r600_clamp_scissor(struct r600_common_context *rctx,
struct pipe_scissor_state *out,
struct r600_signed_scissor *scissor)
{
unsigned max_scissor = GET_MAX_SCISSOR(rctx);
out->minx = CLAMP(scissor->minx, 0, max_scissor);
out->miny = CLAMP(scissor->miny, 0, max_scissor);
out->maxx = CLAMP(scissor->maxx, 0, max_scissor);
out->maxy = CLAMP(scissor->maxy, 0, max_scissor);
}
static void r600_clip_scissor(struct pipe_scissor_state *out,
struct pipe_scissor_state *clip)
{
out->minx = MAX2(out->minx, clip->minx);
out->miny = MAX2(out->miny, clip->miny);
out->maxx = MIN2(out->maxx, clip->maxx);
out->maxy = MIN2(out->maxy, clip->maxy);
}
static void r600_scissor_make_union(struct r600_signed_scissor *out,
struct r600_signed_scissor *in)
{
out->minx = MIN2(out->minx, in->minx);
out->miny = MIN2(out->miny, in->miny);
out->maxx = MAX2(out->maxx, in->maxx);
out->maxy = MAX2(out->maxy, in->maxy);
}
void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
struct pipe_scissor_state *scissor)
{
if (rctx->chip_class == EVERGREEN || rctx->chip_class == CAYMAN) {
if (scissor->maxx == 0)
scissor->minx = 1;
if (scissor->maxy == 0)
scissor->miny = 1;
if (rctx->chip_class == CAYMAN &&
scissor->maxx == 1 && scissor->maxy == 1)
scissor->maxx = 2;
}
}
static void r600_emit_one_scissor(struct r600_common_context *rctx,
struct radeon_winsys_cs *cs,
struct r600_signed_scissor *vp_scissor,
struct pipe_scissor_state *scissor)
{
struct pipe_scissor_state final;
if (rctx->vs_disables_clipping_viewport) {
final.minx = final.miny = 0;
final.maxx = final.maxy = GET_MAX_SCISSOR(rctx);
} else {
r600_clamp_scissor(rctx, &final, vp_scissor);
}
if (scissor)
r600_clip_scissor(&final, scissor);
evergreen_apply_scissor_bug_workaround(rctx, &final);
radeon_emit(cs, S_028250_TL_X(final.minx) |
S_028250_TL_Y(final.miny) |
S_028250_WINDOW_OFFSET_DISABLE(1));
radeon_emit(cs, S_028254_BR_X(final.maxx) |
S_028254_BR_Y(final.maxy));
}
/* the range is [-MAX, MAX] */
#define GET_MAX_VIEWPORT_RANGE(rctx) (rctx->chip_class >= EVERGREEN ? 32768 : 16384)
static void r600_emit_guardband(struct r600_common_context *rctx,
struct r600_signed_scissor *vp_as_scissor)
{
struct radeon_winsys_cs *cs = rctx->gfx.cs;
struct pipe_viewport_state vp;
float left, top, right, bottom, max_range, guardband_x, guardband_y;
/* Reconstruct the viewport transformation from the scissor. */
vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0;
vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0;
vp.scale[0] = vp_as_scissor->maxx - vp.translate[0];
vp.scale[1] = vp_as_scissor->maxy - vp.translate[1];
/* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
if (vp_as_scissor->minx == vp_as_scissor->maxx)
vp.scale[0] = 0.5;
if (vp_as_scissor->miny == vp_as_scissor->maxy)
vp.scale[1] = 0.5;
/* Find the biggest guard band that is inside the supported viewport
* range. The guard band is specified as a horizontal and vertical
* distance from (0,0) in clip space.
*
* This is done by applying the inverse viewport transformation
* on the viewport limits to get those limits in clip space.
*
* Use a limit one pixel smaller to allow for some precision error.
*/
max_range = GET_MAX_VIEWPORT_RANGE(rctx) - 1;
left = (-max_range - vp.translate[0]) / vp.scale[0];
right = ( max_range - vp.translate[0]) / vp.scale[0];
top = (-max_range - vp.translate[1]) / vp.scale[1];
bottom = ( max_range - vp.translate[1]) / vp.scale[1];
assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1);
guardband_x = MIN2(-left, right);
guardband_y = MIN2(-top, bottom);
/* If any of the GB registers is updated, all of them must be updated. */
if (rctx->chip_class >= CAYMAN)
radeon_set_context_reg_seq(cs, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
else
radeon_set_context_reg_seq(cs, R600_R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 4);
radeon_emit(cs, fui(guardband_y)); /* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
radeon_emit(cs, fui(1.0)); /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
radeon_emit(cs, fui(1.0)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
}
static void r600_emit_scissors(struct r600_common_context *rctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = rctx->gfx.cs;
struct pipe_scissor_state *states = rctx->scissors.states;
unsigned mask = rctx->scissors.dirty_mask;
bool scissor_enabled = rctx->scissor_enabled;
struct r600_signed_scissor max_vp_scissor;
int i;
/* The simple case: Only 1 viewport is active. */
if (!rctx->vs_writes_viewport_index) {
struct r600_signed_scissor *vp = &rctx->viewports.as_scissor[0];
if (!(mask & 1))
return;
radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
r600_emit_one_scissor(rctx, cs, vp, scissor_enabled ? &states[0] : NULL);
r600_emit_guardband(rctx, vp);
rctx->scissors.dirty_mask &= ~1; /* clear one bit */
return;
}
/* Shaders can draw to any viewport. Make a union of all viewports. */
max_vp_scissor = rctx->viewports.as_scissor[0];
for (i = 1; i < R600_MAX_VIEWPORTS; i++)
r600_scissor_make_union(&max_vp_scissor,
&rctx->viewports.as_scissor[i]);
while (mask) {
int start, count, i;
u_bit_scan_consecutive_range(&mask, &start, &count);
radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
start * 4 * 2, count * 2);
for (i = start; i < start+count; i++) {
r600_emit_one_scissor(rctx, cs, &rctx->viewports.as_scissor[i],
scissor_enabled ? &states[i] : NULL);
}
}
r600_emit_guardband(rctx, &max_vp_scissor);
rctx->scissors.dirty_mask = 0;
}
static void r600_set_viewport_states(struct pipe_context *ctx,
unsigned start_slot,
unsigned num_viewports,
const struct pipe_viewport_state *state)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
unsigned mask;
int i;
for (i = 0; i < num_viewports; i++) {
unsigned index = start_slot + i;
rctx->viewports.states[index] = state[i];
r600_get_scissor_from_viewport(rctx, &state[i],
&rctx->viewports.as_scissor[index]);
}
mask = ((1 << num_viewports) - 1) << start_slot;
rctx->viewports.dirty_mask |= mask;
rctx->viewports.depth_range_dirty_mask |= mask;
rctx->scissors.dirty_mask |= mask;
rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
}
static void r600_emit_one_viewport(struct r600_common_context *rctx,
struct pipe_viewport_state *state)
{
struct radeon_winsys_cs *cs = rctx->gfx.cs;
radeon_emit(cs, fui(state->scale[0]));
radeon_emit(cs, fui(state->translate[0]));
radeon_emit(cs, fui(state->scale[1]));
radeon_emit(cs, fui(state->translate[1]));
radeon_emit(cs, fui(state->scale[2]));
radeon_emit(cs, fui(state->translate[2]));
}
static void r600_emit_viewports(struct r600_common_context *rctx)
{
struct radeon_winsys_cs *cs = rctx->gfx.cs;
struct pipe_viewport_state *states = rctx->viewports.states;
unsigned mask = rctx->viewports.dirty_mask;
/* The simple case: Only 1 viewport is active. */
if (!rctx->vs_writes_viewport_index) {
if (!(mask & 1))
return;
radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
r600_emit_one_viewport(rctx, &states[0]);
rctx->viewports.dirty_mask &= ~1; /* clear one bit */
return;
}
while (mask) {
int start, count, i;
u_bit_scan_consecutive_range(&mask, &start, &count);
radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
start * 4 * 6, count * 6);
for (i = start; i < start+count; i++)
r600_emit_one_viewport(rctx, &states[i]);
}
rctx->viewports.dirty_mask = 0;
}
static void r600_emit_depth_ranges(struct r600_common_context *rctx)
{
struct radeon_winsys_cs *cs = rctx->gfx.cs;
struct pipe_viewport_state *states = rctx->viewports.states;
unsigned mask = rctx->viewports.depth_range_dirty_mask;
float zmin, zmax;
/* The simple case: Only 1 viewport is active. */
if (!rctx->vs_writes_viewport_index) {
if (!(mask & 1))
return;
util_viewport_zmin_zmax(&states[0], rctx->clip_halfz, &zmin, &zmax);
radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
radeon_emit(cs, fui(zmin));
radeon_emit(cs, fui(zmax));
rctx->viewports.depth_range_dirty_mask &= ~1; /* clear one bit */
return;
}
while (mask) {
int start, count, i;
u_bit_scan_consecutive_range(&mask, &start, &count);
radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 +
start * 4 * 2, count * 2);
for (i = start; i < start+count; i++) {
util_viewport_zmin_zmax(&states[i], rctx->clip_halfz, &zmin, &zmax);
radeon_emit(cs, fui(zmin));
radeon_emit(cs, fui(zmax));
}
}
rctx->viewports.depth_range_dirty_mask = 0;
}
static void r600_emit_viewport_states(struct r600_common_context *rctx,
struct r600_atom *atom)
{
r600_emit_viewports(rctx);
r600_emit_depth_ranges(rctx);
}
/* Set viewport dependencies on pipe_rasterizer_state. */
void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
bool scissor_enable, bool clip_halfz)
{
if (rctx->scissor_enabled != scissor_enable) {
rctx->scissor_enabled = scissor_enable;
rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
}
if (rctx->clip_halfz != clip_halfz) {
rctx->clip_halfz = clip_halfz;
rctx->viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
}
}
/**
* Normally, we only emit 1 viewport and 1 scissor if no shader is using
* the VIEWPORT_INDEX output, and emitting the other viewports and scissors
* is delayed. When a shader with VIEWPORT_INDEX appears, this should be
* called to emit the rest.
*/
void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
struct tgsi_shader_info *info)
{
bool vs_window_space;
if (!info)
return;
/* When the VS disables clipping and viewport transformation. */
vs_window_space =
info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
if (rctx->vs_disables_clipping_viewport != vs_window_space) {
rctx->vs_disables_clipping_viewport = vs_window_space;
rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
}
/* Viewport index handling. */
rctx->vs_writes_viewport_index = info->writes_viewport_index;
if (!rctx->vs_writes_viewport_index)
return;
if (rctx->scissors.dirty_mask)
rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
if (rctx->viewports.dirty_mask ||
rctx->viewports.depth_range_dirty_mask)
rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
}
void r600_init_viewport_functions(struct r600_common_context *rctx)
{
rctx->scissors.atom.emit = r600_emit_scissors;
rctx->viewports.atom.emit = r600_emit_viewport_states;
rctx->scissors.atom.num_dw = (2 + 16 * 2) + 6;
rctx->viewports.atom.num_dw = 2 + 16 * 6;
rctx->b.set_scissor_states = r600_set_scissor_states;
rctx->b.set_viewport_states = r600_set_viewport_states;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,447 @@
/**************************************************************************
*
* Copyright 2011 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/*
* Authors:
* Christian König <christian.koenig@amd.com>
*
*/
#ifndef RADEON_UVD_H
#define RADEON_UVD_H
#include "radeon/radeon_winsys.h"
#include "vl/vl_video_buffer.h"
/* UVD uses PM4 packet type 0 and 2 */
#define RUVD_PKT_TYPE_S(x) (((unsigned)(x) & 0x3) << 30)
#define RUVD_PKT_TYPE_G(x) (((x) >> 30) & 0x3)
#define RUVD_PKT_TYPE_C 0x3FFFFFFF
#define RUVD_PKT_COUNT_S(x) (((unsigned)(x) & 0x3FFF) << 16)
#define RUVD_PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF)
#define RUVD_PKT_COUNT_C 0xC000FFFF
#define RUVD_PKT0_BASE_INDEX_S(x) (((unsigned)(x) & 0xFFFF) << 0)
#define RUVD_PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF)
#define RUVD_PKT0_BASE_INDEX_C 0xFFFF0000
#define RUVD_PKT0(index, count) (RUVD_PKT_TYPE_S(0) | RUVD_PKT0_BASE_INDEX_S(index) | RUVD_PKT_COUNT_S(count))
#define RUVD_PKT2() (RUVD_PKT_TYPE_S(2))
/* registers involved with UVD */
#define RUVD_GPCOM_VCPU_CMD 0xEF0C
#define RUVD_GPCOM_VCPU_DATA0 0xEF10
#define RUVD_GPCOM_VCPU_DATA1 0xEF14
#define RUVD_ENGINE_CNTL 0xEF18
#define RUVD_GPCOM_VCPU_CMD_SOC15 0x2070c
#define RUVD_GPCOM_VCPU_DATA0_SOC15 0x20710
#define RUVD_GPCOM_VCPU_DATA1_SOC15 0x20714
#define RUVD_ENGINE_CNTL_SOC15 0x20718
/* UVD commands to VCPU */
#define RUVD_CMD_MSG_BUFFER 0x00000000
#define RUVD_CMD_DPB_BUFFER 0x00000001
#define RUVD_CMD_DECODING_TARGET_BUFFER 0x00000002
#define RUVD_CMD_FEEDBACK_BUFFER 0x00000003
#define RUVD_CMD_SESSION_CONTEXT_BUFFER 0x00000005
#define RUVD_CMD_BITSTREAM_BUFFER 0x00000100
#define RUVD_CMD_ITSCALING_TABLE_BUFFER 0x00000204
#define RUVD_CMD_CONTEXT_BUFFER 0x00000206
/* UVD message types */
#define RUVD_MSG_CREATE 0
#define RUVD_MSG_DECODE 1
#define RUVD_MSG_DESTROY 2
/* UVD stream types */
#define RUVD_CODEC_H264 0x00000000
#define RUVD_CODEC_VC1 0x00000001
#define RUVD_CODEC_MPEG2 0x00000003
#define RUVD_CODEC_MPEG4 0x00000004
#define RUVD_CODEC_H264_PERF 0x00000007
#define RUVD_CODEC_MJPEG 0x00000008
#define RUVD_CODEC_H265 0x00000010
/* UVD decode target buffer tiling mode */
#define RUVD_TILE_LINEAR 0x00000000
#define RUVD_TILE_8X4 0x00000001
#define RUVD_TILE_8X8 0x00000002
#define RUVD_TILE_32AS8 0x00000003
/* UVD decode target buffer array mode */
#define RUVD_ARRAY_MODE_LINEAR 0x00000000
#define RUVD_ARRAY_MODE_MACRO_LINEAR_MICRO_TILED 0x00000001
#define RUVD_ARRAY_MODE_1D_THIN 0x00000002
#define RUVD_ARRAY_MODE_2D_THIN 0x00000004
#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_LINEAR 0x00000004
#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_TILED 0x00000005
/* UVD tile config */
#define RUVD_BANK_WIDTH(x) ((x) << 0)
#define RUVD_BANK_HEIGHT(x) ((x) << 3)
#define RUVD_MACRO_TILE_ASPECT_RATIO(x) ((x) << 6)
#define RUVD_NUM_BANKS(x) ((x) << 9)
/* H.264 profile definitions */
#define RUVD_H264_PROFILE_BASELINE 0x00000000
#define RUVD_H264_PROFILE_MAIN 0x00000001
#define RUVD_H264_PROFILE_HIGH 0x00000002
#define RUVD_H264_PROFILE_STEREO_HIGH 0x00000003
#define RUVD_H264_PROFILE_MVC 0x00000004
/* VC-1 profile definitions */
#define RUVD_VC1_PROFILE_SIMPLE 0x00000000
#define RUVD_VC1_PROFILE_MAIN 0x00000001
#define RUVD_VC1_PROFILE_ADVANCED 0x00000002
enum ruvd_surface_type {
RUVD_SURFACE_TYPE_LEGACY = 0,
RUVD_SURFACE_TYPE_GFX9
};
struct ruvd_mvc_element {
uint16_t viewOrderIndex;
uint16_t viewId;
uint16_t numOfAnchorRefsInL0;
uint16_t viewIdOfAnchorRefsInL0[15];
uint16_t numOfAnchorRefsInL1;
uint16_t viewIdOfAnchorRefsInL1[15];
uint16_t numOfNonAnchorRefsInL0;
uint16_t viewIdOfNonAnchorRefsInL0[15];
uint16_t numOfNonAnchorRefsInL1;
uint16_t viewIdOfNonAnchorRefsInL1[15];
};
struct ruvd_h264 {
uint32_t profile;
uint32_t level;
uint32_t sps_info_flags;
uint32_t pps_info_flags;
uint8_t chroma_format;
uint8_t bit_depth_luma_minus8;
uint8_t bit_depth_chroma_minus8;
uint8_t log2_max_frame_num_minus4;
uint8_t pic_order_cnt_type;
uint8_t log2_max_pic_order_cnt_lsb_minus4;
uint8_t num_ref_frames;
uint8_t reserved_8bit;
int8_t pic_init_qp_minus26;
int8_t pic_init_qs_minus26;
int8_t chroma_qp_index_offset;
int8_t second_chroma_qp_index_offset;
uint8_t num_slice_groups_minus1;
uint8_t slice_group_map_type;
uint8_t num_ref_idx_l0_active_minus1;
uint8_t num_ref_idx_l1_active_minus1;
uint16_t slice_group_change_rate_minus1;
uint16_t reserved_16bit_1;
uint8_t scaling_list_4x4[6][16];
uint8_t scaling_list_8x8[2][64];
uint32_t frame_num;
uint32_t frame_num_list[16];
int32_t curr_field_order_cnt_list[2];
int32_t field_order_cnt_list[16][2];
uint32_t decoded_pic_idx;
uint32_t curr_pic_ref_frame_num;
uint8_t ref_frame_list[16];
uint32_t reserved[122];
struct {
uint32_t numViews;
uint32_t viewId0;
struct ruvd_mvc_element mvcElements[1];
} mvc;
};
struct ruvd_h265 {
uint32_t sps_info_flags;
uint32_t pps_info_flags;
uint8_t chroma_format;
uint8_t bit_depth_luma_minus8;
uint8_t bit_depth_chroma_minus8;
uint8_t log2_max_pic_order_cnt_lsb_minus4;
uint8_t sps_max_dec_pic_buffering_minus1;
uint8_t log2_min_luma_coding_block_size_minus3;
uint8_t log2_diff_max_min_luma_coding_block_size;
uint8_t log2_min_transform_block_size_minus2;
uint8_t log2_diff_max_min_transform_block_size;
uint8_t max_transform_hierarchy_depth_inter;
uint8_t max_transform_hierarchy_depth_intra;
uint8_t pcm_sample_bit_depth_luma_minus1;
uint8_t pcm_sample_bit_depth_chroma_minus1;
uint8_t log2_min_pcm_luma_coding_block_size_minus3;
uint8_t log2_diff_max_min_pcm_luma_coding_block_size;
uint8_t num_extra_slice_header_bits;
uint8_t num_short_term_ref_pic_sets;
uint8_t num_long_term_ref_pic_sps;
uint8_t num_ref_idx_l0_default_active_minus1;
uint8_t num_ref_idx_l1_default_active_minus1;
int8_t pps_cb_qp_offset;
int8_t pps_cr_qp_offset;
int8_t pps_beta_offset_div2;
int8_t pps_tc_offset_div2;
uint8_t diff_cu_qp_delta_depth;
uint8_t num_tile_columns_minus1;
uint8_t num_tile_rows_minus1;
uint8_t log2_parallel_merge_level_minus2;
uint16_t column_width_minus1[19];
uint16_t row_height_minus1[21];
int8_t init_qp_minus26;
uint8_t num_delta_pocs_ref_rps_idx;
uint8_t curr_idx;
uint8_t reserved1;
int32_t curr_poc;
uint8_t ref_pic_list[16];
int32_t poc_list[16];
uint8_t ref_pic_set_st_curr_before[8];
uint8_t ref_pic_set_st_curr_after[8];
uint8_t ref_pic_set_lt_curr[8];
uint8_t ucScalingListDCCoefSizeID2[6];
uint8_t ucScalingListDCCoefSizeID3[2];
uint8_t highestTid;
uint8_t isNonRef;
uint8_t p010_mode;
uint8_t msb_mode;
uint8_t luma_10to8;
uint8_t chroma_10to8;
uint8_t sclr_luma10to8;
uint8_t sclr_chroma10to8;
uint8_t direct_reflist[2][15];
};
struct ruvd_vc1 {
uint32_t profile;
uint32_t level;
uint32_t sps_info_flags;
uint32_t pps_info_flags;
uint32_t pic_structure;
uint32_t chroma_format;
};
struct ruvd_mpeg2 {
uint32_t decoded_pic_idx;
uint32_t ref_pic_idx[2];
uint8_t load_intra_quantiser_matrix;
uint8_t load_nonintra_quantiser_matrix;
uint8_t reserved_quantiser_alignement[2];
uint8_t intra_quantiser_matrix[64];
uint8_t nonintra_quantiser_matrix[64];
uint8_t profile_and_level_indication;
uint8_t chroma_format;
uint8_t picture_coding_type;
uint8_t reserved_1;
uint8_t f_code[2][2];
uint8_t intra_dc_precision;
uint8_t pic_structure;
uint8_t top_field_first;
uint8_t frame_pred_frame_dct;
uint8_t concealment_motion_vectors;
uint8_t q_scale_type;
uint8_t intra_vlc_format;
uint8_t alternate_scan;
};
struct ruvd_mpeg4
{
uint32_t decoded_pic_idx;
uint32_t ref_pic_idx[2];
uint32_t variant_type;
uint8_t profile_and_level_indication;
uint8_t video_object_layer_verid;
uint8_t video_object_layer_shape;
uint8_t reserved_1;
uint16_t video_object_layer_width;
uint16_t video_object_layer_height;
uint16_t vop_time_increment_resolution;
uint16_t reserved_2;
uint32_t flags;
uint8_t quant_type;
uint8_t reserved_3[3];
uint8_t intra_quant_mat[64];
uint8_t nonintra_quant_mat[64];
struct {
uint8_t sprite_enable;
uint8_t reserved_4[3];
uint16_t sprite_width;
uint16_t sprite_height;
int16_t sprite_left_coordinate;
int16_t sprite_top_coordinate;
uint8_t no_of_sprite_warping_points;
uint8_t sprite_warping_accuracy;
uint8_t sprite_brightness_change;
uint8_t low_latency_sprite_enable;
} sprite_config;
struct {
uint32_t flags;
uint8_t vol_mode;
uint8_t reserved_5[3];
} divx_311_config;
};
/* message between driver and hardware */
struct ruvd_msg {
uint32_t size;
uint32_t msg_type;
uint32_t stream_handle;
uint32_t status_report_feedback_number;
union {
struct {
uint32_t stream_type;
uint32_t session_flags;
uint32_t asic_id;
uint32_t width_in_samples;
uint32_t height_in_samples;
uint32_t dpb_buffer;
uint32_t dpb_size;
uint32_t dpb_model;
uint32_t version_info;
} create;
struct {
uint32_t stream_type;
uint32_t decode_flags;
uint32_t width_in_samples;
uint32_t height_in_samples;
uint32_t dpb_buffer;
uint32_t dpb_size;
uint32_t dpb_model;
uint32_t dpb_reserved;
uint32_t db_offset_alignment;
uint32_t db_pitch;
uint32_t db_tiling_mode;
uint32_t db_array_mode;
uint32_t db_field_mode;
uint32_t db_surf_tile_config;
uint32_t db_aligned_height;
uint32_t db_reserved;
uint32_t use_addr_macro;
uint32_t bsd_buffer;
uint32_t bsd_size;
uint32_t pic_param_buffer;
uint32_t pic_param_size;
uint32_t mb_cntl_buffer;
uint32_t mb_cntl_size;
uint32_t dt_buffer;
uint32_t dt_pitch;
uint32_t dt_tiling_mode;
uint32_t dt_array_mode;
uint32_t dt_field_mode;
uint32_t dt_luma_top_offset;
uint32_t dt_luma_bottom_offset;
uint32_t dt_chroma_top_offset;
uint32_t dt_chroma_bottom_offset;
uint32_t dt_surf_tile_config;
uint32_t dt_uv_surf_tile_config;
// re-use dt_wa_chroma_top_offset as dt_ext_info for UV pitch in stoney
uint32_t dt_wa_chroma_top_offset;
uint32_t dt_wa_chroma_bottom_offset;
uint32_t reserved[16];
union {
struct ruvd_h264 h264;
struct ruvd_h265 h265;
struct ruvd_vc1 vc1;
struct ruvd_mpeg2 mpeg2;
struct ruvd_mpeg4 mpeg4;
uint32_t info[768];
} codec;
uint8_t extension_support;
uint8_t reserved_8bit_1;
uint8_t reserved_8bit_2;
uint8_t reserved_8bit_3;
uint32_t extension_reserved[64];
} decode;
} body;
};
/* driver dependent callback */
typedef struct pb_buffer* (*ruvd_set_dtb)
(struct ruvd_msg* msg, struct vl_video_buffer *vb);
/* create an UVD decode */
struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
const struct pipe_video_codec *templat,
ruvd_set_dtb set_dtb);
/* fill decoding target field from the luma and chroma surfaces */
void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
struct radeon_surf *chroma, enum ruvd_surface_type type);
#endif

View File

@ -0,0 +1,553 @@
/**************************************************************************
*
* Copyright 2013 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/*
* Authors:
* Christian König <christian.koenig@amd.com>
*
*/
#include <stdio.h>
#include "pipe/p_video_codec.h"
#include "util/u_video.h"
#include "util/u_memory.h"
#include "vl/vl_video_buffer.h"
#include "r600_pipe_common.h"
#include "radeon_video.h"
#include "radeon_vce.h"
#define FW_40_2_2 ((40 << 24) | (2 << 16) | (2 << 8))
#define FW_50_0_1 ((50 << 24) | (0 << 16) | (1 << 8))
#define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8))
#define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8))
#define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8))
#define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8))
#define FW_52_4_3 ((52 << 24) | (4 << 16) | (3 << 8))
#define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8))
#define FW_53 (53 << 24)
/**
* flush commands to the hardware
*/
static void flush(struct rvce_encoder *enc)
{
enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL);
enc->task_info_idx = 0;
enc->bs_idx = 0;
}
#if 0
static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb)
{
uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
unsigned i = 0;
fprintf(stderr, "\n");
fprintf(stderr, "encStatus:\t\t\t%08x\n", ptr[i++]);
fprintf(stderr, "encHasBitstream:\t\t%08x\n", ptr[i++]);
fprintf(stderr, "encHasAudioBitstream:\t\t%08x\n", ptr[i++]);
fprintf(stderr, "encBitstreamOffset:\t\t%08x\n", ptr[i++]);
fprintf(stderr, "encBitstreamSize:\t\t%08x\n", ptr[i++]);
fprintf(stderr, "encAudioBitstreamOffset:\t%08x\n", ptr[i++]);
fprintf(stderr, "encAudioBitstreamSize:\t\t%08x\n", ptr[i++]);
fprintf(stderr, "encExtrabytes:\t\t\t%08x\n", ptr[i++]);
fprintf(stderr, "encAudioExtrabytes:\t\t%08x\n", ptr[i++]);
fprintf(stderr, "videoTimeStamp:\t\t\t%08x\n", ptr[i++]);
fprintf(stderr, "audioTimeStamp:\t\t\t%08x\n", ptr[i++]);
fprintf(stderr, "videoOutputType:\t\t%08x\n", ptr[i++]);
fprintf(stderr, "attributeFlags:\t\t\t%08x\n", ptr[i++]);
fprintf(stderr, "seiPrivatePackageOffset:\t%08x\n", ptr[i++]);
fprintf(stderr, "seiPrivatePackageSize:\t\t%08x\n", ptr[i++]);
fprintf(stderr, "\n");
enc->ws->buffer_unmap(fb->res->buf);
}
#endif
/**
* reset the CPB handling
*/
static void reset_cpb(struct rvce_encoder *enc)
{
unsigned i;
LIST_INITHEAD(&enc->cpb_slots);
for (i = 0; i < enc->cpb_num; ++i) {
struct rvce_cpb_slot *slot = &enc->cpb_array[i];
slot->index = i;
slot->picture_type = PIPE_H264_ENC_PICTURE_TYPE_SKIP;
slot->frame_num = 0;
slot->pic_order_cnt = 0;
LIST_ADDTAIL(&slot->list, &enc->cpb_slots);
}
}
/**
* sort l0 and l1 to the top of the list
*/
static void sort_cpb(struct rvce_encoder *enc)
{
struct rvce_cpb_slot *i, *l0 = NULL, *l1 = NULL;
LIST_FOR_EACH_ENTRY(i, &enc->cpb_slots, list) {
if (i->frame_num == enc->pic.ref_idx_l0)
l0 = i;
if (i->frame_num == enc->pic.ref_idx_l1)
l1 = i;
if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P && l0)
break;
if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B &&
l0 && l1)
break;
}
if (l1) {
LIST_DEL(&l1->list);
LIST_ADD(&l1->list, &enc->cpb_slots);
}
if (l0) {
LIST_DEL(&l0->list);
LIST_ADD(&l0->list, &enc->cpb_slots);
}
}
/**
* get number of cpbs based on dpb
*/
static unsigned get_cpb_num(struct rvce_encoder *enc)
{
unsigned w = align(enc->base.width, 16) / 16;
unsigned h = align(enc->base.height, 16) / 16;
unsigned dpb;
switch (enc->base.level) {
case 10:
dpb = 396;
break;
case 11:
dpb = 900;
break;
case 12:
case 13:
case 20:
dpb = 2376;
break;
case 21:
dpb = 4752;
break;
case 22:
case 30:
dpb = 8100;
break;
case 31:
dpb = 18000;
break;
case 32:
dpb = 20480;
break;
case 40:
case 41:
dpb = 32768;
break;
case 42:
dpb = 34816;
break;
case 50:
dpb = 110400;
break;
default:
case 51:
case 52:
dpb = 184320;
break;
}
return MIN2(dpb / (w * h), 16);
}
/**
* Get the slot for the currently encoded frame
*/
struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc)
{
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list);
}
/**
* Get the slot for L0
*/
struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc)
{
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list);
}
/**
* Get the slot for L1
*/
struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc)
{
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list);
}
/**
* Calculate the offsets into the CPB
*/
void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
signed *luma_offset, signed *chroma_offset)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen;
unsigned pitch, vpitch, fsize;
if (rscreen->chip_class < GFX9) {
pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128);
vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16);
} else {
pitch = align(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe, 256);
vpitch = align(enc->luma->u.gfx9.surf_height, 16);
}
fsize = pitch * (vpitch + vpitch / 2);
*luma_offset = slot->index * fsize;
*chroma_offset = *luma_offset + pitch * vpitch;
}
/**
* destroy this video encoder
*/
static void rvce_destroy(struct pipe_video_codec *encoder)
{
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
if (enc->stream_handle) {
struct rvid_buffer fb;
rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
enc->fb = &fb;
enc->session(enc);
enc->feedback(enc);
enc->destroy(enc);
flush(enc);
rvid_destroy_buffer(&fb);
}
rvid_destroy_buffer(&enc->cpb);
enc->ws->cs_destroy(enc->cs);
FREE(enc->cpb_array);
FREE(enc);
}
static void rvce_begin_frame(struct pipe_video_codec *encoder,
struct pipe_video_buffer *source,
struct pipe_picture_desc *picture)
{
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source;
struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture;
bool need_rate_control =
enc->pic.rate_ctrl.rate_ctrl_method != pic->rate_ctrl.rate_ctrl_method ||
enc->pic.quant_i_frames != pic->quant_i_frames ||
enc->pic.quant_p_frames != pic->quant_p_frames ||
enc->pic.quant_b_frames != pic->quant_b_frames;
enc->pic = *pic;
get_pic_param(enc, pic);
enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR)
reset_cpb(enc);
else if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_B)
sort_cpb(enc);
if (!enc->stream_handle) {
struct rvid_buffer fb;
enc->stream_handle = rvid_alloc_stream_handle();
rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
enc->fb = &fb;
enc->session(enc);
enc->create(enc);
enc->config(enc);
enc->feedback(enc);
flush(enc);
//dump_feedback(enc, &fb);
rvid_destroy_buffer(&fb);
need_rate_control = false;
}
if (need_rate_control) {
enc->session(enc);
enc->config(enc);
flush(enc);
}
}
static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
struct pipe_video_buffer *source,
struct pipe_resource *destination,
void **fb)
{
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
enc->get_buffer(destination, &enc->bs_handle, NULL);
enc->bs_size = destination->width0;
*fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
if (!rvid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) {
RVID_ERR("Can't create feedback buffer.\n");
return;
}
if (!radeon_emitted(enc->cs, 0))
enc->session(enc);
enc->encode(enc);
enc->feedback(enc);
}
static void rvce_end_frame(struct pipe_video_codec *encoder,
struct pipe_video_buffer *source,
struct pipe_picture_desc *picture)
{
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
struct rvce_cpb_slot *slot = LIST_ENTRY(
struct rvce_cpb_slot, enc->cpb_slots.prev, list);
if (!enc->dual_inst || enc->bs_idx > 1)
flush(enc);
/* update the CPB backtrack with the just encoded frame */
slot->picture_type = enc->pic.picture_type;
slot->frame_num = enc->pic.frame_num;
slot->pic_order_cnt = enc->pic.pic_order_cnt;
if (!enc->pic.not_referenced) {
LIST_DEL(&slot->list);
LIST_ADD(&slot->list, &enc->cpb_slots);
}
}
static void rvce_get_feedback(struct pipe_video_codec *encoder,
void *feedback, unsigned *size)
{
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
struct rvid_buffer *fb = feedback;
if (size) {
uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
if (ptr[1]) {
*size = ptr[4] - ptr[9];
} else {
*size = 0;
}
enc->ws->buffer_unmap(fb->res->buf);
}
//dump_feedback(enc, fb);
rvid_destroy_buffer(fb);
FREE(fb);
}
/**
* flush any outstanding command buffers to the hardware
*/
static void rvce_flush(struct pipe_video_codec *encoder)
{
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
flush(enc);
}
static void rvce_cs_flush(void *ctx, unsigned flags,
struct pipe_fence_handle **fence)
{
// just ignored
}
struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
const struct pipe_video_codec *templ,
struct radeon_winsys* ws,
rvce_get_buffer get_buffer)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen;
struct r600_common_context *rctx = (struct r600_common_context*)context;
struct rvce_encoder *enc;
struct pipe_video_buffer *tmp_buf, templat = {};
struct radeon_surf *tmp_surf;
unsigned cpb_size;
if (!rscreen->info.vce_fw_version) {
RVID_ERR("Kernel doesn't supports VCE!\n");
return NULL;
} else if (!rvce_is_fw_version_supported(rscreen)) {
RVID_ERR("Unsupported VCE fw version loaded!\n");
return NULL;
}
enc = CALLOC_STRUCT(rvce_encoder);
if (!enc)
return NULL;
if (rscreen->info.drm_major == 3)
enc->use_vm = true;
if ((rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) ||
rscreen->info.drm_major == 3)
enc->use_vui = true;
if (rscreen->info.family >= CHIP_TONGA &&
rscreen->info.family != CHIP_STONEY &&
rscreen->info.family != CHIP_POLARIS11 &&
rscreen->info.family != CHIP_POLARIS12)
enc->dual_pipe = true;
/* TODO enable B frame with dual instance */
if ((rscreen->info.family >= CHIP_TONGA) &&
(templ->max_references == 1) &&
(rscreen->info.vce_harvest_config == 0))
enc->dual_inst = true;
enc->base = *templ;
enc->base.context = context;
enc->base.destroy = rvce_destroy;
enc->base.begin_frame = rvce_begin_frame;
enc->base.encode_bitstream = rvce_encode_bitstream;
enc->base.end_frame = rvce_end_frame;
enc->base.flush = rvce_flush;
enc->base.get_feedback = rvce_get_feedback;
enc->get_buffer = get_buffer;
enc->screen = context->screen;
enc->ws = ws;
enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc);
if (!enc->cs) {
RVID_ERR("Can't get command submission context.\n");
goto error;
}
templat.buffer_format = PIPE_FORMAT_NV12;
templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
templat.width = enc->base.width;
templat.height = enc->base.height;
templat.interlaced = false;
if (!(tmp_buf = context->create_video_buffer(context, &templat))) {
RVID_ERR("Can't create video buffer.\n");
goto error;
}
enc->cpb_num = get_cpb_num(enc);
if (!enc->cpb_num)
goto error;
get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf);
cpb_size = (rscreen->chip_class < GFX9) ?
align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) *
align(tmp_surf->u.legacy.level[0].nblk_y, 32) :
align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) *
align(tmp_surf->u.gfx9.surf_height, 32);
cpb_size = cpb_size * 3 / 2;
cpb_size = cpb_size * enc->cpb_num;
if (enc->dual_pipe)
cpb_size += RVCE_MAX_AUX_BUFFER_NUM *
RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
tmp_buf->destroy(tmp_buf);
if (!rvid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't create CPB buffer.\n");
goto error;
}
enc->cpb_array = CALLOC(enc->cpb_num, sizeof(struct rvce_cpb_slot));
if (!enc->cpb_array)
goto error;
reset_cpb(enc);
goto error;
return &enc->base;
error:
if (enc->cs)
enc->ws->cs_destroy(enc->cs);
rvid_destroy_buffer(&enc->cpb);
FREE(enc->cpb_array);
FREE(enc);
return NULL;
}
/**
* check if kernel has the right fw version loaded
*/
bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
{
switch (rscreen->info.vce_fw_version) {
case FW_40_2_2:
case FW_50_0_1:
case FW_50_1_2:
case FW_50_10_2:
case FW_50_17_3:
case FW_52_0_3:
case FW_52_4_3:
case FW_52_8_3:
return true;
default:
if ((rscreen->info.vce_fw_version & (0xff << 24)) == FW_53)
return true;
else
return false;
}
}
/**
* Add the buffer as relocation to the current command submission
*/
void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
enum radeon_bo_usage usage, enum radeon_bo_domain domain,
signed offset)
{
int reloc_idx;
reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
domain, RADEON_PRIO_VCE);
if (enc->use_vm) {
uint64_t addr;
addr = enc->ws->buffer_get_virtual_address(buf);
addr = addr + offset;
RVCE_CS(addr >> 32);
RVCE_CS(addr);
} else {
offset += enc->ws->buffer_get_reloc_offset(buf);
RVCE_CS(reloc_idx * 4);
RVCE_CS(offset);
}
}

View File

@ -0,0 +1,462 @@
/**************************************************************************
*
* Copyright 2013 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/*
* Authors:
* Christian König <christian.koenig@amd.com>
*
*/
#ifndef RADEON_VCE_H
#define RADEON_VCE_H
#include "util/list.h"
#define RVCE_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value))
#define RVCE_BEGIN(cmd) { \
uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
RVCE_CS(cmd)
#define RVCE_READ(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
#define RVCE_WRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
#define RVCE_READWRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
#define RVCE_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; }
#define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5)
#define RVCE_MAX_AUX_BUFFER_NUM 4
struct r600_common_screen;
/* driver dependent callback */
typedef void (*rvce_get_buffer)(struct pipe_resource *resource,
struct pb_buffer **handle,
struct radeon_surf **surface);
/* Coded picture buffer slot */
struct rvce_cpb_slot {
struct list_head list;
unsigned index;
enum pipe_h264_enc_picture_type picture_type;
unsigned frame_num;
unsigned pic_order_cnt;
};
struct rvce_rate_control {
uint32_t rc_method;
uint32_t target_bitrate;
uint32_t peak_bitrate;
uint32_t frame_rate_num;
uint32_t gop_size;
uint32_t quant_i_frames;
uint32_t quant_p_frames;
uint32_t quant_b_frames;
uint32_t vbv_buffer_size;
uint32_t frame_rate_den;
uint32_t vbv_buf_lv;
uint32_t max_au_size;
uint32_t qp_initial_mode;
uint32_t target_bits_picture;
uint32_t peak_bits_picture_integer;
uint32_t peak_bits_picture_fraction;
uint32_t min_qp;
uint32_t max_qp;
uint32_t skip_frame_enable;
uint32_t fill_data_enable;
uint32_t enforce_hrd;
uint32_t b_pics_delta_qp;
uint32_t ref_b_pics_delta_qp;
uint32_t rc_reinit_disable;
uint32_t enc_lcvbr_init_qp_flag;
uint32_t lcvbrsatd_based_nonlinear_bit_budget_flag;
};
struct rvce_motion_estimation {
uint32_t enc_ime_decimation_search;
uint32_t motion_est_half_pixel;
uint32_t motion_est_quarter_pixel;
uint32_t disable_favor_pmv_point;
uint32_t force_zero_point_center;
uint32_t lsmvert;
uint32_t enc_search_range_x;
uint32_t enc_search_range_y;
uint32_t enc_search1_range_x;
uint32_t enc_search1_range_y;
uint32_t disable_16x16_frame1;
uint32_t disable_satd;
uint32_t enable_amd;
uint32_t enc_disable_sub_mode;
uint32_t enc_ime_skip_x;
uint32_t enc_ime_skip_y;
uint32_t enc_en_ime_overw_dis_subm;
uint32_t enc_ime_overw_dis_subm_no;
uint32_t enc_ime2_search_range_x;
uint32_t enc_ime2_search_range_y;
uint32_t parallel_mode_speedup_enable;
uint32_t fme0_enc_disable_sub_mode;
uint32_t fme1_enc_disable_sub_mode;
uint32_t ime_sw_speedup_enable;
};
struct rvce_pic_control {
uint32_t enc_use_constrained_intra_pred;
uint32_t enc_cabac_enable;
uint32_t enc_cabac_idc;
uint32_t enc_loop_filter_disable;
int32_t enc_lf_beta_offset;
int32_t enc_lf_alpha_c0_offset;
uint32_t enc_crop_left_offset;
uint32_t enc_crop_right_offset;
uint32_t enc_crop_top_offset;
uint32_t enc_crop_bottom_offset;
uint32_t enc_num_mbs_per_slice;
uint32_t enc_intra_refresh_num_mbs_per_slot;
uint32_t enc_force_intra_refresh;
uint32_t enc_force_imb_period;
uint32_t enc_pic_order_cnt_type;
uint32_t log2_max_pic_order_cnt_lsb_minus4;
uint32_t enc_sps_id;
uint32_t enc_pps_id;
uint32_t enc_constraint_set_flags;
uint32_t enc_b_pic_pattern;
uint32_t weight_pred_mode_b_picture;
uint32_t enc_number_of_reference_frames;
uint32_t enc_max_num_ref_frames;
uint32_t enc_num_default_active_ref_l0;
uint32_t enc_num_default_active_ref_l1;
uint32_t enc_slice_mode;
uint32_t enc_max_slice_size;
};
struct rvce_task_info {
uint32_t offset_of_next_task_info;
uint32_t task_operation;
uint32_t reference_picture_dependency;
uint32_t collocate_flag_dependency;
uint32_t feedback_index;
uint32_t video_bitstream_ring_index;
};
struct rvce_feedback_buf_pkg {
uint32_t feedback_ring_address_hi;
uint32_t feedback_ring_address_lo;
uint32_t feedback_ring_size;
};
struct rvce_rdo {
uint32_t enc_disable_tbe_pred_i_frame;
uint32_t enc_disable_tbe_pred_p_frame;
uint32_t use_fme_interpol_y;
uint32_t use_fme_interpol_uv;
uint32_t use_fme_intrapol_y;
uint32_t use_fme_intrapol_uv;
uint32_t use_fme_interpol_y_1;
uint32_t use_fme_interpol_uv_1;
uint32_t use_fme_intrapol_y_1;
uint32_t use_fme_intrapol_uv_1;
uint32_t enc_16x16_cost_adj;
uint32_t enc_skip_cost_adj;
uint32_t enc_force_16x16_skip;
uint32_t enc_disable_threshold_calc_a;
uint32_t enc_luma_coeff_cost;
uint32_t enc_luma_mb_coeff_cost;
uint32_t enc_chroma_coeff_cost;
};
struct rvce_vui {
uint32_t aspect_ratio_info_present_flag;
uint32_t aspect_ratio_idc;
uint32_t sar_width;
uint32_t sar_height;
uint32_t overscan_info_present_flag;
uint32_t overscan_Approp_flag;
uint32_t video_signal_type_present_flag;
uint32_t video_format;
uint32_t video_full_range_flag;
uint32_t color_description_present_flag;
uint32_t color_prim;
uint32_t transfer_char;
uint32_t matrix_coef;
uint32_t chroma_loc_info_present_flag;
uint32_t chroma_loc_top;
uint32_t chroma_loc_bottom;
uint32_t timing_info_present_flag;
uint32_t num_units_in_tick;
uint32_t time_scale;
uint32_t fixed_frame_rate_flag;
uint32_t nal_hrd_parameters_present_flag;
uint32_t cpb_cnt_minus1;
uint32_t bit_rate_scale;
uint32_t cpb_size_scale;
uint32_t bit_rate_value_minus;
uint32_t cpb_size_value_minus;
uint32_t cbr_flag;
uint32_t initial_cpb_removal_delay_length_minus1;
uint32_t cpb_removal_delay_length_minus1;
uint32_t dpb_output_delay_length_minus1;
uint32_t time_offset_length;
uint32_t low_delay_hrd_flag;
uint32_t pic_struct_present_flag;
uint32_t bitstream_restriction_present_flag;
uint32_t motion_vectors_over_pic_boundaries_flag;
uint32_t max_bytes_per_pic_denom;
uint32_t max_bits_per_mb_denom;
uint32_t log2_max_mv_length_hori;
uint32_t log2_max_mv_length_vert;
uint32_t num_reorder_frames;
uint32_t max_dec_frame_buffering;
};
struct rvce_enc_operation {
uint32_t insert_headers;
uint32_t picture_structure;
uint32_t allowed_max_bitstream_size;
uint32_t force_refresh_map;
uint32_t insert_aud;
uint32_t end_of_sequence;
uint32_t end_of_stream;
uint32_t input_picture_luma_address_hi;
uint32_t input_picture_luma_address_lo;
uint32_t input_picture_chroma_address_hi;
uint32_t input_picture_chroma_address_lo;
uint32_t enc_input_frame_y_pitch;
uint32_t enc_input_pic_luma_pitch;
uint32_t enc_input_pic_chroma_pitch;;
uint32_t enc_input_pic_addr_array;
uint32_t enc_input_pic_addr_array_disable2pipe_disablemboffload;
uint32_t enc_input_pic_tile_config;
uint32_t enc_pic_type;
uint32_t enc_idr_flag;
uint32_t enc_idr_pic_id;
uint32_t enc_mgs_key_pic;
uint32_t enc_reference_flag;
uint32_t enc_temporal_layer_index;
uint32_t num_ref_idx_active_override_flag;
uint32_t num_ref_idx_l0_active_minus1;
uint32_t num_ref_idx_l1_active_minus1;
uint32_t enc_ref_list_modification_op;
uint32_t enc_ref_list_modification_num;
uint32_t enc_decoded_picture_marking_op;
uint32_t enc_decoded_picture_marking_num;
uint32_t enc_decoded_picture_marking_idx;
uint32_t enc_decoded_ref_base_picture_marking_op;
uint32_t enc_decoded_ref_base_picture_marking_num;
uint32_t l0_picture_structure;
uint32_t l0_enc_pic_type;
uint32_t l0_frame_number;
uint32_t l0_picture_order_count;
uint32_t l0_luma_offset;
uint32_t l0_chroma_offset;
uint32_t l1_picture_structure;
uint32_t l1_enc_pic_type;
uint32_t l1_frame_number;
uint32_t l1_picture_order_count;
uint32_t l1_luma_offset;
uint32_t l1_chroma_offset;
uint32_t enc_reconstructed_luma_offset;
uint32_t enc_reconstructed_chroma_offset;;
uint32_t enc_coloc_buffer_offset;
uint32_t enc_reconstructed_ref_base_picture_luma_offset;
uint32_t enc_reconstructed_ref_base_picture_chroma_offset;
uint32_t enc_reference_ref_base_picture_luma_offset;
uint32_t enc_reference_ref_base_picture_chroma_offset;
uint32_t picture_count;
uint32_t frame_number;
uint32_t picture_order_count;
uint32_t num_i_pic_remain_in_rcgop;
uint32_t num_p_pic_remain_in_rcgop;
uint32_t num_b_pic_remain_in_rcgop;
uint32_t num_ir_pic_remain_in_rcgop;
uint32_t enable_intra_refresh;
uint32_t aq_variance_en;
uint32_t aq_block_size;
uint32_t aq_mb_variance_sel;
uint32_t aq_frame_variance_sel;
uint32_t aq_param_a;
uint32_t aq_param_b;
uint32_t aq_param_c;
uint32_t aq_param_d;
uint32_t aq_param_e;
uint32_t context_in_sfb;
};
struct rvce_enc_create {
uint32_t enc_use_circular_buffer;
uint32_t enc_profile;
uint32_t enc_level;
uint32_t enc_pic_struct_restriction;
uint32_t enc_image_width;
uint32_t enc_image_height;
uint32_t enc_ref_pic_luma_pitch;
uint32_t enc_ref_pic_chroma_pitch;
uint32_t enc_ref_y_height_in_qw;
uint32_t enc_ref_pic_addr_array_enc_pic_struct_restriction_disable_rdo;
uint32_t enc_pre_encode_context_buffer_offset;
uint32_t enc_pre_encode_input_luma_buffer_offset;
uint32_t enc_pre_encode_input_chroma_buffer_offset;
uint32_t enc_pre_encode_mode_chromaflag_vbaqmode_scenechangesensitivity;
};
struct rvce_config_ext {
uint32_t enc_enable_perf_logging;
};
struct rvce_h264_enc_pic {
struct rvce_rate_control rc;
struct rvce_motion_estimation me;
struct rvce_pic_control pc;
struct rvce_task_info ti;
struct rvce_feedback_buf_pkg fb;
struct rvce_rdo rdo;
struct rvce_vui vui;
struct rvce_enc_operation eo;
struct rvce_enc_create ec;
struct rvce_config_ext ce;
unsigned quant_i_frames;
unsigned quant_p_frames;
unsigned quant_b_frames;
enum pipe_h264_enc_picture_type picture_type;
unsigned frame_num;
unsigned frame_num_cnt;
unsigned p_remain;
unsigned i_remain;
unsigned idr_pic_id;
unsigned gop_cnt;
unsigned gop_size;
unsigned pic_order_cnt;
unsigned ref_idx_l0;
unsigned ref_idx_l1;
unsigned addrmode_arraymode_disrdo_distwoinstants;
bool not_referenced;
bool is_idr;
bool has_ref_pic_list;
bool enable_vui;
unsigned int ref_pic_list_0[32];
unsigned int ref_pic_list_1[32];
unsigned int frame_idx[32];
};
/* VCE encoder representation */
struct rvce_encoder {
struct pipe_video_codec base;
/* version specific packets */
void (*session)(struct rvce_encoder *enc);
void (*create)(struct rvce_encoder *enc);
void (*feedback)(struct rvce_encoder *enc);
void (*rate_control)(struct rvce_encoder *enc);
void (*config_extension)(struct rvce_encoder *enc);
void (*pic_control)(struct rvce_encoder *enc);
void (*motion_estimation)(struct rvce_encoder *enc);
void (*rdo)(struct rvce_encoder *enc);
void (*vui)(struct rvce_encoder *enc);
void (*config)(struct rvce_encoder *enc);
void (*encode)(struct rvce_encoder *enc);
void (*destroy)(struct rvce_encoder *enc);
void (*task_info)(struct rvce_encoder *enc, uint32_t op,
uint32_t dep, uint32_t fb_idx,
uint32_t ring_idx);
unsigned stream_handle;
struct pipe_screen *screen;
struct radeon_winsys* ws;
struct radeon_winsys_cs* cs;
rvce_get_buffer get_buffer;
struct pb_buffer* handle;
struct radeon_surf* luma;
struct radeon_surf* chroma;
struct pb_buffer* bs_handle;
unsigned bs_size;
struct rvce_cpb_slot *cpb_array;
struct list_head cpb_slots;
unsigned cpb_num;
struct rvid_buffer *fb;
struct rvid_buffer cpb;
struct pipe_h264_enc_picture_desc pic;
struct rvce_h264_enc_pic enc_pic;
unsigned task_info_idx;
unsigned bs_idx;
bool use_vm;
bool use_vui;
bool dual_pipe;
bool dual_inst;
};
/* CPB handling functions */
struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc);
struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc);
struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc);
void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
signed *luma_offset, signed *chroma_offset);
struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
const struct pipe_video_codec *templat,
struct radeon_winsys* ws,
rvce_get_buffer get_buffer);
bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen);
void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
enum radeon_bo_usage usage, enum radeon_bo_domain domain,
signed offset);
/* init vce fw 40.2.2 specific callbacks */
void radeon_vce_40_2_2_init(struct rvce_encoder *enc);
/* init vce fw 50 specific callbacks */
void radeon_vce_50_init(struct rvce_encoder *enc);
/* init vce fw 52 specific callbacks */
void radeon_vce_52_init(struct rvce_encoder *enc);
/* version specific function for getting parameters */
void (*get_pic_param)(struct rvce_encoder *enc,
struct pipe_h264_enc_picture_desc *pic);
/* get parameters for vce 40.2.2 */
void radeon_vce_40_2_2_get_param(struct rvce_encoder *enc,
struct pipe_h264_enc_picture_desc *pic);
/* get parameters for vce 50 */
void radeon_vce_50_get_param(struct rvce_encoder *enc,
struct pipe_h264_enc_picture_desc *pic);
/* get parameters for vce 52 */
void radeon_vce_52_get_param(struct rvce_encoder *enc,
struct pipe_h264_enc_picture_desc *pic);
#endif

View File

@ -0,0 +1,372 @@
/**************************************************************************
*
* Copyright 2013 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/*
* Authors:
* Christian König <christian.koenig@amd.com>
*
*/
#include <unistd.h>
#include "util/u_memory.h"
#include "util/u_video.h"
#include "vl/vl_defines.h"
#include "vl/vl_video_buffer.h"
#include "r600_pipe_common.h"
#include "radeon_video.h"
#include "radeon_vce.h"
#define UVD_FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8))
/* generate an stream handle */
unsigned rvid_alloc_stream_handle()
{
static unsigned counter = 0;
unsigned stream_handle = 0;
unsigned pid = getpid();
int i;
for (i = 0; i < 32; ++i)
stream_handle |= ((pid >> i) & 1) << (31 - i);
stream_handle ^= ++counter;
return stream_handle;
}
/* create a buffer in the winsys */
bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
unsigned size, unsigned usage)
{
memset(buffer, 0, sizeof(*buffer));
buffer->usage = usage;
/* Hardware buffer placement restrictions require the kernel to be
* able to move buffers around individually, so request a
* non-sub-allocated buffer.
*/
buffer->res = (struct r600_resource *)
pipe_buffer_create(screen, PIPE_BIND_SHARED,
usage, size);
return buffer->res != NULL;
}
/* destroy a buffer */
void rvid_destroy_buffer(struct rvid_buffer *buffer)
{
r600_resource_reference(&buffer->res, NULL);
}
/* reallocate a buffer, preserving its content */
bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
struct rvid_buffer *new_buf, unsigned new_size)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
struct radeon_winsys* ws = rscreen->ws;
unsigned bytes = MIN2(new_buf->res->buf->size, new_size);
struct rvid_buffer old_buf = *new_buf;
void *src = NULL, *dst = NULL;
if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage))
goto error;
src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ);
if (!src)
goto error;
dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE);
if (!dst)
goto error;
memcpy(dst, src, bytes);
if (new_size > bytes) {
new_size -= bytes;
dst += bytes;
memset(dst, 0, new_size);
}
ws->buffer_unmap(new_buf->res->buf);
ws->buffer_unmap(old_buf.res->buf);
rvid_destroy_buffer(&old_buf);
return true;
error:
if (src)
ws->buffer_unmap(old_buf.res->buf);
rvid_destroy_buffer(new_buf);
*new_buf = old_buf;
return false;
}
/* clear the buffer with zeros */
void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
{
struct r600_common_context *rctx = (struct r600_common_context*)context;
rctx->dma_clear_buffer(context, &buffer->res->b.b, 0,
buffer->res->buf->size, 0);
context->flush(context, NULL, 0);
}
/**
* join surfaces into the same buffer with identical tiling params
* sumup their sizes and replace the backend buffers with a single bo
*/
void rvid_join_surfaces(struct r600_common_context *rctx,
struct pb_buffer** buffers[VL_NUM_COMPONENTS],
struct radeon_surf *surfaces[VL_NUM_COMPONENTS])
{
struct radeon_winsys* ws;
unsigned best_tiling, best_wh, off;
unsigned size, alignment;
struct pb_buffer *pb;
unsigned i, j;
ws = rctx->ws;
for (i = 0, best_tiling = 0, best_wh = ~0; i < VL_NUM_COMPONENTS; ++i) {
unsigned wh;
if (!surfaces[i])
continue;
if (rctx->chip_class < GFX9) {
/* choose the smallest bank w/h for now */
wh = surfaces[i]->u.legacy.bankw * surfaces[i]->u.legacy.bankh;
if (wh < best_wh) {
best_wh = wh;
best_tiling = i;
}
}
}
for (i = 0, off = 0; i < VL_NUM_COMPONENTS; ++i) {
if (!surfaces[i])
continue;
/* adjust the texture layer offsets */
off = align(off, surfaces[i]->surf_alignment);
if (rctx->chip_class < GFX9) {
/* copy the tiling parameters */
surfaces[i]->u.legacy.bankw = surfaces[best_tiling]->u.legacy.bankw;
surfaces[i]->u.legacy.bankh = surfaces[best_tiling]->u.legacy.bankh;
surfaces[i]->u.legacy.mtilea = surfaces[best_tiling]->u.legacy.mtilea;
surfaces[i]->u.legacy.tile_split = surfaces[best_tiling]->u.legacy.tile_split;
for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.legacy.level); ++j)
surfaces[i]->u.legacy.level[j].offset += off;
} else
surfaces[i]->u.gfx9.surf_offset += off;
off += surfaces[i]->surf_size;
}
for (i = 0, size = 0, alignment = 0; i < VL_NUM_COMPONENTS; ++i) {
if (!buffers[i] || !*buffers[i])
continue;
size = align(size, (*buffers[i])->alignment);
size += (*buffers[i])->size;
alignment = MAX2(alignment, (*buffers[i])->alignment * 1);
}
if (!size)
return;
/* TODO: 2D tiling workaround */
alignment *= 2;
pb = ws->buffer_create(ws, size, alignment, RADEON_DOMAIN_VRAM,
RADEON_FLAG_GTT_WC);
if (!pb)
return;
for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
if (!buffers[i] || !*buffers[i])
continue;
pb_reference(buffers[i], pb);
}
pb_reference(&pb, NULL);
}
int rvid_get_video_param(struct pipe_screen *screen,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint,
enum pipe_video_cap param)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
enum pipe_video_format codec = u_reduce_video_profile(profile);
struct radeon_info info;
rscreen->ws->query_info(rscreen->ws, &info);
if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
switch (param) {
case PIPE_VIDEO_CAP_SUPPORTED:
return codec == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
rvce_is_fw_version_supported(rscreen);
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
case PIPE_VIDEO_CAP_MAX_WIDTH:
return (rscreen->family < CHIP_TONGA) ? 2048 : 4096;
case PIPE_VIDEO_CAP_MAX_HEIGHT:
return (rscreen->family < CHIP_TONGA) ? 1152 : 2304;
case PIPE_VIDEO_CAP_PREFERED_FORMAT:
return PIPE_FORMAT_NV12;
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
return false;
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
return false;
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
return true;
case PIPE_VIDEO_CAP_STACKED_FRAMES:
return (rscreen->family < CHIP_TONGA) ? 1 : 2;
default:
return 0;
}
}
switch (param) {
case PIPE_VIDEO_CAP_SUPPORTED:
switch (codec) {
case PIPE_VIDEO_FORMAT_MPEG12:
return profile != PIPE_VIDEO_PROFILE_MPEG1;
case PIPE_VIDEO_FORMAT_MPEG4:
/* no support for MPEG4 on older hw */
return rscreen->family >= CHIP_PALM;
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
if ((rscreen->family == CHIP_POLARIS10 ||
rscreen->family == CHIP_POLARIS11) &&
info.uvd_fw_version < UVD_FW_1_66_16 ) {
RVID_ERR("POLARIS10/11 firmware version need to be updated.\n");
return false;
}
return true;
case PIPE_VIDEO_FORMAT_VC1:
return true;
case PIPE_VIDEO_FORMAT_HEVC:
/* Carrizo only supports HEVC Main */
if (rscreen->family >= CHIP_STONEY)
return (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN ||
profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10);
else if (rscreen->family >= CHIP_CARRIZO)
return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN;
return false;
case PIPE_VIDEO_FORMAT_JPEG:
if (rscreen->family < CHIP_CARRIZO || rscreen->family >= CHIP_VEGA10)
return false;
if (!(rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 19)) {
RVID_ERR("No MJPEG support for the kernel version\n");
return false;
}
return true;
default:
return false;
}
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
case PIPE_VIDEO_CAP_MAX_WIDTH:
return (rscreen->family < CHIP_TONGA) ? 2048 : 4096;
case PIPE_VIDEO_CAP_MAX_HEIGHT:
return (rscreen->family < CHIP_TONGA) ? 1152 : 4096;
case PIPE_VIDEO_CAP_PREFERED_FORMAT:
if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
return PIPE_FORMAT_P016;
else
return PIPE_FORMAT_NV12;
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
if (rscreen->family < CHIP_PALM) {
/* MPEG2 only with shaders and no support for
interlacing on R6xx style UVD */
return codec != PIPE_VIDEO_FORMAT_MPEG12 &&
rscreen->family > CHIP_RV770;
} else {
enum pipe_video_format format = u_reduce_video_profile(profile);
if (format == PIPE_VIDEO_FORMAT_HEVC)
return false; //The firmware doesn't support interlaced HEVC.
else if (format == PIPE_VIDEO_FORMAT_JPEG)
return false;
return true;
}
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
return true;
case PIPE_VIDEO_CAP_MAX_LEVEL:
switch (profile) {
case PIPE_VIDEO_PROFILE_MPEG1:
return 0;
case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
return 3;
case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE:
return 3;
case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE:
return 5;
case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
return 1;
case PIPE_VIDEO_PROFILE_VC1_MAIN:
return 2;
case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
return 4;
case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
return (rscreen->family < CHIP_TONGA) ? 41 : 52;
case PIPE_VIDEO_PROFILE_HEVC_MAIN:
case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
return 186;
default:
return 0;
}
default:
return 0;
}
}
boolean rvid_is_format_supported(struct pipe_screen *screen,
enum pipe_format format,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint)
{
/* HEVC 10 bit decoding should use P016 instead of NV12 if possible */
if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
return (format == PIPE_FORMAT_NV12) ||
(format == PIPE_FORMAT_P016);
/* we can only handle this one with UVD */
if (profile != PIPE_VIDEO_PROFILE_UNKNOWN)
return format == PIPE_FORMAT_NV12;
return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint);
}

View File

@ -0,0 +1,85 @@
/**************************************************************************
*
* Copyright 2013 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/*
* Authors:
* Christian König <christian.koenig@amd.com>
*
*/
#ifndef RADEON_VIDEO_H
#define RADEON_VIDEO_H
#include "radeon/radeon_winsys.h"
#include "vl/vl_video_buffer.h"
#define RVID_ERR(fmt, args...) \
fprintf(stderr, "EE %s:%d %s UVD - "fmt, __FILE__, __LINE__, __func__, ##args)
/* video buffer representation */
struct rvid_buffer
{
unsigned usage;
struct r600_resource *res;
};
/* generate an stream handle */
unsigned rvid_alloc_stream_handle(void);
/* create a buffer in the winsys */
bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
unsigned size, unsigned usage);
/* destroy a buffer */
void rvid_destroy_buffer(struct rvid_buffer *buffer);
/* reallocate a buffer, preserving its content */
bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
struct rvid_buffer *new_buf, unsigned new_size);
/* clear the buffer with zeros */
void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer);
/* join surfaces into the same buffer with identical tiling params
sumup their sizes and replace the backend buffers with a single bo */
void rvid_join_surfaces(struct r600_common_context *rctx,
struct pb_buffer** buffers[VL_NUM_COMPONENTS],
struct radeon_surf *surfaces[VL_NUM_COMPONENTS]);
/* returns supported codecs and other parameters */
int rvid_get_video_param(struct pipe_screen *screen,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint,
enum pipe_video_cap param);
/* the hardware only supports NV12 */
boolean rvid_is_format_supported(struct pipe_screen *screen,
enum pipe_format format,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint);
#endif // RADEON_VIDEO_H

View File

@ -28,22 +28,22 @@
/* 2xMSAA
* There are two locations (4, 4), (-4, -4). */
const uint32_t eg_sample_locs_2x[4] = {
static const uint32_t eg_sample_locs_2x[4] = {
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
};
const unsigned eg_max_dist_2x = 4;
static const unsigned eg_max_dist_2x = 4;
/* 4xMSAA
* There are 4 locations: (-2, 6), (6, -2), (-6, 2), (2, 6). */
const uint32_t eg_sample_locs_4x[4] = {
static const uint32_t eg_sample_locs_4x[4] = {
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
};
const unsigned eg_max_dist_4x = 6;
static const unsigned eg_max_dist_4x = 6;
/* Cayman 8xMSAA */
static const uint32_t cm_sample_locs_8x[] = {
@ -78,7 +78,7 @@ static const uint32_t cm_sample_locs_16x[] = {
};
static const unsigned cm_max_dist_16x = 8;
void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
void si_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
unsigned sample_index, float *out_value)
{
int offset, index;
@ -123,24 +123,24 @@ void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
}
}
void cayman_init_msaa(struct pipe_context *ctx)
void si_init_msaa(struct pipe_context *ctx)
{
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
int i;
cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]);
si_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]);
for (i = 0; i < 2; i++)
cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]);
si_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]);
for (i = 0; i < 4; i++)
cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]);
si_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]);
for (i = 0; i < 8; i++)
cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]);
si_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]);
for (i = 0; i < 16; i++)
cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]);
si_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]);
}
void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
void si_common_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
{
switch (nr_samples) {
default:
@ -201,9 +201,9 @@ void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
}
}
void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
int ps_iter_samples, int overrast_samples,
unsigned sc_mode_cntl_1)
void si_common_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
int ps_iter_samples, int overrast_samples,
unsigned sc_mode_cntl_1)
{
int setup_samples = nr_samples > 1 ? nr_samples :
overrast_samples > 1 ? overrast_samples : 0;

View File

@ -30,9 +30,9 @@
#include <inttypes.h>
#include <stdio.h>
bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
struct pb_buffer *buf,
enum radeon_bo_usage usage)
bool si_rings_is_buffer_referenced(struct r600_common_context *ctx,
struct pb_buffer *buf,
enum radeon_bo_usage usage)
{
if (ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, buf, usage)) {
return true;
@ -44,9 +44,9 @@ bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
return false;
}
void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
struct r600_resource *resource,
unsigned usage)
void *si_buffer_map_sync_with_rings(struct r600_common_context *ctx,
struct r600_resource *resource,
unsigned usage)
{
enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
bool busy = false;
@ -101,9 +101,9 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
return ctx->ws->buffer_map(resource->buf, NULL, usage);
}
void r600_init_resource_fields(struct r600_common_screen *rscreen,
struct r600_resource *res,
uint64_t size, unsigned alignment)
void si_init_resource_fields(struct r600_common_screen *rscreen,
struct r600_resource *res,
uint64_t size, unsigned alignment)
{
struct r600_texture *rtex = (struct r600_texture*)res;
@ -205,8 +205,8 @@ void r600_init_resource_fields(struct r600_common_screen *rscreen,
res->gart_usage = size;
}
bool r600_alloc_resource(struct r600_common_screen *rscreen,
struct r600_resource *res)
bool si_alloc_resource(struct r600_common_screen *rscreen,
struct r600_resource *res)
{
struct pb_buffer *old_buf, *new_buf;
@ -274,7 +274,7 @@ r600_invalidate_buffer(struct r600_common_context *rctx,
return false;
/* Check if mapping this buffer would cause waiting for the GPU. */
if (r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
if (si_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
!rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
} else {
@ -285,7 +285,7 @@ r600_invalidate_buffer(struct r600_common_context *rctx,
}
/* Replace the storage of dst with src. */
void r600_replace_buffer_storage(struct pipe_context *ctx,
void si_replace_buffer_storage(struct pipe_context *ctx,
struct pipe_resource *dst,
struct pipe_resource *src)
{
@ -308,8 +308,8 @@ void r600_replace_buffer_storage(struct pipe_context *ctx,
rctx->rebind_buffer(ctx, dst, old_gpu_address);
}
void r600_invalidate_resource(struct pipe_context *ctx,
struct pipe_resource *resource)
void si_invalidate_resource(struct pipe_context *ctx,
struct pipe_resource *resource)
{
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
struct r600_resource *rbuffer = r600_resource(resource);
@ -429,7 +429,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
/* Check if mapping this buffer would cause waiting for the GPU.
*/
if (rbuffer->flags & RADEON_FLAG_SPARSE ||
r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
si_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
!rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
/* Do a wait-free write-only transfer using a temporary buffer. */
unsigned offset;
@ -472,7 +472,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
box->x % R600_MAP_BUFFER_ALIGNMENT,
0, 0, resource, 0, box);
data = r600_buffer_map_sync_with_rings(rctx, staging,
data = si_buffer_map_sync_with_rings(rctx, staging,
usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
if (!data) {
r600_resource_reference(&staging, NULL);
@ -487,7 +487,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
}
}
data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage);
data = si_buffer_map_sync_with_rings(rctx, rbuffer, usage);
if (!data) {
return NULL;
}
@ -557,10 +557,10 @@ static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
slab_free(&rctx->pool_transfers, transfer);
}
void r600_buffer_subdata(struct pipe_context *ctx,
struct pipe_resource *buffer,
unsigned usage, unsigned offset,
unsigned size, const void *data)
void si_buffer_subdata(struct pipe_context *ctx,
struct pipe_resource *buffer,
unsigned usage, unsigned offset,
unsigned size, const void *data)
{
struct pipe_transfer *transfer = NULL;
struct pipe_box box;
@ -611,30 +611,30 @@ r600_alloc_buffer_struct(struct pipe_screen *screen,
return rbuffer;
}
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ,
unsigned alignment)
struct pipe_resource *si_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ,
unsigned alignment)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
r600_init_resource_fields(rscreen, rbuffer, templ->width0, alignment);
si_init_resource_fields(rscreen, rbuffer, templ->width0, alignment);
if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
rbuffer->flags |= RADEON_FLAG_SPARSE;
if (!r600_alloc_resource(rscreen, rbuffer)) {
if (!si_alloc_resource(rscreen, rbuffer)) {
FREE(rbuffer);
return NULL;
}
return &rbuffer->b.b;
}
struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen,
unsigned flags,
unsigned usage,
unsigned size,
unsigned alignment)
struct pipe_resource *si_aligned_buffer_create(struct pipe_screen *screen,
unsigned flags,
unsigned usage,
unsigned size,
unsigned alignment)
{
struct pipe_resource buffer;
@ -648,13 +648,13 @@ struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen,
buffer.height0 = 1;
buffer.depth0 = 1;
buffer.array_size = 1;
return r600_buffer_create(screen, &buffer, alignment);
return si_buffer_create(screen, &buffer, alignment);
}
struct pipe_resource *
r600_buffer_from_user_memory(struct pipe_screen *screen,
const struct pipe_resource *templ,
void *user_memory)
si_buffer_from_user_memory(struct pipe_screen *screen,
const struct pipe_resource *templ,
void *user_memory)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct radeon_winsys *ws = rscreen->ws;

View File

@ -162,7 +162,7 @@ r600_gpu_load_thread(void *param)
return 0;
}
void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen)
void si_gpu_load_kill_thread(struct r600_common_screen *rscreen)
{
if (!rscreen->gpu_load_thread)
return;
@ -269,14 +269,14 @@ static unsigned busy_index_from_type(struct r600_common_screen *rscreen,
}
}
uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type)
uint64_t si_begin_counter(struct r600_common_screen *rscreen, unsigned type)
{
unsigned busy_index = busy_index_from_type(rscreen, type);
return r600_read_mmio_counter(rscreen, busy_index);
}
unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
uint64_t begin)
unsigned si_end_counter(struct r600_common_screen *rscreen, unsigned type,
uint64_t begin)
{
unsigned busy_index = busy_index_from_type(rscreen, type);
return r600_end_mmio_counter(rscreen, begin, busy_index);

View File

@ -112,7 +112,7 @@ static void r600_pc_query_destroy(struct r600_common_screen *rscreen,
FREE(query->counters);
r600_query_hw_destroy(rscreen, rquery);
si_query_hw_destroy(rscreen, rquery);
}
static bool r600_pc_query_prepare_buffer(struct r600_common_screen *screen,
@ -217,9 +217,9 @@ static void r600_pc_query_add_result(struct r600_common_screen *rscreen,
static struct r600_query_ops batch_query_ops = {
.destroy = r600_pc_query_destroy,
.begin = r600_query_hw_begin,
.end = r600_query_hw_end,
.get_result = r600_query_hw_get_result
.begin = si_query_hw_begin,
.end = si_query_hw_end,
.get_result = si_query_hw_get_result
};
static struct r600_query_hw_ops batch_query_hw_ops = {
@ -297,9 +297,9 @@ static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
return group;
}
struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
unsigned num_queries,
unsigned *query_types)
struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
unsigned num_queries,
unsigned *query_types)
{
struct r600_common_screen *screen =
(struct r600_common_screen *)ctx->screen;
@ -417,7 +417,7 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
counter->qwords *= block->num_instances;
}
if (!r600_query_hw_init(screen, &query->b))
if (!si_query_hw_init(screen, &query->b))
goto error;
return (struct pipe_query *)query;
@ -511,9 +511,9 @@ static bool r600_init_block_names(struct r600_common_screen *screen,
return true;
}
int r600_get_perfcounter_info(struct r600_common_screen *screen,
unsigned index,
struct pipe_driver_query_info *info)
int si_get_perfcounter_info(struct r600_common_screen *screen,
unsigned index,
struct pipe_driver_query_info *info)
{
struct r600_perfcounters *pc = screen->perfcounters;
struct r600_perfcounter_block *block;
@ -553,9 +553,9 @@ int r600_get_perfcounter_info(struct r600_common_screen *screen,
return 1;
}
int r600_get_perfcounter_group_info(struct r600_common_screen *screen,
unsigned index,
struct pipe_driver_query_group_info *info)
int si_get_perfcounter_group_info(struct r600_common_screen *screen,
unsigned index,
struct pipe_driver_query_group_info *info)
{
struct r600_perfcounters *pc = screen->perfcounters;
struct r600_perfcounter_block *block;
@ -580,13 +580,13 @@ int r600_get_perfcounter_group_info(struct r600_common_screen *screen,
return 1;
}
void r600_perfcounters_destroy(struct r600_common_screen *rscreen)
void si_perfcounters_destroy(struct r600_common_screen *rscreen)
{
if (rscreen->perfcounters)
rscreen->perfcounters->cleanup(rscreen);
}
bool r600_perfcounters_init(struct r600_perfcounters *pc,
bool si_perfcounters_init(struct r600_perfcounters *pc,
unsigned num_blocks)
{
pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block));
@ -599,11 +599,11 @@ bool r600_perfcounters_init(struct r600_perfcounters *pc,
return true;
}
void r600_perfcounters_add_block(struct r600_common_screen *rscreen,
struct r600_perfcounters *pc,
const char *name, unsigned flags,
unsigned counters, unsigned selectors,
unsigned instances, void *data)
void si_perfcounters_add_block(struct r600_common_screen *rscreen,
struct r600_perfcounters *pc,
const char *name, unsigned flags,
unsigned counters, unsigned selectors,
unsigned instances, void *data)
{
struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks];
@ -636,7 +636,7 @@ void r600_perfcounters_add_block(struct r600_common_screen *rscreen,
pc->num_groups += block->num_groups;
}
void r600_perfcounters_do_destroy(struct r600_perfcounters *pc)
void si_perfcounters_do_destroy(struct r600_perfcounters *pc)
{
unsigned i;

View File

@ -39,17 +39,8 @@
#include <inttypes.h>
#include <sys/utsname.h>
#ifndef HAVE_LLVM
#define HAVE_LLVM 0
#endif
#if HAVE_LLVM
#include <llvm-c/TargetMachine.h>
#endif
#ifndef MESA_LLVM_VERSION_PATCH
#define MESA_LLVM_VERSION_PATCH 0
#endif
struct r600_multi_fence {
struct pipe_reference reference;
@ -66,12 +57,12 @@ struct r600_multi_fence {
/*
* shader binary helpers.
*/
void radeon_shader_binary_init(struct ac_shader_binary *b)
void si_radeon_shader_binary_init(struct ac_shader_binary *b)
{
memset(b, 0, sizeof(*b));
}
void radeon_shader_binary_clean(struct ac_shader_binary *b)
void si_radeon_shader_binary_clean(struct ac_shader_binary *b)
{
if (!b)
return;
@ -99,11 +90,11 @@ void radeon_shader_binary_clean(struct ac_shader_binary *b)
* \param old_value Previous fence value (for a bug workaround)
* \param new_value Fence value to write for this event.
*/
void r600_gfx_write_event_eop(struct r600_common_context *ctx,
unsigned event, unsigned event_flags,
unsigned data_sel,
struct r600_resource *buf, uint64_t va,
uint32_t new_fence, unsigned query_type)
void si_gfx_write_event_eop(struct r600_common_context *ctx,
unsigned event, unsigned event_flags,
unsigned data_sel,
struct r600_resource *buf, uint64_t va,
uint32_t new_fence, unsigned query_type)
{
struct radeon_winsys_cs *cs = ctx->gfx.cs;
unsigned op = EVENT_TYPE(event) |
@ -183,7 +174,7 @@ void r600_gfx_write_event_eop(struct r600_common_context *ctx,
RADEON_PRIO_QUERY);
}
unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen)
unsigned si_gfx_write_fence_dwords(struct r600_common_screen *screen)
{
unsigned dwords = 6;
@ -197,8 +188,8 @@ unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen)
return dwords;
}
void r600_gfx_wait_fence(struct r600_common_context *ctx,
uint64_t va, uint32_t ref, uint32_t mask)
void si_gfx_wait_fence(struct r600_common_context *ctx,
uint64_t va, uint32_t ref, uint32_t mask)
{
struct radeon_winsys_cs *cs = ctx->gfx.cs;
@ -211,11 +202,11 @@ void r600_gfx_wait_fence(struct r600_common_context *ctx,
radeon_emit(cs, 4); /* poll interval */
}
void r600_draw_rectangle(struct blitter_context *blitter,
int x1, int y1, int x2, int y2,
float depth, unsigned num_instances,
enum blitter_attrib_type type,
const union blitter_attrib *attrib)
void si_draw_rectangle(struct blitter_context *blitter,
int x1, int y1, int x2, int y2,
float depth, unsigned num_instances,
enum blitter_attrib_type type,
const union blitter_attrib *attrib)
{
struct r600_common_context *rctx =
(struct r600_common_context*)util_blitter_get_pipe(blitter);
@ -309,8 +300,8 @@ static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
}
}
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
struct r600_resource *dst, struct r600_resource *src)
void si_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
struct r600_resource *dst, struct r600_resource *src)
{
uint64_t vram = ctx->dma.cs->used_vram;
uint64_t gtt = ctx->dma.cs->used_gart;
@ -387,29 +378,29 @@ static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
{
}
void r600_preflush_suspend_features(struct r600_common_context *ctx)
void si_preflush_suspend_features(struct r600_common_context *ctx)
{
/* suspend queries */
if (!LIST_IS_EMPTY(&ctx->active_queries))
r600_suspend_queries(ctx);
si_suspend_queries(ctx);
ctx->streamout.suspended = false;
if (ctx->streamout.begin_emitted) {
r600_emit_streamout_end(ctx);
si_emit_streamout_end(ctx);
ctx->streamout.suspended = true;
}
}
void r600_postflush_resume_features(struct r600_common_context *ctx)
void si_postflush_resume_features(struct r600_common_context *ctx)
{
if (ctx->streamout.suspended) {
ctx->streamout.append_bitmask = ctx->streamout.enabled_mask;
r600_streamout_buffers_dirty(ctx);
si_streamout_buffers_dirty(ctx);
}
/* resume queries */
if (!LIST_IS_EMPTY(&ctx->active_queries))
r600_resume_queries(ctx);
si_resume_queries(ctx);
}
static void r600_add_fence_dependency(struct r600_common_context *rctx,
@ -542,7 +533,7 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags,
}
if (check_vm)
radeon_save_cs(rctx->ws, cs, &saved, true);
si_save_cs(rctx->ws, cs, &saved, true);
rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence);
if (fence)
@ -555,7 +546,7 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags,
rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 800*1000*1000);
rctx->check_vm_faults(rctx, &saved, RING_DMA);
radeon_clear_saved_cs(&saved);
si_clear_saved_cs(&saved);
}
}
@ -563,8 +554,8 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags,
* Store a linearized copy of all chunks of \p cs together with the buffer
* list in \p saved.
*/
void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
struct radeon_saved_cs *saved, bool get_buffer_list)
void si_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
struct radeon_saved_cs *saved, bool get_buffer_list)
{
uint32_t *buf;
unsigned i;
@ -602,7 +593,7 @@ oom:
memset(saved, 0, sizeof(*saved));
}
void radeon_clear_saved_cs(struct radeon_saved_cs *saved)
void si_clear_saved_cs(struct radeon_saved_cs *saved)
{
FREE(saved->ib);
FREE(saved->bo_list);
@ -646,7 +637,7 @@ static void r600_set_device_reset_callback(struct pipe_context *ctx,
sizeof(rctx->device_reset_callback));
}
bool r600_check_device_reset(struct r600_common_context *rctx)
bool si_check_device_reset(struct r600_common_context *rctx)
{
enum pipe_reset_status status;
@ -708,9 +699,9 @@ static bool r600_resource_commit(struct pipe_context *pctx,
return ctx->ws->buffer_commit(res->buf, box->x, box->width, commit);
}
bool r600_common_context_init(struct r600_common_context *rctx,
struct r600_common_screen *rscreen,
unsigned context_flags)
bool si_common_context_init(struct r600_common_context *rctx,
struct r600_common_screen *rscreen,
unsigned context_flags)
{
slab_create_child(&rctx->pool_transfers, &rscreen->pool_transfers);
slab_create_child(&rctx->pool_transfers_unsync, &rscreen->pool_transfers);
@ -720,7 +711,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
rctx->family = rscreen->family;
rctx->chip_class = rscreen->chip_class;
rctx->b.invalidate_resource = r600_invalidate_resource;
rctx->b.invalidate_resource = si_invalidate_resource;
rctx->b.resource_commit = r600_resource_commit;
rctx->b.transfer_map = u_transfer_map_vtbl;
rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl;
@ -731,15 +722,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
rctx->b.set_debug_callback = r600_set_debug_callback;
rctx->b.fence_server_sync = r600_fence_server_sync;
rctx->dma_clear_buffer = r600_dma_clear_buffer_fallback;
/* evergreen_compute.c has a special codepath for global buffers.
* Everything else can use the direct path.
*/
if ((rscreen->chip_class == EVERGREEN || rscreen->chip_class == CAYMAN) &&
(context_flags & PIPE_CONTEXT_COMPUTE_ONLY))
rctx->b.buffer_subdata = u_default_buffer_subdata;
else
rctx->b.buffer_subdata = r600_buffer_subdata;
rctx->b.buffer_subdata = si_buffer_subdata;
if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) {
rctx->b.get_device_reset_status = r600_get_reset_status;
@ -750,11 +733,11 @@ bool r600_common_context_init(struct r600_common_context *rctx,
rctx->b.set_device_reset_callback = r600_set_device_reset_callback;
r600_init_context_texture_functions(rctx);
r600_init_viewport_functions(rctx);
r600_streamout_init(rctx);
r600_query_init(rctx);
cayman_init_msaa(&rctx->b);
si_init_context_texture_functions(rctx);
si_init_viewport_functions(rctx);
si_streamout_init(rctx);
si_init_query_functions(rctx);
si_init_msaa(&rctx->b);
if (rctx->chip_class == CIK ||
rctx->chip_class == VI ||
@ -796,7 +779,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
return true;
}
void r600_common_context_cleanup(struct r600_common_context *rctx)
void si_common_context_cleanup(struct r600_common_context *rctx)
{
unsigned i,j;
@ -976,19 +959,14 @@ static void r600_disk_cache_create(struct r600_common_screen *rscreen)
&mesa_timestamp)) {
char *timestamp_str;
int res = -1;
if (rscreen->chip_class < SI) {
res = asprintf(&timestamp_str, "%u",mesa_timestamp);
uint32_t llvm_timestamp;
if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo,
&llvm_timestamp)) {
res = asprintf(&timestamp_str, "%u_%u",
mesa_timestamp, llvm_timestamp);
}
#if HAVE_LLVM
else {
uint32_t llvm_timestamp;
if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo,
&llvm_timestamp)) {
res = asprintf(&timestamp_str, "%u_%u",
mesa_timestamp, llvm_timestamp);
}
}
#endif
if (res != -1) {
/* These flags affect shader compilation. */
uint64_t shader_debug_flags =
@ -1074,7 +1052,7 @@ static int r600_get_video_param(struct pipe_screen *screen,
}
}
const char *r600_get_llvm_processor_name(enum radeon_family family)
const char *si_get_llvm_processor_name(enum radeon_family family)
{
switch (family) {
case CHIP_R600:
@ -1161,10 +1139,7 @@ static unsigned get_max_threads_per_block(struct r600_common_screen *screen,
/* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice
* round number.
*/
if (screen->chip_class >= SI)
return 2048;
return 256;
return 2048;
}
static int r600_get_compute_param(struct pipe_screen *screen,
@ -1193,7 +1168,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
* GPUs, so we need to use the name of a similar GPU.
*/
default:
gpu = r600_get_llvm_processor_name(rscreen->family);
gpu = si_get_llvm_processor_name(rscreen->family);
break;
}
if (ret) {
@ -1237,9 +1212,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
case PIPE_COMPUTE_CAP_ADDRESS_BITS:
if (ret) {
uint32_t *address_bits = ret;
address_bits[0] = 32;
if (rscreen->chip_class >= SI)
address_bits[0] = 64;
address_bits[0] = 64;
}
return 1 * sizeof(uint32_t);
@ -1319,8 +1292,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
if (ret) {
uint64_t *max_variable_threads_per_block = ret;
if (rscreen->chip_class >= SI &&
ir_type == PIPE_SHADER_IR_TGSI)
if (ir_type == PIPE_SHADER_IR_TGSI)
*max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
else
*max_variable_threads_per_block = 0;
@ -1444,18 +1416,18 @@ static void r600_query_memory_info(struct pipe_screen *screen,
info->nr_device_memory_evictions = info->device_memory_evicted / 64;
}
struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
const struct pipe_resource *templ)
struct pipe_resource *si_resource_create_common(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
if (templ->target == PIPE_BUFFER) {
return r600_buffer_create(screen, templ, 256);
return si_buffer_create(screen, templ, 256);
} else {
return r600_texture_create(screen, templ);
return si_texture_create(screen, templ);
}
}
bool r600_common_screen_init(struct r600_common_screen *rscreen,
struct radeon_winsys *ws)
bool si_common_screen_init(struct r600_common_screen *rscreen,
struct radeon_winsys *ws)
{
char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {};
struct utsname uname_data;
@ -1496,19 +1468,19 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
rscreen->b.fence_finish = r600_fence_finish;
rscreen->b.fence_reference = r600_fence_reference;
rscreen->b.resource_destroy = u_resource_destroy_vtbl;
rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory;
rscreen->b.resource_from_user_memory = si_buffer_from_user_memory;
rscreen->b.query_memory_info = r600_query_memory_info;
if (rscreen->info.has_hw_decode) {
rscreen->b.get_video_param = rvid_get_video_param;
rscreen->b.is_video_format_supported = rvid_is_format_supported;
rscreen->b.get_video_param = si_vid_get_video_param;
rscreen->b.is_video_format_supported = si_vid_is_format_supported;
} else {
rscreen->b.get_video_param = r600_get_video_param;
rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
}
r600_init_screen_texture_functions(rscreen);
r600_init_screen_query_functions(rscreen);
si_init_screen_texture_functions(rscreen);
si_init_screen_query_functions(rscreen);
rscreen->family = rscreen->info.family;
rscreen->chip_class = rscreen->info.chip_class;
@ -1587,10 +1559,10 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
return true;
}
void r600_destroy_common_screen(struct r600_common_screen *rscreen)
void si_destroy_common_screen(struct r600_common_screen *rscreen)
{
r600_perfcounters_destroy(rscreen);
r600_gpu_load_kill_thread(rscreen);
si_perfcounters_destroy(rscreen);
si_gpu_load_kill_thread(rscreen);
mtx_destroy(&rscreen->gpu_load_mutex);
mtx_destroy(&rscreen->aux_context_lock);
@ -1603,20 +1575,20 @@ void r600_destroy_common_screen(struct r600_common_screen *rscreen)
FREE(rscreen);
}
bool r600_can_dump_shader(struct r600_common_screen *rscreen,
unsigned processor)
bool si_can_dump_shader(struct r600_common_screen *rscreen,
unsigned processor)
{
return rscreen->debug_flags & (1 << processor);
}
bool r600_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor)
bool si_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor)
{
return (rscreen->debug_flags & DBG_CHECK_IR) ||
r600_can_dump_shader(rscreen, processor);
si_can_dump_shader(rscreen, processor);
}
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
uint64_t offset, uint64_t size, unsigned value)
void si_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
uint64_t offset, uint64_t size, unsigned value)
{
struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;

View File

@ -141,8 +141,8 @@ struct r600_perfcounters;
struct tgsi_shader_info;
struct r600_qbo_state;
void radeon_shader_binary_init(struct ac_shader_binary *b);
void radeon_shader_binary_clean(struct ac_shader_binary *b);
void si_radeon_shader_binary_init(struct ac_shader_binary *b);
void si_radeon_shader_binary_clean(struct ac_shader_binary *b);
/* Only 32-bit buffer allocations are supported, gallium doesn't support more
* at the moment.
@ -723,130 +723,125 @@ struct r600_common_context {
};
/* r600_buffer_common.c */
bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
struct pb_buffer *buf,
enum radeon_bo_usage usage);
void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
struct r600_resource *resource,
unsigned usage);
void r600_buffer_subdata(struct pipe_context *ctx,
struct pipe_resource *buffer,
unsigned usage, unsigned offset,
unsigned size, const void *data);
void r600_init_resource_fields(struct r600_common_screen *rscreen,
struct r600_resource *res,
uint64_t size, unsigned alignment);
bool r600_alloc_resource(struct r600_common_screen *rscreen,
struct r600_resource *res);
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ,
unsigned alignment);
struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen,
unsigned flags,
unsigned usage,
unsigned size,
unsigned alignment);
bool si_rings_is_buffer_referenced(struct r600_common_context *ctx,
struct pb_buffer *buf,
enum radeon_bo_usage usage);
void *si_buffer_map_sync_with_rings(struct r600_common_context *ctx,
struct r600_resource *resource,
unsigned usage);
void si_buffer_subdata(struct pipe_context *ctx,
struct pipe_resource *buffer,
unsigned usage, unsigned offset,
unsigned size, const void *data);
void si_init_resource_fields(struct r600_common_screen *rscreen,
struct r600_resource *res,
uint64_t size, unsigned alignment);
bool si_alloc_resource(struct r600_common_screen *rscreen,
struct r600_resource *res);
struct pipe_resource *si_buffer_create(struct pipe_screen *screen,
const struct pipe_resource *templ,
unsigned alignment);
struct pipe_resource *si_aligned_buffer_create(struct pipe_screen *screen,
unsigned flags,
unsigned usage,
unsigned size,
unsigned alignment);
struct pipe_resource *
r600_buffer_from_user_memory(struct pipe_screen *screen,
const struct pipe_resource *templ,
void *user_memory);
void
r600_invalidate_resource(struct pipe_context *ctx,
struct pipe_resource *resource);
void r600_replace_buffer_storage(struct pipe_context *ctx,
struct pipe_resource *dst,
struct pipe_resource *src);
si_buffer_from_user_memory(struct pipe_screen *screen,
const struct pipe_resource *templ,
void *user_memory);
void si_invalidate_resource(struct pipe_context *ctx,
struct pipe_resource *resource);
void si_replace_buffer_storage(struct pipe_context *ctx,
struct pipe_resource *dst,
struct pipe_resource *src);
/* r600_common_pipe.c */
void r600_gfx_write_event_eop(struct r600_common_context *ctx,
unsigned event, unsigned event_flags,
unsigned data_sel,
struct r600_resource *buf, uint64_t va,
uint32_t new_fence, unsigned query_type);
unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen);
void r600_gfx_wait_fence(struct r600_common_context *ctx,
uint64_t va, uint32_t ref, uint32_t mask);
void r600_draw_rectangle(struct blitter_context *blitter,
int x1, int y1, int x2, int y2,
float depth, unsigned num_instances,
enum blitter_attrib_type type,
const union blitter_attrib *attrib);
bool r600_common_screen_init(struct r600_common_screen *rscreen,
struct radeon_winsys *ws);
void r600_destroy_common_screen(struct r600_common_screen *rscreen);
void r600_preflush_suspend_features(struct r600_common_context *ctx);
void r600_postflush_resume_features(struct r600_common_context *ctx);
bool r600_common_context_init(struct r600_common_context *rctx,
struct r600_common_screen *rscreen,
unsigned context_flags);
void r600_common_context_cleanup(struct r600_common_context *rctx);
bool r600_can_dump_shader(struct r600_common_screen *rscreen,
unsigned processor);
bool r600_extra_shader_checks(struct r600_common_screen *rscreen,
unsigned processor);
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
uint64_t offset, uint64_t size, unsigned value);
struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
const struct pipe_resource *templ);
const char *r600_get_llvm_processor_name(enum radeon_family family);
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
struct r600_resource *dst, struct r600_resource *src);
void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
struct radeon_saved_cs *saved, bool get_buffer_list);
void radeon_clear_saved_cs(struct radeon_saved_cs *saved);
bool r600_check_device_reset(struct r600_common_context *rctx);
void si_gfx_write_event_eop(struct r600_common_context *ctx,
unsigned event, unsigned event_flags,
unsigned data_sel,
struct r600_resource *buf, uint64_t va,
uint32_t new_fence, unsigned query_type);
unsigned si_gfx_write_fence_dwords(struct r600_common_screen *screen);
void si_gfx_wait_fence(struct r600_common_context *ctx,
uint64_t va, uint32_t ref, uint32_t mask);
void si_draw_rectangle(struct blitter_context *blitter,
int x1, int y1, int x2, int y2,
float depth, unsigned num_instances,
enum blitter_attrib_type type,
const union blitter_attrib *attrib);
bool si_common_screen_init(struct r600_common_screen *rscreen,
struct radeon_winsys *ws);
void si_destroy_common_screen(struct r600_common_screen *rscreen);
void si_preflush_suspend_features(struct r600_common_context *ctx);
void si_postflush_resume_features(struct r600_common_context *ctx);
bool si_common_context_init(struct r600_common_context *rctx,
struct r600_common_screen *rscreen,
unsigned context_flags);
void si_common_context_cleanup(struct r600_common_context *rctx);
bool si_can_dump_shader(struct r600_common_screen *rscreen,
unsigned processor);
bool si_extra_shader_checks(struct r600_common_screen *rscreen,
unsigned processor);
void si_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
uint64_t offset, uint64_t size, unsigned value);
struct pipe_resource *si_resource_create_common(struct pipe_screen *screen,
const struct pipe_resource *templ);
const char *si_get_llvm_processor_name(enum radeon_family family);
void si_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
struct r600_resource *dst, struct r600_resource *src);
void si_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
struct radeon_saved_cs *saved, bool get_buffer_list);
void si_clear_saved_cs(struct radeon_saved_cs *saved);
bool si_check_device_reset(struct r600_common_context *rctx);
/* r600_gpu_load.c */
void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type);
unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
uint64_t begin);
void si_gpu_load_kill_thread(struct r600_common_screen *rscreen);
uint64_t si_begin_counter(struct r600_common_screen *rscreen, unsigned type);
unsigned si_end_counter(struct r600_common_screen *rscreen, unsigned type,
uint64_t begin);
/* r600_perfcounters.c */
void r600_perfcounters_destroy(struct r600_common_screen *rscreen);
void si_perfcounters_destroy(struct r600_common_screen *rscreen);
/* r600_query.c */
void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
void r600_query_init(struct r600_common_context *rctx);
void r600_suspend_queries(struct r600_common_context *ctx);
void r600_resume_queries(struct r600_common_context *ctx);
void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen);
void si_init_screen_query_functions(struct r600_common_screen *rscreen);
void si_init_query_functions(struct r600_common_context *rctx);
void si_suspend_queries(struct r600_common_context *ctx);
void si_resume_queries(struct r600_common_context *ctx);
/* r600_streamout.c */
void r600_streamout_buffers_dirty(struct r600_common_context *rctx);
void r600_set_streamout_targets(struct pipe_context *ctx,
unsigned num_targets,
struct pipe_stream_output_target **targets,
const unsigned *offset);
void r600_emit_streamout_end(struct r600_common_context *rctx);
void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
unsigned type, int diff);
void r600_streamout_init(struct r600_common_context *rctx);
void si_streamout_buffers_dirty(struct r600_common_context *rctx);
void si_common_set_streamout_targets(struct pipe_context *ctx,
unsigned num_targets,
struct pipe_stream_output_target **targets,
const unsigned *offset);
void si_emit_streamout_end(struct r600_common_context *rctx);
void si_update_prims_generated_query_state(struct r600_common_context *rctx,
unsigned type, int diff);
void si_streamout_init(struct r600_common_context *rctx);
/* r600_test_dma.c */
void r600_test_dma(struct r600_common_screen *rscreen);
void si_test_dma(struct r600_common_screen *rscreen);
/* r600_texture.c */
bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
struct r600_texture *rdst,
unsigned dst_level, unsigned dstx,
unsigned dsty, unsigned dstz,
struct r600_texture *rsrc,
unsigned src_level,
const struct pipe_box *src_box);
void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
struct r600_texture *rtex,
unsigned nr_samples,
struct r600_fmask_info *out);
void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
struct r600_texture *rtex,
struct r600_cmask_info *out);
bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
struct pipe_resource *texture,
struct r600_texture **staging);
void r600_print_texture_info(struct r600_common_screen *rscreen,
struct r600_texture *rtex, struct u_log_context *log);
struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
bool si_prepare_for_dma_blit(struct r600_common_context *rctx,
struct r600_texture *rdst,
unsigned dst_level, unsigned dstx,
unsigned dsty, unsigned dstz,
struct r600_texture *rsrc,
unsigned src_level,
const struct pipe_box *src_box);
void si_texture_get_fmask_info(struct r600_common_screen *rscreen,
struct r600_texture *rtex,
unsigned nr_samples,
struct r600_fmask_info *out);
bool si_init_flushed_depth_texture(struct pipe_context *ctx,
struct pipe_resource *texture,
struct r600_texture **staging);
void si_print_texture_info(struct r600_common_screen *rscreen,
struct r600_texture *rtex, struct u_log_context *log);
struct pipe_resource *si_texture_create(struct pipe_screen *screen,
const struct pipe_resource *templ);
bool vi_dcc_formats_compatible(enum pipe_format format1,
enum pipe_format format2);
@ -857,12 +852,12 @@ void vi_disable_dcc_if_incompatible_format(struct r600_common_context *rctx,
struct pipe_resource *tex,
unsigned level,
enum pipe_format view_format);
struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_surface *templ,
unsigned width0, unsigned height0,
unsigned width, unsigned height);
unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap);
struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_surface *templ,
unsigned width0, unsigned height0,
unsigned width, unsigned height);
unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap);
void vi_separate_dcc_start_query(struct pipe_context *ctx,
struct r600_texture *tex);
void vi_separate_dcc_stop_query(struct pipe_context *ctx,
@ -872,37 +867,33 @@ void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
void vi_dcc_clear_level(struct r600_common_context *rctx,
struct r600_texture *rtex,
unsigned level, unsigned clear_value);
void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
struct pipe_framebuffer_state *fb,
struct r600_atom *fb_state,
unsigned *buffers, ubyte *dirty_cbufs,
const union pipe_color_union *color);
bool r600_texture_disable_dcc(struct r600_common_context *rctx,
struct r600_texture *rtex);
void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
void r600_init_context_texture_functions(struct r600_common_context *rctx);
void si_do_fast_color_clear(struct r600_common_context *rctx,
struct pipe_framebuffer_state *fb,
struct r600_atom *fb_state,
unsigned *buffers, ubyte *dirty_cbufs,
const union pipe_color_union *color);
bool si_texture_disable_dcc(struct r600_common_context *rctx,
struct r600_texture *rtex);
void si_init_screen_texture_functions(struct r600_common_screen *rscreen);
void si_init_context_texture_functions(struct r600_common_context *rctx);
/* r600_viewport.c */
void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
struct pipe_scissor_state *scissor);
void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
bool scissor_enable, bool clip_halfz);
void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
struct tgsi_shader_info *info);
void r600_init_viewport_functions(struct r600_common_context *rctx);
void si_apply_scissor_bug_workaround(struct r600_common_context *rctx,
struct pipe_scissor_state *scissor);
void si_viewport_set_rast_deps(struct r600_common_context *rctx,
bool scissor_enable, bool clip_halfz);
void si_update_vs_writes_viewport_index(struct r600_common_context *rctx,
struct tgsi_shader_info *info);
void si_init_viewport_functions(struct r600_common_context *rctx);
/* cayman_msaa.c */
extern const uint32_t eg_sample_locs_2x[4];
extern const unsigned eg_max_dist_2x;
extern const uint32_t eg_sample_locs_4x[4];
extern const unsigned eg_max_dist_4x;
void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
unsigned sample_index, float *out_value);
void cayman_init_msaa(struct pipe_context *ctx);
void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples);
void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
int ps_iter_samples, int overrast_samples,
unsigned sc_mode_cntl_1);
void si_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
unsigned sample_index, float *out_value);
void si_init_msaa(struct pipe_context *ctx);
void si_common_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples);
void si_common_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
int ps_iter_samples, int overrast_samples,
unsigned sc_mode_cntl_1);
/* Inline helpers. */

View File

@ -219,7 +219,7 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx,
case R600_QUERY_GPU_SURF_SYNC_BUSY:
case R600_QUERY_GPU_CP_DMA_BUSY:
case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
query->begin_result = r600_begin_counter(rctx->screen,
query->begin_result = si_begin_counter(rctx->screen,
query->b.type);
break;
case R600_QUERY_NUM_COMPILATIONS:
@ -375,7 +375,7 @@ static bool r600_query_sw_end(struct r600_common_context *rctx,
case R600_QUERY_GPU_SURF_SYNC_BUSY:
case R600_QUERY_GPU_CP_DMA_BUSY:
case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
query->end_result = r600_end_counter(rctx->screen,
query->end_result = si_end_counter(rctx->screen,
query->b.type,
query->begin_result);
query->begin_result = 0;
@ -494,8 +494,8 @@ static struct pipe_query *r600_query_sw_create(unsigned query_type)
return (struct pipe_query *)query;
}
void r600_query_hw_destroy(struct r600_common_screen *rscreen,
struct r600_query *rquery)
void si_query_hw_destroy(struct r600_common_screen *rscreen,
struct r600_query *rquery)
{
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
struct r600_query_buffer *prev = query->buffer.previous;
@ -583,10 +583,10 @@ static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
unsigned offset);
static struct r600_query_ops query_hw_ops = {
.destroy = r600_query_hw_destroy,
.begin = r600_query_hw_begin,
.end = r600_query_hw_end,
.get_result = r600_query_hw_get_result,
.destroy = si_query_hw_destroy,
.begin = si_query_hw_begin,
.end = si_query_hw_end,
.get_result = si_query_hw_get_result,
.get_result_resource = r600_query_hw_get_result_resource,
};
@ -612,8 +612,8 @@ static struct r600_query_hw_ops query_hw_default_hw_ops = {
.add_result = r600_query_hw_add_result,
};
bool r600_query_hw_init(struct r600_common_screen *rscreen,
struct r600_query_hw *query)
bool si_query_hw_init(struct r600_common_screen *rscreen,
struct r600_query_hw *query)
{
query->buffer.buf = r600_new_query_buffer(rscreen, query);
if (!query->buffer.buf)
@ -641,16 +641,16 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscree
query->result_size = 16 * rscreen->info.num_render_backends;
query->result_size += 16; /* for the fence + alignment */
query->num_cs_dw_begin = 6;
query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen);
query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(rscreen);
break;
case PIPE_QUERY_TIME_ELAPSED:
query->result_size = 24;
query->num_cs_dw_begin = 8;
query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen);
query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(rscreen);
break;
case PIPE_QUERY_TIMESTAMP:
query->result_size = 16;
query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen);
query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(rscreen);
query->flags = R600_QUERY_HW_FLAG_NO_START;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
@ -670,11 +670,11 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscree
query->num_cs_dw_end = 6 * R600_MAX_STREAMS;
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
/* 11 values on EG, 8 on R600. */
query->result_size = (rscreen->chip_class >= EVERGREEN ? 11 : 8) * 16;
/* 11 values on GCN. */
query->result_size = 11 * 16;
query->result_size += 8; /* for the fence + alignment */
query->num_cs_dw_begin = 6;
query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen);
query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(rscreen);
break;
default:
assert(0);
@ -682,7 +682,7 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscree
return NULL;
}
if (!r600_query_hw_init(rscreen, query)) {
if (!si_query_hw_init(rscreen, query)) {
FREE(query);
return NULL;
}
@ -782,7 +782,7 @@ static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
/* Write the timestamp after the last draw is done.
* (bottom-of-pipe)
*/
r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
si_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
0, EOP_DATA_SEL_TIMESTAMP,
NULL, va, 0, query->b.type);
}
@ -809,7 +809,7 @@ static void r600_query_hw_emit_start(struct r600_common_context *ctx,
return; // previous buffer allocation failure
r600_update_occlusion_query_state(ctx, query->b.type, 1);
r600_update_prims_generated_query_state(ctx, query->b.type, 1);
si_update_prims_generated_query_state(ctx, query->b.type, 1);
ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + query->num_cs_dw_end,
true);
@ -869,7 +869,7 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
va += 8;
/* fall through */
case PIPE_QUERY_TIMESTAMP:
r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
si_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
0, EOP_DATA_SEL_TIMESTAMP, NULL, va,
0, query->b.type);
fence_va = va + 8;
@ -893,7 +893,7 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
RADEON_PRIO_QUERY);
if (fence_va)
r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0,
si_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0,
EOP_DATA_SEL_VALUE_32BIT,
query->buffer.buf, fence_va, 0x80000000,
query->b.type);
@ -923,7 +923,7 @@ static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
ctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end;
r600_update_occlusion_query_state(ctx, query->b.type, -1);
r600_update_prims_generated_query_state(ctx, query->b.type, -1);
si_update_prims_generated_query_state(ctx, query->b.type, -1);
}
static void emit_set_predicate(struct r600_common_context *ctx,
@ -1057,8 +1057,8 @@ static boolean r600_begin_query(struct pipe_context *ctx,
return rquery->ops->begin(rctx, rquery);
}
void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
struct r600_query_hw *query)
void si_query_hw_reset_buffers(struct r600_common_context *rctx,
struct r600_query_hw *query)
{
struct r600_query_buffer *prev = query->buffer.previous;
@ -1074,7 +1074,7 @@ void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
query->buffer.previous = NULL;
/* Obtain a new buffer if the current one can't be mapped without a stall. */
if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) ||
if (si_rings_is_buffer_referenced(rctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) ||
!rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
r600_resource_reference(&query->buffer.buf, NULL);
query->buffer.buf = r600_new_query_buffer(rctx->screen, query);
@ -1084,8 +1084,8 @@ void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
}
}
bool r600_query_hw_begin(struct r600_common_context *rctx,
struct r600_query *rquery)
bool si_query_hw_begin(struct r600_common_context *rctx,
struct r600_query *rquery)
{
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
@ -1095,7 +1095,7 @@ bool r600_query_hw_begin(struct r600_common_context *rctx,
}
if (!(query->flags & R600_QUERY_HW_FLAG_BEGIN_RESUMES))
r600_query_hw_reset_buffers(rctx, query);
si_query_hw_reset_buffers(rctx, query);
r600_resource_reference(&query->workaround_buf, NULL);
@ -1115,13 +1115,13 @@ static bool r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
return rquery->ops->end(rctx, rquery);
}
bool r600_query_hw_end(struct r600_common_context *rctx,
struct r600_query *rquery)
bool si_query_hw_end(struct r600_common_context *rctx,
struct r600_query *rquery)
{
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
if (query->flags & R600_QUERY_HW_FLAG_NO_START)
r600_query_hw_reset_buffers(rctx, query);
si_query_hw_reset_buffers(rctx, query);
r600_query_hw_emit_stop(rctx, query);
@ -1287,47 +1287,28 @@ static void r600_query_hw_add_result(struct r600_common_screen *rscreen,
}
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
if (rscreen->chip_class >= EVERGREEN) {
result->pipeline_statistics.ps_invocations +=
r600_query_read_result(buffer, 0, 22, false);
result->pipeline_statistics.c_primitives +=
r600_query_read_result(buffer, 2, 24, false);
result->pipeline_statistics.c_invocations +=
r600_query_read_result(buffer, 4, 26, false);
result->pipeline_statistics.vs_invocations +=
r600_query_read_result(buffer, 6, 28, false);
result->pipeline_statistics.gs_invocations +=
r600_query_read_result(buffer, 8, 30, false);
result->pipeline_statistics.gs_primitives +=
r600_query_read_result(buffer, 10, 32, false);
result->pipeline_statistics.ia_primitives +=
r600_query_read_result(buffer, 12, 34, false);
result->pipeline_statistics.ia_vertices +=
r600_query_read_result(buffer, 14, 36, false);
result->pipeline_statistics.hs_invocations +=
r600_query_read_result(buffer, 16, 38, false);
result->pipeline_statistics.ds_invocations +=
r600_query_read_result(buffer, 18, 40, false);
result->pipeline_statistics.cs_invocations +=
r600_query_read_result(buffer, 20, 42, false);
} else {
result->pipeline_statistics.ps_invocations +=
r600_query_read_result(buffer, 0, 16, false);
result->pipeline_statistics.c_primitives +=
r600_query_read_result(buffer, 2, 18, false);
result->pipeline_statistics.c_invocations +=
r600_query_read_result(buffer, 4, 20, false);
result->pipeline_statistics.vs_invocations +=
r600_query_read_result(buffer, 6, 22, false);
result->pipeline_statistics.gs_invocations +=
r600_query_read_result(buffer, 8, 24, false);
result->pipeline_statistics.gs_primitives +=
r600_query_read_result(buffer, 10, 26, false);
result->pipeline_statistics.ia_primitives +=
r600_query_read_result(buffer, 12, 28, false);
result->pipeline_statistics.ia_vertices +=
r600_query_read_result(buffer, 14, 30, false);
}
result->pipeline_statistics.ps_invocations +=
r600_query_read_result(buffer, 0, 22, false);
result->pipeline_statistics.c_primitives +=
r600_query_read_result(buffer, 2, 24, false);
result->pipeline_statistics.c_invocations +=
r600_query_read_result(buffer, 4, 26, false);
result->pipeline_statistics.vs_invocations +=
r600_query_read_result(buffer, 6, 28, false);
result->pipeline_statistics.gs_invocations +=
r600_query_read_result(buffer, 8, 30, false);
result->pipeline_statistics.gs_primitives +=
r600_query_read_result(buffer, 10, 32, false);
result->pipeline_statistics.ia_primitives +=
r600_query_read_result(buffer, 12, 34, false);
result->pipeline_statistics.ia_vertices +=
r600_query_read_result(buffer, 14, 36, false);
result->pipeline_statistics.hs_invocations +=
r600_query_read_result(buffer, 16, 38, false);
result->pipeline_statistics.ds_invocations +=
r600_query_read_result(buffer, 18, 40, false);
result->pipeline_statistics.cs_invocations +=
r600_query_read_result(buffer, 20, 42, false);
#if 0 /* for testing */
printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
"DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, "
@ -1381,9 +1362,9 @@ static void r600_query_hw_clear_result(struct r600_query_hw *query,
util_query_clear_result(result, query->b.type);
}
bool r600_query_hw_get_result(struct r600_common_context *rctx,
struct r600_query *rquery,
bool wait, union pipe_query_result *result)
bool si_query_hw_get_result(struct r600_common_context *rctx,
struct r600_query *rquery,
bool wait, union pipe_query_result *result)
{
struct r600_common_screen *rscreen = rctx->screen;
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
@ -1400,7 +1381,7 @@ bool r600_query_hw_get_result(struct r600_common_context *rctx,
if (rquery->b.flushed)
map = rctx->ws->buffer_map(qbuf->buf->buf, NULL, usage);
else
map = r600_buffer_map_sync_with_rings(rctx, qbuf->buf, usage);
map = si_buffer_map_sync_with_rings(rctx, qbuf->buf, usage);
if (!map)
return false;
@ -1787,7 +1768,7 @@ static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size;
va += params.fence_offset;
r600_gfx_wait_fence(rctx, va, 0x80000000, 0x80000000);
si_gfx_wait_fence(rctx, va, 0x80000000, 0x80000000);
}
rctx->b.launch_grid(&rctx->b, &grid);
@ -1871,7 +1852,7 @@ static void r600_render_condition(struct pipe_context *ctx,
rctx->set_atom_dirty(rctx, atom, query != NULL);
}
void r600_suspend_queries(struct r600_common_context *ctx)
void si_suspend_queries(struct r600_common_context *ctx)
{
struct r600_query_hw *query;
@ -1906,7 +1887,7 @@ static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *
return num_dw;
}
void r600_resume_queries(struct r600_common_context *ctx)
void si_resume_queries(struct r600_common_context *ctx)
{
struct r600_query_hw *query;
unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, &ctx->active_queries);
@ -1921,84 +1902,6 @@ void r600_resume_queries(struct r600_common_context *ctx)
}
}
/* Fix radeon_info::enabled_rb_mask for R600, R700, EVERGREEN, NI. */
void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen)
{
struct r600_common_context *ctx =
(struct r600_common_context*)rscreen->aux_context;
struct radeon_winsys_cs *cs = ctx->gfx.cs;
struct r600_resource *buffer;
uint32_t *results;
unsigned i, mask = 0;
unsigned max_rbs = ctx->screen->info.num_render_backends;
assert(rscreen->chip_class <= CAYMAN);
/* if backend_map query is supported by the kernel */
if (rscreen->info.r600_gb_backend_map_valid) {
unsigned num_tile_pipes = rscreen->info.num_tile_pipes;
unsigned backend_map = rscreen->info.r600_gb_backend_map;
unsigned item_width, item_mask;
if (ctx->chip_class >= EVERGREEN) {
item_width = 4;
item_mask = 0x7;
} else {
item_width = 2;
item_mask = 0x3;
}
while (num_tile_pipes--) {
i = backend_map & item_mask;
mask |= (1<<i);
backend_map >>= item_width;
}
if (mask != 0) {
rscreen->info.enabled_rb_mask = mask;
return;
}
}
/* otherwise backup path for older kernels */
/* create buffer for event data */
buffer = (struct r600_resource*)
pipe_buffer_create(ctx->b.screen, 0,
PIPE_USAGE_STAGING, max_rbs * 16);
if (!buffer)
return;
/* initialize buffer with zeroes */
results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE);
if (results) {
memset(results, 0, max_rbs * 4 * 4);
/* emit EVENT_WRITE for ZPASS_DONE */
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(cs, buffer->gpu_address);
radeon_emit(cs, buffer->gpu_address >> 32);
r600_emit_reloc(ctx, &ctx->gfx, buffer,
RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
/* analyze results */
results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ);
if (results) {
for(i = 0; i < max_rbs; i++) {
/* at least highest bit will be set if backend is used */
if (results[i*4 + 1])
mask |= (1<<i);
}
}
}
r600_resource_reference(&buffer, NULL);
if (mask)
rscreen->info.enabled_rb_mask = mask;
}
#define XFULL(name_, query_type_, type_, result_type_, group_id_) \
{ \
.name = name_, \
@ -2124,13 +2027,13 @@ static int r600_get_driver_query_info(struct pipe_screen *screen,
if (!info) {
unsigned num_perfcounters =
r600_get_perfcounter_info(rscreen, 0, NULL);
si_get_perfcounter_info(rscreen, 0, NULL);
return num_queries + num_perfcounters;
}
if (index >= num_queries)
return r600_get_perfcounter_info(rscreen, index - num_queries, info);
return si_get_perfcounter_info(rscreen, index - num_queries, info);
*info = r600_driver_query_list[index];
@ -2177,7 +2080,7 @@ static int r600_get_driver_query_group_info(struct pipe_screen *screen,
return num_pc_groups + R600_NUM_SW_QUERY_GROUPS;
if (index < num_pc_groups)
return r600_get_perfcounter_group_info(rscreen, index, info);
return si_get_perfcounter_group_info(rscreen, index, info);
index -= num_pc_groups;
if (index >= R600_NUM_SW_QUERY_GROUPS)
@ -2189,10 +2092,10 @@ static int r600_get_driver_query_group_info(struct pipe_screen *screen,
return 1;
}
void r600_query_init(struct r600_common_context *rctx)
void si_init_query_functions(struct r600_common_context *rctx)
{
rctx->b.create_query = r600_create_query;
rctx->b.create_batch_query = r600_create_batch_query;
rctx->b.create_batch_query = si_create_batch_query;
rctx->b.destroy_query = r600_destroy_query;
rctx->b.begin_query = r600_begin_query;
rctx->b.end_query = r600_end_query;
@ -2206,7 +2109,7 @@ void r600_query_init(struct r600_common_context *rctx)
LIST_INITHEAD(&rctx->active_queries);
}
void r600_init_screen_query_functions(struct r600_common_screen *rscreen)
void si_init_screen_query_functions(struct r600_common_screen *rscreen)
{
rscreen->b.get_driver_query_info = r600_get_driver_query_info;
rscreen->b.get_driver_query_group_info = r600_get_driver_query_group_info;

View File

@ -200,18 +200,18 @@ struct r600_query_hw {
unsigned workaround_offset;
};
bool r600_query_hw_init(struct r600_common_screen *rscreen,
struct r600_query_hw *query);
void r600_query_hw_destroy(struct r600_common_screen *rscreen,
struct r600_query *rquery);
bool r600_query_hw_begin(struct r600_common_context *rctx,
bool si_query_hw_init(struct r600_common_screen *rscreen,
struct r600_query_hw *query);
void si_query_hw_destroy(struct r600_common_screen *rscreen,
struct r600_query *rquery);
bool r600_query_hw_end(struct r600_common_context *rctx,
bool si_query_hw_begin(struct r600_common_context *rctx,
struct r600_query *rquery);
bool r600_query_hw_get_result(struct r600_common_context *rctx,
struct r600_query *rquery,
bool wait,
union pipe_query_result *result);
bool si_query_hw_end(struct r600_common_context *rctx,
struct r600_query *rquery);
bool si_query_hw_get_result(struct r600_common_context *rctx,
struct r600_query *rquery,
bool wait,
union pipe_query_result *result);
/* Performance counters */
enum {
@ -297,26 +297,26 @@ struct r600_perfcounters {
bool separate_instance;
};
struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
unsigned num_queries,
unsigned *query_types);
struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
unsigned num_queries,
unsigned *query_types);
int r600_get_perfcounter_info(struct r600_common_screen *,
unsigned index,
struct pipe_driver_query_info *info);
int r600_get_perfcounter_group_info(struct r600_common_screen *,
unsigned index,
struct pipe_driver_query_group_info *info);
int si_get_perfcounter_info(struct r600_common_screen *,
unsigned index,
struct pipe_driver_query_info *info);
int si_get_perfcounter_group_info(struct r600_common_screen *,
unsigned index,
struct pipe_driver_query_group_info *info);
bool r600_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks);
void r600_perfcounters_add_block(struct r600_common_screen *,
struct r600_perfcounters *,
const char *name, unsigned flags,
unsigned counters, unsigned selectors,
unsigned instances, void *data);
void r600_perfcounters_do_destroy(struct r600_perfcounters *);
void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
struct r600_query_hw *query);
bool si_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks);
void si_perfcounters_add_block(struct r600_common_screen *,
struct r600_perfcounters *,
const char *name, unsigned flags,
unsigned counters, unsigned selectors,
unsigned instances, void *data);
void si_perfcounters_do_destroy(struct r600_perfcounters *);
void si_query_hw_reset_buffers(struct r600_common_context *rctx,
struct r600_query_hw *query);
struct r600_qbo_state {
void *saved_compute;

View File

@ -74,7 +74,7 @@ static void r600_so_target_destroy(struct pipe_context *ctx,
FREE(t);
}
void r600_streamout_buffers_dirty(struct r600_common_context *rctx)
void si_streamout_buffers_dirty(struct r600_common_context *rctx)
{
struct r600_atom *begin = &rctx->streamout.begin_atom;
unsigned num_bufs = util_bitcount(rctx->streamout.enabled_mask);
@ -109,10 +109,10 @@ void r600_streamout_buffers_dirty(struct r600_common_context *rctx)
r600_set_streamout_enable(rctx, true);
}
void r600_set_streamout_targets(struct pipe_context *ctx,
unsigned num_targets,
struct pipe_stream_output_target **targets,
const unsigned *offsets)
void si_common_set_streamout_targets(struct pipe_context *ctx,
unsigned num_targets,
struct pipe_stream_output_target **targets,
const unsigned *offsets)
{
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
unsigned i;
@ -120,7 +120,7 @@ void r600_set_streamout_targets(struct pipe_context *ctx,
/* Stop streamout. */
if (rctx->streamout.num_targets && rctx->streamout.begin_emitted) {
r600_emit_streamout_end(rctx);
si_emit_streamout_end(rctx);
}
/* Set the new targets. */
@ -144,7 +144,7 @@ void r600_set_streamout_targets(struct pipe_context *ctx,
rctx->streamout.append_bitmask = append_bitmask;
if (num_targets) {
r600_streamout_buffers_dirty(rctx);
si_streamout_buffers_dirty(rctx);
} else {
rctx->set_atom_dirty(rctx, &rctx->streamout.begin_atom, false);
r600_set_streamout_enable(rctx, false);
@ -266,7 +266,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
rctx->streamout.begin_emitted = true;
}
void r600_emit_streamout_end(struct r600_common_context *rctx)
void si_emit_streamout_end(struct r600_common_context *rctx)
{
struct radeon_winsys_cs *cs = rctx->gfx.cs;
struct r600_so_target **t = rctx->streamout.targets;
@ -353,8 +353,8 @@ static void r600_set_streamout_enable(struct r600_common_context *rctx, bool ena
}
}
void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
unsigned type, int diff)
void si_update_prims_generated_query_state(struct r600_common_context *rctx,
unsigned type, int diff)
{
if (type == PIPE_QUERY_PRIMITIVES_GENERATED) {
bool old_strmout_en = r600_get_strmout_en(rctx);
@ -371,7 +371,7 @@ void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
}
}
void r600_streamout_init(struct r600_common_context *rctx)
void si_streamout_init(struct r600_common_context *rctx)
{
rctx->b.create_stream_output_target = r600_create_so_target;
rctx->b.stream_output_target_destroy = r600_so_target_destroy;

View File

@ -171,7 +171,7 @@ static unsigned generate_max_tex_side(unsigned max_tex_side)
}
}
void r600_test_dma(struct r600_common_screen *rscreen)
void si_test_dma(struct r600_common_screen *rscreen)
{
struct pipe_screen *screen = &rscreen->b;
struct pipe_context *ctx = screen->context_create(screen, NULL, 0);

View File

@ -44,13 +44,13 @@ r600_choose_tiling(struct r600_common_screen *rscreen,
const struct pipe_resource *templ);
bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
struct r600_texture *rdst,
unsigned dst_level, unsigned dstx,
unsigned dsty, unsigned dstz,
struct r600_texture *rsrc,
unsigned src_level,
const struct pipe_box *src_box)
bool si_prepare_for_dma_blit(struct r600_common_context *rctx,
struct r600_texture *rdst,
unsigned dst_level, unsigned dstx,
unsigned dsty, unsigned dstz,
struct r600_texture *rsrc,
unsigned src_level,
const struct pipe_box *src_box)
{
if (!rctx->dma.cs)
return false;
@ -237,7 +237,7 @@ static int r600_init_surface(struct r600_common_screen *rscreen,
is_depth = util_format_has_depth(desc);
is_stencil = util_format_has_stencil(desc);
if (rscreen->chip_class >= EVERGREEN && !is_flushed_depth &&
if (!is_flushed_depth &&
ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
bpe = 4; /* stencil is allocated separately on evergreen */
} else {
@ -408,10 +408,7 @@ static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;
rtex->dirty_level_mask = 0;
if (rscreen->chip_class >= SI)
rtex->cb_color_info &= ~SI_S_028C70_FAST_CLEAR(1);
else
rtex->cb_color_info &= ~EG_S_028C70_FAST_CLEAR(1);
rtex->cb_color_info &= ~SI_S_028C70_FAST_CLEAR(1);
if (rtex->cmask_buffer != &rtex->resource)
r600_resource_reference(&rtex->cmask_buffer, NULL);
@ -466,8 +463,8 @@ static bool r600_texture_discard_dcc(struct r600_common_screen *rscreen,
* \param rctx the current context if you have one, or rscreen->aux_context
* if you don't.
*/
bool r600_texture_disable_dcc(struct r600_common_context *rctx,
struct r600_texture *rtex)
bool si_texture_disable_dcc(struct r600_common_context *rctx,
struct r600_texture *rtex)
{
struct r600_common_screen *rscreen = rctx->screen;
@ -624,7 +621,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
* access.
*/
if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) {
if (r600_texture_disable_dcc(rctx, rtex))
if (si_texture_disable_dcc(rctx, rtex))
update_metadata = true;
}
@ -681,7 +678,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
rctx->b.resource_copy_region(&rctx->b, newb, 0, 0, 0, 0,
&res->b.b, 0, &box);
/* Move the new buffer storage to the old pipe_resource. */
r600_replace_buffer_storage(&rctx->b, &res->b.b, newb);
si_replace_buffer_storage(&rctx->b, &res->b.b, newb);
pipe_resource_reference(&newb, NULL);
assert(res->b.b.bind & PIPE_BIND_SHARED);
@ -730,10 +727,10 @@ static void r600_texture_destroy(struct pipe_screen *screen,
static const struct u_resource_vtbl r600_texture_vtbl;
/* The number of samples can be specified independently of the texture. */
void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
struct r600_texture *rtex,
unsigned nr_samples,
struct r600_fmask_info *out)
void si_texture_get_fmask_info(struct r600_common_screen *rscreen,
struct r600_texture *rtex,
unsigned nr_samples,
struct r600_fmask_info *out)
{
/* FMASK is allocated like an ordinary texture. */
struct pipe_resource templ = rtex->resource.b.b;
@ -751,17 +748,6 @@ void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
templ.nr_samples = 1;
flags = rtex->surface.flags | RADEON_SURF_FMASK;
if (rscreen->chip_class <= CAYMAN) {
/* Use the same parameters and tile mode. */
fmask.u.legacy.bankw = rtex->surface.u.legacy.bankw;
fmask.u.legacy.bankh = rtex->surface.u.legacy.bankh;
fmask.u.legacy.mtilea = rtex->surface.u.legacy.mtilea;
fmask.u.legacy.tile_split = rtex->surface.u.legacy.tile_split;
if (nr_samples <= 4)
fmask.u.legacy.bankh = 4;
}
switch (nr_samples) {
case 2:
case 4:
@ -775,13 +761,6 @@ void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
return;
}
/* Overallocate FMASK on R600-R700 to fix colorbuffer corruption.
* This can be fixed by writing a separate FMASK allocator specifically
* for R600-R700 asics. */
if (rscreen->chip_class <= R700) {
bpe *= 2;
}
if (rscreen->ws->surface_init(rscreen->ws, &templ, flags, bpe,
RADEON_SURF_MODE_2D, &fmask)) {
R600_ERR("Got error in surface_init while allocating FMASK.\n");
@ -805,47 +784,13 @@ void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
r600_texture_get_fmask_info(rscreen, rtex,
si_texture_get_fmask_info(rscreen, rtex,
rtex->resource.b.b.nr_samples, &rtex->fmask);
rtex->fmask.offset = align64(rtex->size, rtex->fmask.alignment);
rtex->size = rtex->fmask.offset + rtex->fmask.size;
}
void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
struct r600_texture *rtex,
struct r600_cmask_info *out)
{
unsigned cmask_tile_width = 8;
unsigned cmask_tile_height = 8;
unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height;
unsigned element_bits = 4;
unsigned cmask_cache_bits = 1024;
unsigned num_pipes = rscreen->info.num_tile_pipes;
unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes;
unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements;
unsigned sqrt_pixels_per_macro_tile = sqrt(pixels_per_macro_tile);
unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile);
unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width;
unsigned pitch_elements = align(rtex->resource.b.b.width0, macro_tile_width);
unsigned height = align(rtex->resource.b.b.height0, macro_tile_height);
unsigned base_align = num_pipes * pipe_interleave_bytes;
unsigned slice_bytes =
((pitch_elements * height * element_bits + 7) / 8) / cmask_tile_elements;
assert(macro_tile_width % 128 == 0);
assert(macro_tile_height % 128 == 0);
out->slice_tile_max = ((pitch_elements * height) / (128*128)) - 1;
out->alignment = MAX2(256, base_align);
out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
align(slice_bytes, base_align);
}
static void si_texture_get_cmask_info(struct r600_common_screen *rscreen,
struct r600_texture *rtex,
struct r600_cmask_info *out)
@ -903,19 +848,12 @@ static void si_texture_get_cmask_info(struct r600_common_screen *rscreen,
static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
if (rscreen->chip_class >= SI) {
si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
} else {
r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
}
si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment);
rtex->size = rtex->cmask.offset + rtex->cmask.size;
if (rscreen->chip_class >= SI)
rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
else
rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
}
static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen,
@ -926,14 +864,10 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen
assert(rtex->cmask.size == 0);
if (rscreen->chip_class >= SI) {
si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
} else {
r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
}
si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
rtex->cmask_buffer = (struct r600_resource *)
r600_aligned_buffer_create(&rscreen->b,
si_aligned_buffer_create(&rscreen->b,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
rtex->cmask.size,
@ -946,10 +880,7 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen
/* update colorbuffer state bits */
rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
if (rscreen->chip_class >= SI)
rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
else
rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
p_atomic_inc(&rscreen->compressed_colortex_counter);
}
@ -965,16 +896,6 @@ static void r600_texture_get_htile_size(struct r600_common_screen *rscreen,
rtex->surface.htile_size = 0;
if (rscreen->chip_class <= EVERGREEN &&
rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26)
return;
/* HW bug on R6xx. */
if (rscreen->chip_class == R600 &&
(rtex->resource.b.b.width0 > 7680 ||
rtex->resource.b.b.height0 > 7680))
return;
/* HTILE is broken with 1D tiling on old kernels and CIK. */
if (rscreen->chip_class >= CIK &&
rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
@ -1045,8 +966,8 @@ static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
rtex->size = rtex->htile_offset + rtex->surface.htile_size;
}
void r600_print_texture_info(struct r600_common_screen *rscreen,
struct r600_texture *rtex, struct u_log_context *log)
void si_print_texture_info(struct r600_common_screen *rscreen,
struct r600_texture *rtex, struct u_log_context *log)
{
int i;
@ -1252,21 +1173,12 @@ r600_texture_create_object(struct pipe_screen *screen,
rtex->ps_draw_ratio = 0;
if (rtex->is_depth) {
if (base->flags & (R600_RESOURCE_FLAG_TRANSFER |
R600_RESOURCE_FLAG_FLUSHED_DEPTH) ||
rscreen->chip_class >= EVERGREEN) {
if (rscreen->chip_class >= GFX9) {
rtex->can_sample_z = true;
rtex->can_sample_s = true;
} else {
rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted;
rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted;
}
if (rscreen->chip_class >= GFX9) {
rtex->can_sample_z = true;
rtex->can_sample_s = true;
} else {
if (rtex->resource.b.b.nr_samples <= 1 &&
(rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
rtex->resource.b.b.format == PIPE_FORMAT_Z32_FLOAT))
rtex->can_sample_z = true;
rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted;
rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted;
}
if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
@ -1304,14 +1216,14 @@ r600_texture_create_object(struct pipe_screen *screen,
/* Now create the backing buffer. */
if (!buf) {
r600_init_resource_fields(rscreen, resource, rtex->size,
si_init_resource_fields(rscreen, resource, rtex->size,
rtex->surface.surf_alignment);
/* Displayable surfaces are not suballocated. */
if (resource->b.b.bind & PIPE_BIND_SCANOUT)
resource->flags |= RADEON_FLAG_NO_SUBALLOC;
if (!r600_alloc_resource(rscreen, resource)) {
if (!si_alloc_resource(rscreen, resource)) {
FREE(rtex);
return NULL;
}
@ -1329,7 +1241,7 @@ r600_texture_create_object(struct pipe_screen *screen,
if (rtex->cmask.size) {
/* Initialize the cmask to 0xCC (= compressed state). */
r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
si_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
rtex->cmask.offset, rtex->cmask.size,
0xCCCCCCCC);
}
@ -1339,7 +1251,7 @@ r600_texture_create_object(struct pipe_screen *screen,
if (rscreen->chip_class >= GFX9 || rtex->tc_compatible_htile)
clear_value = 0x0000030F;
r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
si_screen_clear_buffer(rscreen, &rtex->resource.b.b,
rtex->htile_offset,
rtex->surface.htile_size,
clear_value);
@ -1347,7 +1259,7 @@ r600_texture_create_object(struct pipe_screen *screen,
/* Initialize DCC only if the texture is not being imported. */
if (!buf && rtex->dcc_offset) {
r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
si_screen_clear_buffer(rscreen, &rtex->resource.b.b,
rtex->dcc_offset,
rtex->surface.dcc_size,
0xFFFFFFFF);
@ -1369,7 +1281,7 @@ r600_texture_create_object(struct pipe_screen *screen,
puts("Texture:");
struct u_log_context log;
u_log_context_init(&log);
r600_print_texture_info(rscreen, rtex, &log);
si_print_texture_info(rscreen, rtex, &log);
u_log_new_page_print(&log, stdout);
fflush(stdout);
u_log_context_destroy(&log);
@ -1403,13 +1315,6 @@ r600_choose_tiling(struct r600_common_screen *rscreen,
(templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY))
return RADEON_SURF_MODE_2D;
/* r600g: force tiling on TEXTURE_2D and TEXTURE_3D compute resources. */
if (rscreen->chip_class >= R600 && rscreen->chip_class <= CAYMAN &&
(templ->bind & PIPE_BIND_COMPUTE_RESOURCE) &&
(templ->target == PIPE_TEXTURE_2D ||
templ->target == PIPE_TEXTURE_3D))
force_tiling = true;
/* Handle common candidates for the linear mode.
* Compressed textures and DB surfaces must always be tiled.
*/
@ -1425,8 +1330,7 @@ r600_choose_tiling(struct r600_common_screen *rscreen,
/* Cursors are linear on SI.
* (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */
if (rscreen->chip_class >= SI &&
(templ->bind & PIPE_BIND_CURSOR))
if (templ->bind & PIPE_BIND_CURSOR)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
if (templ->bind & PIPE_BIND_LINEAR)
@ -1455,8 +1359,8 @@ r600_choose_tiling(struct r600_common_screen *rscreen,
return RADEON_SURF_MODE_2D;
}
struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
const struct pipe_resource *templ)
struct pipe_resource *si_texture_create(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct radeon_surf surface = {0};
@ -1531,9 +1435,9 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
return &rtex->resource.b.b;
}
bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
struct pipe_resource *texture,
struct r600_texture **staging)
bool si_init_flushed_depth_texture(struct pipe_context *ctx,
struct pipe_resource *texture,
struct r600_texture **staging)
{
struct r600_texture *rtex = (struct r600_texture*)texture;
struct pipe_resource resource;
@ -1633,9 +1537,7 @@ static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen,
unsigned transfer_usage,
const struct pipe_box *box)
{
/* r600g doesn't react to dirty_tex_descriptor_counter */
return rscreen->chip_class >= SI &&
!rtex->resource.b.is_shared &&
return !rtex->resource.b.is_shared &&
!(transfer_usage & PIPE_TRANSFER_READ) &&
rtex->resource.b.b.last_level == 0 &&
util_texrange_covers_whole_level(&rtex->resource.b.b, 0,
@ -1654,7 +1556,7 @@ static void r600_texture_invalidate_storage(struct r600_common_context *rctx,
assert(rtex->surface.is_linear);
/* Reallocate the buffer in the same pipe_resource. */
r600_alloc_resource(rscreen, &rtex->resource);
si_alloc_resource(rscreen, &rtex->resource);
/* Initialize the CMASK base address (needed even without CMASK). */
rtex->cmask.base_address_reg =
@ -1718,7 +1620,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
rtex->resource.domains & RADEON_DOMAIN_VRAM ||
rtex->resource.flags & RADEON_FLAG_GTT_WC;
/* Write & linear only: */
else if (r600_rings_is_buffer_referenced(rctx, rtex->resource.buf,
else if (si_rings_is_buffer_referenced(rctx, rtex->resource.buf,
RADEON_USAGE_READWRITE) ||
!rctx->ws->buffer_wait(rtex->resource.buf, 0,
RADEON_USAGE_READWRITE)) {
@ -1757,7 +1659,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
r600_init_temp_resource_from_box(&resource, texture, box, level, 0);
if (!r600_init_flushed_depth_texture(ctx, &resource, &staging_depth)) {
if (!si_init_flushed_depth_texture(ctx, &resource, &staging_depth)) {
R600_ERR("failed to create temporary texture to hold untiled copy\n");
FREE(trans);
return NULL;
@ -1784,7 +1686,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
} else {
/* XXX: only readback the rectangle which is being mapped? */
/* XXX: when discard is true, no need to read back from depth texture */
if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) {
if (!si_init_flushed_depth_texture(ctx, texture, &staging_depth)) {
R600_ERR("failed to create temporary texture to hold untiled copy\n");
FREE(trans);
return NULL;
@ -1840,7 +1742,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
buf = &rtex->resource;
}
if (!(map = r600_buffer_map_sync_with_rings(rctx, buf, usage))) {
if (!(map = si_buffer_map_sync_with_rings(rctx, buf, usage))) {
r600_resource_reference(&trans->staging, NULL);
FREE(trans);
return NULL;
@ -2010,15 +1912,15 @@ void vi_disable_dcc_if_incompatible_format(struct r600_common_context *rctx,
if (vi_dcc_enabled(rtex, level) &&
!vi_dcc_formats_compatible(tex->format, view_format))
if (!r600_texture_disable_dcc(rctx, (struct r600_texture*)tex))
if (!si_texture_disable_dcc(rctx, (struct r600_texture*)tex))
rctx->decompress_dcc(&rctx->b, rtex);
}
struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_surface *templ,
unsigned width0, unsigned height0,
unsigned width, unsigned height)
struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_surface *templ,
unsigned width0, unsigned height0,
unsigned width, unsigned height)
{
struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
@ -2079,7 +1981,7 @@ static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
}
}
return r600_create_surface_custom(pipe, tex, templ,
return si_create_surface_custom(pipe, tex, templ,
width0, height0,
width, height);
}
@ -2159,7 +2061,7 @@ static void r600_clear_texture(struct pipe_context *pipe,
pipe_surface_reference(&sf, NULL);
}
unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap)
unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap)
{
const struct util_format_description *desc = util_format_description(format);
@ -2380,7 +2282,7 @@ static void vi_separate_dcc_try_enable(struct r600_common_context *rctx,
tex->last_dcc_separate_buffer = NULL;
} else {
tex->dcc_separate_buffer = (struct r600_resource*)
r600_aligned_buffer_create(rctx->b.screen,
si_aligned_buffer_create(rctx->b.screen,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
tex->surface.dcc_size,
@ -2416,7 +2318,7 @@ void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
/* Read the results. */
ctx->get_query_result(ctx, rctx->dcc_stats[i].ps_stats[2],
true, &result);
r600_query_hw_reset_buffers(rctx,
si_query_hw_reset_buffers(rctx,
(struct r600_query_hw*)
rctx->dcc_stats[i].ps_stats[2]);
@ -2527,7 +2429,7 @@ static bool vi_get_fast_clear_parameters(enum pipe_format surface_format,
util_format_is_alpha(surface_format)) {
extra_channel = -1;
} else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
if(r600_translate_colorswap(surface_format, false) <= 1)
if(si_translate_colorswap(surface_format, false) <= 1)
extra_channel = desc->nr_channels - 1;
else
extra_channel = 0;
@ -2725,7 +2627,7 @@ static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen,
p_atomic_inc(&rscreen->dirty_tex_counter);
}
void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
void si_do_fast_color_clear(struct r600_common_context *rctx,
struct pipe_framebuffer_state *fb,
struct r600_atom *fb_state,
unsigned *buffers, ubyte *dirty_cbufs,
@ -2858,8 +2760,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
}
/* We can change the micro tile mode before a full clear. */
if (rctx->screen->chip_class >= SI)
si_set_optimal_micro_tile_mode(rctx->screen, tex);
si_set_optimal_micro_tile_mode(rctx->screen, tex);
evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
@ -2982,7 +2883,7 @@ r600_texture_from_memobj(struct pipe_screen *screen,
return &rtex->resource.b.b;
}
void r600_init_screen_texture_functions(struct r600_common_screen *rscreen)
void si_init_screen_texture_functions(struct r600_common_screen *rscreen)
{
rscreen->b.resource_from_handle = r600_texture_from_handle;
rscreen->b.resource_get_handle = r600_texture_get_handle;
@ -2991,7 +2892,7 @@ void r600_init_screen_texture_functions(struct r600_common_screen *rscreen)
rscreen->b.memobj_destroy = r600_memobj_destroy;
}
void r600_init_context_texture_functions(struct r600_common_context *rctx)
void si_init_context_texture_functions(struct r600_common_context *rctx)
{
rctx->b.create_surface = r600_create_surface;
rctx->b.surface_destroy = r600_surface_destroy;

View File

@ -115,8 +115,8 @@ static void r600_scissor_make_union(struct r600_signed_scissor *out,
out->maxy = MAX2(out->maxy, in->maxy);
}
void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
struct pipe_scissor_state *scissor)
void si_apply_scissor_bug_workaround(struct r600_common_context *rctx,
struct pipe_scissor_state *scissor)
{
if (rctx->chip_class == EVERGREEN || rctx->chip_class == CAYMAN) {
if (scissor->maxx == 0)
@ -147,7 +147,7 @@ static void r600_emit_one_scissor(struct r600_common_context *rctx,
if (scissor)
r600_clip_scissor(&final, scissor);
evergreen_apply_scissor_bug_workaround(rctx, &final);
si_apply_scissor_bug_workaround(rctx, &final);
radeon_emit(cs, S_028250_TL_X(final.minx) |
S_028250_TL_Y(final.miny) |
@ -368,8 +368,8 @@ static void r600_emit_viewport_states(struct r600_common_context *rctx,
}
/* Set viewport dependencies on pipe_rasterizer_state. */
void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
bool scissor_enable, bool clip_halfz)
void si_viewport_set_rast_deps(struct r600_common_context *rctx,
bool scissor_enable, bool clip_halfz)
{
if (rctx->scissor_enabled != scissor_enable) {
rctx->scissor_enabled = scissor_enable;
@ -389,8 +389,8 @@ void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
* is delayed. When a shader with VIEWPORT_INDEX appears, this should be
* called to emit the rest.
*/
void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
struct tgsi_shader_info *info)
void si_update_vs_writes_viewport_index(struct r600_common_context *rctx,
struct tgsi_shader_info *info)
{
bool vs_window_space;
@ -420,7 +420,7 @@ void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
}
void r600_init_viewport_functions(struct r600_common_context *rctx)
void si_init_viewport_functions(struct r600_common_context *rctx)
{
rctx->scissors.atom.emit = r600_emit_scissors;
rctx->viewports.atom.emit = r600_emit_viewport_states;

View File

@ -1101,13 +1101,13 @@ static void ruvd_destroy(struct pipe_video_codec *decoder)
dec->ws->cs_destroy(dec->cs);
for (i = 0; i < NUM_BUFFERS; ++i) {
rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
rvid_destroy_buffer(&dec->bs_buffers[i]);
si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
si_vid_destroy_buffer(&dec->bs_buffers[i]);
}
rvid_destroy_buffer(&dec->dpb);
rvid_destroy_buffer(&dec->ctx);
rvid_destroy_buffer(&dec->sessionctx);
si_vid_destroy_buffer(&dec->dpb);
si_vid_destroy_buffer(&dec->ctx);
si_vid_destroy_buffer(&dec->sessionctx);
FREE(dec);
}
@ -1178,7 +1178,7 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,
if (new_size > buf->res->buf->size) {
dec->ws->buffer_unmap(buf->res->buf);
if (!rvid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
RVID_ERR("Can't resize bitstream buffer!");
return;
}
@ -1271,10 +1271,10 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc*)picture);
else
ctx_size = calc_ctx_size_h265_main(dec);
if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated context buffer.\n");
}
rvid_clear_buffer(decoder->context, &dec->ctx);
si_vid_clear_buffer(decoder->context, &dec->ctx);
}
if (dec->ctx.res)
@ -1341,9 +1341,9 @@ static void ruvd_flush(struct pipe_video_codec *decoder)
/**
* create and UVD decoder
*/
struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
const struct pipe_video_codec *templ,
ruvd_set_dtb set_dtb)
struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *context,
const struct pipe_video_codec *templ,
ruvd_set_dtb set_dtb)
{
struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws;
struct r600_common_context *rctx = (struct r600_common_context*)context;
@ -1398,7 +1398,7 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
dec->stream_type = profile2stream_type(dec, info.family);
dec->set_dtb = set_dtb;
dec->stream_handle = rvid_alloc_stream_handle();
dec->stream_handle = si_vid_alloc_stream_handle();
dec->screen = context->screen;
dec->ws = ws;
dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL);
@ -1415,48 +1415,48 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);
if (have_it(dec))
msg_fb_it_size += IT_SCALING_TABLE_SIZE;
if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
msg_fb_it_size, PIPE_USAGE_STAGING)) {
RVID_ERR("Can't allocated message buffers.\n");
goto error;
}
if (!rvid_create_buffer(dec->screen, &dec->bs_buffers[i],
if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i],
bs_buf_size, PIPE_USAGE_STAGING)) {
RVID_ERR("Can't allocated bitstream buffers.\n");
goto error;
}
rvid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
rvid_clear_buffer(context, &dec->bs_buffers[i]);
si_vid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
si_vid_clear_buffer(context, &dec->bs_buffers[i]);
}
dpb_size = calc_dpb_size(dec);
if (dpb_size) {
if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated dpb.\n");
goto error;
}
rvid_clear_buffer(context, &dec->dpb);
si_vid_clear_buffer(context, &dec->dpb);
}
if (dec->stream_type == RUVD_CODEC_H264_PERF && info.family >= CHIP_POLARIS10) {
unsigned ctx_size = calc_ctx_size_h264_perf(dec);
if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated context buffer.\n");
goto error;
}
rvid_clear_buffer(context, &dec->ctx);
si_vid_clear_buffer(context, &dec->ctx);
}
if (info.family >= CHIP_POLARIS10 && info.drm_minor >= 3) {
if (!rvid_create_buffer(dec->screen, &dec->sessionctx,
if (!si_vid_create_buffer(dec->screen, &dec->sessionctx,
UVD_SESSION_CONTEXT_SIZE,
PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated session ctx.\n");
goto error;
}
rvid_clear_buffer(context, &dec->sessionctx);
si_vid_clear_buffer(context, &dec->sessionctx);
}
if (info.family >= CHIP_VEGA10) {
@ -1492,13 +1492,13 @@ error:
if (dec->cs) dec->ws->cs_destroy(dec->cs);
for (i = 0; i < NUM_BUFFERS; ++i) {
rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
rvid_destroy_buffer(&dec->bs_buffers[i]);
si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
si_vid_destroy_buffer(&dec->bs_buffers[i]);
}
rvid_destroy_buffer(&dec->dpb);
rvid_destroy_buffer(&dec->ctx);
rvid_destroy_buffer(&dec->sessionctx);
si_vid_destroy_buffer(&dec->dpb);
si_vid_destroy_buffer(&dec->ctx);
si_vid_destroy_buffer(&dec->sessionctx);
FREE(dec);
@ -1551,8 +1551,8 @@ static unsigned bank_wh(unsigned bankwh)
/**
* fill decoding target field from the luma and chroma surfaces
*/
void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
struct radeon_surf *chroma, enum ruvd_surface_type type)
void si_uvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
struct radeon_surf *chroma, enum ruvd_surface_type type)
{
switch (type) {
default:

View File

@ -437,11 +437,11 @@ typedef struct pb_buffer* (*ruvd_set_dtb)
(struct ruvd_msg* msg, struct vl_video_buffer *vb);
/* create an UVD decode */
struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
const struct pipe_video_codec *templat,
ruvd_set_dtb set_dtb);
struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *context,
const struct pipe_video_codec *templat,
ruvd_set_dtb set_dtb);
/* fill decoding target field from the luma and chroma surfaces */
void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
struct radeon_surf *chroma, enum ruvd_surface_type type);
void si_uvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
struct radeon_surf *chroma, enum ruvd_surface_type type);
#endif

View File

@ -198,7 +198,7 @@ static unsigned get_cpb_num(struct rvce_encoder *enc)
/**
* Get the slot for the currently encoded frame
*/
struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc)
struct rvce_cpb_slot *si_current_slot(struct rvce_encoder *enc)
{
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list);
}
@ -206,7 +206,7 @@ struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc)
/**
* Get the slot for L0
*/
struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc)
struct rvce_cpb_slot *si_l0_slot(struct rvce_encoder *enc)
{
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list);
}
@ -214,7 +214,7 @@ struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc)
/**
* Get the slot for L1
*/
struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc)
struct rvce_cpb_slot *si_l1_slot(struct rvce_encoder *enc)
{
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list);
}
@ -222,8 +222,8 @@ struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc)
/**
* Calculate the offsets into the CPB
*/
void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
signed *luma_offset, signed *chroma_offset)
void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
signed *luma_offset, signed *chroma_offset)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen;
unsigned pitch, vpitch, fsize;
@ -249,15 +249,15 @@ static void rvce_destroy(struct pipe_video_codec *encoder)
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
if (enc->stream_handle) {
struct rvid_buffer fb;
rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
enc->fb = &fb;
enc->session(enc);
enc->feedback(enc);
enc->destroy(enc);
flush(enc);
rvid_destroy_buffer(&fb);
si_vid_destroy_buffer(&fb);
}
rvid_destroy_buffer(&enc->cpb);
si_vid_destroy_buffer(&enc->cpb);
enc->ws->cs_destroy(enc->cs);
FREE(enc->cpb_array);
FREE(enc);
@ -278,7 +278,7 @@ static void rvce_begin_frame(struct pipe_video_codec *encoder,
enc->pic.quant_b_frames != pic->quant_b_frames;
enc->pic = *pic;
get_pic_param(enc, pic);
si_get_pic_param(enc, pic);
enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
@ -291,8 +291,8 @@ static void rvce_begin_frame(struct pipe_video_codec *encoder,
if (!enc->stream_handle) {
struct rvid_buffer fb;
enc->stream_handle = rvid_alloc_stream_handle();
rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
enc->stream_handle = si_vid_alloc_stream_handle();
si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
enc->fb = &fb;
enc->session(enc);
enc->create(enc);
@ -300,7 +300,7 @@ static void rvce_begin_frame(struct pipe_video_codec *encoder,
enc->feedback(enc);
flush(enc);
//dump_feedback(enc, &fb);
rvid_destroy_buffer(&fb);
si_vid_destroy_buffer(&fb);
need_rate_control = false;
}
@ -321,7 +321,7 @@ static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
enc->bs_size = destination->width0;
*fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
if (!rvid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) {
if (!si_vid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) {
RVID_ERR("Can't create feedback buffer.\n");
return;
}
@ -370,7 +370,7 @@ static void rvce_get_feedback(struct pipe_video_codec *encoder,
enc->ws->buffer_unmap(fb->res->buf);
}
//dump_feedback(enc, fb);
rvid_destroy_buffer(fb);
si_vid_destroy_buffer(fb);
FREE(fb);
}
@ -390,10 +390,10 @@ static void rvce_cs_flush(void *ctx, unsigned flags,
// just ignored
}
struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
const struct pipe_video_codec *templ,
struct radeon_winsys* ws,
rvce_get_buffer get_buffer)
struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context,
const struct pipe_video_codec *templ,
struct radeon_winsys* ws,
rvce_get_buffer get_buffer)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen;
struct r600_common_context *rctx = (struct r600_common_context*)context;
@ -406,7 +406,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
RVID_ERR("Kernel doesn't supports VCE!\n");
return NULL;
} else if (!rvce_is_fw_version_supported(rscreen)) {
} else if (!si_vce_is_fw_version_supported(rscreen)) {
RVID_ERR("Unsupported VCE fw version loaded!\n");
return NULL;
}
@ -479,7 +479,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
cpb_size += RVCE_MAX_AUX_BUFFER_NUM *
RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
tmp_buf->destroy(tmp_buf);
if (!rvid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
if (!si_vid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't create CPB buffer.\n");
goto error;
}
@ -492,29 +492,29 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
switch (rscreen->info.vce_fw_version) {
case FW_40_2_2:
radeon_vce_40_2_2_init(enc);
get_pic_param = radeon_vce_40_2_2_get_param;
si_vce_40_2_2_init(enc);
si_get_pic_param = si_vce_40_2_2_get_param;
break;
case FW_50_0_1:
case FW_50_1_2:
case FW_50_10_2:
case FW_50_17_3:
radeon_vce_50_init(enc);
get_pic_param = radeon_vce_50_get_param;
si_vce_50_init(enc);
si_get_pic_param = si_vce_50_get_param;
break;
case FW_52_0_3:
case FW_52_4_3:
case FW_52_8_3:
radeon_vce_52_init(enc);
get_pic_param = radeon_vce_52_get_param;
si_vce_52_init(enc);
si_get_pic_param = si_vce_52_get_param;
break;
default:
if ((rscreen->info.vce_fw_version & (0xff << 24)) == FW_53) {
radeon_vce_52_init(enc);
get_pic_param = radeon_vce_52_get_param;
si_vce_52_init(enc);
si_get_pic_param = si_vce_52_get_param;
} else
goto error;
}
@ -525,7 +525,7 @@ error:
if (enc->cs)
enc->ws->cs_destroy(enc->cs);
rvid_destroy_buffer(&enc->cpb);
si_vid_destroy_buffer(&enc->cpb);
FREE(enc->cpb_array);
FREE(enc);
@ -535,7 +535,7 @@ error:
/**
* check if kernel has the right fw version loaded
*/
bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
bool si_vce_is_fw_version_supported(struct r600_common_screen *rscreen)
{
switch (rscreen->info.vce_fw_version) {
case FW_40_2_2:
@ -558,9 +558,9 @@ bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
/**
* Add the buffer as relocation to the current command submission
*/
void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
enum radeon_bo_usage usage, enum radeon_bo_domain domain,
signed offset)
void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
enum radeon_bo_usage usage, enum radeon_bo_domain domain,
signed offset)
{
int reloc_idx;

View File

@ -40,9 +40,9 @@
#define RVCE_BEGIN(cmd) { \
uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
RVCE_CS(cmd)
#define RVCE_READ(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
#define RVCE_WRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
#define RVCE_READWRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
#define RVCE_READ(buf, domain, off) si_vce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
#define RVCE_WRITE(buf, domain, off) si_vce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
#define RVCE_READWRITE(buf, domain, off) si_vce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
#define RVCE_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; }
#define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5)
@ -417,46 +417,46 @@ struct rvce_encoder {
};
/* CPB handling functions */
struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc);
struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc);
struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc);
void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
signed *luma_offset, signed *chroma_offset);
struct rvce_cpb_slot *si_current_slot(struct rvce_encoder *enc);
struct rvce_cpb_slot *si_l0_slot(struct rvce_encoder *enc);
struct rvce_cpb_slot *si_l1_slot(struct rvce_encoder *enc);
void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
signed *luma_offset, signed *chroma_offset);
struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
const struct pipe_video_codec *templat,
struct radeon_winsys* ws,
rvce_get_buffer get_buffer);
struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context,
const struct pipe_video_codec *templat,
struct radeon_winsys* ws,
rvce_get_buffer get_buffer);
bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen);
bool si_vce_is_fw_version_supported(struct r600_common_screen *rscreen);
void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
enum radeon_bo_usage usage, enum radeon_bo_domain domain,
signed offset);
void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
enum radeon_bo_usage usage, enum radeon_bo_domain domain,
signed offset);
/* init vce fw 40.2.2 specific callbacks */
void radeon_vce_40_2_2_init(struct rvce_encoder *enc);
void si_vce_40_2_2_init(struct rvce_encoder *enc);
/* init vce fw 50 specific callbacks */
void radeon_vce_50_init(struct rvce_encoder *enc);
void si_vce_50_init(struct rvce_encoder *enc);
/* init vce fw 52 specific callbacks */
void radeon_vce_52_init(struct rvce_encoder *enc);
void si_vce_52_init(struct rvce_encoder *enc);
/* version specific function for getting parameters */
void (*get_pic_param)(struct rvce_encoder *enc,
void (*si_get_pic_param)(struct rvce_encoder *enc,
struct pipe_h264_enc_picture_desc *pic);
/* get parameters for vce 40.2.2 */
void radeon_vce_40_2_2_get_param(struct rvce_encoder *enc,
struct pipe_h264_enc_picture_desc *pic);
void si_vce_40_2_2_get_param(struct rvce_encoder *enc,
struct pipe_h264_enc_picture_desc *pic);
/* get parameters for vce 50 */
void radeon_vce_50_get_param(struct rvce_encoder *enc,
struct pipe_h264_enc_picture_desc *pic);
void si_vce_50_get_param(struct rvce_encoder *enc,
struct pipe_h264_enc_picture_desc *pic);
/* get parameters for vce 52 */
void radeon_vce_52_get_param(struct rvce_encoder *enc,
struct pipe_h264_enc_picture_desc *pic);
void si_vce_52_get_param(struct rvce_encoder *enc,
struct pipe_h264_enc_picture_desc *pic);
#endif

View File

@ -363,8 +363,8 @@ static void encode(struct rvce_encoder *enc)
RVCE_CS(0x00000000); // pictureStructure
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
struct rvce_cpb_slot *l0 = l0_slot(enc);
rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
struct rvce_cpb_slot *l0 = si_l0_slot(enc);
si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
RVCE_CS(l0->picture_type); // encPicType
RVCE_CS(l0->frame_num); // frameNumber
RVCE_CS(l0->pic_order_cnt); // pictureOrderCount
@ -389,8 +389,8 @@ static void encode(struct rvce_encoder *enc)
// encReferencePictureL1[0]
RVCE_CS(0x00000000); // pictureStructure
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
struct rvce_cpb_slot *l1 = l1_slot(enc);
rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
struct rvce_cpb_slot *l1 = si_l1_slot(enc);
si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
RVCE_CS(l1->picture_type); // encPicType
RVCE_CS(l1->frame_num); // frameNumber
RVCE_CS(l1->pic_order_cnt); // pictureOrderCount
@ -404,7 +404,7 @@ static void encode(struct rvce_encoder *enc)
RVCE_CS(0xffffffff); // chromaOffset
}
rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset);
RVCE_CS(luma_offset); // encReconstructedLumaOffset
RVCE_CS(chroma_offset); // encReconstructedChromaOffset
RVCE_CS(0x00000000); // encColocBufferOffset
@ -431,11 +431,11 @@ static void destroy(struct rvce_encoder *enc)
RVCE_END();
}
void radeon_vce_40_2_2_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic)
void si_vce_40_2_2_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic)
{
}
void radeon_vce_40_2_2_init(struct rvce_encoder *enc)
void si_vce_40_2_2_init(struct rvce_encoder *enc)
{
enc->session = session;
enc->task_info = task_info;

View File

@ -173,8 +173,8 @@ static void encode(struct rvce_encoder *enc)
RVCE_CS(0x00000000); // pictureStructure
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
struct rvce_cpb_slot *l0 = l0_slot(enc);
rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
struct rvce_cpb_slot *l0 = si_l0_slot(enc);
si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
RVCE_CS(l0->picture_type); // encPicType
RVCE_CS(l0->frame_num); // frameNumber
RVCE_CS(l0->pic_order_cnt); // pictureOrderCount
@ -199,8 +199,8 @@ static void encode(struct rvce_encoder *enc)
// encReferencePictureL1[0]
RVCE_CS(0x00000000); // pictureStructure
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
struct rvce_cpb_slot *l1 = l1_slot(enc);
rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
struct rvce_cpb_slot *l1 = si_l1_slot(enc);
si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
RVCE_CS(l1->picture_type); // encPicType
RVCE_CS(l1->frame_num); // frameNumber
RVCE_CS(l1->pic_order_cnt); // pictureOrderCount
@ -214,7 +214,7 @@ static void encode(struct rvce_encoder *enc)
RVCE_CS(0xffffffff); // chromaOffset
}
rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset);
RVCE_CS(luma_offset); // encReconstructedLumaOffset
RVCE_CS(chroma_offset); // encReconstructedChromaOffset
RVCE_CS(0x00000000); // encColocBufferOffset
@ -233,13 +233,13 @@ static void encode(struct rvce_encoder *enc)
RVCE_END();
}
void radeon_vce_50_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic)
void si_vce_50_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic)
{
}
void radeon_vce_50_init(struct rvce_encoder *enc)
void si_vce_50_init(struct rvce_encoder *enc)
{
radeon_vce_40_2_2_init(enc);
si_vce_40_2_2_init(enc);
/* only the two below are different */
enc->rate_control = rate_control;

View File

@ -138,7 +138,7 @@ static void get_vui_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture
enc->enc_pic.vui.max_dec_frame_buffering = 0x00000003;
}
void radeon_vce_52_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic)
void si_vce_52_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic)
{
get_rate_control_param(enc, pic);
get_motion_estimation_param(enc, pic);
@ -319,8 +319,8 @@ static void encode(struct rvce_encoder *enc)
RVCE_CS(0x00000000); // pictureStructure
if(enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
struct rvce_cpb_slot *l0 = l0_slot(enc);
rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
struct rvce_cpb_slot *l0 = si_l0_slot(enc);
si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
RVCE_CS(l0->picture_type);
RVCE_CS(l0->frame_num);
RVCE_CS(l0->pic_order_cnt);
@ -356,8 +356,8 @@ static void encode(struct rvce_encoder *enc)
// encReferencePictureL1[0]
RVCE_CS(0x00000000); // pictureStructure
if(enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
struct rvce_cpb_slot *l1 = l1_slot(enc);
rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
struct rvce_cpb_slot *l1 = si_l1_slot(enc);
si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
RVCE_CS(l1->picture_type);
RVCE_CS(l1->frame_num);
RVCE_CS(l1->pic_order_cnt);
@ -376,7 +376,7 @@ static void encode(struct rvce_encoder *enc)
RVCE_CS(enc->enc_pic.eo.l1_chroma_offset);
}
rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset);
RVCE_CS(luma_offset);
RVCE_CS(chroma_offset);
RVCE_CS(enc->enc_pic.eo.enc_coloc_buffer_offset);
@ -646,7 +646,7 @@ static void vui(struct rvce_encoder *enc)
RVCE_END();
}
void radeon_vce_52_init(struct rvce_encoder *enc)
void si_vce_52_init(struct rvce_encoder *enc)
{
enc->session = session;
enc->task_info = task_info;

View File

@ -678,9 +678,9 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
(struct pipe_h265_picture_desc*)picture);
else
ctx_size = calc_ctx_size_h265_main(dec);
if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT))
if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT))
RVID_ERR("Can't allocated context buffer.\n");
rvid_clear_buffer(dec->base.context, &dec->ctx);
si_vid_clear_buffer(dec->base.context, &dec->ctx);
}
break;
}
@ -1026,13 +1026,13 @@ static void radeon_dec_destroy(struct pipe_video_codec *decoder)
dec->ws->cs_destroy(dec->cs);
for (i = 0; i < NUM_BUFFERS; ++i) {
rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
rvid_destroy_buffer(&dec->bs_buffers[i]);
si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
si_vid_destroy_buffer(&dec->bs_buffers[i]);
}
rvid_destroy_buffer(&dec->dpb);
rvid_destroy_buffer(&dec->ctx);
rvid_destroy_buffer(&dec->sessionctx);
si_vid_destroy_buffer(&dec->dpb);
si_vid_destroy_buffer(&dec->ctx);
si_vid_destroy_buffer(&dec->sessionctx);
FREE(dec);
}
@ -1096,7 +1096,7 @@ static void radeon_dec_decode_bitstream(struct pipe_video_codec *decoder,
if (new_size > buf->res->buf->size) {
dec->ws->buffer_unmap(buf->res->buf);
if (!rvid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
RVID_ERR("Can't resize bitstream buffer!");
return;
}
@ -1227,7 +1227,7 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
dec->base.flush = radeon_dec_flush;
dec->stream_type = stream_type;
dec->stream_handle = rvid_alloc_stream_handle();
dec->stream_handle = si_vid_alloc_stream_handle();
dec->screen = context->screen;
dec->ws = ws;
dec->cs = ws->cs_create(rctx->ctx, RING_VCN_DEC, NULL, NULL);
@ -1242,47 +1242,47 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
if (have_it(dec))
msg_fb_it_size += IT_SCALING_TABLE_SIZE;
/* use vram to improve performance, workaround an unknown bug */
if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
msg_fb_it_size, PIPE_USAGE_DEFAULT)) {
if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
msg_fb_it_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated message buffers.\n");
goto error;
}
if (!rvid_create_buffer(dec->screen, &dec->bs_buffers[i],
bs_buf_size, PIPE_USAGE_STAGING)) {
if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i],
bs_buf_size, PIPE_USAGE_STAGING)) {
RVID_ERR("Can't allocated bitstream buffers.\n");
goto error;
}
rvid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
rvid_clear_buffer(context, &dec->bs_buffers[i]);
si_vid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
si_vid_clear_buffer(context, &dec->bs_buffers[i]);
}
dpb_size = calc_dpb_size(dec);
if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated dpb.\n");
goto error;
}
rvid_clear_buffer(context, &dec->dpb);
si_vid_clear_buffer(context, &dec->dpb);
if (dec->stream_type == RDECODE_CODEC_H264_PERF) {
unsigned ctx_size = calc_ctx_size_h264_perf(dec);
if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated context buffer.\n");
goto error;
}
rvid_clear_buffer(context, &dec->ctx);
si_vid_clear_buffer(context, &dec->ctx);
}
if (!rvid_create_buffer(dec->screen, &dec->sessionctx,
RDECODE_SESSION_CONTEXT_SIZE,
PIPE_USAGE_DEFAULT)) {
if (!si_vid_create_buffer(dec->screen, &dec->sessionctx,
RDECODE_SESSION_CONTEXT_SIZE,
PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated session ctx.\n");
goto error;
}
rvid_clear_buffer(context, &dec->sessionctx);
si_vid_clear_buffer(context, &dec->sessionctx);
map_msg_fb_it_buf(dec);
rvcn_dec_message_create(dec);
@ -1299,13 +1299,13 @@ error:
if (dec->cs) dec->ws->cs_destroy(dec->cs);
for (i = 0; i < NUM_BUFFERS; ++i) {
rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
rvid_destroy_buffer(&dec->bs_buffers[i]);
si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
si_vid_destroy_buffer(&dec->bs_buffers[i]);
}
rvid_destroy_buffer(&dec->dpb);
rvid_destroy_buffer(&dec->ctx);
rvid_destroy_buffer(&dec->sessionctx);
si_vid_destroy_buffer(&dec->dpb);
si_vid_destroy_buffer(&dec->ctx);
si_vid_destroy_buffer(&dec->sessionctx);
FREE(dec);

View File

@ -46,7 +46,7 @@
#define UVD_FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8))
/* generate an stream handle */
unsigned rvid_alloc_stream_handle()
unsigned si_vid_alloc_stream_handle()
{
static unsigned counter = 0;
unsigned stream_handle = 0;
@ -61,8 +61,8 @@ unsigned rvid_alloc_stream_handle()
}
/* create a buffer in the winsys */
bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
unsigned size, unsigned usage)
bool si_vid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
unsigned size, unsigned usage)
{
memset(buffer, 0, sizeof(*buffer));
buffer->usage = usage;
@ -79,14 +79,14 @@ bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
}
/* destroy a buffer */
void rvid_destroy_buffer(struct rvid_buffer *buffer)
void si_vid_destroy_buffer(struct rvid_buffer *buffer)
{
r600_resource_reference(&buffer->res, NULL);
}
/* reallocate a buffer, preserving its content */
bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
struct rvid_buffer *new_buf, unsigned new_size)
bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
struct rvid_buffer *new_buf, unsigned new_size)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
struct radeon_winsys* ws = rscreen->ws;
@ -94,7 +94,7 @@ bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
struct rvid_buffer old_buf = *new_buf;
void *src = NULL, *dst = NULL;
if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage))
if (!si_vid_create_buffer(screen, new_buf, new_size, new_buf->usage))
goto error;
src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ);
@ -113,19 +113,19 @@ bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
}
ws->buffer_unmap(new_buf->res->buf);
ws->buffer_unmap(old_buf.res->buf);
rvid_destroy_buffer(&old_buf);
si_vid_destroy_buffer(&old_buf);
return true;
error:
if (src)
ws->buffer_unmap(old_buf.res->buf);
rvid_destroy_buffer(new_buf);
si_vid_destroy_buffer(new_buf);
*new_buf = old_buf;
return false;
}
/* clear the buffer with zeros */
void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
{
struct r600_common_context *rctx = (struct r600_common_context*)context;
@ -138,9 +138,9 @@ void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
* join surfaces into the same buffer with identical tiling params
* sumup their sizes and replace the backend buffers with a single bo
*/
void rvid_join_surfaces(struct r600_common_context *rctx,
struct pb_buffer** buffers[VL_NUM_COMPONENTS],
struct radeon_surf *surfaces[VL_NUM_COMPONENTS])
void si_vid_join_surfaces(struct r600_common_context *rctx,
struct pb_buffer** buffers[VL_NUM_COMPONENTS],
struct radeon_surf *surfaces[VL_NUM_COMPONENTS])
{
struct radeon_winsys* ws;
unsigned best_tiling, best_wh, off;
@ -218,10 +218,10 @@ void rvid_join_surfaces(struct r600_common_context *rctx,
pb_reference(&pb, NULL);
}
int rvid_get_video_param(struct pipe_screen *screen,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint,
enum pipe_video_cap param)
int si_vid_get_video_param(struct pipe_screen *screen,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint,
enum pipe_video_cap param)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
enum pipe_video_format codec = u_reduce_video_profile(profile);
@ -233,7 +233,7 @@ int rvid_get_video_param(struct pipe_screen *screen,
switch (param) {
case PIPE_VIDEO_CAP_SUPPORTED:
return codec == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
rvce_is_fw_version_supported(rscreen);
si_vce_is_fw_version_supported(rscreen);
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
case PIPE_VIDEO_CAP_MAX_WIDTH:
@ -354,10 +354,10 @@ int rvid_get_video_param(struct pipe_screen *screen,
}
}
boolean rvid_is_format_supported(struct pipe_screen *screen,
enum pipe_format format,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint)
boolean si_vid_is_format_supported(struct pipe_screen *screen,
enum pipe_format format,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint)
{
/* HEVC 10 bit decoding should use P016 instead of NV12 if possible */
if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)

View File

@ -48,38 +48,38 @@ struct rvid_buffer
};
/* generate an stream handle */
unsigned rvid_alloc_stream_handle(void);
unsigned si_vid_alloc_stream_handle(void);
/* create a buffer in the winsys */
bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
unsigned size, unsigned usage);
bool si_vid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
unsigned size, unsigned usage);
/* destroy a buffer */
void rvid_destroy_buffer(struct rvid_buffer *buffer);
void si_vid_destroy_buffer(struct rvid_buffer *buffer);
/* reallocate a buffer, preserving its content */
bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
struct rvid_buffer *new_buf, unsigned new_size);
bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
struct rvid_buffer *new_buf, unsigned new_size);
/* clear the buffer with zeros */
void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer);
void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer);
/* join surfaces into the same buffer with identical tiling params
sumup their sizes and replace the backend buffers with a single bo */
void rvid_join_surfaces(struct r600_common_context *rctx,
struct pb_buffer** buffers[VL_NUM_COMPONENTS],
struct radeon_surf *surfaces[VL_NUM_COMPONENTS]);
void si_vid_join_surfaces(struct r600_common_context *rctx,
struct pb_buffer** buffers[VL_NUM_COMPONENTS],
struct radeon_surf *surfaces[VL_NUM_COMPONENTS]);
/* returns supported codecs and other parameters */
int rvid_get_video_param(struct pipe_screen *screen,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint,
enum pipe_video_cap param);
int si_vid_get_video_param(struct pipe_screen *screen,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint,
enum pipe_video_cap param);
/* the hardware only supports NV12 */
boolean rvid_is_format_supported(struct pipe_screen *screen,
enum pipe_format format,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint);
boolean si_vid_is_format_supported(struct pipe_screen *screen,
enum pipe_format format,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint);
#endif // RADEON_VIDEO_H

View File

@ -50,7 +50,7 @@ static void cik_sdma_copy_buffer(struct si_context *ctx,
src_offset += rsrc->gpu_address;
ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
r600_need_dma_space(&ctx->b, ncopy * 7, rdst, rsrc);
si_need_dma_space(&ctx->b, ncopy * 7, rdst, rsrc);
for (i = 0; i < ncopy; i++) {
csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE);
@ -95,7 +95,7 @@ static void cik_sdma_clear_buffer(struct pipe_context *ctx,
/* the same maximum size as for copying */
ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
r600_need_dma_space(&sctx->b, ncopy * 5, rdst, NULL);
si_need_dma_space(&sctx->b, ncopy * 5, rdst, NULL);
for (i = 0; i < ncopy; i++) {
csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE);
@ -194,7 +194,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
src_slice_pitch * bpp * (srcz + src_box->depth) <=
rsrc->resource.buf->size);
if (!r600_prepare_for_dma_blit(&sctx->b, rdst, dst_level, dstx, dsty,
if (!si_prepare_for_dma_blit(&sctx->b, rdst, dst_level, dstx, dsty,
dstz, rsrc, src_level, src_box))
return false;
@ -235,7 +235,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
srcy + copy_height != (1 << 14)))) {
struct radeon_winsys_cs *cs = sctx->b.dma.cs;
r600_need_dma_space(&sctx->b, 13, &rdst->resource, &rsrc->resource);
si_need_dma_space(&sctx->b, 13, &rdst->resource, &rsrc->resource);
radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) |
@ -398,7 +398,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
struct radeon_winsys_cs *cs = sctx->b.dma.cs;
uint32_t direction = linear == rdst ? 1u << 31 : 0;
r600_need_dma_space(&sctx->b, 14, &rdst->resource, &rsrc->resource);
si_need_dma_space(&sctx->b, 14, &rdst->resource, &rsrc->resource);
radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW, 0) |
@ -492,7 +492,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
dstx + copy_width != (1 << 14)))) {
struct radeon_winsys_cs *cs = sctx->b.dma.cs;
r600_need_dma_space(&sctx->b, 15, &rdst->resource, &rsrc->resource);
si_need_dma_space(&sctx->b, 15, &rdst->resource, &rsrc->resource);
radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW, 0));

View File

@ -350,7 +350,7 @@ si_decompress_depth(struct si_context *sctx,
*/
if (copy_planes &&
(tex->flushed_depth_texture ||
r600_init_flushed_depth_texture(&sctx->b.b, &tex->resource.b.b, NULL))) {
si_init_flushed_depth_texture(&sctx->b.b, &tex->resource.b.b, NULL))) {
struct r600_texture *dst = tex->flushed_depth_texture;
unsigned fully_copied_levels;
unsigned levels = 0;
@ -621,7 +621,7 @@ static void si_check_render_feedback_texture(struct si_context *sctx,
}
if (render_feedback)
r600_texture_disable_dcc(&sctx->b, tex);
si_texture_disable_dcc(&sctx->b, tex);
}
static void si_check_render_feedback_textures(struct si_context *sctx,
@ -835,7 +835,7 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
zsbuf ? (struct r600_texture*)zsbuf->texture : NULL;
if (buffers & PIPE_CLEAR_COLOR) {
evergreen_do_fast_color_clear(&sctx->b, fb,
si_do_fast_color_clear(&sctx->b, fb,
&sctx->framebuffer.atom, &buffers,
&sctx->framebuffer.dirty_cbufs,
color);
@ -1175,7 +1175,7 @@ void si_resource_copy_region(struct pipe_context *ctx,
src_templ.format);
/* Initialize the surface. */
dst_view = r600_create_surface_custom(ctx, dst, &dst_templ,
dst_view = si_create_surface_custom(ctx, dst, &dst_templ,
dst_width0, dst_height0,
dst_width, dst_height);

View File

@ -175,7 +175,7 @@ static void *si_create_compute_state(
if ((sctx->b.debug.debug_message && !sctx->b.debug.async) ||
sctx->is_debug ||
r600_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE))
si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE))
si_create_compute_state_async(program, -1);
else
util_queue_add_job(&sscreen->shader_compiler_queue,
@ -328,7 +328,7 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
r600_resource_reference(&sctx->compute_scratch_buffer, NULL);
sctx->compute_scratch_buffer = (struct r600_resource*)
r600_aligned_buffer_create(&sctx->screen->b.b,
si_aligned_buffer_create(&sctx->screen->b.b,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
scratch_needed, 256);

View File

@ -309,7 +309,7 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size,
sctx->scratch_buffer->b.b.width0 < scratch_size) {
r600_resource_reference(&sctx->scratch_buffer, NULL);
sctx->scratch_buffer = (struct r600_resource*)
r600_aligned_buffer_create(&sctx->screen->b.b,
si_aligned_buffer_create(&sctx->screen->b.b,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
scratch_size, 256);

View File

@ -540,14 +540,14 @@ static void si_dump_framebuffer(struct si_context *sctx, struct u_log_context *l
rtex = (struct r600_texture*)state->cbufs[i]->texture;
u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i);
r600_print_texture_info(sctx->b.screen, rtex, log);
si_print_texture_info(sctx->b.screen, rtex, log);
u_log_printf(log, "\n");
}
if (state->zsbuf) {
rtex = (struct r600_texture*)state->zsbuf->texture;
u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
r600_print_texture_info(sctx->b.screen, rtex, log);
si_print_texture_info(sctx->b.screen, rtex, log);
u_log_printf(log, "\n");
}
}

View File

@ -390,7 +390,7 @@ static void si_set_sampler_view_desc(struct si_context *sctx,
if (unlikely(!is_buffer && sview->dcc_incompatible)) {
if (vi_dcc_enabled(rtex, view->u.tex.first_level))
if (!r600_texture_disable_dcc(&sctx->b, rtex))
if (!si_texture_disable_dcc(&sctx->b, rtex))
sctx->b.decompress_dcc(&sctx->b.b, rtex);
sview->dcc_incompatible = false;
@ -674,7 +674,7 @@ static void si_set_shader_image_desc(struct si_context *ctx,
* The decompression is relatively cheap if the surface
* has been decompressed already.
*/
if (!r600_texture_disable_dcc(&ctx->b, tex))
if (!si_texture_disable_dcc(&ctx->b, tex))
ctx->b.decompress_dcc(&ctx->b.b, tex);
}
@ -1404,7 +1404,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
*/
/* Set the VGT regs. */
r600_set_streamout_targets(ctx, num_targets, targets, offsets);
si_common_set_streamout_targets(ctx, num_targets, targets, offsets);
/* Set the shader resources.*/
for (i = 0; i < num_targets; i++) {
@ -1636,10 +1636,10 @@ static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf
/* Update the streamout state. */
if (sctx->b.streamout.begin_emitted)
r600_emit_streamout_end(&sctx->b);
si_emit_streamout_end(&sctx->b);
sctx->b.streamout.append_bitmask =
sctx->b.streamout.enabled_mask;
r600_streamout_buffers_dirty(&sctx->b);
si_streamout_buffers_dirty(&sctx->b);
}
}
@ -1795,7 +1795,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
uint64_t old_va = rbuffer->gpu_address;
/* Reallocate the buffer in the same pipe_resource. */
r600_alloc_resource(&sctx->screen->b, rbuffer);
si_alloc_resource(&sctx->screen->b, rbuffer);
si_rebind_buffer(ctx, buf, old_va);
}

View File

@ -62,7 +62,7 @@ static void si_dma_copy_buffer(struct si_context *ctx,
}
ncopy = DIV_ROUND_UP(size, max_size);
r600_need_dma_space(&ctx->b, ncopy * 5, rdst, rsrc);
si_need_dma_space(&ctx->b, ncopy * 5, rdst, rsrc);
for (i = 0; i < ncopy; i++) {
count = MIN2(size, max_size);
@ -104,7 +104,7 @@ static void si_dma_clear_buffer(struct pipe_context *ctx,
/* the same maximum size as for copying */
ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
r600_need_dma_space(&sctx->b, ncopy * 4, rdst, NULL);
si_need_dma_space(&sctx->b, ncopy * 4, rdst, NULL);
for (i = 0; i < ncopy; i++) {
csize = MIN2(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
@ -193,7 +193,7 @@ static void si_dma_copy_tile(struct si_context *ctx,
mt = G_009910_MICRO_TILE_MODE(tile_mode);
size = copy_height * pitch;
ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
r600_need_dma_space(&ctx->b, ncopy * 9, &rdst->resource, &rsrc->resource);
si_need_dma_space(&ctx->b, ncopy * 9, &rdst->resource, &rsrc->resource);
for (i = 0; i < ncopy; i++) {
cheight = copy_height;
@ -261,7 +261,7 @@ static void si_dma_copy(struct pipe_context *ctx,
goto fallback;
if (src_box->depth > 1 ||
!r600_prepare_for_dma_blit(&sctx->b, rdst, dst_level, dstx, dsty,
!si_prepare_for_dma_blit(&sctx->b, rdst, dst_level, dstx, dsty,
dstz, rsrc, src_level, src_box))
goto fallback;

View File

@ -29,7 +29,7 @@
void si_destroy_saved_cs(struct si_saved_cs *scs)
{
radeon_clear_saved_cs(&scs->gfx);
si_clear_saved_cs(&scs->gfx);
r600_resource_reference(&scs->trace_buf, NULL);
free(scs);
}
@ -80,7 +80,7 @@ void si_context_gfx_flush(void *context, unsigned flags,
if (!radeon_emitted(cs, ctx->b.initial_gfx_cs_size))
return;
if (r600_check_device_reset(&ctx->b))
if (si_check_device_reset(&ctx->b))
return;
if (ctx->screen->b.debug_flags & DBG_CHECK_VM)
@ -98,7 +98,7 @@ void si_context_gfx_flush(void *context, unsigned flags,
ctx->gfx_flush_in_progress = true;
r600_preflush_suspend_features(&ctx->b);
si_preflush_suspend_features(&ctx->b);
ctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
SI_CONTEXT_PS_PARTIAL_FLUSH;
@ -115,7 +115,7 @@ void si_context_gfx_flush(void *context, unsigned flags,
si_log_hw_flush(ctx);
/* Save the IB for debug contexts. */
radeon_save_cs(ws, cs, &ctx->current_saved_cs->gfx, true);
si_save_cs(ws, cs, &ctx->current_saved_cs->gfx, true);
ctx->current_saved_cs->flushed = true;
}
@ -260,7 +260,7 @@ void si_begin_new_cs(struct si_context *ctx)
&ctx->scratch_buffer->b.b);
}
r600_postflush_resume_features(&ctx->b);
si_postflush_resume_features(&ctx->b);
assert(!ctx->b.gfx.cs->prev_dw);
ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->current.cdw;

View File

@ -614,10 +614,10 @@ static void si_pc_emit_stop(struct r600_common_context *ctx,
{
struct radeon_winsys_cs *cs = ctx->gfx.cs;
r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0,
si_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0,
EOP_DATA_SEL_VALUE_32BIT,
buffer, va, 0, R600_NOT_QUERY);
r600_gfx_wait_fence(ctx, va, 0, 0xffffffff);
si_gfx_wait_fence(ctx, va, 0, 0xffffffff);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
@ -676,7 +676,7 @@ static void si_pc_emit_read(struct r600_common_context *ctx,
static void si_pc_cleanup(struct r600_common_screen *rscreen)
{
r600_perfcounters_do_destroy(rscreen->perfcounters);
si_perfcounters_do_destroy(rscreen->perfcounters);
rscreen->perfcounters = NULL;
}
@ -717,7 +717,7 @@ void si_init_perfcounters(struct si_screen *screen)
return;
pc->num_start_cs_dwords = 14;
pc->num_stop_cs_dwords = 14 + r600_gfx_write_fence_dwords(&screen->b);
pc->num_stop_cs_dwords = 14 + si_gfx_write_fence_dwords(&screen->b);
pc->num_instance_cs_dwords = 3;
pc->num_shaders_cs_dwords = 4;
@ -734,7 +734,7 @@ void si_init_perfcounters(struct si_screen *screen)
pc->emit_read = si_pc_emit_read;
pc->cleanup = si_pc_cleanup;
if (!r600_perfcounters_init(pc, num_blocks))
if (!si_perfcounters_init(pc, num_blocks))
goto error;
for (i = 0; i < num_blocks; ++i) {
@ -746,7 +746,7 @@ void si_init_perfcounters(struct si_screen *screen)
instances = 2;
}
r600_perfcounters_add_block(&screen->b, pc,
si_perfcounters_add_block(&screen->b, pc,
block->b->name,
block->b->flags,
block->b->num_counters,
@ -759,5 +759,5 @@ void si_init_perfcounters(struct si_screen *screen)
return;
error:
r600_perfcounters_do_destroy(pc);
si_perfcounters_do_destroy(pc);
}

View File

@ -88,7 +88,7 @@ static void si_destroy_context(struct pipe_context *context)
if (sctx->blitter)
util_blitter_destroy(sctx->blitter);
r600_common_context_cleanup(&sctx->b);
si_common_context_cleanup(&sctx->b);
LLVMDisposeTargetMachine(sctx->tm);
@ -145,7 +145,7 @@ si_create_llvm_target_machine(struct si_screen *sscreen)
sscreen->b.debug_flags & DBG_SI_SCHED ? ",+si-scheduler" : "");
return LLVMCreateTargetMachine(ac_get_llvm_target(triple), triple,
r600_get_llvm_processor_name(sscreen->b.family),
si_get_llvm_processor_name(sscreen->b.family),
features,
LLVMCodeGenLevelDefault,
LLVMRelocDefault,
@ -185,7 +185,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
if (!r600_common_context_init(&sctx->b, &sscreen->b, flags))
if (!si_common_context_init(&sctx->b, &sscreen->b, flags))
goto fail;
if (sscreen->b.info.drm_major == 3)
@ -243,7 +243,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
sctx->blitter = util_blitter_create(&sctx->b.b);
if (sctx->blitter == NULL)
goto fail;
sctx->blitter->draw_rectangle = r600_draw_rectangle;
sctx->blitter->draw_rectangle = si_draw_rectangle;
sctx->sample_mask.sample_mask = 0xffff;
@ -271,7 +271,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
* if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
if (sctx->b.chip_class == CIK) {
sctx->null_const_buf.buffer =
r600_aligned_buffer_create(screen,
si_aligned_buffer_create(screen,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT, 16,
sctx->screen->b.info.tcc_cache_line_size);
@ -375,7 +375,7 @@ static struct pipe_context *si_pipe_create_context(struct pipe_screen *screen,
return ctx;
return threaded_context_create(ctx, &sscreen->b.pool_transfers,
r600_replace_buffer_storage,
si_replace_buffer_storage,
&((struct si_context*)ctx)->b.tc);
}
@ -835,13 +835,13 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
struct si_shader_part *part = parts[i];
parts[i] = part->next;
radeon_shader_binary_clean(&part->binary);
si_radeon_shader_binary_clean(&part->binary);
FREE(part);
}
}
mtx_destroy(&sscreen->shader_parts_mutex);
si_destroy_shader_cache(sscreen);
r600_destroy_common_screen(&sscreen->b);
si_destroy_common_screen(&sscreen->b);
}
static bool si_init_gs_info(struct si_screen *sscreen)
@ -885,7 +885,7 @@ static void si_handle_env_var_force_family(struct si_screen *sscreen)
return;
for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
if (!strcmp(family, r600_get_llvm_processor_name(i))) {
if (!strcmp(family, si_get_llvm_processor_name(i))) {
/* Override family and chip_class. */
sscreen->b.family = sscreen->b.info.family = i;
@ -969,7 +969,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
sscreen->b.b.get_compiler_options = si_get_compiler_options;
sscreen->b.b.get_device_uuid = radeonsi_get_device_uuid;
sscreen->b.b.get_driver_uuid = radeonsi_get_driver_uuid;
sscreen->b.b.resource_create = r600_resource_create_common;
sscreen->b.b.resource_create = si_resource_create_common;
si_init_screen_state_functions(sscreen);
@ -982,7 +982,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
if (driQueryOptionb(config->options, "radeonsi_enable_sisched"))
sscreen->b.debug_flags |= DBG_SI_SCHED;
if (!r600_common_screen_init(&sscreen->b, ws) ||
if (!si_common_screen_init(&sscreen->b, ws) ||
!si_init_gs_info(sscreen) ||
!si_init_shader_cache(sscreen)) {
FREE(sscreen);
@ -1110,7 +1110,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
sscreen->b.aux_context = si_create_context(&sscreen->b.b, 0);
if (sscreen->b.debug_flags & DBG_TEST_DMA)
r600_test_dma(&sscreen->b);
si_test_dma(&sscreen->b);
if (sscreen->b.debug_flags & (DBG_TEST_VMFAULT_CP |
DBG_TEST_VMFAULT_SDMA |

View File

@ -5109,7 +5109,7 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
if (!check_debug_option ||
r600_can_dump_shader(&sscreen->b, processor)) {
si_can_dump_shader(&sscreen->b, processor)) {
if (processor == PIPE_SHADER_FRAGMENT) {
fprintf(file, "*** SHADER CONFIG ***\n"
"SPI_PS_INPUT_ADDR = 0x%04x\n"
@ -5181,7 +5181,7 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
FILE *file, bool check_debug_option)
{
if (!check_debug_option ||
r600_can_dump_shader(&sscreen->b, processor))
si_can_dump_shader(&sscreen->b, processor))
si_dump_shader_key(processor, shader, file);
if (!check_debug_option && shader->binary.llvm_ir_string) {
@ -5198,7 +5198,7 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
}
if (!check_debug_option ||
(r600_can_dump_shader(&sscreen->b, processor) &&
(si_can_dump_shader(&sscreen->b, processor) &&
!(sscreen->b.debug_flags & DBG_NO_ASM))) {
fprintf(file, "\n%s:\n", si_get_shader_name(shader, processor));
@ -5236,7 +5236,7 @@ static int si_compile_llvm(struct si_screen *sscreen,
int r = 0;
unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations);
if (r600_can_dump_shader(&sscreen->b, processor)) {
if (si_can_dump_shader(&sscreen->b, processor)) {
fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR))) {
@ -5434,7 +5434,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
debug, PIPE_SHADER_GEOMETRY,
"GS Copy Shader");
if (!r) {
if (r600_can_dump_shader(&sscreen->b, PIPE_SHADER_GEOMETRY))
if (si_can_dump_shader(&sscreen->b, PIPE_SHADER_GEOMETRY))
fprintf(stderr, "GS Copy Shader:\n");
si_shader_dump(sscreen, ctx.shader, debug,
PIPE_SHADER_GEOMETRY, stderr, true);
@ -6352,7 +6352,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
/* Dump TGSI code before doing TGSI->LLVM conversion in case the
* conversion fails. */
if (r600_can_dump_shader(&sscreen->b, sel->info.processor) &&
if (si_can_dump_shader(&sscreen->b, sel->info.processor) &&
!(sscreen->b.debug_flags & DBG_NO_TGSI)) {
if (sel->tokens)
tgsi_dump(sel->tokens, 0);
@ -6561,7 +6561,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
si_optimize_vs_outputs(&ctx);
if ((debug && debug->debug_message) ||
r600_can_dump_shader(&sscreen->b, ctx.type))
si_can_dump_shader(&sscreen->b, ctx.type))
si_count_scratch_private_memory(&ctx);
/* Compile to bytecode. */
@ -7750,7 +7750,7 @@ void si_shader_destroy(struct si_shader *shader)
r600_resource_reference(&shader->bo, NULL);
if (!shader->is_binary_shared)
radeon_shader_binary_clean(&shader->binary);
si_radeon_shader_binary_clean(&shader->binary);
free(shader->shader_log);
}

View File

@ -1388,7 +1388,7 @@ void si_llvm_optimize_module(struct si_shader_context *ctx)
/* Dump LLVM IR before any optimization passes */
if (ctx->screen->b.debug_flags & DBG_PREOPT_IR &&
r600_can_dump_shader(&ctx->screen->b, ctx->type))
si_can_dump_shader(&ctx->screen->b, ctx->type))
LLVMDumpModule(ctx->gallivm.module);
/* Create the pass manager */
@ -1397,7 +1397,7 @@ void si_llvm_optimize_module(struct si_shader_context *ctx)
target_library_info = gallivm_create_target_library_info(triple);
LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
if (r600_extra_shader_checks(&ctx->screen->b, ctx->type))
if (si_extra_shader_checks(&ctx->screen->b, ctx->type))
LLVMAddVerifierPass(gallivm->passmgr);
LLVMAddAlwaysInlinerPass(gallivm->passmgr);

View File

@ -1003,7 +1003,7 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR;
sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color);
r600_viewport_set_rast_deps(&sctx->b, rs->scissor_enable, rs->clip_halfz);
si_viewport_set_rast_deps(&sctx->b, rs->scissor_enable, rs->clip_halfz);
si_pm4_bind_state(sctx, rasterizer, rs);
si_update_poly_offset_state(sctx);
@ -2093,7 +2093,7 @@ static unsigned si_is_vertex_format_supported(struct pipe_screen *screen,
static bool si_is_colorbuffer_format_supported(enum pipe_format format)
{
return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
r600_translate_colorswap(format, false) != ~0U;
si_translate_colorswap(format, false) != ~0U;
}
static bool si_is_zs_format_supported(enum pipe_format format)
@ -2354,7 +2354,7 @@ static void si_initialize_color_surface(struct si_context *sctx,
R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
}
assert(format != V_028C70_COLOR_INVALID);
swap = r600_translate_colorswap(surf->base.format, false);
swap = si_translate_colorswap(surf->base.format, false);
endian = si_colorformat_endian_swap(format);
/* blend clamp should be set for all NORM/SRGB types */
@ -2719,7 +2719,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
}
if (vi_dcc_enabled(rtex, surf->base.u.tex.level))
if (!r600_texture_disable_dcc(&sctx->b, rtex))
if (!si_texture_disable_dcc(&sctx->b, rtex))
sctx->b.decompress_dcc(ctx, rtex);
surf->dcc_incompatible = false;
@ -3184,7 +3184,7 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx,
if (nr_samples != sctx->msaa_sample_locs.nr_samples) {
sctx->msaa_sample_locs.nr_samples = nr_samples;
cayman_emit_msaa_sample_locs(cs, nr_samples);
si_common_emit_msaa_sample_locs(cs, nr_samples);
}
if (sctx->b.family >= CHIP_POLARIS10) {
@ -3296,7 +3296,7 @@ static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
S_028A4C_FORCE_EOV_REZ_ENABLE(1);
cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
si_common_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
sctx->ps_iter_samples,
sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0,
sc_mode_cntl_1);
@ -3629,7 +3629,7 @@ si_make_texture_descriptor(struct si_screen *screen,
}
if (tex->dcc_offset) {
unsigned swap = r600_translate_colorswap(pipe_format, false);
unsigned swap = si_translate_colorswap(pipe_format, false);
state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
} else {
@ -3805,7 +3805,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
/* Depth/stencil texturing sometimes needs separate texture. */
if (tmp->is_depth && !r600_can_sample_zs(tmp, view->is_stencil_sampler)) {
if (!tmp->flushed_depth_texture &&
!r600_init_flushed_depth_texture(ctx, texture, NULL)) {
!si_init_flushed_depth_texture(ctx, texture, NULL)) {
pipe_resource_reference(&view->base.texture, NULL);
FREE(view);
return NULL;
@ -4413,7 +4413,7 @@ void si_init_state_functions(struct si_context *sctx)
sctx->b.b.set_stencil_ref = si_set_stencil_ref;
sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
sctx->b.b.get_sample_position = cayman_get_sample_position;
sctx->b.b.get_sample_position = si_get_sample_position;
sctx->b.b.create_sampler_state = si_create_sampler_state;
sctx->b.b.delete_sampler_state = si_delete_sampler_state;

View File

@ -893,7 +893,7 @@ void si_emit_cache_flush(struct si_context *sctx)
/* Necessary for DCC */
if (rctx->chip_class == VI)
r600_gfx_write_event_eop(rctx, V_028A90_FLUSH_AND_INV_CB_DATA_TS,
si_gfx_write_event_eop(rctx, V_028A90_FLUSH_AND_INV_CB_DATA_TS,
0, EOP_DATA_SEL_DISCARD, NULL,
0, 0, R600_NOT_QUERY);
}
@ -1008,11 +1008,11 @@ void si_emit_cache_flush(struct si_context *sctx)
va = sctx->wait_mem_scratch->gpu_address;
sctx->wait_mem_number++;
r600_gfx_write_event_eop(rctx, cb_db_event, tc_flags,
si_gfx_write_event_eop(rctx, cb_db_event, tc_flags,
EOP_DATA_SEL_VALUE_32BIT,
sctx->wait_mem_scratch, va,
sctx->wait_mem_number, R600_NOT_QUERY);
r600_gfx_wait_fence(rctx, va, sctx->wait_mem_number, 0xffffffff);
si_gfx_wait_fence(rctx, va, sctx->wait_mem_number, 0xffffffff);
}
/* Make sure ME is idle (it executes most packets) before continuing.

View File

@ -2226,7 +2226,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
if ((sctx->b.debug.debug_message && !sctx->b.debug.async) ||
sctx->is_debug ||
r600_can_dump_shader(&sscreen->b, sel->info.processor))
si_can_dump_shader(&sscreen->b, sel->info.processor))
si_init_shader_selector_async(sel, -1);
else
util_queue_add_job(&sscreen->shader_compiler_queue, sel,
@ -2299,7 +2299,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
sctx->vs_shader.current = sel ? sel->first_variant : NULL;
si_update_common_shader_state(sctx);
r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
si_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
si_set_active_descriptors_for_shader(sctx, sel);
si_update_streamout_state(sctx);
si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant,
@ -2342,7 +2342,7 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
if (sctx->ia_multi_vgt_param_key.u.uses_tess)
si_update_tess_uses_prim_id(sctx);
}
r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
si_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
si_set_active_descriptors_for_shader(sctx, sel);
si_update_streamout_state(sctx);
si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant,
@ -2393,7 +2393,7 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
si_shader_change_notify(sctx);
sctx->last_tes_sh_base = -1; /* invalidate derived tess state */
}
r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
si_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
si_set_active_descriptors_for_shader(sctx, sel);
si_update_streamout_state(sctx);
si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant,
@ -2710,7 +2710,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
if (update_esgs) {
pipe_resource_reference(&sctx->esgs_ring, NULL);
sctx->esgs_ring =
r600_aligned_buffer_create(sctx->b.b.screen,
si_aligned_buffer_create(sctx->b.b.screen,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
esgs_ring_size, alignment);
@ -2721,7 +2721,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
if (update_gsvs) {
pipe_resource_reference(&sctx->gsvs_ring, NULL);
sctx->gsvs_ring =
r600_aligned_buffer_create(sctx->b.b.screen,
si_aligned_buffer_create(sctx->b.b.screen,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
gsvs_ring_size, alignment);
@ -2963,7 +2963,7 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
r600_resource_reference(&sctx->scratch_buffer, NULL);
sctx->scratch_buffer = (struct r600_resource*)
r600_aligned_buffer_create(&sctx->screen->b.b,
si_aligned_buffer_create(&sctx->screen->b.b,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
scratch_needed_size, 256);
@ -3021,7 +3021,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
/* Use 64K alignment for both rings, so that we can pass the address
* to shaders as one SGPR containing bits [16:47].
*/
sctx->tf_ring = r600_aligned_buffer_create(sctx->b.b.screen,
sctx->tf_ring = si_aligned_buffer_create(sctx->b.b.screen,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
32768 * sctx->screen->b.info.max_se,
@ -3032,7 +3032,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
sctx->tess_offchip_ring =
r600_aligned_buffer_create(sctx->b.b.screen,
si_aligned_buffer_create(sctx->b.b.screen,
R600_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
max_offchip_buffers *

View File

@ -98,7 +98,7 @@ struct pipe_video_buffer *si_video_buffer_create(struct pipe_context *pipe,
pbs[i] = &resources[i]->resource.buf;
}
rvid_join_surfaces(&ctx->b, pbs, surfaces);
si_vid_join_surfaces(&ctx->b, pbs, surfaces);
for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
if (!resources[i])
@ -131,7 +131,7 @@ static struct pb_buffer* si_uvd_set_dtb(struct ruvd_msg *msg, struct vl_video_bu
msg->body.decode.dt_field_mode = buf->base.interlaced;
ruvd_set_dt_surfaces(msg, &luma->surface, (chroma) ? &chroma->surface : NULL, type);
si_uvd_set_dt_surfaces(msg, &luma->surface, (chroma) ? &chroma->surface : NULL, type);
return luma->resource.buf;
}
@ -160,8 +160,8 @@ struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context,
bool vcn = (ctx->b.family == CHIP_RAVEN) ? true : false;
if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE)
return rvce_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer);
return si_vce_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer);
return (vcn) ? radeon_create_decoder(context, templ) :
ruvd_create_decoder(context, templ, si_uvd_set_dtb);
si_common_uvd_create_decoder(context, templ, si_uvd_set_dtb);
}

View File

@ -128,7 +128,6 @@ nodist_EXTRA_pipe_r600_la_SOURCES = dummy.cpp
pipe_r600_la_LIBADD = \
$(PIPE_LIBS) \
$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la \
$(top_builddir)/src/gallium/drivers/radeon/libradeon.la \
$(top_builddir)/src/gallium/drivers/r600/libr600.la \
$(LIBDRM_LIBS) \
$(RADEON_LIBS) \