r600: fork and import gallium/radeon
This marks the end of code sharing between r600 and radeonsi. It's getting difficult to work on radeonsi without breaking r600. A lot of functions had to be renamed to prevent linker conflicts. There are also minor cleanups. Acked-by: Dave Airlie <airlied@redhat.com> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
e1623da818
commit
06bfb2d28f
|
@ -2704,8 +2704,7 @@ AM_CONDITIONAL(HAVE_GALLIUM_PL111, test "x$HAVE_GALLIUM_PL111" = xyes)
|
||||||
AM_CONDITIONAL(HAVE_GALLIUM_R300, test "x$HAVE_GALLIUM_R300" = xyes)
|
AM_CONDITIONAL(HAVE_GALLIUM_R300, test "x$HAVE_GALLIUM_R300" = xyes)
|
||||||
AM_CONDITIONAL(HAVE_GALLIUM_R600, test "x$HAVE_GALLIUM_R600" = xyes)
|
AM_CONDITIONAL(HAVE_GALLIUM_R600, test "x$HAVE_GALLIUM_R600" = xyes)
|
||||||
AM_CONDITIONAL(HAVE_GALLIUM_RADEONSI, test "x$HAVE_GALLIUM_RADEONSI" = xyes)
|
AM_CONDITIONAL(HAVE_GALLIUM_RADEONSI, test "x$HAVE_GALLIUM_RADEONSI" = xyes)
|
||||||
AM_CONDITIONAL(HAVE_GALLIUM_RADEON_COMMON, test "x$HAVE_GALLIUM_R600" = xyes -o \
|
AM_CONDITIONAL(HAVE_GALLIUM_RADEON_COMMON, test "x$HAVE_GALLIUM_RADEONSI" = xyes)
|
||||||
"x$HAVE_GALLIUM_RADEONSI" = xyes)
|
|
||||||
AM_CONDITIONAL(HAVE_GALLIUM_NOUVEAU, test "x$HAVE_GALLIUM_NOUVEAU" = xyes)
|
AM_CONDITIONAL(HAVE_GALLIUM_NOUVEAU, test "x$HAVE_GALLIUM_NOUVEAU" = xyes)
|
||||||
AM_CONDITIONAL(HAVE_GALLIUM_FREEDRENO, test "x$HAVE_GALLIUM_FREEDRENO" = xyes)
|
AM_CONDITIONAL(HAVE_GALLIUM_FREEDRENO, test "x$HAVE_GALLIUM_FREEDRENO" = xyes)
|
||||||
AM_CONDITIONAL(HAVE_GALLIUM_ETNAVIV, test "x$HAVE_GALLIUM_ETNAVIV" = xyes)
|
AM_CONDITIONAL(HAVE_GALLIUM_ETNAVIV, test "x$HAVE_GALLIUM_ETNAVIV" = xyes)
|
||||||
|
|
|
@ -42,7 +42,7 @@ if HAVE_GALLIUM_R300
|
||||||
SUBDIRS += drivers/r300
|
SUBDIRS += drivers/r300
|
||||||
endif
|
endif
|
||||||
|
|
||||||
## radeon - linked into r600 and radeonsi
|
## radeon - linked into radeonsi
|
||||||
if HAVE_GALLIUM_RADEON_COMMON
|
if HAVE_GALLIUM_RADEON_COMMON
|
||||||
SUBDIRS += drivers/radeon
|
SUBDIRS += drivers/radeon
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -11,6 +11,4 @@ TARGET_LIB_DEPS += \
|
||||||
TARGET_RADEON_WINSYS = \
|
TARGET_RADEON_WINSYS = \
|
||||||
$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la
|
$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la
|
||||||
|
|
||||||
TARGET_RADEON_COMMON = \
|
|
||||||
$(top_builddir)/src/gallium/drivers/radeon/libradeon.la
|
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -27,8 +27,7 @@ libr600_la_SOURCES = \
|
||||||
if HAVE_GALLIUM_LLVM
|
if HAVE_GALLIUM_LLVM
|
||||||
|
|
||||||
AM_CFLAGS += \
|
AM_CFLAGS += \
|
||||||
$(LLVM_CFLAGS) \
|
$(LLVM_CFLAGS)
|
||||||
-I$(top_srcdir)/src/gallium/drivers/radeon/
|
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,26 @@ C_SOURCES = \
|
||||||
r600_state_common.c \
|
r600_state_common.c \
|
||||||
r600_uvd.c \
|
r600_uvd.c \
|
||||||
r700_asm.c \
|
r700_asm.c \
|
||||||
r700_sq.h
|
r700_sq.h \
|
||||||
|
cayman_msaa.c \
|
||||||
|
r600_buffer_common.c \
|
||||||
|
r600_cs.h \
|
||||||
|
r600_gpu_load.c \
|
||||||
|
r600_perfcounter.c \
|
||||||
|
r600_pipe_common.c \
|
||||||
|
r600_pipe_common.h \
|
||||||
|
r600_query.c \
|
||||||
|
r600_query.h \
|
||||||
|
r600_streamout.c \
|
||||||
|
r600_test_dma.c \
|
||||||
|
r600_texture.c \
|
||||||
|
r600_viewport.c \
|
||||||
|
radeon_uvd.c \
|
||||||
|
radeon_uvd.h \
|
||||||
|
radeon_vce.c \
|
||||||
|
radeon_vce.h \
|
||||||
|
radeon_video.c \
|
||||||
|
radeon_video.h
|
||||||
|
|
||||||
CXX_SOURCES = \
|
CXX_SOURCES = \
|
||||||
sb/sb_bc_builder.cpp \
|
sb/sb_bc_builder.cpp \
|
||||||
|
|
|
@ -0,0 +1,269 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2014 Advanced Micro Devices, Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*
|
||||||
|
* Authors: Marek Olšák <maraeo@gmail.com>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "r600_cs.h"
|
||||||
|
|
||||||
|
/* 2xMSAA
|
||||||
|
* There are two locations (4, 4), (-4, -4). */
|
||||||
|
const uint32_t eg_sample_locs_2x[4] = {
|
||||||
|
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
|
||||||
|
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
|
||||||
|
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
|
||||||
|
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
|
||||||
|
};
|
||||||
|
const unsigned eg_max_dist_2x = 4;
|
||||||
|
/* 4xMSAA
|
||||||
|
* There are 4 locations: (-2, 6), (6, -2), (-6, 2), (2, 6). */
|
||||||
|
const uint32_t eg_sample_locs_4x[4] = {
|
||||||
|
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
|
||||||
|
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
|
||||||
|
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
|
||||||
|
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
|
||||||
|
};
|
||||||
|
const unsigned eg_max_dist_4x = 6;
|
||||||
|
|
||||||
|
/* Cayman 8xMSAA */
|
||||||
|
static const uint32_t cm_sample_locs_8x[] = {
|
||||||
|
FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
|
||||||
|
FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
|
||||||
|
FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
|
||||||
|
FILL_SREG( 1, -3, -1, 3, 5, 1, -3, -5),
|
||||||
|
FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
|
||||||
|
FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
|
||||||
|
FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
|
||||||
|
FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
|
||||||
|
};
|
||||||
|
static const unsigned cm_max_dist_8x = 8;
|
||||||
|
/* Cayman 16xMSAA */
|
||||||
|
static const uint32_t cm_sample_locs_16x[] = {
|
||||||
|
FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
|
||||||
|
FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
|
||||||
|
FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
|
||||||
|
FILL_SREG( 1, 1, -1, -3, -3, 2, 4, -1),
|
||||||
|
FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
|
||||||
|
FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
|
||||||
|
FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
|
||||||
|
FILL_SREG(-5, -2, 2, 5, 5, 3, 3, -5),
|
||||||
|
FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
|
||||||
|
FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
|
||||||
|
FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
|
||||||
|
FILL_SREG(-2, 6, 0, -7, -4, -6, -6, 4),
|
||||||
|
FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
|
||||||
|
FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
|
||||||
|
FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
|
||||||
|
FILL_SREG(-8, 0, 7, -4, 6, 7, -7, -8),
|
||||||
|
};
|
||||||
|
static const unsigned cm_max_dist_16x = 8;
|
||||||
|
|
||||||
|
void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
|
||||||
|
unsigned sample_index, float *out_value)
|
||||||
|
{
|
||||||
|
int offset, index;
|
||||||
|
struct {
|
||||||
|
int idx:4;
|
||||||
|
} val;
|
||||||
|
switch (sample_count) {
|
||||||
|
case 1:
|
||||||
|
default:
|
||||||
|
out_value[0] = out_value[1] = 0.5;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
offset = 4 * (sample_index * 2);
|
||||||
|
val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf;
|
||||||
|
out_value[0] = (float)(val.idx + 8) / 16.0f;
|
||||||
|
val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf;
|
||||||
|
out_value[1] = (float)(val.idx + 8) / 16.0f;
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
offset = 4 * (sample_index * 2);
|
||||||
|
val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf;
|
||||||
|
out_value[0] = (float)(val.idx + 8) / 16.0f;
|
||||||
|
val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf;
|
||||||
|
out_value[1] = (float)(val.idx + 8) / 16.0f;
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
offset = 4 * (sample_index % 4 * 2);
|
||||||
|
index = (sample_index / 4) * 4;
|
||||||
|
val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
|
||||||
|
out_value[0] = (float)(val.idx + 8) / 16.0f;
|
||||||
|
val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
|
||||||
|
out_value[1] = (float)(val.idx + 8) / 16.0f;
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
offset = 4 * (sample_index % 4 * 2);
|
||||||
|
index = (sample_index / 4) * 4;
|
||||||
|
val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
|
||||||
|
out_value[0] = (float)(val.idx + 8) / 16.0f;
|
||||||
|
val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
|
||||||
|
out_value[1] = (float)(val.idx + 8) / 16.0f;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void cayman_init_msaa(struct pipe_context *ctx)
|
||||||
|
{
|
||||||
|
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]);
|
||||||
|
|
||||||
|
for (i = 0; i < 2; i++)
|
||||||
|
cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]);
|
||||||
|
for (i = 0; i < 4; i++)
|
||||||
|
cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]);
|
||||||
|
for (i = 0; i < 8; i++)
|
||||||
|
cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]);
|
||||||
|
for (i = 0; i < 16; i++)
|
||||||
|
cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
|
||||||
|
{
|
||||||
|
switch (nr_samples) {
|
||||||
|
default:
|
||||||
|
case 1:
|
||||||
|
radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
|
||||||
|
radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
|
||||||
|
radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
|
||||||
|
radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
|
||||||
|
radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
|
||||||
|
radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
|
||||||
|
radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
|
||||||
|
radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
|
||||||
|
radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
|
||||||
|
radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
|
||||||
|
radeon_emit(cs, cm_sample_locs_8x[0]);
|
||||||
|
radeon_emit(cs, cm_sample_locs_8x[4]);
|
||||||
|
radeon_emit(cs, 0);
|
||||||
|
radeon_emit(cs, 0);
|
||||||
|
radeon_emit(cs, cm_sample_locs_8x[1]);
|
||||||
|
radeon_emit(cs, cm_sample_locs_8x[5]);
|
||||||
|
radeon_emit(cs, 0);
|
||||||
|
radeon_emit(cs, 0);
|
||||||
|
radeon_emit(cs, cm_sample_locs_8x[2]);
|
||||||
|
radeon_emit(cs, cm_sample_locs_8x[6]);
|
||||||
|
radeon_emit(cs, 0);
|
||||||
|
radeon_emit(cs, 0);
|
||||||
|
radeon_emit(cs, cm_sample_locs_8x[3]);
|
||||||
|
radeon_emit(cs, cm_sample_locs_8x[7]);
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
|
||||||
|
radeon_emit(cs, cm_sample_locs_16x[0]);
|
||||||
|
radeon_emit(cs, cm_sample_locs_16x[4]);
|
||||||
|
radeon_emit(cs, cm_sample_locs_16x[8]);
|
||||||
|
radeon_emit(cs, cm_sample_locs_16x[12]);
|
||||||
|
radeon_emit(cs, cm_sample_locs_16x[1]);
|
||||||
|
radeon_emit(cs, cm_sample_locs_16x[5]);
|
||||||
|
radeon_emit(cs, cm_sample_locs_16x[9]);
|
||||||
|
radeon_emit(cs, cm_sample_locs_16x[13]);
|
||||||
|
radeon_emit(cs, cm_sample_locs_16x[2]);
|
||||||
|
radeon_emit(cs, cm_sample_locs_16x[6]);
|
||||||
|
radeon_emit(cs, cm_sample_locs_16x[10]);
|
||||||
|
radeon_emit(cs, cm_sample_locs_16x[14]);
|
||||||
|
radeon_emit(cs, cm_sample_locs_16x[3]);
|
||||||
|
radeon_emit(cs, cm_sample_locs_16x[7]);
|
||||||
|
radeon_emit(cs, cm_sample_locs_16x[11]);
|
||||||
|
radeon_emit(cs, cm_sample_locs_16x[15]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
|
||||||
|
int ps_iter_samples, int overrast_samples,
|
||||||
|
unsigned sc_mode_cntl_1)
|
||||||
|
{
|
||||||
|
int setup_samples = nr_samples > 1 ? nr_samples :
|
||||||
|
overrast_samples > 1 ? overrast_samples : 0;
|
||||||
|
/* Required by OpenGL line rasterization.
|
||||||
|
*
|
||||||
|
* TODO: We should also enable perpendicular endcaps for AA lines,
|
||||||
|
* but that requires implementing line stippling in the pixel
|
||||||
|
* shader. SC can only do line stippling with axis-aligned
|
||||||
|
* endcaps.
|
||||||
|
*/
|
||||||
|
unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
|
||||||
|
|
||||||
|
if (setup_samples > 1) {
|
||||||
|
/* indexed by log2(nr_samples) */
|
||||||
|
unsigned max_dist[] = {
|
||||||
|
0,
|
||||||
|
eg_max_dist_2x,
|
||||||
|
eg_max_dist_4x,
|
||||||
|
cm_max_dist_8x,
|
||||||
|
cm_max_dist_16x
|
||||||
|
};
|
||||||
|
unsigned log_samples = util_logbase2(setup_samples);
|
||||||
|
unsigned log_ps_iter_samples =
|
||||||
|
util_logbase2(util_next_power_of_two(ps_iter_samples));
|
||||||
|
|
||||||
|
radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
|
||||||
|
radeon_emit(cs, sc_line_cntl |
|
||||||
|
S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
|
||||||
|
radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
|
||||||
|
S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
|
||||||
|
S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */
|
||||||
|
|
||||||
|
if (nr_samples > 1) {
|
||||||
|
radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
|
||||||
|
S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
|
||||||
|
S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
|
||||||
|
S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
|
||||||
|
S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
|
||||||
|
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
|
||||||
|
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
|
||||||
|
radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
|
||||||
|
EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
|
||||||
|
sc_mode_cntl_1);
|
||||||
|
} else if (overrast_samples > 1) {
|
||||||
|
radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
|
||||||
|
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
|
||||||
|
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
|
||||||
|
S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
|
||||||
|
radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
|
||||||
|
sc_mode_cntl_1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
|
||||||
|
radeon_emit(cs, sc_line_cntl); /* CM_R_028BDC_PA_SC_LINE_CNTL */
|
||||||
|
radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
|
||||||
|
|
||||||
|
radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
|
||||||
|
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
|
||||||
|
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
|
||||||
|
radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
|
||||||
|
sc_mode_cntl_1);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,687 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2013 Advanced Micro Devices, Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||||
|
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||||
|
* the Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||||
|
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||||
|
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
* Authors:
|
||||||
|
* Marek Olšák
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "r600_cs.h"
|
||||||
|
#include "util/u_memory.h"
|
||||||
|
#include "util/u_upload_mgr.h"
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
|
||||||
|
struct pb_buffer *buf,
|
||||||
|
enum radeon_bo_usage usage)
|
||||||
|
{
|
||||||
|
if (ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, buf, usage)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (radeon_emitted(ctx->dma.cs, 0) &&
|
||||||
|
ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, buf, usage)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
|
||||||
|
struct r600_resource *resource,
|
||||||
|
unsigned usage)
|
||||||
|
{
|
||||||
|
enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
|
||||||
|
bool busy = false;
|
||||||
|
|
||||||
|
assert(!(resource->flags & RADEON_FLAG_SPARSE));
|
||||||
|
|
||||||
|
if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
|
||||||
|
return ctx->ws->buffer_map(resource->buf, NULL, usage);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(usage & PIPE_TRANSFER_WRITE)) {
|
||||||
|
/* have to wait for the last write */
|
||||||
|
rusage = RADEON_USAGE_WRITE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) &&
|
||||||
|
ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
|
||||||
|
resource->buf, rusage)) {
|
||||||
|
if (usage & PIPE_TRANSFER_DONTBLOCK) {
|
||||||
|
ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
|
||||||
|
return NULL;
|
||||||
|
} else {
|
||||||
|
ctx->gfx.flush(ctx, 0, NULL);
|
||||||
|
busy = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (radeon_emitted(ctx->dma.cs, 0) &&
|
||||||
|
ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
|
||||||
|
resource->buf, rusage)) {
|
||||||
|
if (usage & PIPE_TRANSFER_DONTBLOCK) {
|
||||||
|
ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
|
||||||
|
return NULL;
|
||||||
|
} else {
|
||||||
|
ctx->dma.flush(ctx, 0, NULL);
|
||||||
|
busy = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (busy || !ctx->ws->buffer_wait(resource->buf, 0, rusage)) {
|
||||||
|
if (usage & PIPE_TRANSFER_DONTBLOCK) {
|
||||||
|
return NULL;
|
||||||
|
} else {
|
||||||
|
/* We will be wait for the GPU. Wait for any offloaded
|
||||||
|
* CS flush to complete to avoid busy-waiting in the winsys. */
|
||||||
|
ctx->ws->cs_sync_flush(ctx->gfx.cs);
|
||||||
|
if (ctx->dma.cs)
|
||||||
|
ctx->ws->cs_sync_flush(ctx->dma.cs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Setting the CS to NULL will prevent doing checks we have done already. */
|
||||||
|
return ctx->ws->buffer_map(resource->buf, NULL, usage);
|
||||||
|
}
|
||||||
|
|
||||||
|
void r600_init_resource_fields(struct r600_common_screen *rscreen,
|
||||||
|
struct r600_resource *res,
|
||||||
|
uint64_t size, unsigned alignment)
|
||||||
|
{
|
||||||
|
struct r600_texture *rtex = (struct r600_texture*)res;
|
||||||
|
|
||||||
|
res->bo_size = size;
|
||||||
|
res->bo_alignment = alignment;
|
||||||
|
res->flags = 0;
|
||||||
|
res->texture_handle_allocated = false;
|
||||||
|
res->image_handle_allocated = false;
|
||||||
|
|
||||||
|
switch (res->b.b.usage) {
|
||||||
|
case PIPE_USAGE_STREAM:
|
||||||
|
res->flags = RADEON_FLAG_GTT_WC;
|
||||||
|
/* fall through */
|
||||||
|
case PIPE_USAGE_STAGING:
|
||||||
|
/* Transfers are likely to occur more often with these
|
||||||
|
* resources. */
|
||||||
|
res->domains = RADEON_DOMAIN_GTT;
|
||||||
|
break;
|
||||||
|
case PIPE_USAGE_DYNAMIC:
|
||||||
|
/* Older kernels didn't always flush the HDP cache before
|
||||||
|
* CS execution
|
||||||
|
*/
|
||||||
|
if (rscreen->info.drm_major == 2 &&
|
||||||
|
rscreen->info.drm_minor < 40) {
|
||||||
|
res->domains = RADEON_DOMAIN_GTT;
|
||||||
|
res->flags |= RADEON_FLAG_GTT_WC;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* fall through */
|
||||||
|
case PIPE_USAGE_DEFAULT:
|
||||||
|
case PIPE_USAGE_IMMUTABLE:
|
||||||
|
default:
|
||||||
|
/* Not listing GTT here improves performance in some
|
||||||
|
* apps. */
|
||||||
|
res->domains = RADEON_DOMAIN_VRAM;
|
||||||
|
res->flags |= RADEON_FLAG_GTT_WC;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (res->b.b.target == PIPE_BUFFER &&
|
||||||
|
res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
|
||||||
|
PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
|
||||||
|
/* Use GTT for all persistent mappings with older
|
||||||
|
* kernels, because they didn't always flush the HDP
|
||||||
|
* cache before CS execution.
|
||||||
|
*
|
||||||
|
* Write-combined CPU mappings are fine, the kernel
|
||||||
|
* ensures all CPU writes finish before the GPU
|
||||||
|
* executes a command stream.
|
||||||
|
*/
|
||||||
|
if (rscreen->info.drm_major == 2 &&
|
||||||
|
rscreen->info.drm_minor < 40)
|
||||||
|
res->domains = RADEON_DOMAIN_GTT;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Tiled textures are unmappable. Always put them in VRAM. */
|
||||||
|
if ((res->b.b.target != PIPE_BUFFER && !rtex->surface.is_linear) ||
|
||||||
|
res->flags & R600_RESOURCE_FLAG_UNMAPPABLE) {
|
||||||
|
res->domains = RADEON_DOMAIN_VRAM;
|
||||||
|
res->flags |= RADEON_FLAG_NO_CPU_ACCESS |
|
||||||
|
RADEON_FLAG_GTT_WC;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Only displayable single-sample textures can be shared between
|
||||||
|
* processes. */
|
||||||
|
if (res->b.b.target == PIPE_BUFFER ||
|
||||||
|
res->b.b.nr_samples >= 2 ||
|
||||||
|
(rtex->surface.micro_tile_mode != RADEON_MICRO_MODE_DISPLAY &&
|
||||||
|
/* Raven doesn't use display micro mode for 32bpp, so check this: */
|
||||||
|
!(res->b.b.bind & PIPE_BIND_SCANOUT)))
|
||||||
|
res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
|
||||||
|
|
||||||
|
/* If VRAM is just stolen system memory, allow both VRAM and
|
||||||
|
* GTT, whichever has free space. If a buffer is evicted from
|
||||||
|
* VRAM to GTT, it will stay there.
|
||||||
|
*
|
||||||
|
* DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only
|
||||||
|
* placements even with a low amount of stolen VRAM.
|
||||||
|
*/
|
||||||
|
if (!rscreen->info.has_dedicated_vram &&
|
||||||
|
(rscreen->info.drm_major < 3 || rscreen->info.drm_minor < 6) &&
|
||||||
|
res->domains == RADEON_DOMAIN_VRAM) {
|
||||||
|
res->domains = RADEON_DOMAIN_VRAM_GTT;
|
||||||
|
res->flags &= ~RADEON_FLAG_NO_CPU_ACCESS; /* disallowed with VRAM_GTT */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rscreen->debug_flags & DBG_NO_WC)
|
||||||
|
res->flags &= ~RADEON_FLAG_GTT_WC;
|
||||||
|
|
||||||
|
if (res->b.b.bind & PIPE_BIND_SHARED)
|
||||||
|
res->flags |= RADEON_FLAG_NO_SUBALLOC;
|
||||||
|
|
||||||
|
/* Set expected VRAM and GART usage for the buffer. */
|
||||||
|
res->vram_usage = 0;
|
||||||
|
res->gart_usage = 0;
|
||||||
|
|
||||||
|
if (res->domains & RADEON_DOMAIN_VRAM)
|
||||||
|
res->vram_usage = size;
|
||||||
|
else if (res->domains & RADEON_DOMAIN_GTT)
|
||||||
|
res->gart_usage = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool r600_alloc_resource(struct r600_common_screen *rscreen,
|
||||||
|
struct r600_resource *res)
|
||||||
|
{
|
||||||
|
struct pb_buffer *old_buf, *new_buf;
|
||||||
|
|
||||||
|
/* Allocate a new resource. */
|
||||||
|
new_buf = rscreen->ws->buffer_create(rscreen->ws, res->bo_size,
|
||||||
|
res->bo_alignment,
|
||||||
|
res->domains, res->flags);
|
||||||
|
if (!new_buf) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Replace the pointer such that if res->buf wasn't NULL, it won't be
|
||||||
|
* NULL. This should prevent crashes with multiple contexts using
|
||||||
|
* the same buffer where one of the contexts invalidates it while
|
||||||
|
* the others are using it. */
|
||||||
|
old_buf = res->buf;
|
||||||
|
res->buf = new_buf; /* should be atomic */
|
||||||
|
|
||||||
|
if (rscreen->info.has_virtual_memory)
|
||||||
|
res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->buf);
|
||||||
|
else
|
||||||
|
res->gpu_address = 0;
|
||||||
|
|
||||||
|
pb_reference(&old_buf, NULL);
|
||||||
|
|
||||||
|
util_range_set_empty(&res->valid_buffer_range);
|
||||||
|
res->TC_L2_dirty = false;
|
||||||
|
|
||||||
|
/* Print debug information. */
|
||||||
|
if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
|
||||||
|
fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %"PRIu64" bytes\n",
|
||||||
|
res->gpu_address, res->gpu_address + res->buf->size,
|
||||||
|
res->buf->size);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_buffer_destroy(struct pipe_screen *screen,
|
||||||
|
struct pipe_resource *buf)
|
||||||
|
{
|
||||||
|
struct r600_resource *rbuffer = r600_resource(buf);
|
||||||
|
|
||||||
|
threaded_resource_deinit(buf);
|
||||||
|
util_range_destroy(&rbuffer->valid_buffer_range);
|
||||||
|
pb_reference(&rbuffer->buf, NULL);
|
||||||
|
FREE(rbuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
r600_invalidate_buffer(struct r600_common_context *rctx,
|
||||||
|
struct r600_resource *rbuffer)
|
||||||
|
{
|
||||||
|
/* Shared buffers can't be reallocated. */
|
||||||
|
if (rbuffer->b.is_shared)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Sparse buffers can't be reallocated. */
|
||||||
|
if (rbuffer->flags & RADEON_FLAG_SPARSE)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* In AMD_pinned_memory, the user pointer association only gets
|
||||||
|
* broken when the buffer is explicitly re-allocated.
|
||||||
|
*/
|
||||||
|
if (rbuffer->b.is_user_ptr)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Check if mapping this buffer would cause waiting for the GPU. */
|
||||||
|
if (r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
|
||||||
|
!rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
|
||||||
|
rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
|
||||||
|
} else {
|
||||||
|
util_range_set_empty(&rbuffer->valid_buffer_range);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Replace the storage of dst with src. */
|
||||||
|
void r600_replace_buffer_storage(struct pipe_context *ctx,
|
||||||
|
struct pipe_resource *dst,
|
||||||
|
struct pipe_resource *src)
|
||||||
|
{
|
||||||
|
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
|
||||||
|
struct r600_resource *rdst = r600_resource(dst);
|
||||||
|
struct r600_resource *rsrc = r600_resource(src);
|
||||||
|
uint64_t old_gpu_address = rdst->gpu_address;
|
||||||
|
|
||||||
|
pb_reference(&rdst->buf, rsrc->buf);
|
||||||
|
rdst->gpu_address = rsrc->gpu_address;
|
||||||
|
rdst->b.b.bind = rsrc->b.b.bind;
|
||||||
|
rdst->flags = rsrc->flags;
|
||||||
|
|
||||||
|
assert(rdst->vram_usage == rsrc->vram_usage);
|
||||||
|
assert(rdst->gart_usage == rsrc->gart_usage);
|
||||||
|
assert(rdst->bo_size == rsrc->bo_size);
|
||||||
|
assert(rdst->bo_alignment == rsrc->bo_alignment);
|
||||||
|
assert(rdst->domains == rsrc->domains);
|
||||||
|
|
||||||
|
rctx->rebind_buffer(ctx, dst, old_gpu_address);
|
||||||
|
}
|
||||||
|
|
||||||
|
void r600_invalidate_resource(struct pipe_context *ctx,
|
||||||
|
struct pipe_resource *resource)
|
||||||
|
{
|
||||||
|
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
|
||||||
|
struct r600_resource *rbuffer = r600_resource(resource);
|
||||||
|
|
||||||
|
/* We currently only do anyting here for buffers */
|
||||||
|
if (resource->target == PIPE_BUFFER)
|
||||||
|
(void)r600_invalidate_buffer(rctx, rbuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *r600_buffer_get_transfer(struct pipe_context *ctx,
|
||||||
|
struct pipe_resource *resource,
|
||||||
|
unsigned usage,
|
||||||
|
const struct pipe_box *box,
|
||||||
|
struct pipe_transfer **ptransfer,
|
||||||
|
void *data, struct r600_resource *staging,
|
||||||
|
unsigned offset)
|
||||||
|
{
|
||||||
|
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
|
||||||
|
struct r600_transfer *transfer;
|
||||||
|
|
||||||
|
if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC)
|
||||||
|
transfer = slab_alloc(&rctx->pool_transfers_unsync);
|
||||||
|
else
|
||||||
|
transfer = slab_alloc(&rctx->pool_transfers);
|
||||||
|
|
||||||
|
transfer->b.b.resource = NULL;
|
||||||
|
pipe_resource_reference(&transfer->b.b.resource, resource);
|
||||||
|
transfer->b.b.level = 0;
|
||||||
|
transfer->b.b.usage = usage;
|
||||||
|
transfer->b.b.box = *box;
|
||||||
|
transfer->b.b.stride = 0;
|
||||||
|
transfer->b.b.layer_stride = 0;
|
||||||
|
transfer->b.staging = NULL;
|
||||||
|
transfer->offset = offset;
|
||||||
|
transfer->staging = staging;
|
||||||
|
*ptransfer = &transfer->b.b;
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx,
|
||||||
|
unsigned dstx, unsigned srcx, unsigned size)
|
||||||
|
{
|
||||||
|
bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4);
|
||||||
|
|
||||||
|
return rctx->screen->has_cp_dma ||
|
||||||
|
(dword_aligned && (rctx->dma.cs ||
|
||||||
|
rctx->screen->has_streamout));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *r600_buffer_transfer_map(struct pipe_context *ctx,
|
||||||
|
struct pipe_resource *resource,
|
||||||
|
unsigned level,
|
||||||
|
unsigned usage,
|
||||||
|
const struct pipe_box *box,
|
||||||
|
struct pipe_transfer **ptransfer)
|
||||||
|
{
|
||||||
|
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
|
||||||
|
struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
|
||||||
|
struct r600_resource *rbuffer = r600_resource(resource);
|
||||||
|
uint8_t *data;
|
||||||
|
|
||||||
|
assert(box->x + box->width <= resource->width0);
|
||||||
|
|
||||||
|
/* From GL_AMD_pinned_memory issues:
|
||||||
|
*
|
||||||
|
* 4) Is glMapBuffer on a shared buffer guaranteed to return the
|
||||||
|
* same system address which was specified at creation time?
|
||||||
|
*
|
||||||
|
* RESOLVED: NO. The GL implementation might return a different
|
||||||
|
* virtual mapping of that memory, although the same physical
|
||||||
|
* page will be used.
|
||||||
|
*
|
||||||
|
* So don't ever use staging buffers.
|
||||||
|
*/
|
||||||
|
if (rbuffer->b.is_user_ptr)
|
||||||
|
usage |= PIPE_TRANSFER_PERSISTENT;
|
||||||
|
|
||||||
|
/* See if the buffer range being mapped has never been initialized,
|
||||||
|
* in which case it can be mapped unsynchronized. */
|
||||||
|
if (!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
|
||||||
|
TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED)) &&
|
||||||
|
usage & PIPE_TRANSFER_WRITE &&
|
||||||
|
!rbuffer->b.is_shared &&
|
||||||
|
!util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
|
||||||
|
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If discarding the entire range, discard the whole resource instead. */
|
||||||
|
if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
|
||||||
|
box->x == 0 && box->width == resource->width0) {
|
||||||
|
usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
|
||||||
|
!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
|
||||||
|
TC_TRANSFER_MAP_NO_INVALIDATE))) {
|
||||||
|
assert(usage & PIPE_TRANSFER_WRITE);
|
||||||
|
|
||||||
|
if (r600_invalidate_buffer(rctx, rbuffer)) {
|
||||||
|
/* At this point, the buffer is always idle. */
|
||||||
|
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
|
||||||
|
} else {
|
||||||
|
/* Fall back to a temporary buffer. */
|
||||||
|
usage |= PIPE_TRANSFER_DISCARD_RANGE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
|
||||||
|
!(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
|
||||||
|
((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
|
||||||
|
PIPE_TRANSFER_PERSISTENT)) &&
|
||||||
|
r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) ||
|
||||||
|
(rbuffer->flags & RADEON_FLAG_SPARSE))) {
|
||||||
|
assert(usage & PIPE_TRANSFER_WRITE);
|
||||||
|
|
||||||
|
/* Check if mapping this buffer would cause waiting for the GPU.
|
||||||
|
*/
|
||||||
|
if (rbuffer->flags & RADEON_FLAG_SPARSE ||
|
||||||
|
r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
|
||||||
|
!rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
|
||||||
|
/* Do a wait-free write-only transfer using a temporary buffer. */
|
||||||
|
unsigned offset;
|
||||||
|
struct r600_resource *staging = NULL;
|
||||||
|
|
||||||
|
u_upload_alloc(ctx->stream_uploader, 0,
|
||||||
|
box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
|
||||||
|
rctx->screen->info.tcc_cache_line_size,
|
||||||
|
&offset, (struct pipe_resource**)&staging,
|
||||||
|
(void**)&data);
|
||||||
|
|
||||||
|
if (staging) {
|
||||||
|
data += box->x % R600_MAP_BUFFER_ALIGNMENT;
|
||||||
|
return r600_buffer_get_transfer(ctx, resource, usage, box,
|
||||||
|
ptransfer, data, staging, offset);
|
||||||
|
} else if (rbuffer->flags & RADEON_FLAG_SPARSE) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* At this point, the buffer is always idle (we checked it above). */
|
||||||
|
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Use a staging buffer in cached GTT for reads. */
|
||||||
|
else if (((usage & PIPE_TRANSFER_READ) &&
|
||||||
|
!(usage & PIPE_TRANSFER_PERSISTENT) &&
|
||||||
|
(rbuffer->domains & RADEON_DOMAIN_VRAM ||
|
||||||
|
rbuffer->flags & RADEON_FLAG_GTT_WC) &&
|
||||||
|
r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) ||
|
||||||
|
(rbuffer->flags & RADEON_FLAG_SPARSE)) {
|
||||||
|
struct r600_resource *staging;
|
||||||
|
|
||||||
|
assert(!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC));
|
||||||
|
staging = (struct r600_resource*) pipe_buffer_create(
|
||||||
|
ctx->screen, 0, PIPE_USAGE_STAGING,
|
||||||
|
box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT));
|
||||||
|
if (staging) {
|
||||||
|
/* Copy the VRAM buffer to the staging buffer. */
|
||||||
|
rctx->dma_copy(ctx, &staging->b.b, 0,
|
||||||
|
box->x % R600_MAP_BUFFER_ALIGNMENT,
|
||||||
|
0, 0, resource, 0, box);
|
||||||
|
|
||||||
|
data = r600_buffer_map_sync_with_rings(rctx, staging,
|
||||||
|
usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
|
||||||
|
if (!data) {
|
||||||
|
r600_resource_reference(&staging, NULL);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
data += box->x % R600_MAP_BUFFER_ALIGNMENT;
|
||||||
|
|
||||||
|
return r600_buffer_get_transfer(ctx, resource, usage, box,
|
||||||
|
ptransfer, data, staging, 0);
|
||||||
|
} else if (rbuffer->flags & RADEON_FLAG_SPARSE) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage);
|
||||||
|
if (!data) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
data += box->x;
|
||||||
|
|
||||||
|
return r600_buffer_get_transfer(ctx, resource, usage, box,
|
||||||
|
ptransfer, data, NULL, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_buffer_do_flush_region(struct pipe_context *ctx,
|
||||||
|
struct pipe_transfer *transfer,
|
||||||
|
const struct pipe_box *box)
|
||||||
|
{
|
||||||
|
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
|
||||||
|
struct r600_resource *rbuffer = r600_resource(transfer->resource);
|
||||||
|
|
||||||
|
if (rtransfer->staging) {
|
||||||
|
struct pipe_resource *dst, *src;
|
||||||
|
unsigned soffset;
|
||||||
|
struct pipe_box dma_box;
|
||||||
|
|
||||||
|
dst = transfer->resource;
|
||||||
|
src = &rtransfer->staging->b.b;
|
||||||
|
soffset = rtransfer->offset + box->x % R600_MAP_BUFFER_ALIGNMENT;
|
||||||
|
|
||||||
|
u_box_1d(soffset, box->width, &dma_box);
|
||||||
|
|
||||||
|
/* Copy the staging buffer into the original one. */
|
||||||
|
ctx->resource_copy_region(ctx, dst, 0, box->x, 0, 0, src, 0, &dma_box);
|
||||||
|
}
|
||||||
|
|
||||||
|
util_range_add(&rbuffer->valid_buffer_range, box->x,
|
||||||
|
box->x + box->width);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_buffer_flush_region(struct pipe_context *ctx,
|
||||||
|
struct pipe_transfer *transfer,
|
||||||
|
const struct pipe_box *rel_box)
|
||||||
|
{
|
||||||
|
unsigned required_usage = PIPE_TRANSFER_WRITE |
|
||||||
|
PIPE_TRANSFER_FLUSH_EXPLICIT;
|
||||||
|
|
||||||
|
if ((transfer->usage & required_usage) == required_usage) {
|
||||||
|
struct pipe_box box;
|
||||||
|
|
||||||
|
u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
|
||||||
|
r600_buffer_do_flush_region(ctx, transfer, &box);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
|
||||||
|
struct pipe_transfer *transfer)
|
||||||
|
{
|
||||||
|
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
|
||||||
|
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
|
||||||
|
|
||||||
|
if (transfer->usage & PIPE_TRANSFER_WRITE &&
|
||||||
|
!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
|
||||||
|
r600_buffer_do_flush_region(ctx, transfer, &transfer->box);
|
||||||
|
|
||||||
|
r600_resource_reference(&rtransfer->staging, NULL);
|
||||||
|
assert(rtransfer->b.staging == NULL); /* for threaded context only */
|
||||||
|
pipe_resource_reference(&transfer->resource, NULL);
|
||||||
|
|
||||||
|
/* Don't use pool_transfers_unsync. We are always in the driver
|
||||||
|
* thread. */
|
||||||
|
slab_free(&rctx->pool_transfers, transfer);
|
||||||
|
}
|
||||||
|
|
||||||
|
void r600_buffer_subdata(struct pipe_context *ctx,
|
||||||
|
struct pipe_resource *buffer,
|
||||||
|
unsigned usage, unsigned offset,
|
||||||
|
unsigned size, const void *data)
|
||||||
|
{
|
||||||
|
struct pipe_transfer *transfer = NULL;
|
||||||
|
struct pipe_box box;
|
||||||
|
uint8_t *map = NULL;
|
||||||
|
|
||||||
|
u_box_1d(offset, size, &box);
|
||||||
|
map = r600_buffer_transfer_map(ctx, buffer, 0,
|
||||||
|
PIPE_TRANSFER_WRITE |
|
||||||
|
PIPE_TRANSFER_DISCARD_RANGE |
|
||||||
|
usage,
|
||||||
|
&box, &transfer);
|
||||||
|
if (!map)
|
||||||
|
return;
|
||||||
|
|
||||||
|
memcpy(map, data, size);
|
||||||
|
r600_buffer_transfer_unmap(ctx, transfer);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct u_resource_vtbl r600_buffer_vtbl =
|
||||||
|
{
|
||||||
|
NULL, /* get_handle */
|
||||||
|
r600_buffer_destroy, /* resource_destroy */
|
||||||
|
r600_buffer_transfer_map, /* transfer_map */
|
||||||
|
r600_buffer_flush_region, /* transfer_flush_region */
|
||||||
|
r600_buffer_transfer_unmap, /* transfer_unmap */
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct r600_resource *
|
||||||
|
r600_alloc_buffer_struct(struct pipe_screen *screen,
|
||||||
|
const struct pipe_resource *templ)
|
||||||
|
{
|
||||||
|
struct r600_resource *rbuffer;
|
||||||
|
|
||||||
|
rbuffer = MALLOC_STRUCT(r600_resource);
|
||||||
|
|
||||||
|
rbuffer->b.b = *templ;
|
||||||
|
rbuffer->b.b.next = NULL;
|
||||||
|
pipe_reference_init(&rbuffer->b.b.reference, 1);
|
||||||
|
rbuffer->b.b.screen = screen;
|
||||||
|
|
||||||
|
rbuffer->b.vtbl = &r600_buffer_vtbl;
|
||||||
|
threaded_resource_init(&rbuffer->b.b);
|
||||||
|
|
||||||
|
rbuffer->buf = NULL;
|
||||||
|
rbuffer->bind_history = 0;
|
||||||
|
rbuffer->TC_L2_dirty = false;
|
||||||
|
util_range_init(&rbuffer->valid_buffer_range);
|
||||||
|
return rbuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
|
||||||
|
const struct pipe_resource *templ,
|
||||||
|
unsigned alignment)
|
||||||
|
{
|
||||||
|
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
|
||||||
|
struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
|
||||||
|
|
||||||
|
r600_init_resource_fields(rscreen, rbuffer, templ->width0, alignment);
|
||||||
|
|
||||||
|
if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
|
||||||
|
rbuffer->flags |= RADEON_FLAG_SPARSE;
|
||||||
|
|
||||||
|
if (!r600_alloc_resource(rscreen, rbuffer)) {
|
||||||
|
FREE(rbuffer);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return &rbuffer->b.b;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen,
|
||||||
|
unsigned flags,
|
||||||
|
unsigned usage,
|
||||||
|
unsigned size,
|
||||||
|
unsigned alignment)
|
||||||
|
{
|
||||||
|
struct pipe_resource buffer;
|
||||||
|
|
||||||
|
memset(&buffer, 0, sizeof buffer);
|
||||||
|
buffer.target = PIPE_BUFFER;
|
||||||
|
buffer.format = PIPE_FORMAT_R8_UNORM;
|
||||||
|
buffer.bind = 0;
|
||||||
|
buffer.usage = usage;
|
||||||
|
buffer.flags = flags;
|
||||||
|
buffer.width0 = size;
|
||||||
|
buffer.height0 = 1;
|
||||||
|
buffer.depth0 = 1;
|
||||||
|
buffer.array_size = 1;
|
||||||
|
return r600_buffer_create(screen, &buffer, alignment);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct pipe_resource *
|
||||||
|
r600_buffer_from_user_memory(struct pipe_screen *screen,
|
||||||
|
const struct pipe_resource *templ,
|
||||||
|
void *user_memory)
|
||||||
|
{
|
||||||
|
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
|
||||||
|
struct radeon_winsys *ws = rscreen->ws;
|
||||||
|
struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
|
||||||
|
|
||||||
|
rbuffer->domains = RADEON_DOMAIN_GTT;
|
||||||
|
rbuffer->flags = 0;
|
||||||
|
rbuffer->b.is_user_ptr = true;
|
||||||
|
util_range_add(&rbuffer->valid_buffer_range, 0, templ->width0);
|
||||||
|
util_range_add(&rbuffer->b.valid_buffer_range, 0, templ->width0);
|
||||||
|
|
||||||
|
/* Convert a user pointer to a buffer. */
|
||||||
|
rbuffer->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0);
|
||||||
|
if (!rbuffer->buf) {
|
||||||
|
FREE(rbuffer);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rscreen->info.has_virtual_memory)
|
||||||
|
rbuffer->gpu_address =
|
||||||
|
ws->buffer_get_virtual_address(rbuffer->buf);
|
||||||
|
else
|
||||||
|
rbuffer->gpu_address = 0;
|
||||||
|
|
||||||
|
rbuffer->vram_usage = 0;
|
||||||
|
rbuffer->gart_usage = templ->width0;
|
||||||
|
|
||||||
|
return &rbuffer->b.b;
|
||||||
|
}
|
|
@ -0,0 +1,209 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2013 Advanced Micro Devices, Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||||
|
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||||
|
* the Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||||
|
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||||
|
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
* Authors: Marek Olšák <maraeo@gmail.com>
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This file contains helpers for writing commands to commands streams.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef R600_CS_H
|
||||||
|
#define R600_CS_H
|
||||||
|
|
||||||
|
#include "r600_pipe_common.h"
|
||||||
|
#include "amd/common/r600d_common.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return true if there is enough memory in VRAM and GTT for the buffers
|
||||||
|
* added so far.
|
||||||
|
*
|
||||||
|
* \param vram VRAM memory size not added to the buffer list yet
|
||||||
|
* \param gtt GTT memory size not added to the buffer list yet
|
||||||
|
*/
|
||||||
|
static inline bool
|
||||||
|
radeon_cs_memory_below_limit(struct r600_common_screen *screen,
|
||||||
|
struct radeon_winsys_cs *cs,
|
||||||
|
uint64_t vram, uint64_t gtt)
|
||||||
|
{
|
||||||
|
vram += cs->used_vram;
|
||||||
|
gtt += cs->used_gart;
|
||||||
|
|
||||||
|
/* Anything that goes above the VRAM size should go to GTT. */
|
||||||
|
if (vram > screen->info.vram_size)
|
||||||
|
gtt += vram - screen->info.vram_size;
|
||||||
|
|
||||||
|
/* Now we just need to check if we have enough GTT. */
|
||||||
|
return gtt < screen->info.gart_size * 0.7;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a buffer to the buffer list for the given command stream (CS).
|
||||||
|
*
|
||||||
|
* All buffers used by a CS must be added to the list. This tells the kernel
|
||||||
|
* driver which buffers are used by GPU commands. Other buffers can
|
||||||
|
* be swapped out (not accessible) during execution.
|
||||||
|
*
|
||||||
|
* The buffer list becomes empty after every context flush and must be
|
||||||
|
* rebuilt.
|
||||||
|
*/
|
||||||
|
static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rctx,
|
||||||
|
struct r600_ring *ring,
|
||||||
|
struct r600_resource *rbo,
|
||||||
|
enum radeon_bo_usage usage,
|
||||||
|
enum radeon_bo_priority priority)
|
||||||
|
{
|
||||||
|
assert(usage);
|
||||||
|
return rctx->ws->cs_add_buffer(
|
||||||
|
ring->cs, rbo->buf,
|
||||||
|
(enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED),
|
||||||
|
rbo->domains, priority) * 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Same as above, but also checks memory usage and flushes the context
|
||||||
|
* accordingly.
|
||||||
|
*
|
||||||
|
* When this SHOULD NOT be used:
|
||||||
|
*
|
||||||
|
* - if r600_context_add_resource_size has been called for the buffer
|
||||||
|
* followed by *_need_cs_space for checking the memory usage
|
||||||
|
*
|
||||||
|
* - if r600_need_dma_space has been called for the buffer
|
||||||
|
*
|
||||||
|
* - when emitting state packets and draw packets (because preceding packets
|
||||||
|
* can't be re-emitted at that point)
|
||||||
|
*
|
||||||
|
* - if shader resource "enabled_mask" is not up-to-date or there is
|
||||||
|
* a different constraint disallowing a context flush
|
||||||
|
*/
|
||||||
|
static inline unsigned
|
||||||
|
radeon_add_to_buffer_list_check_mem(struct r600_common_context *rctx,
|
||||||
|
struct r600_ring *ring,
|
||||||
|
struct r600_resource *rbo,
|
||||||
|
enum radeon_bo_usage usage,
|
||||||
|
enum radeon_bo_priority priority,
|
||||||
|
bool check_mem)
|
||||||
|
{
|
||||||
|
if (check_mem &&
|
||||||
|
!radeon_cs_memory_below_limit(rctx->screen, ring->cs,
|
||||||
|
rctx->vram + rbo->vram_usage,
|
||||||
|
rctx->gtt + rbo->gart_usage))
|
||||||
|
ring->flush(rctx, RADEON_FLUSH_ASYNC, NULL);
|
||||||
|
|
||||||
|
return radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void r600_emit_reloc(struct r600_common_context *rctx,
|
||||||
|
struct r600_ring *ring, struct r600_resource *rbo,
|
||||||
|
enum radeon_bo_usage usage,
|
||||||
|
enum radeon_bo_priority priority)
|
||||||
|
{
|
||||||
|
struct radeon_winsys_cs *cs = ring->cs;
|
||||||
|
bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.has_virtual_memory;
|
||||||
|
unsigned reloc = radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
|
||||||
|
|
||||||
|
if (!has_vm) {
|
||||||
|
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
|
||||||
|
radeon_emit(cs, reloc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void radeon_set_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
|
||||||
|
{
|
||||||
|
assert(reg < R600_CONTEXT_REG_OFFSET);
|
||||||
|
assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
|
||||||
|
radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
|
||||||
|
radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void radeon_set_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
|
||||||
|
{
|
||||||
|
radeon_set_config_reg_seq(cs, reg, 1);
|
||||||
|
radeon_emit(cs, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void radeon_set_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
|
||||||
|
{
|
||||||
|
assert(reg >= R600_CONTEXT_REG_OFFSET);
|
||||||
|
assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
|
||||||
|
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
|
||||||
|
radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void radeon_set_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
|
||||||
|
{
|
||||||
|
radeon_set_context_reg_seq(cs, reg, 1);
|
||||||
|
radeon_emit(cs, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void radeon_set_context_reg_idx(struct radeon_winsys_cs *cs,
|
||||||
|
unsigned reg, unsigned idx,
|
||||||
|
unsigned value)
|
||||||
|
{
|
||||||
|
assert(reg >= R600_CONTEXT_REG_OFFSET);
|
||||||
|
assert(cs->current.cdw + 3 <= cs->current.max_dw);
|
||||||
|
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
|
||||||
|
radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
|
||||||
|
radeon_emit(cs, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void radeon_set_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
|
||||||
|
{
|
||||||
|
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
|
||||||
|
assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
|
||||||
|
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
|
||||||
|
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void radeon_set_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
|
||||||
|
{
|
||||||
|
radeon_set_sh_reg_seq(cs, reg, 1);
|
||||||
|
radeon_emit(cs, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void radeon_set_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
|
||||||
|
{
|
||||||
|
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
|
||||||
|
assert(cs->current.cdw + 2 + num <= cs->current.max_dw);
|
||||||
|
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
|
||||||
|
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void radeon_set_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
|
||||||
|
{
|
||||||
|
radeon_set_uconfig_reg_seq(cs, reg, 1);
|
||||||
|
radeon_emit(cs, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void radeon_set_uconfig_reg_idx(struct radeon_winsys_cs *cs,
|
||||||
|
unsigned reg, unsigned idx,
|
||||||
|
unsigned value)
|
||||||
|
{
|
||||||
|
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
|
||||||
|
assert(cs->current.cdw + 3 <= cs->current.max_dw);
|
||||||
|
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0));
|
||||||
|
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28));
|
||||||
|
radeon_emit(cs, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,283 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2015 Advanced Micro Devices, Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*
|
||||||
|
* Authors: Marek Olšák <maraeo@gmail.com>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* The GPU load is measured as follows.
|
||||||
|
*
|
||||||
|
* There is a thread which samples the GRBM_STATUS register at a certain
|
||||||
|
* frequency and the "busy" or "idle" counter is incremented based on
|
||||||
|
* whether the GUI_ACTIVE bit is set or not.
|
||||||
|
*
|
||||||
|
* Then, the user can sample the counters twice and calculate the average
|
||||||
|
* GPU load between the two samples.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "r600_pipe_common.h"
|
||||||
|
#include "r600_query.h"
|
||||||
|
#include "os/os_time.h"
|
||||||
|
|
||||||
|
/* For good accuracy at 1000 fps or lower. This will be inaccurate for higher
|
||||||
|
* fps (there are too few samples per frame). */
|
||||||
|
#define SAMPLES_PER_SEC 10000
|
||||||
|
|
||||||
|
#define GRBM_STATUS 0x8010
|
||||||
|
#define TA_BUSY(x) (((x) >> 14) & 0x1)
|
||||||
|
#define GDS_BUSY(x) (((x) >> 15) & 0x1)
|
||||||
|
#define VGT_BUSY(x) (((x) >> 17) & 0x1)
|
||||||
|
#define IA_BUSY(x) (((x) >> 19) & 0x1)
|
||||||
|
#define SX_BUSY(x) (((x) >> 20) & 0x1)
|
||||||
|
#define WD_BUSY(x) (((x) >> 21) & 0x1)
|
||||||
|
#define SPI_BUSY(x) (((x) >> 22) & 0x1)
|
||||||
|
#define BCI_BUSY(x) (((x) >> 23) & 0x1)
|
||||||
|
#define SC_BUSY(x) (((x) >> 24) & 0x1)
|
||||||
|
#define PA_BUSY(x) (((x) >> 25) & 0x1)
|
||||||
|
#define DB_BUSY(x) (((x) >> 26) & 0x1)
|
||||||
|
#define CP_BUSY(x) (((x) >> 29) & 0x1)
|
||||||
|
#define CB_BUSY(x) (((x) >> 30) & 0x1)
|
||||||
|
#define GUI_ACTIVE(x) (((x) >> 31) & 0x1)
|
||||||
|
|
||||||
|
#define SRBM_STATUS2 0x0e4c
|
||||||
|
#define SDMA_BUSY(x) (((x) >> 5) & 0x1)
|
||||||
|
|
||||||
|
#define CP_STAT 0x8680
|
||||||
|
#define PFP_BUSY(x) (((x) >> 15) & 0x1)
|
||||||
|
#define MEQ_BUSY(x) (((x) >> 16) & 0x1)
|
||||||
|
#define ME_BUSY(x) (((x) >> 17) & 0x1)
|
||||||
|
#define SURFACE_SYNC_BUSY(x) (((x) >> 21) & 0x1)
|
||||||
|
#define DMA_BUSY(x) (((x) >> 22) & 0x1)
|
||||||
|
#define SCRATCH_RAM_BUSY(x) (((x) >> 24) & 0x1)
|
||||||
|
|
||||||
|
#define IDENTITY(x) x
|
||||||
|
|
||||||
|
#define UPDATE_COUNTER(field, mask) \
|
||||||
|
do { \
|
||||||
|
if (mask(value)) \
|
||||||
|
p_atomic_inc(&counters->named.field.busy); \
|
||||||
|
else \
|
||||||
|
p_atomic_inc(&counters->named.field.idle); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
static void r600_update_mmio_counters(struct r600_common_screen *rscreen,
|
||||||
|
union r600_mmio_counters *counters)
|
||||||
|
{
|
||||||
|
uint32_t value = 0;
|
||||||
|
bool gui_busy, sdma_busy = false;
|
||||||
|
|
||||||
|
/* GRBM_STATUS */
|
||||||
|
rscreen->ws->read_registers(rscreen->ws, GRBM_STATUS, 1, &value);
|
||||||
|
|
||||||
|
UPDATE_COUNTER(ta, TA_BUSY);
|
||||||
|
UPDATE_COUNTER(gds, GDS_BUSY);
|
||||||
|
UPDATE_COUNTER(vgt, VGT_BUSY);
|
||||||
|
UPDATE_COUNTER(ia, IA_BUSY);
|
||||||
|
UPDATE_COUNTER(sx, SX_BUSY);
|
||||||
|
UPDATE_COUNTER(wd, WD_BUSY);
|
||||||
|
UPDATE_COUNTER(spi, SPI_BUSY);
|
||||||
|
UPDATE_COUNTER(bci, BCI_BUSY);
|
||||||
|
UPDATE_COUNTER(sc, SC_BUSY);
|
||||||
|
UPDATE_COUNTER(pa, PA_BUSY);
|
||||||
|
UPDATE_COUNTER(db, DB_BUSY);
|
||||||
|
UPDATE_COUNTER(cp, CP_BUSY);
|
||||||
|
UPDATE_COUNTER(cb, CB_BUSY);
|
||||||
|
UPDATE_COUNTER(gui, GUI_ACTIVE);
|
||||||
|
gui_busy = GUI_ACTIVE(value);
|
||||||
|
|
||||||
|
if (rscreen->chip_class == CIK || rscreen->chip_class == VI) {
|
||||||
|
/* SRBM_STATUS2 */
|
||||||
|
rscreen->ws->read_registers(rscreen->ws, SRBM_STATUS2, 1, &value);
|
||||||
|
|
||||||
|
UPDATE_COUNTER(sdma, SDMA_BUSY);
|
||||||
|
sdma_busy = SDMA_BUSY(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rscreen->chip_class >= VI) {
|
||||||
|
/* CP_STAT */
|
||||||
|
rscreen->ws->read_registers(rscreen->ws, CP_STAT, 1, &value);
|
||||||
|
|
||||||
|
UPDATE_COUNTER(pfp, PFP_BUSY);
|
||||||
|
UPDATE_COUNTER(meq, MEQ_BUSY);
|
||||||
|
UPDATE_COUNTER(me, ME_BUSY);
|
||||||
|
UPDATE_COUNTER(surf_sync, SURFACE_SYNC_BUSY);
|
||||||
|
UPDATE_COUNTER(cp_dma, DMA_BUSY);
|
||||||
|
UPDATE_COUNTER(scratch_ram, SCRATCH_RAM_BUSY);
|
||||||
|
}
|
||||||
|
|
||||||
|
value = gui_busy || sdma_busy;
|
||||||
|
UPDATE_COUNTER(gpu, IDENTITY);
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef UPDATE_COUNTER
|
||||||
|
|
||||||
|
static int
|
||||||
|
r600_gpu_load_thread(void *param)
|
||||||
|
{
|
||||||
|
struct r600_common_screen *rscreen = (struct r600_common_screen*)param;
|
||||||
|
const int period_us = 1000000 / SAMPLES_PER_SEC;
|
||||||
|
int sleep_us = period_us;
|
||||||
|
int64_t cur_time, last_time = os_time_get();
|
||||||
|
|
||||||
|
while (!p_atomic_read(&rscreen->gpu_load_stop_thread)) {
|
||||||
|
if (sleep_us)
|
||||||
|
os_time_sleep(sleep_us);
|
||||||
|
|
||||||
|
/* Make sure we sleep the ideal amount of time to match
|
||||||
|
* the expected frequency. */
|
||||||
|
cur_time = os_time_get();
|
||||||
|
|
||||||
|
if (os_time_timeout(last_time, last_time + period_us,
|
||||||
|
cur_time))
|
||||||
|
sleep_us = MAX2(sleep_us - 1, 1);
|
||||||
|
else
|
||||||
|
sleep_us += 1;
|
||||||
|
|
||||||
|
/*printf("Hz: %.1f\n", 1000000.0 / (cur_time - last_time));*/
|
||||||
|
last_time = cur_time;
|
||||||
|
|
||||||
|
/* Update the counters. */
|
||||||
|
r600_update_mmio_counters(rscreen, &rscreen->mmio_counters);
|
||||||
|
}
|
||||||
|
p_atomic_dec(&rscreen->gpu_load_stop_thread);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen)
|
||||||
|
{
|
||||||
|
if (!rscreen->gpu_load_thread)
|
||||||
|
return;
|
||||||
|
|
||||||
|
p_atomic_inc(&rscreen->gpu_load_stop_thread);
|
||||||
|
thrd_join(rscreen->gpu_load_thread, NULL);
|
||||||
|
rscreen->gpu_load_thread = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t r600_read_mmio_counter(struct r600_common_screen *rscreen,
|
||||||
|
unsigned busy_index)
|
||||||
|
{
|
||||||
|
/* Start the thread if needed. */
|
||||||
|
if (!rscreen->gpu_load_thread) {
|
||||||
|
mtx_lock(&rscreen->gpu_load_mutex);
|
||||||
|
/* Check again inside the mutex. */
|
||||||
|
if (!rscreen->gpu_load_thread)
|
||||||
|
rscreen->gpu_load_thread =
|
||||||
|
u_thread_create(r600_gpu_load_thread, rscreen);
|
||||||
|
mtx_unlock(&rscreen->gpu_load_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned busy = p_atomic_read(&rscreen->mmio_counters.array[busy_index]);
|
||||||
|
unsigned idle = p_atomic_read(&rscreen->mmio_counters.array[busy_index + 1]);
|
||||||
|
|
||||||
|
return busy | ((uint64_t)idle << 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned r600_end_mmio_counter(struct r600_common_screen *rscreen,
|
||||||
|
uint64_t begin, unsigned busy_index)
|
||||||
|
{
|
||||||
|
uint64_t end = r600_read_mmio_counter(rscreen, busy_index);
|
||||||
|
unsigned busy = (end & 0xffffffff) - (begin & 0xffffffff);
|
||||||
|
unsigned idle = (end >> 32) - (begin >> 32);
|
||||||
|
|
||||||
|
/* Calculate the % of time the busy counter was being incremented.
|
||||||
|
*
|
||||||
|
* If no counters were incremented, return the current counter status.
|
||||||
|
* It's for the case when the load is queried faster than
|
||||||
|
* the counters are updated.
|
||||||
|
*/
|
||||||
|
if (idle || busy) {
|
||||||
|
return busy*100 / (busy + idle);
|
||||||
|
} else {
|
||||||
|
union r600_mmio_counters counters;
|
||||||
|
|
||||||
|
memset(&counters, 0, sizeof(counters));
|
||||||
|
r600_update_mmio_counters(rscreen, &counters);
|
||||||
|
return counters.array[busy_index] ? 100 : 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define BUSY_INDEX(rscreen, field) (&rscreen->mmio_counters.named.field.busy - \
|
||||||
|
rscreen->mmio_counters.array)
|
||||||
|
|
||||||
|
static unsigned busy_index_from_type(struct r600_common_screen *rscreen,
|
||||||
|
unsigned type)
|
||||||
|
{
|
||||||
|
switch (type) {
|
||||||
|
case R600_QUERY_GPU_LOAD:
|
||||||
|
return BUSY_INDEX(rscreen, gpu);
|
||||||
|
case R600_QUERY_GPU_SHADERS_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, spi);
|
||||||
|
case R600_QUERY_GPU_TA_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, ta);
|
||||||
|
case R600_QUERY_GPU_GDS_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, gds);
|
||||||
|
case R600_QUERY_GPU_VGT_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, vgt);
|
||||||
|
case R600_QUERY_GPU_IA_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, ia);
|
||||||
|
case R600_QUERY_GPU_SX_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, sx);
|
||||||
|
case R600_QUERY_GPU_WD_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, wd);
|
||||||
|
case R600_QUERY_GPU_BCI_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, bci);
|
||||||
|
case R600_QUERY_GPU_SC_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, sc);
|
||||||
|
case R600_QUERY_GPU_PA_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, pa);
|
||||||
|
case R600_QUERY_GPU_DB_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, db);
|
||||||
|
case R600_QUERY_GPU_CP_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, cp);
|
||||||
|
case R600_QUERY_GPU_CB_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, cb);
|
||||||
|
case R600_QUERY_GPU_SDMA_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, sdma);
|
||||||
|
case R600_QUERY_GPU_PFP_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, pfp);
|
||||||
|
case R600_QUERY_GPU_MEQ_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, meq);
|
||||||
|
case R600_QUERY_GPU_ME_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, me);
|
||||||
|
case R600_QUERY_GPU_SURF_SYNC_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, surf_sync);
|
||||||
|
case R600_QUERY_GPU_CP_DMA_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, cp_dma);
|
||||||
|
case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
|
||||||
|
return BUSY_INDEX(rscreen, scratch_ram);
|
||||||
|
default:
|
||||||
|
unreachable("invalid query type");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type)
|
||||||
|
{
|
||||||
|
unsigned busy_index = busy_index_from_type(rscreen, type);
|
||||||
|
return r600_read_mmio_counter(rscreen, busy_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
|
||||||
|
uint64_t begin)
|
||||||
|
{
|
||||||
|
unsigned busy_index = busy_index_from_type(rscreen, type);
|
||||||
|
return r600_end_mmio_counter(rscreen, begin, busy_index);
|
||||||
|
}
|
|
@ -0,0 +1,649 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2015 Advanced Micro Devices, Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*
|
||||||
|
* Authors:
|
||||||
|
* Nicolai Hähnle <nicolai.haehnle@amd.com>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "util/u_memory.h"
|
||||||
|
#include "r600_query.h"
|
||||||
|
#include "r600_pipe_common.h"
|
||||||
|
#include "amd/common/r600d_common.h"
|
||||||
|
|
||||||
|
/* Max counters per HW block */
|
||||||
|
#define R600_QUERY_MAX_COUNTERS 16
|
||||||
|
|
||||||
|
static struct r600_perfcounter_block *
|
||||||
|
lookup_counter(struct r600_perfcounters *pc, unsigned index,
|
||||||
|
unsigned *base_gid, unsigned *sub_index)
|
||||||
|
{
|
||||||
|
struct r600_perfcounter_block *block = pc->blocks;
|
||||||
|
unsigned bid;
|
||||||
|
|
||||||
|
*base_gid = 0;
|
||||||
|
for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
|
||||||
|
unsigned total = block->num_groups * block->num_selectors;
|
||||||
|
|
||||||
|
if (index < total) {
|
||||||
|
*sub_index = index;
|
||||||
|
return block;
|
||||||
|
}
|
||||||
|
|
||||||
|
index -= total;
|
||||||
|
*base_gid += block->num_groups;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct r600_perfcounter_block *
|
||||||
|
lookup_group(struct r600_perfcounters *pc, unsigned *index)
|
||||||
|
{
|
||||||
|
unsigned bid;
|
||||||
|
struct r600_perfcounter_block *block = pc->blocks;
|
||||||
|
|
||||||
|
for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
|
||||||
|
if (*index < block->num_groups)
|
||||||
|
return block;
|
||||||
|
*index -= block->num_groups;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct r600_pc_group {
|
||||||
|
struct r600_pc_group *next;
|
||||||
|
struct r600_perfcounter_block *block;
|
||||||
|
unsigned sub_gid; /* only used during init */
|
||||||
|
unsigned result_base; /* only used during init */
|
||||||
|
int se;
|
||||||
|
int instance;
|
||||||
|
unsigned num_counters;
|
||||||
|
unsigned selectors[R600_QUERY_MAX_COUNTERS];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct r600_pc_counter {
|
||||||
|
unsigned base;
|
||||||
|
unsigned qwords;
|
||||||
|
unsigned stride; /* in uint64s */
|
||||||
|
};
|
||||||
|
|
||||||
|
#define R600_PC_SHADERS_WINDOWING (1 << 31)
|
||||||
|
|
||||||
|
struct r600_query_pc {
|
||||||
|
struct r600_query_hw b;
|
||||||
|
|
||||||
|
unsigned shaders;
|
||||||
|
unsigned num_counters;
|
||||||
|
struct r600_pc_counter *counters;
|
||||||
|
struct r600_pc_group *groups;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void r600_pc_query_destroy(struct r600_common_screen *rscreen,
|
||||||
|
struct r600_query *rquery)
|
||||||
|
{
|
||||||
|
struct r600_query_pc *query = (struct r600_query_pc *)rquery;
|
||||||
|
|
||||||
|
while (query->groups) {
|
||||||
|
struct r600_pc_group *group = query->groups;
|
||||||
|
query->groups = group->next;
|
||||||
|
FREE(group);
|
||||||
|
}
|
||||||
|
|
||||||
|
FREE(query->counters);
|
||||||
|
|
||||||
|
r600_query_hw_destroy(rscreen, rquery);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool r600_pc_query_prepare_buffer(struct r600_common_screen *screen,
|
||||||
|
struct r600_query_hw *hwquery,
|
||||||
|
struct r600_resource *buffer)
|
||||||
|
{
|
||||||
|
/* no-op */
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_pc_query_emit_start(struct r600_common_context *ctx,
|
||||||
|
struct r600_query_hw *hwquery,
|
||||||
|
struct r600_resource *buffer, uint64_t va)
|
||||||
|
{
|
||||||
|
struct r600_perfcounters *pc = ctx->screen->perfcounters;
|
||||||
|
struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
|
||||||
|
struct r600_pc_group *group;
|
||||||
|
int current_se = -1;
|
||||||
|
int current_instance = -1;
|
||||||
|
|
||||||
|
if (query->shaders)
|
||||||
|
pc->emit_shaders(ctx, query->shaders);
|
||||||
|
|
||||||
|
for (group = query->groups; group; group = group->next) {
|
||||||
|
struct r600_perfcounter_block *block = group->block;
|
||||||
|
|
||||||
|
if (group->se != current_se || group->instance != current_instance) {
|
||||||
|
current_se = group->se;
|
||||||
|
current_instance = group->instance;
|
||||||
|
pc->emit_instance(ctx, group->se, group->instance);
|
||||||
|
}
|
||||||
|
|
||||||
|
pc->emit_select(ctx, block, group->num_counters, group->selectors);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (current_se != -1 || current_instance != -1)
|
||||||
|
pc->emit_instance(ctx, -1, -1);
|
||||||
|
|
||||||
|
pc->emit_start(ctx, buffer, va);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_pc_query_emit_stop(struct r600_common_context *ctx,
|
||||||
|
struct r600_query_hw *hwquery,
|
||||||
|
struct r600_resource *buffer, uint64_t va)
|
||||||
|
{
|
||||||
|
struct r600_perfcounters *pc = ctx->screen->perfcounters;
|
||||||
|
struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
|
||||||
|
struct r600_pc_group *group;
|
||||||
|
|
||||||
|
pc->emit_stop(ctx, buffer, va);
|
||||||
|
|
||||||
|
for (group = query->groups; group; group = group->next) {
|
||||||
|
struct r600_perfcounter_block *block = group->block;
|
||||||
|
unsigned se = group->se >= 0 ? group->se : 0;
|
||||||
|
unsigned se_end = se + 1;
|
||||||
|
|
||||||
|
if ((block->flags & R600_PC_BLOCK_SE) && (group->se < 0))
|
||||||
|
se_end = ctx->screen->info.max_se;
|
||||||
|
|
||||||
|
do {
|
||||||
|
unsigned instance = group->instance >= 0 ? group->instance : 0;
|
||||||
|
|
||||||
|
do {
|
||||||
|
pc->emit_instance(ctx, se, instance);
|
||||||
|
pc->emit_read(ctx, block,
|
||||||
|
group->num_counters, group->selectors,
|
||||||
|
buffer, va);
|
||||||
|
va += sizeof(uint64_t) * group->num_counters;
|
||||||
|
} while (group->instance < 0 && ++instance < block->num_instances);
|
||||||
|
} while (++se < se_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
pc->emit_instance(ctx, -1, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_pc_query_clear_result(struct r600_query_hw *hwquery,
|
||||||
|
union pipe_query_result *result)
|
||||||
|
{
|
||||||
|
struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
|
||||||
|
|
||||||
|
memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_pc_query_add_result(struct r600_common_screen *rscreen,
|
||||||
|
struct r600_query_hw *hwquery,
|
||||||
|
void *buffer,
|
||||||
|
union pipe_query_result *result)
|
||||||
|
{
|
||||||
|
struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
|
||||||
|
uint64_t *results = buffer;
|
||||||
|
unsigned i, j;
|
||||||
|
|
||||||
|
for (i = 0; i < query->num_counters; ++i) {
|
||||||
|
struct r600_pc_counter *counter = &query->counters[i];
|
||||||
|
|
||||||
|
for (j = 0; j < counter->qwords; ++j) {
|
||||||
|
uint32_t value = results[counter->base + j * counter->stride];
|
||||||
|
result->batch[i].u64 += value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct r600_query_ops batch_query_ops = {
|
||||||
|
.destroy = r600_pc_query_destroy,
|
||||||
|
.begin = r600_query_hw_begin,
|
||||||
|
.end = r600_query_hw_end,
|
||||||
|
.get_result = r600_query_hw_get_result
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct r600_query_hw_ops batch_query_hw_ops = {
|
||||||
|
.prepare_buffer = r600_pc_query_prepare_buffer,
|
||||||
|
.emit_start = r600_pc_query_emit_start,
|
||||||
|
.emit_stop = r600_pc_query_emit_stop,
|
||||||
|
.clear_result = r600_pc_query_clear_result,
|
||||||
|
.add_result = r600_pc_query_add_result,
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
|
||||||
|
struct r600_query_pc *query,
|
||||||
|
struct r600_perfcounter_block *block,
|
||||||
|
unsigned sub_gid)
|
||||||
|
{
|
||||||
|
struct r600_pc_group *group = query->groups;
|
||||||
|
|
||||||
|
while (group) {
|
||||||
|
if (group->block == block && group->sub_gid == sub_gid)
|
||||||
|
return group;
|
||||||
|
group = group->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
group = CALLOC_STRUCT(r600_pc_group);
|
||||||
|
if (!group)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
group->block = block;
|
||||||
|
group->sub_gid = sub_gid;
|
||||||
|
|
||||||
|
if (block->flags & R600_PC_BLOCK_SHADER) {
|
||||||
|
unsigned sub_gids = block->num_instances;
|
||||||
|
unsigned shader_id;
|
||||||
|
unsigned shaders;
|
||||||
|
unsigned query_shaders;
|
||||||
|
|
||||||
|
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
|
||||||
|
sub_gids = sub_gids * screen->info.max_se;
|
||||||
|
shader_id = sub_gid / sub_gids;
|
||||||
|
sub_gid = sub_gid % sub_gids;
|
||||||
|
|
||||||
|
shaders = screen->perfcounters->shader_type_bits[shader_id];
|
||||||
|
|
||||||
|
query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING;
|
||||||
|
if (query_shaders && query_shaders != shaders) {
|
||||||
|
fprintf(stderr, "r600_perfcounter: incompatible shader groups\n");
|
||||||
|
FREE(group);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
query->shaders = shaders;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
|
||||||
|
// A non-zero value in query->shaders ensures that the shader
|
||||||
|
// masking is reset unless the user explicitly requests one.
|
||||||
|
query->shaders = R600_PC_SHADERS_WINDOWING;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
|
||||||
|
group->se = sub_gid / block->num_instances;
|
||||||
|
sub_gid = sub_gid % block->num_instances;
|
||||||
|
} else {
|
||||||
|
group->se = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
|
||||||
|
group->instance = sub_gid;
|
||||||
|
} else {
|
||||||
|
group->instance = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
group->next = query->groups;
|
||||||
|
query->groups = group;
|
||||||
|
|
||||||
|
return group;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
|
||||||
|
unsigned num_queries,
|
||||||
|
unsigned *query_types)
|
||||||
|
{
|
||||||
|
struct r600_common_screen *screen =
|
||||||
|
(struct r600_common_screen *)ctx->screen;
|
||||||
|
struct r600_perfcounters *pc = screen->perfcounters;
|
||||||
|
struct r600_perfcounter_block *block;
|
||||||
|
struct r600_pc_group *group;
|
||||||
|
struct r600_query_pc *query;
|
||||||
|
unsigned base_gid, sub_gid, sub_index;
|
||||||
|
unsigned i, j;
|
||||||
|
|
||||||
|
if (!pc)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
query = CALLOC_STRUCT(r600_query_pc);
|
||||||
|
if (!query)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
query->b.b.ops = &batch_query_ops;
|
||||||
|
query->b.ops = &batch_query_hw_ops;
|
||||||
|
|
||||||
|
query->num_counters = num_queries;
|
||||||
|
|
||||||
|
/* Collect selectors per group */
|
||||||
|
for (i = 0; i < num_queries; ++i) {
|
||||||
|
unsigned sub_gid;
|
||||||
|
|
||||||
|
if (query_types[i] < R600_QUERY_FIRST_PERFCOUNTER)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
|
||||||
|
&base_gid, &sub_index);
|
||||||
|
if (!block)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
sub_gid = sub_index / block->num_selectors;
|
||||||
|
sub_index = sub_index % block->num_selectors;
|
||||||
|
|
||||||
|
group = get_group_state(screen, query, block, sub_gid);
|
||||||
|
if (!group)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
if (group->num_counters >= block->num_counters) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"perfcounter group %s: too many selected\n",
|
||||||
|
block->basename);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
group->selectors[group->num_counters] = sub_index;
|
||||||
|
++group->num_counters;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compute result bases and CS size per group */
|
||||||
|
query->b.num_cs_dw_begin = pc->num_start_cs_dwords;
|
||||||
|
query->b.num_cs_dw_end = pc->num_stop_cs_dwords;
|
||||||
|
|
||||||
|
query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
|
||||||
|
query->b.num_cs_dw_end += pc->num_instance_cs_dwords;
|
||||||
|
|
||||||
|
i = 0;
|
||||||
|
for (group = query->groups; group; group = group->next) {
|
||||||
|
struct r600_perfcounter_block *block = group->block;
|
||||||
|
unsigned select_dw, read_dw;
|
||||||
|
unsigned instances = 1;
|
||||||
|
|
||||||
|
if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
|
||||||
|
instances = screen->info.max_se;
|
||||||
|
if (group->instance < 0)
|
||||||
|
instances *= block->num_instances;
|
||||||
|
|
||||||
|
group->result_base = i;
|
||||||
|
query->b.result_size += sizeof(uint64_t) * instances * group->num_counters;
|
||||||
|
i += instances * group->num_counters;
|
||||||
|
|
||||||
|
pc->get_size(block, group->num_counters, group->selectors,
|
||||||
|
&select_dw, &read_dw);
|
||||||
|
query->b.num_cs_dw_begin += select_dw;
|
||||||
|
query->b.num_cs_dw_end += instances * read_dw;
|
||||||
|
query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
|
||||||
|
query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (query->shaders) {
|
||||||
|
if (query->shaders == R600_PC_SHADERS_WINDOWING)
|
||||||
|
query->shaders = 0xffffffff;
|
||||||
|
query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Map user-supplied query array to result indices */
|
||||||
|
query->counters = CALLOC(num_queries, sizeof(*query->counters));
|
||||||
|
for (i = 0; i < num_queries; ++i) {
|
||||||
|
struct r600_pc_counter *counter = &query->counters[i];
|
||||||
|
struct r600_perfcounter_block *block;
|
||||||
|
|
||||||
|
block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
|
||||||
|
&base_gid, &sub_index);
|
||||||
|
|
||||||
|
sub_gid = sub_index / block->num_selectors;
|
||||||
|
sub_index = sub_index % block->num_selectors;
|
||||||
|
|
||||||
|
group = get_group_state(screen, query, block, sub_gid);
|
||||||
|
assert(group != NULL);
|
||||||
|
|
||||||
|
for (j = 0; j < group->num_counters; ++j) {
|
||||||
|
if (group->selectors[j] == sub_index)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
counter->base = group->result_base + j;
|
||||||
|
counter->stride = group->num_counters;
|
||||||
|
|
||||||
|
counter->qwords = 1;
|
||||||
|
if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
|
||||||
|
counter->qwords = screen->info.max_se;
|
||||||
|
if (group->instance < 0)
|
||||||
|
counter->qwords *= block->num_instances;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!r600_query_hw_init(screen, &query->b))
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
return (struct pipe_query *)query;
|
||||||
|
|
||||||
|
error:
|
||||||
|
r600_pc_query_destroy(screen, &query->b.b);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool r600_init_block_names(struct r600_common_screen *screen,
|
||||||
|
struct r600_perfcounter_block *block)
|
||||||
|
{
|
||||||
|
unsigned i, j, k;
|
||||||
|
unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;
|
||||||
|
unsigned namelen;
|
||||||
|
char *groupname;
|
||||||
|
char *p;
|
||||||
|
|
||||||
|
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
|
||||||
|
groups_instance = block->num_instances;
|
||||||
|
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
|
||||||
|
groups_se = screen->info.max_se;
|
||||||
|
if (block->flags & R600_PC_BLOCK_SHADER)
|
||||||
|
groups_shader = screen->perfcounters->num_shader_types;
|
||||||
|
|
||||||
|
namelen = strlen(block->basename);
|
||||||
|
block->group_name_stride = namelen + 1;
|
||||||
|
if (block->flags & R600_PC_BLOCK_SHADER)
|
||||||
|
block->group_name_stride += 3;
|
||||||
|
if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
|
||||||
|
assert(groups_se <= 10);
|
||||||
|
block->group_name_stride += 1;
|
||||||
|
|
||||||
|
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
|
||||||
|
block->group_name_stride += 1;
|
||||||
|
}
|
||||||
|
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
|
||||||
|
assert(groups_instance <= 100);
|
||||||
|
block->group_name_stride += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
block->group_names = MALLOC(block->num_groups * block->group_name_stride);
|
||||||
|
if (!block->group_names)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
groupname = block->group_names;
|
||||||
|
for (i = 0; i < groups_shader; ++i) {
|
||||||
|
const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i];
|
||||||
|
unsigned shaderlen = strlen(shader_suffix);
|
||||||
|
for (j = 0; j < groups_se; ++j) {
|
||||||
|
for (k = 0; k < groups_instance; ++k) {
|
||||||
|
strcpy(groupname, block->basename);
|
||||||
|
p = groupname + namelen;
|
||||||
|
|
||||||
|
if (block->flags & R600_PC_BLOCK_SHADER) {
|
||||||
|
strcpy(p, shader_suffix);
|
||||||
|
p += shaderlen;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
|
||||||
|
p += sprintf(p, "%d", j);
|
||||||
|
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
|
||||||
|
*p++ = '_';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
|
||||||
|
p += sprintf(p, "%d", k);
|
||||||
|
|
||||||
|
groupname += block->group_name_stride;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(block->num_selectors <= 1000);
|
||||||
|
block->selector_name_stride = block->group_name_stride + 4;
|
||||||
|
block->selector_names = MALLOC(block->num_groups * block->num_selectors *
|
||||||
|
block->selector_name_stride);
|
||||||
|
if (!block->selector_names)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
groupname = block->group_names;
|
||||||
|
p = block->selector_names;
|
||||||
|
for (i = 0; i < block->num_groups; ++i) {
|
||||||
|
for (j = 0; j < block->num_selectors; ++j) {
|
||||||
|
sprintf(p, "%s_%03d", groupname, j);
|
||||||
|
p += block->selector_name_stride;
|
||||||
|
}
|
||||||
|
groupname += block->group_name_stride;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int r600_get_perfcounter_info(struct r600_common_screen *screen,
|
||||||
|
unsigned index,
|
||||||
|
struct pipe_driver_query_info *info)
|
||||||
|
{
|
||||||
|
struct r600_perfcounters *pc = screen->perfcounters;
|
||||||
|
struct r600_perfcounter_block *block;
|
||||||
|
unsigned base_gid, sub;
|
||||||
|
|
||||||
|
if (!pc)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (!info) {
|
||||||
|
unsigned bid, num_queries = 0;
|
||||||
|
|
||||||
|
for (bid = 0; bid < pc->num_blocks; ++bid) {
|
||||||
|
num_queries += pc->blocks[bid].num_selectors *
|
||||||
|
pc->blocks[bid].num_groups;
|
||||||
|
}
|
||||||
|
|
||||||
|
return num_queries;
|
||||||
|
}
|
||||||
|
|
||||||
|
block = lookup_counter(pc, index, &base_gid, &sub);
|
||||||
|
if (!block)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (!block->selector_names) {
|
||||||
|
if (!r600_init_block_names(screen, block))
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
info->name = block->selector_names + sub * block->selector_name_stride;
|
||||||
|
info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index;
|
||||||
|
info->max_value.u64 = 0;
|
||||||
|
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
|
||||||
|
info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
|
||||||
|
info->group_id = base_gid + sub / block->num_selectors;
|
||||||
|
info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
|
||||||
|
if (sub > 0 && sub + 1 < block->num_selectors * block->num_groups)
|
||||||
|
info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int r600_get_perfcounter_group_info(struct r600_common_screen *screen,
|
||||||
|
unsigned index,
|
||||||
|
struct pipe_driver_query_group_info *info)
|
||||||
|
{
|
||||||
|
struct r600_perfcounters *pc = screen->perfcounters;
|
||||||
|
struct r600_perfcounter_block *block;
|
||||||
|
|
||||||
|
if (!pc)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (!info)
|
||||||
|
return pc->num_groups;
|
||||||
|
|
||||||
|
block = lookup_group(pc, &index);
|
||||||
|
if (!block)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (!block->group_names) {
|
||||||
|
if (!r600_init_block_names(screen, block))
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
info->name = block->group_names + index * block->group_name_stride;
|
||||||
|
info->num_queries = block->num_selectors;
|
||||||
|
info->max_active_queries = block->num_counters;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void r600_perfcounters_destroy(struct r600_common_screen *rscreen)
|
||||||
|
{
|
||||||
|
if (rscreen->perfcounters)
|
||||||
|
rscreen->perfcounters->cleanup(rscreen);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool r600_perfcounters_init(struct r600_perfcounters *pc,
|
||||||
|
unsigned num_blocks)
|
||||||
|
{
|
||||||
|
pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block));
|
||||||
|
if (!pc->blocks)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false);
|
||||||
|
pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void r600_perfcounters_add_block(struct r600_common_screen *rscreen,
|
||||||
|
struct r600_perfcounters *pc,
|
||||||
|
const char *name, unsigned flags,
|
||||||
|
unsigned counters, unsigned selectors,
|
||||||
|
unsigned instances, void *data)
|
||||||
|
{
|
||||||
|
struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks];
|
||||||
|
|
||||||
|
assert(counters <= R600_QUERY_MAX_COUNTERS);
|
||||||
|
|
||||||
|
block->basename = name;
|
||||||
|
block->flags = flags;
|
||||||
|
block->num_counters = counters;
|
||||||
|
block->num_selectors = selectors;
|
||||||
|
block->num_instances = MAX2(instances, 1);
|
||||||
|
block->data = data;
|
||||||
|
|
||||||
|
if (pc->separate_se && (block->flags & R600_PC_BLOCK_SE))
|
||||||
|
block->flags |= R600_PC_BLOCK_SE_GROUPS;
|
||||||
|
if (pc->separate_instance && block->num_instances > 1)
|
||||||
|
block->flags |= R600_PC_BLOCK_INSTANCE_GROUPS;
|
||||||
|
|
||||||
|
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
|
||||||
|
block->num_groups = block->num_instances;
|
||||||
|
} else {
|
||||||
|
block->num_groups = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
|
||||||
|
block->num_groups *= rscreen->info.max_se;
|
||||||
|
if (block->flags & R600_PC_BLOCK_SHADER)
|
||||||
|
block->num_groups *= pc->num_shader_types;
|
||||||
|
|
||||||
|
++pc->num_blocks;
|
||||||
|
pc->num_groups += block->num_groups;
|
||||||
|
}
|
||||||
|
|
||||||
|
void r600_perfcounters_do_destroy(struct r600_perfcounters *pc)
|
||||||
|
{
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
|
for (i = 0; i < pc->num_blocks; ++i) {
|
||||||
|
FREE(pc->blocks[i].group_names);
|
||||||
|
FREE(pc->blocks[i].selector_names);
|
||||||
|
}
|
||||||
|
FREE(pc->blocks);
|
||||||
|
FREE(pc);
|
||||||
|
}
|
|
@ -37,8 +37,8 @@
|
||||||
#include "util/u_math.h"
|
#include "util/u_math.h"
|
||||||
#include "vl/vl_decoder.h"
|
#include "vl/vl_decoder.h"
|
||||||
#include "vl/vl_video_buffer.h"
|
#include "vl/vl_video_buffer.h"
|
||||||
#include "radeon/radeon_video.h"
|
#include "radeon_video.h"
|
||||||
#include "radeon/radeon_uvd.h"
|
#include "radeon_uvd.h"
|
||||||
#include "os/os_time.h"
|
#include "os/os_time.h"
|
||||||
|
|
||||||
static const struct debug_named_value r600_debug_options[] = {
|
static const struct debug_named_value r600_debug_options[] = {
|
||||||
|
|
|
@ -26,8 +26,8 @@
|
||||||
#ifndef R600_PIPE_H
|
#ifndef R600_PIPE_H
|
||||||
#define R600_PIPE_H
|
#define R600_PIPE_H
|
||||||
|
|
||||||
#include "radeon/r600_pipe_common.h"
|
#include "r600_pipe_common.h"
|
||||||
#include "radeon/r600_cs.h"
|
#include "r600_cs.h"
|
||||||
#include "r600_public.h"
|
#include "r600_public.h"
|
||||||
#include "pipe/p_defines.h"
|
#include "pipe/p_defines.h"
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,327 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2015 Advanced Micro Devices, Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*
|
||||||
|
* Authors:
|
||||||
|
* Nicolai Hähnle <nicolai.haehnle@amd.com>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef R600_QUERY_H
|
||||||
|
#define R600_QUERY_H
|
||||||
|
|
||||||
|
#include "util/u_threaded_context.h"
|
||||||
|
|
||||||
|
struct pipe_context;
|
||||||
|
struct pipe_query;
|
||||||
|
struct pipe_resource;
|
||||||
|
|
||||||
|
struct r600_common_context;
|
||||||
|
struct r600_common_screen;
|
||||||
|
struct r600_query;
|
||||||
|
struct r600_query_hw;
|
||||||
|
struct r600_resource;
|
||||||
|
|
||||||
|
enum {
|
||||||
|
R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC,
|
||||||
|
R600_QUERY_DECOMPRESS_CALLS,
|
||||||
|
R600_QUERY_MRT_DRAW_CALLS,
|
||||||
|
R600_QUERY_PRIM_RESTART_CALLS,
|
||||||
|
R600_QUERY_SPILL_DRAW_CALLS,
|
||||||
|
R600_QUERY_COMPUTE_CALLS,
|
||||||
|
R600_QUERY_SPILL_COMPUTE_CALLS,
|
||||||
|
R600_QUERY_DMA_CALLS,
|
||||||
|
R600_QUERY_CP_DMA_CALLS,
|
||||||
|
R600_QUERY_NUM_VS_FLUSHES,
|
||||||
|
R600_QUERY_NUM_PS_FLUSHES,
|
||||||
|
R600_QUERY_NUM_CS_FLUSHES,
|
||||||
|
R600_QUERY_NUM_CB_CACHE_FLUSHES,
|
||||||
|
R600_QUERY_NUM_DB_CACHE_FLUSHES,
|
||||||
|
R600_QUERY_NUM_L2_INVALIDATES,
|
||||||
|
R600_QUERY_NUM_L2_WRITEBACKS,
|
||||||
|
R600_QUERY_NUM_RESIDENT_HANDLES,
|
||||||
|
R600_QUERY_TC_OFFLOADED_SLOTS,
|
||||||
|
R600_QUERY_TC_DIRECT_SLOTS,
|
||||||
|
R600_QUERY_TC_NUM_SYNCS,
|
||||||
|
R600_QUERY_CS_THREAD_BUSY,
|
||||||
|
R600_QUERY_GALLIUM_THREAD_BUSY,
|
||||||
|
R600_QUERY_REQUESTED_VRAM,
|
||||||
|
R600_QUERY_REQUESTED_GTT,
|
||||||
|
R600_QUERY_MAPPED_VRAM,
|
||||||
|
R600_QUERY_MAPPED_GTT,
|
||||||
|
R600_QUERY_BUFFER_WAIT_TIME,
|
||||||
|
R600_QUERY_NUM_MAPPED_BUFFERS,
|
||||||
|
R600_QUERY_NUM_GFX_IBS,
|
||||||
|
R600_QUERY_NUM_SDMA_IBS,
|
||||||
|
R600_QUERY_GFX_BO_LIST_SIZE,
|
||||||
|
R600_QUERY_NUM_BYTES_MOVED,
|
||||||
|
R600_QUERY_NUM_EVICTIONS,
|
||||||
|
R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS,
|
||||||
|
R600_QUERY_VRAM_USAGE,
|
||||||
|
R600_QUERY_VRAM_VIS_USAGE,
|
||||||
|
R600_QUERY_GTT_USAGE,
|
||||||
|
R600_QUERY_GPU_TEMPERATURE,
|
||||||
|
R600_QUERY_CURRENT_GPU_SCLK,
|
||||||
|
R600_QUERY_CURRENT_GPU_MCLK,
|
||||||
|
R600_QUERY_GPU_LOAD,
|
||||||
|
R600_QUERY_GPU_SHADERS_BUSY,
|
||||||
|
R600_QUERY_GPU_TA_BUSY,
|
||||||
|
R600_QUERY_GPU_GDS_BUSY,
|
||||||
|
R600_QUERY_GPU_VGT_BUSY,
|
||||||
|
R600_QUERY_GPU_IA_BUSY,
|
||||||
|
R600_QUERY_GPU_SX_BUSY,
|
||||||
|
R600_QUERY_GPU_WD_BUSY,
|
||||||
|
R600_QUERY_GPU_BCI_BUSY,
|
||||||
|
R600_QUERY_GPU_SC_BUSY,
|
||||||
|
R600_QUERY_GPU_PA_BUSY,
|
||||||
|
R600_QUERY_GPU_DB_BUSY,
|
||||||
|
R600_QUERY_GPU_CP_BUSY,
|
||||||
|
R600_QUERY_GPU_CB_BUSY,
|
||||||
|
R600_QUERY_GPU_SDMA_BUSY,
|
||||||
|
R600_QUERY_GPU_PFP_BUSY,
|
||||||
|
R600_QUERY_GPU_MEQ_BUSY,
|
||||||
|
R600_QUERY_GPU_ME_BUSY,
|
||||||
|
R600_QUERY_GPU_SURF_SYNC_BUSY,
|
||||||
|
R600_QUERY_GPU_CP_DMA_BUSY,
|
||||||
|
R600_QUERY_GPU_SCRATCH_RAM_BUSY,
|
||||||
|
R600_QUERY_NUM_COMPILATIONS,
|
||||||
|
R600_QUERY_NUM_SHADERS_CREATED,
|
||||||
|
R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO,
|
||||||
|
R600_QUERY_NUM_SHADER_CACHE_HITS,
|
||||||
|
R600_QUERY_GPIN_ASIC_ID,
|
||||||
|
R600_QUERY_GPIN_NUM_SIMD,
|
||||||
|
R600_QUERY_GPIN_NUM_RB,
|
||||||
|
R600_QUERY_GPIN_NUM_SPI,
|
||||||
|
R600_QUERY_GPIN_NUM_SE,
|
||||||
|
|
||||||
|
R600_QUERY_FIRST_PERFCOUNTER = PIPE_QUERY_DRIVER_SPECIFIC + 100,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
R600_QUERY_GROUP_GPIN = 0,
|
||||||
|
R600_NUM_SW_QUERY_GROUPS
|
||||||
|
};
|
||||||
|
|
||||||
|
struct r600_query_ops {
|
||||||
|
void (*destroy)(struct r600_common_screen *, struct r600_query *);
|
||||||
|
bool (*begin)(struct r600_common_context *, struct r600_query *);
|
||||||
|
bool (*end)(struct r600_common_context *, struct r600_query *);
|
||||||
|
bool (*get_result)(struct r600_common_context *,
|
||||||
|
struct r600_query *, bool wait,
|
||||||
|
union pipe_query_result *result);
|
||||||
|
void (*get_result_resource)(struct r600_common_context *,
|
||||||
|
struct r600_query *, bool wait,
|
||||||
|
enum pipe_query_value_type result_type,
|
||||||
|
int index,
|
||||||
|
struct pipe_resource *resource,
|
||||||
|
unsigned offset);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct r600_query {
|
||||||
|
struct threaded_query b;
|
||||||
|
struct r600_query_ops *ops;
|
||||||
|
|
||||||
|
/* The type of query */
|
||||||
|
unsigned type;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
R600_QUERY_HW_FLAG_NO_START = (1 << 0),
|
||||||
|
/* gap */
|
||||||
|
/* whether begin_query doesn't clear the result */
|
||||||
|
R600_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2),
|
||||||
|
};
|
||||||
|
|
||||||
|
struct r600_query_hw_ops {
|
||||||
|
bool (*prepare_buffer)(struct r600_common_screen *,
|
||||||
|
struct r600_query_hw *,
|
||||||
|
struct r600_resource *);
|
||||||
|
void (*emit_start)(struct r600_common_context *,
|
||||||
|
struct r600_query_hw *,
|
||||||
|
struct r600_resource *buffer, uint64_t va);
|
||||||
|
void (*emit_stop)(struct r600_common_context *,
|
||||||
|
struct r600_query_hw *,
|
||||||
|
struct r600_resource *buffer, uint64_t va);
|
||||||
|
void (*clear_result)(struct r600_query_hw *, union pipe_query_result *);
|
||||||
|
void (*add_result)(struct r600_common_screen *screen,
|
||||||
|
struct r600_query_hw *, void *buffer,
|
||||||
|
union pipe_query_result *result);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct r600_query_buffer {
|
||||||
|
/* The buffer where query results are stored. */
|
||||||
|
struct r600_resource *buf;
|
||||||
|
/* Offset of the next free result after current query data */
|
||||||
|
unsigned results_end;
|
||||||
|
/* If a query buffer is full, a new buffer is created and the old one
|
||||||
|
* is put in here. When we calculate the result, we sum up the samples
|
||||||
|
* from all buffers. */
|
||||||
|
struct r600_query_buffer *previous;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct r600_query_hw {
|
||||||
|
struct r600_query b;
|
||||||
|
struct r600_query_hw_ops *ops;
|
||||||
|
unsigned flags;
|
||||||
|
|
||||||
|
/* The query buffer and how many results are in it. */
|
||||||
|
struct r600_query_buffer buffer;
|
||||||
|
/* Size of the result in memory for both begin_query and end_query,
|
||||||
|
* this can be one or two numbers, or it could even be a size of a structure. */
|
||||||
|
unsigned result_size;
|
||||||
|
/* The number of dwords for begin_query or end_query. */
|
||||||
|
unsigned num_cs_dw_begin;
|
||||||
|
unsigned num_cs_dw_end;
|
||||||
|
/* Linked list of queries */
|
||||||
|
struct list_head list;
|
||||||
|
/* For transform feedback: which stream the query is for */
|
||||||
|
unsigned stream;
|
||||||
|
|
||||||
|
/* Workaround via compute shader */
|
||||||
|
struct r600_resource *workaround_buf;
|
||||||
|
unsigned workaround_offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool r600_query_hw_init(struct r600_common_screen *rscreen,
|
||||||
|
struct r600_query_hw *query);
|
||||||
|
void r600_query_hw_destroy(struct r600_common_screen *rscreen,
|
||||||
|
struct r600_query *rquery);
|
||||||
|
bool r600_query_hw_begin(struct r600_common_context *rctx,
|
||||||
|
struct r600_query *rquery);
|
||||||
|
bool r600_query_hw_end(struct r600_common_context *rctx,
|
||||||
|
struct r600_query *rquery);
|
||||||
|
bool r600_query_hw_get_result(struct r600_common_context *rctx,
|
||||||
|
struct r600_query *rquery,
|
||||||
|
bool wait,
|
||||||
|
union pipe_query_result *result);
|
||||||
|
|
||||||
|
/* Performance counters */
|
||||||
|
enum {
|
||||||
|
/* This block is part of the shader engine */
|
||||||
|
R600_PC_BLOCK_SE = (1 << 0),
|
||||||
|
|
||||||
|
/* Expose per-instance groups instead of summing all instances (within
|
||||||
|
* an SE). */
|
||||||
|
R600_PC_BLOCK_INSTANCE_GROUPS = (1 << 1),
|
||||||
|
|
||||||
|
/* Expose per-SE groups instead of summing instances across SEs. */
|
||||||
|
R600_PC_BLOCK_SE_GROUPS = (1 << 2),
|
||||||
|
|
||||||
|
/* Shader block */
|
||||||
|
R600_PC_BLOCK_SHADER = (1 << 3),
|
||||||
|
|
||||||
|
/* Non-shader block with perfcounters windowed by shaders. */
|
||||||
|
R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Describes a hardware block with performance counters. Multiple instances of
|
||||||
|
* each block, possibly per-SE, may exist on the chip. Depending on the block
|
||||||
|
* and on the user's configuration, we either
|
||||||
|
* (a) expose every instance as a performance counter group,
|
||||||
|
* (b) expose a single performance counter group that reports the sum over all
|
||||||
|
* instances, or
|
||||||
|
* (c) expose one performance counter group per instance, but summed over all
|
||||||
|
* shader engines.
|
||||||
|
*/
|
||||||
|
struct r600_perfcounter_block {
|
||||||
|
const char *basename;
|
||||||
|
unsigned flags;
|
||||||
|
unsigned num_counters;
|
||||||
|
unsigned num_selectors;
|
||||||
|
unsigned num_instances;
|
||||||
|
|
||||||
|
unsigned num_groups;
|
||||||
|
char *group_names;
|
||||||
|
unsigned group_name_stride;
|
||||||
|
|
||||||
|
char *selector_names;
|
||||||
|
unsigned selector_name_stride;
|
||||||
|
|
||||||
|
void *data;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct r600_perfcounters {
|
||||||
|
unsigned num_groups;
|
||||||
|
unsigned num_blocks;
|
||||||
|
struct r600_perfcounter_block *blocks;
|
||||||
|
|
||||||
|
unsigned num_start_cs_dwords;
|
||||||
|
unsigned num_stop_cs_dwords;
|
||||||
|
unsigned num_instance_cs_dwords;
|
||||||
|
unsigned num_shaders_cs_dwords;
|
||||||
|
|
||||||
|
unsigned num_shader_types;
|
||||||
|
const char * const *shader_type_suffixes;
|
||||||
|
const unsigned *shader_type_bits;
|
||||||
|
|
||||||
|
void (*get_size)(struct r600_perfcounter_block *,
|
||||||
|
unsigned count, unsigned *selectors,
|
||||||
|
unsigned *num_select_dw, unsigned *num_read_dw);
|
||||||
|
|
||||||
|
void (*emit_instance)(struct r600_common_context *,
|
||||||
|
int se, int instance);
|
||||||
|
void (*emit_shaders)(struct r600_common_context *, unsigned shaders);
|
||||||
|
void (*emit_select)(struct r600_common_context *,
|
||||||
|
struct r600_perfcounter_block *,
|
||||||
|
unsigned count, unsigned *selectors);
|
||||||
|
void (*emit_start)(struct r600_common_context *,
|
||||||
|
struct r600_resource *buffer, uint64_t va);
|
||||||
|
void (*emit_stop)(struct r600_common_context *,
|
||||||
|
struct r600_resource *buffer, uint64_t va);
|
||||||
|
void (*emit_read)(struct r600_common_context *,
|
||||||
|
struct r600_perfcounter_block *,
|
||||||
|
unsigned count, unsigned *selectors,
|
||||||
|
struct r600_resource *buffer, uint64_t va);
|
||||||
|
|
||||||
|
void (*cleanup)(struct r600_common_screen *);
|
||||||
|
|
||||||
|
bool separate_se;
|
||||||
|
bool separate_instance;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
|
||||||
|
unsigned num_queries,
|
||||||
|
unsigned *query_types);
|
||||||
|
|
||||||
|
int r600_get_perfcounter_info(struct r600_common_screen *,
|
||||||
|
unsigned index,
|
||||||
|
struct pipe_driver_query_info *info);
|
||||||
|
int r600_get_perfcounter_group_info(struct r600_common_screen *,
|
||||||
|
unsigned index,
|
||||||
|
struct pipe_driver_query_group_info *info);
|
||||||
|
|
||||||
|
bool r600_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks);
|
||||||
|
void r600_perfcounters_add_block(struct r600_common_screen *,
|
||||||
|
struct r600_perfcounters *,
|
||||||
|
const char *name, unsigned flags,
|
||||||
|
unsigned counters, unsigned selectors,
|
||||||
|
unsigned instances, void *data);
|
||||||
|
void r600_perfcounters_do_destroy(struct r600_perfcounters *);
|
||||||
|
void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
|
||||||
|
struct r600_query_hw *query);
|
||||||
|
|
||||||
|
struct r600_qbo_state {
|
||||||
|
void *saved_compute;
|
||||||
|
struct pipe_constant_buffer saved_const0;
|
||||||
|
struct pipe_shader_buffer saved_ssbo[3];
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* R600_QUERY_H */
|
|
@ -0,0 +1,381 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2013 Advanced Micro Devices, Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*
|
||||||
|
* Authors: Marek Olšák <maraeo@gmail.com>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "r600_pipe_common.h"
|
||||||
|
#include "r600_cs.h"
|
||||||
|
|
||||||
|
#include "util/u_memory.h"
|
||||||
|
|
||||||
|
static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable);
|
||||||
|
|
||||||
|
static struct pipe_stream_output_target *
|
||||||
|
r600_create_so_target(struct pipe_context *ctx,
|
||||||
|
struct pipe_resource *buffer,
|
||||||
|
unsigned buffer_offset,
|
||||||
|
unsigned buffer_size)
|
||||||
|
{
|
||||||
|
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
|
||||||
|
struct r600_so_target *t;
|
||||||
|
struct r600_resource *rbuffer = (struct r600_resource*)buffer;
|
||||||
|
|
||||||
|
t = CALLOC_STRUCT(r600_so_target);
|
||||||
|
if (!t) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
u_suballocator_alloc(rctx->allocator_zeroed_memory, 4, 4,
|
||||||
|
&t->buf_filled_size_offset,
|
||||||
|
(struct pipe_resource**)&t->buf_filled_size);
|
||||||
|
if (!t->buf_filled_size) {
|
||||||
|
FREE(t);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
t->b.reference.count = 1;
|
||||||
|
t->b.context = ctx;
|
||||||
|
pipe_resource_reference(&t->b.buffer, buffer);
|
||||||
|
t->b.buffer_offset = buffer_offset;
|
||||||
|
t->b.buffer_size = buffer_size;
|
||||||
|
|
||||||
|
util_range_add(&rbuffer->valid_buffer_range, buffer_offset,
|
||||||
|
buffer_offset + buffer_size);
|
||||||
|
return &t->b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_so_target_destroy(struct pipe_context *ctx,
|
||||||
|
struct pipe_stream_output_target *target)
|
||||||
|
{
|
||||||
|
struct r600_so_target *t = (struct r600_so_target*)target;
|
||||||
|
pipe_resource_reference(&t->b.buffer, NULL);
|
||||||
|
r600_resource_reference(&t->buf_filled_size, NULL);
|
||||||
|
FREE(t);
|
||||||
|
}
|
||||||
|
|
||||||
|
void r600_streamout_buffers_dirty(struct r600_common_context *rctx)
|
||||||
|
{
|
||||||
|
struct r600_atom *begin = &rctx->streamout.begin_atom;
|
||||||
|
unsigned num_bufs = util_bitcount(rctx->streamout.enabled_mask);
|
||||||
|
unsigned num_bufs_appended = util_bitcount(rctx->streamout.enabled_mask &
|
||||||
|
rctx->streamout.append_bitmask);
|
||||||
|
|
||||||
|
if (!num_bufs)
|
||||||
|
return;
|
||||||
|
|
||||||
|
rctx->streamout.num_dw_for_end =
|
||||||
|
12 + /* flush_vgt_streamout */
|
||||||
|
num_bufs * 11; /* STRMOUT_BUFFER_UPDATE, BUFFER_SIZE */
|
||||||
|
|
||||||
|
begin->num_dw = 12; /* flush_vgt_streamout */
|
||||||
|
|
||||||
|
if (rctx->chip_class >= SI) {
|
||||||
|
begin->num_dw += num_bufs * 4; /* SET_CONTEXT_REG */
|
||||||
|
} else {
|
||||||
|
begin->num_dw += num_bufs * 7; /* SET_CONTEXT_REG */
|
||||||
|
|
||||||
|
if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740)
|
||||||
|
begin->num_dw += num_bufs * 5; /* STRMOUT_BASE_UPDATE */
|
||||||
|
}
|
||||||
|
|
||||||
|
begin->num_dw +=
|
||||||
|
num_bufs_appended * 8 + /* STRMOUT_BUFFER_UPDATE */
|
||||||
|
(num_bufs - num_bufs_appended) * 6 + /* STRMOUT_BUFFER_UPDATE */
|
||||||
|
(rctx->family > CHIP_R600 && rctx->family < CHIP_RS780 ? 2 : 0); /* SURFACE_BASE_UPDATE */
|
||||||
|
|
||||||
|
rctx->set_atom_dirty(rctx, begin, true);
|
||||||
|
|
||||||
|
r600_set_streamout_enable(rctx, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
void r600_set_streamout_targets(struct pipe_context *ctx,
|
||||||
|
unsigned num_targets,
|
||||||
|
struct pipe_stream_output_target **targets,
|
||||||
|
const unsigned *offsets)
|
||||||
|
{
|
||||||
|
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
|
||||||
|
unsigned i;
|
||||||
|
unsigned enabled_mask = 0, append_bitmask = 0;
|
||||||
|
|
||||||
|
/* Stop streamout. */
|
||||||
|
if (rctx->streamout.num_targets && rctx->streamout.begin_emitted) {
|
||||||
|
r600_emit_streamout_end(rctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set the new targets. */
|
||||||
|
for (i = 0; i < num_targets; i++) {
|
||||||
|
pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], targets[i]);
|
||||||
|
if (!targets[i])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
r600_context_add_resource_size(ctx, targets[i]->buffer);
|
||||||
|
enabled_mask |= 1 << i;
|
||||||
|
if (offsets[i] == ((unsigned)-1))
|
||||||
|
append_bitmask |= 1 << i;
|
||||||
|
}
|
||||||
|
for (; i < rctx->streamout.num_targets; i++) {
|
||||||
|
pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
rctx->streamout.enabled_mask = enabled_mask;
|
||||||
|
|
||||||
|
rctx->streamout.num_targets = num_targets;
|
||||||
|
rctx->streamout.append_bitmask = append_bitmask;
|
||||||
|
|
||||||
|
if (num_targets) {
|
||||||
|
r600_streamout_buffers_dirty(rctx);
|
||||||
|
} else {
|
||||||
|
rctx->set_atom_dirty(rctx, &rctx->streamout.begin_atom, false);
|
||||||
|
r600_set_streamout_enable(rctx, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_flush_vgt_streamout(struct r600_common_context *rctx)
|
||||||
|
{
|
||||||
|
struct radeon_winsys_cs *cs = rctx->gfx.cs;
|
||||||
|
unsigned reg_strmout_cntl;
|
||||||
|
|
||||||
|
/* The register is at different places on different ASICs. */
|
||||||
|
if (rctx->chip_class >= CIK) {
|
||||||
|
reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
|
||||||
|
} else if (rctx->chip_class >= EVERGREEN) {
|
||||||
|
reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL;
|
||||||
|
} else {
|
||||||
|
reg_strmout_cntl = R_008490_CP_STRMOUT_CNTL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rctx->chip_class >= CIK) {
|
||||||
|
radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
|
||||||
|
} else {
|
||||||
|
radeon_set_config_reg(cs, reg_strmout_cntl, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
|
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0));
|
||||||
|
|
||||||
|
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
|
||||||
|
radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
|
||||||
|
radeon_emit(cs, reg_strmout_cntl >> 2); /* register */
|
||||||
|
radeon_emit(cs, 0);
|
||||||
|
radeon_emit(cs, S_008490_OFFSET_UPDATE_DONE(1)); /* reference value */
|
||||||
|
radeon_emit(cs, S_008490_OFFSET_UPDATE_DONE(1)); /* mask */
|
||||||
|
radeon_emit(cs, 4); /* poll interval */
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r600_atom *atom)
|
||||||
|
{
|
||||||
|
struct radeon_winsys_cs *cs = rctx->gfx.cs;
|
||||||
|
struct r600_so_target **t = rctx->streamout.targets;
|
||||||
|
uint16_t *stride_in_dw = rctx->streamout.stride_in_dw;
|
||||||
|
unsigned i, update_flags = 0;
|
||||||
|
|
||||||
|
r600_flush_vgt_streamout(rctx);
|
||||||
|
|
||||||
|
for (i = 0; i < rctx->streamout.num_targets; i++) {
|
||||||
|
if (!t[i])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
t[i]->stride_in_dw = stride_in_dw[i];
|
||||||
|
|
||||||
|
if (rctx->chip_class >= SI) {
|
||||||
|
/* SI binds streamout buffers as shader resources.
|
||||||
|
* VGT only counts primitives and tells the shader
|
||||||
|
* through SGPRs what to do. */
|
||||||
|
radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
|
||||||
|
radeon_emit(cs, (t[i]->b.buffer_offset +
|
||||||
|
t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */
|
||||||
|
radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */
|
||||||
|
} else {
|
||||||
|
uint64_t va = r600_resource(t[i]->b.buffer)->gpu_address;
|
||||||
|
|
||||||
|
update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);
|
||||||
|
|
||||||
|
radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3);
|
||||||
|
radeon_emit(cs, (t[i]->b.buffer_offset +
|
||||||
|
t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */
|
||||||
|
radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */
|
||||||
|
radeon_emit(cs, va >> 8); /* BUFFER_BASE */
|
||||||
|
|
||||||
|
r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer),
|
||||||
|
RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER);
|
||||||
|
|
||||||
|
/* R7xx requires this packet after updating BUFFER_BASE.
|
||||||
|
* Without this, R7xx locks up. */
|
||||||
|
if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) {
|
||||||
|
radeon_emit(cs, PKT3(PKT3_STRMOUT_BASE_UPDATE, 1, 0));
|
||||||
|
radeon_emit(cs, i);
|
||||||
|
radeon_emit(cs, va >> 8);
|
||||||
|
|
||||||
|
r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer),
|
||||||
|
RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rctx->streamout.append_bitmask & (1 << i) && t[i]->buf_filled_size_valid) {
|
||||||
|
uint64_t va = t[i]->buf_filled_size->gpu_address +
|
||||||
|
t[i]->buf_filled_size_offset;
|
||||||
|
|
||||||
|
/* Append. */
|
||||||
|
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
|
||||||
|
radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
|
||||||
|
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */
|
||||||
|
radeon_emit(cs, 0); /* unused */
|
||||||
|
radeon_emit(cs, 0); /* unused */
|
||||||
|
radeon_emit(cs, va); /* src address lo */
|
||||||
|
radeon_emit(cs, va >> 32); /* src address hi */
|
||||||
|
|
||||||
|
r600_emit_reloc(rctx, &rctx->gfx, t[i]->buf_filled_size,
|
||||||
|
RADEON_USAGE_READ, RADEON_PRIO_SO_FILLED_SIZE);
|
||||||
|
} else {
|
||||||
|
/* Start from the beginning. */
|
||||||
|
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
|
||||||
|
radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
|
||||||
|
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
|
||||||
|
radeon_emit(cs, 0); /* unused */
|
||||||
|
radeon_emit(cs, 0); /* unused */
|
||||||
|
radeon_emit(cs, t[i]->b.buffer_offset >> 2); /* buffer offset in DW */
|
||||||
|
radeon_emit(cs, 0); /* unused */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rctx->family > CHIP_R600 && rctx->family < CHIP_RV770) {
|
||||||
|
radeon_emit(cs, PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0));
|
||||||
|
radeon_emit(cs, update_flags);
|
||||||
|
}
|
||||||
|
rctx->streamout.begin_emitted = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void r600_emit_streamout_end(struct r600_common_context *rctx)
|
||||||
|
{
|
||||||
|
struct radeon_winsys_cs *cs = rctx->gfx.cs;
|
||||||
|
struct r600_so_target **t = rctx->streamout.targets;
|
||||||
|
unsigned i;
|
||||||
|
uint64_t va;
|
||||||
|
|
||||||
|
r600_flush_vgt_streamout(rctx);
|
||||||
|
|
||||||
|
for (i = 0; i < rctx->streamout.num_targets; i++) {
|
||||||
|
if (!t[i])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset;
|
||||||
|
radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
|
||||||
|
radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
|
||||||
|
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
|
||||||
|
STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */
|
||||||
|
radeon_emit(cs, va); /* dst address lo */
|
||||||
|
radeon_emit(cs, va >> 32); /* dst address hi */
|
||||||
|
radeon_emit(cs, 0); /* unused */
|
||||||
|
radeon_emit(cs, 0); /* unused */
|
||||||
|
|
||||||
|
r600_emit_reloc(rctx, &rctx->gfx, t[i]->buf_filled_size,
|
||||||
|
RADEON_USAGE_WRITE, RADEON_PRIO_SO_FILLED_SIZE);
|
||||||
|
|
||||||
|
/* Zero the buffer size. The counters (primitives generated,
|
||||||
|
* primitives emitted) may be enabled even if there is not
|
||||||
|
* buffer bound. This ensures that the primitives-emitted query
|
||||||
|
* won't increment. */
|
||||||
|
radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
|
||||||
|
|
||||||
|
t[i]->buf_filled_size_valid = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
rctx->streamout.begin_emitted = false;
|
||||||
|
rctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* STREAMOUT CONFIG DERIVED STATE
|
||||||
|
*
|
||||||
|
* Streamout must be enabled for the PRIMITIVES_GENERATED query to work.
|
||||||
|
* The buffer mask is an independent state, so no writes occur if there
|
||||||
|
* are no buffers bound.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void r600_emit_streamout_enable(struct r600_common_context *rctx,
|
||||||
|
struct r600_atom *atom)
|
||||||
|
{
|
||||||
|
unsigned strmout_config_reg = R_028AB0_VGT_STRMOUT_EN;
|
||||||
|
unsigned strmout_config_val = S_028B94_STREAMOUT_0_EN(r600_get_strmout_en(rctx));
|
||||||
|
unsigned strmout_buffer_reg = R_028B20_VGT_STRMOUT_BUFFER_EN;
|
||||||
|
unsigned strmout_buffer_val = rctx->streamout.hw_enabled_mask &
|
||||||
|
rctx->streamout.enabled_stream_buffers_mask;
|
||||||
|
|
||||||
|
if (rctx->chip_class >= EVERGREEN) {
|
||||||
|
strmout_buffer_reg = R_028B98_VGT_STRMOUT_BUFFER_CONFIG;
|
||||||
|
|
||||||
|
strmout_config_reg = R_028B94_VGT_STRMOUT_CONFIG;
|
||||||
|
strmout_config_val |=
|
||||||
|
S_028B94_RAST_STREAM(0) |
|
||||||
|
S_028B94_STREAMOUT_1_EN(r600_get_strmout_en(rctx)) |
|
||||||
|
S_028B94_STREAMOUT_2_EN(r600_get_strmout_en(rctx)) |
|
||||||
|
S_028B94_STREAMOUT_3_EN(r600_get_strmout_en(rctx));
|
||||||
|
}
|
||||||
|
radeon_set_context_reg(rctx->gfx.cs, strmout_buffer_reg, strmout_buffer_val);
|
||||||
|
radeon_set_context_reg(rctx->gfx.cs, strmout_config_reg, strmout_config_val);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable)
|
||||||
|
{
|
||||||
|
bool old_strmout_en = r600_get_strmout_en(rctx);
|
||||||
|
unsigned old_hw_enabled_mask = rctx->streamout.hw_enabled_mask;
|
||||||
|
|
||||||
|
rctx->streamout.streamout_enabled = enable;
|
||||||
|
|
||||||
|
rctx->streamout.hw_enabled_mask = rctx->streamout.enabled_mask |
|
||||||
|
(rctx->streamout.enabled_mask << 4) |
|
||||||
|
(rctx->streamout.enabled_mask << 8) |
|
||||||
|
(rctx->streamout.enabled_mask << 12);
|
||||||
|
|
||||||
|
if ((old_strmout_en != r600_get_strmout_en(rctx)) ||
|
||||||
|
(old_hw_enabled_mask != rctx->streamout.hw_enabled_mask)) {
|
||||||
|
rctx->set_atom_dirty(rctx, &rctx->streamout.enable_atom, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
|
||||||
|
unsigned type, int diff)
|
||||||
|
{
|
||||||
|
if (type == PIPE_QUERY_PRIMITIVES_GENERATED) {
|
||||||
|
bool old_strmout_en = r600_get_strmout_en(rctx);
|
||||||
|
|
||||||
|
rctx->streamout.num_prims_gen_queries += diff;
|
||||||
|
assert(rctx->streamout.num_prims_gen_queries >= 0);
|
||||||
|
|
||||||
|
rctx->streamout.prims_gen_query_enabled =
|
||||||
|
rctx->streamout.num_prims_gen_queries != 0;
|
||||||
|
|
||||||
|
if (old_strmout_en != r600_get_strmout_en(rctx)) {
|
||||||
|
rctx->set_atom_dirty(rctx, &rctx->streamout.enable_atom, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void r600_streamout_init(struct r600_common_context *rctx)
|
||||||
|
{
|
||||||
|
rctx->b.create_stream_output_target = r600_create_so_target;
|
||||||
|
rctx->b.stream_output_target_destroy = r600_so_target_destroy;
|
||||||
|
rctx->streamout.begin_atom.emit = r600_emit_streamout_begin;
|
||||||
|
rctx->streamout.enable_atom.emit = r600_emit_streamout_enable;
|
||||||
|
rctx->streamout.enable_atom.num_dw = 6;
|
||||||
|
}
|
|
@ -0,0 +1,398 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2016 Advanced Micro Devices, Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* This file implements randomized SDMA texture blit tests. */
|
||||||
|
|
||||||
|
#include "r600_pipe_common.h"
|
||||||
|
#include "util/u_surface.h"
|
||||||
|
#include "util/rand_xor.h"
|
||||||
|
|
||||||
|
static uint64_t seed_xorshift128plus[2];
|
||||||
|
|
||||||
|
#define RAND_NUM_SIZE 8
|
||||||
|
|
||||||
|
/* The GPU blits are emulated on the CPU using these CPU textures. */
|
||||||
|
|
||||||
|
struct cpu_texture {
|
||||||
|
uint8_t *ptr;
|
||||||
|
uint64_t size;
|
||||||
|
uint64_t layer_stride;
|
||||||
|
unsigned stride;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void alloc_cpu_texture(struct cpu_texture *tex,
|
||||||
|
struct pipe_resource *templ, int bpp)
|
||||||
|
{
|
||||||
|
tex->stride = align(templ->width0 * bpp, RAND_NUM_SIZE);
|
||||||
|
tex->layer_stride = (uint64_t)tex->stride * templ->height0;
|
||||||
|
tex->size = tex->layer_stride * templ->array_size;
|
||||||
|
tex->ptr = malloc(tex->size);
|
||||||
|
assert(tex->ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void set_random_pixels(struct pipe_context *ctx,
|
||||||
|
struct pipe_resource *tex,
|
||||||
|
struct cpu_texture *cpu)
|
||||||
|
{
|
||||||
|
struct pipe_transfer *t;
|
||||||
|
uint8_t *map;
|
||||||
|
int x,y,z;
|
||||||
|
|
||||||
|
map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_TRANSFER_WRITE,
|
||||||
|
0, 0, 0, tex->width0, tex->height0,
|
||||||
|
tex->array_size, &t);
|
||||||
|
assert(map);
|
||||||
|
|
||||||
|
for (z = 0; z < tex->array_size; z++) {
|
||||||
|
for (y = 0; y < tex->height0; y++) {
|
||||||
|
uint64_t *ptr = (uint64_t*)
|
||||||
|
(map + t->layer_stride*z + t->stride*y);
|
||||||
|
uint64_t *ptr_cpu = (uint64_t*)
|
||||||
|
(cpu->ptr + cpu->layer_stride*z + cpu->stride*y);
|
||||||
|
unsigned size = cpu->stride / RAND_NUM_SIZE;
|
||||||
|
|
||||||
|
assert(t->stride % RAND_NUM_SIZE == 0);
|
||||||
|
assert(cpu->stride % RAND_NUM_SIZE == 0);
|
||||||
|
|
||||||
|
for (x = 0; x < size; x++) {
|
||||||
|
*ptr++ = *ptr_cpu++ =
|
||||||
|
rand_xorshift128plus(seed_xorshift128plus);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pipe_transfer_unmap(ctx, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool compare_textures(struct pipe_context *ctx,
|
||||||
|
struct pipe_resource *tex,
|
||||||
|
struct cpu_texture *cpu, int bpp)
|
||||||
|
{
|
||||||
|
struct pipe_transfer *t;
|
||||||
|
uint8_t *map;
|
||||||
|
int y,z;
|
||||||
|
bool pass = true;
|
||||||
|
|
||||||
|
map = pipe_transfer_map_3d(ctx, tex, 0, PIPE_TRANSFER_READ,
|
||||||
|
0, 0, 0, tex->width0, tex->height0,
|
||||||
|
tex->array_size, &t);
|
||||||
|
assert(map);
|
||||||
|
|
||||||
|
for (z = 0; z < tex->array_size; z++) {
|
||||||
|
for (y = 0; y < tex->height0; y++) {
|
||||||
|
uint8_t *ptr = map + t->layer_stride*z + t->stride*y;
|
||||||
|
uint8_t *cpu_ptr = cpu->ptr +
|
||||||
|
cpu->layer_stride*z + cpu->stride*y;
|
||||||
|
|
||||||
|
if (memcmp(ptr, cpu_ptr, tex->width0 * bpp)) {
|
||||||
|
pass = false;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
done:
|
||||||
|
pipe_transfer_unmap(ctx, t);
|
||||||
|
return pass;
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum pipe_format get_format_from_bpp(int bpp)
|
||||||
|
{
|
||||||
|
switch (bpp) {
|
||||||
|
case 1:
|
||||||
|
return PIPE_FORMAT_R8_UINT;
|
||||||
|
case 2:
|
||||||
|
return PIPE_FORMAT_R16_UINT;
|
||||||
|
case 4:
|
||||||
|
return PIPE_FORMAT_R32_UINT;
|
||||||
|
case 8:
|
||||||
|
return PIPE_FORMAT_R32G32_UINT;
|
||||||
|
case 16:
|
||||||
|
return PIPE_FORMAT_R32G32B32A32_UINT;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
return PIPE_FORMAT_NONE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char *array_mode_to_string(struct r600_common_screen *rscreen,
|
||||||
|
struct radeon_surf *surf)
|
||||||
|
{
|
||||||
|
if (rscreen->chip_class >= GFX9) {
|
||||||
|
/* TODO */
|
||||||
|
return " UNKNOWN";
|
||||||
|
} else {
|
||||||
|
switch (surf->u.legacy.level[0].mode) {
|
||||||
|
case RADEON_SURF_MODE_LINEAR_ALIGNED:
|
||||||
|
return "LINEAR_ALIGNED";
|
||||||
|
case RADEON_SURF_MODE_1D:
|
||||||
|
return "1D_TILED_THIN1";
|
||||||
|
case RADEON_SURF_MODE_2D:
|
||||||
|
return "2D_TILED_THIN1";
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
return " UNKNOWN";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned generate_max_tex_side(unsigned max_tex_side)
|
||||||
|
{
|
||||||
|
switch (rand() % 4) {
|
||||||
|
case 0:
|
||||||
|
/* Try to hit large sizes in 1/4 of the cases. */
|
||||||
|
return max_tex_side;
|
||||||
|
case 1:
|
||||||
|
/* Try to hit 1D tiling in 1/4 of the cases. */
|
||||||
|
return 128;
|
||||||
|
default:
|
||||||
|
/* Try to hit common sizes in 2/4 of the cases. */
|
||||||
|
return 2048;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void r600_test_dma(struct r600_common_screen *rscreen)
|
||||||
|
{
|
||||||
|
struct pipe_screen *screen = &rscreen->b;
|
||||||
|
struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
|
||||||
|
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
|
||||||
|
uint64_t max_alloc_size;
|
||||||
|
unsigned i, iterations, num_partial_copies, max_levels, max_tex_side;
|
||||||
|
unsigned num_pass = 0, num_fail = 0;
|
||||||
|
|
||||||
|
max_levels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS);
|
||||||
|
max_tex_side = 1 << (max_levels - 1);
|
||||||
|
|
||||||
|
/* Max 128 MB allowed for both textures. */
|
||||||
|
max_alloc_size = 128 * 1024 * 1024;
|
||||||
|
|
||||||
|
/* the seed for random test parameters */
|
||||||
|
srand(0x9b47d95b);
|
||||||
|
/* the seed for random pixel data */
|
||||||
|
s_rand_xorshift128plus(seed_xorshift128plus, false);
|
||||||
|
|
||||||
|
iterations = 1000000000; /* just kill it when you are bored */
|
||||||
|
num_partial_copies = 30;
|
||||||
|
|
||||||
|
/* These parameters are randomly generated per test:
|
||||||
|
* - whether to do one whole-surface copy or N partial copies per test
|
||||||
|
* - which tiling modes to use (LINEAR_ALIGNED, 1D, 2D)
|
||||||
|
* - which texture dimensions to use
|
||||||
|
* - whether to use VRAM (all tiling modes) and GTT (staging, linear
|
||||||
|
* only) allocations
|
||||||
|
* - random initial pixels in src
|
||||||
|
* - generate random subrectangle copies for partial blits
|
||||||
|
*/
|
||||||
|
for (i = 0; i < iterations; i++) {
|
||||||
|
struct pipe_resource tsrc = {}, tdst = {}, *src, *dst;
|
||||||
|
struct r600_texture *rdst;
|
||||||
|
struct r600_texture *rsrc;
|
||||||
|
struct cpu_texture src_cpu, dst_cpu;
|
||||||
|
unsigned bpp, max_width, max_height, max_depth, j, num;
|
||||||
|
unsigned gfx_blits = 0, dma_blits = 0, max_tex_side_gen;
|
||||||
|
unsigned max_tex_layers;
|
||||||
|
bool pass;
|
||||||
|
bool do_partial_copies = rand() & 1;
|
||||||
|
|
||||||
|
/* generate a random test case */
|
||||||
|
tsrc.target = tdst.target = PIPE_TEXTURE_2D_ARRAY;
|
||||||
|
tsrc.depth0 = tdst.depth0 = 1;
|
||||||
|
|
||||||
|
bpp = 1 << (rand() % 5);
|
||||||
|
tsrc.format = tdst.format = get_format_from_bpp(bpp);
|
||||||
|
|
||||||
|
max_tex_side_gen = generate_max_tex_side(max_tex_side);
|
||||||
|
max_tex_layers = rand() % 4 ? 1 : 5;
|
||||||
|
|
||||||
|
tsrc.width0 = (rand() % max_tex_side_gen) + 1;
|
||||||
|
tsrc.height0 = (rand() % max_tex_side_gen) + 1;
|
||||||
|
tsrc.array_size = (rand() % max_tex_layers) + 1;
|
||||||
|
|
||||||
|
/* Have a 1/4 chance of getting power-of-two dimensions. */
|
||||||
|
if (rand() % 4 == 0) {
|
||||||
|
tsrc.width0 = util_next_power_of_two(tsrc.width0);
|
||||||
|
tsrc.height0 = util_next_power_of_two(tsrc.height0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!do_partial_copies) {
|
||||||
|
/* whole-surface copies only, same dimensions */
|
||||||
|
tdst = tsrc;
|
||||||
|
} else {
|
||||||
|
max_tex_side_gen = generate_max_tex_side(max_tex_side);
|
||||||
|
max_tex_layers = rand() % 4 ? 1 : 5;
|
||||||
|
|
||||||
|
/* many partial copies, dimensions can be different */
|
||||||
|
tdst.width0 = (rand() % max_tex_side_gen) + 1;
|
||||||
|
tdst.height0 = (rand() % max_tex_side_gen) + 1;
|
||||||
|
tdst.array_size = (rand() % max_tex_layers) + 1;
|
||||||
|
|
||||||
|
/* Have a 1/4 chance of getting power-of-two dimensions. */
|
||||||
|
if (rand() % 4 == 0) {
|
||||||
|
tdst.width0 = util_next_power_of_two(tdst.width0);
|
||||||
|
tdst.height0 = util_next_power_of_two(tdst.height0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check texture sizes */
|
||||||
|
if ((uint64_t)tsrc.width0 * tsrc.height0 * tsrc.array_size * bpp +
|
||||||
|
(uint64_t)tdst.width0 * tdst.height0 * tdst.array_size * bpp >
|
||||||
|
max_alloc_size) {
|
||||||
|
/* too large, try again */
|
||||||
|
i--;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* VRAM + the tiling mode depends on dimensions (3/4 of cases),
|
||||||
|
* or GTT + linear only (1/4 of cases)
|
||||||
|
*/
|
||||||
|
tsrc.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING;
|
||||||
|
tdst.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING;
|
||||||
|
|
||||||
|
/* Allocate textures (both the GPU and CPU copies).
|
||||||
|
* The CPU will emulate what the GPU should be doing.
|
||||||
|
*/
|
||||||
|
src = screen->resource_create(screen, &tsrc);
|
||||||
|
dst = screen->resource_create(screen, &tdst);
|
||||||
|
assert(src);
|
||||||
|
assert(dst);
|
||||||
|
rdst = (struct r600_texture*)dst;
|
||||||
|
rsrc = (struct r600_texture*)src;
|
||||||
|
alloc_cpu_texture(&src_cpu, &tsrc, bpp);
|
||||||
|
alloc_cpu_texture(&dst_cpu, &tdst, bpp);
|
||||||
|
|
||||||
|
printf("%4u: dst = (%5u x %5u x %u, %s), "
|
||||||
|
" src = (%5u x %5u x %u, %s), bpp = %2u, ",
|
||||||
|
i, tdst.width0, tdst.height0, tdst.array_size,
|
||||||
|
array_mode_to_string(rscreen, &rdst->surface),
|
||||||
|
tsrc.width0, tsrc.height0, tsrc.array_size,
|
||||||
|
array_mode_to_string(rscreen, &rsrc->surface), bpp);
|
||||||
|
fflush(stdout);
|
||||||
|
|
||||||
|
/* set src pixels */
|
||||||
|
set_random_pixels(ctx, src, &src_cpu);
|
||||||
|
|
||||||
|
/* clear dst pixels */
|
||||||
|
rctx->clear_buffer(ctx, dst, 0, rdst->surface.surf_size, 0, true);
|
||||||
|
memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size);
|
||||||
|
|
||||||
|
/* preparation */
|
||||||
|
max_width = MIN2(tsrc.width0, tdst.width0);
|
||||||
|
max_height = MIN2(tsrc.height0, tdst.height0);
|
||||||
|
max_depth = MIN2(tsrc.array_size, tdst.array_size);
|
||||||
|
|
||||||
|
num = do_partial_copies ? num_partial_copies : 1;
|
||||||
|
for (j = 0; j < num; j++) {
|
||||||
|
int width, height, depth;
|
||||||
|
int srcx, srcy, srcz, dstx, dsty, dstz;
|
||||||
|
struct pipe_box box;
|
||||||
|
unsigned old_num_draw_calls = rctx->num_draw_calls;
|
||||||
|
unsigned old_num_dma_calls = rctx->num_dma_calls;
|
||||||
|
|
||||||
|
if (!do_partial_copies) {
|
||||||
|
/* copy whole src to dst */
|
||||||
|
width = max_width;
|
||||||
|
height = max_height;
|
||||||
|
depth = max_depth;
|
||||||
|
|
||||||
|
srcx = srcy = srcz = dstx = dsty = dstz = 0;
|
||||||
|
} else {
|
||||||
|
/* random sub-rectangle copies from src to dst */
|
||||||
|
depth = (rand() % max_depth) + 1;
|
||||||
|
srcz = rand() % (tsrc.array_size - depth + 1);
|
||||||
|
dstz = rand() % (tdst.array_size - depth + 1);
|
||||||
|
|
||||||
|
/* special code path to hit the tiled partial copies */
|
||||||
|
if (!rsrc->surface.is_linear &&
|
||||||
|
!rdst->surface.is_linear &&
|
||||||
|
rand() & 1) {
|
||||||
|
if (max_width < 8 || max_height < 8)
|
||||||
|
continue;
|
||||||
|
width = ((rand() % (max_width / 8)) + 1) * 8;
|
||||||
|
height = ((rand() % (max_height / 8)) + 1) * 8;
|
||||||
|
|
||||||
|
srcx = rand() % (tsrc.width0 - width + 1) & ~0x7;
|
||||||
|
srcy = rand() % (tsrc.height0 - height + 1) & ~0x7;
|
||||||
|
|
||||||
|
dstx = rand() % (tdst.width0 - width + 1) & ~0x7;
|
||||||
|
dsty = rand() % (tdst.height0 - height + 1) & ~0x7;
|
||||||
|
} else {
|
||||||
|
/* just make sure that it doesn't divide by zero */
|
||||||
|
assert(max_width > 0 && max_height > 0);
|
||||||
|
|
||||||
|
width = (rand() % max_width) + 1;
|
||||||
|
height = (rand() % max_height) + 1;
|
||||||
|
|
||||||
|
srcx = rand() % (tsrc.width0 - width + 1);
|
||||||
|
srcy = rand() % (tsrc.height0 - height + 1);
|
||||||
|
|
||||||
|
dstx = rand() % (tdst.width0 - width + 1);
|
||||||
|
dsty = rand() % (tdst.height0 - height + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* special code path to hit out-of-bounds reads in L2T */
|
||||||
|
if (rsrc->surface.is_linear &&
|
||||||
|
!rdst->surface.is_linear &&
|
||||||
|
rand() % 4 == 0) {
|
||||||
|
srcx = 0;
|
||||||
|
srcy = 0;
|
||||||
|
srcz = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* GPU copy */
|
||||||
|
u_box_3d(srcx, srcy, srcz, width, height, depth, &box);
|
||||||
|
rctx->dma_copy(ctx, dst, 0, dstx, dsty, dstz, src, 0, &box);
|
||||||
|
|
||||||
|
/* See which engine was used. */
|
||||||
|
gfx_blits += rctx->num_draw_calls > old_num_draw_calls;
|
||||||
|
dma_blits += rctx->num_dma_calls > old_num_dma_calls;
|
||||||
|
|
||||||
|
/* CPU copy */
|
||||||
|
util_copy_box(dst_cpu.ptr, tdst.format, dst_cpu.stride,
|
||||||
|
dst_cpu.layer_stride,
|
||||||
|
dstx, dsty, dstz, width, height, depth,
|
||||||
|
src_cpu.ptr, src_cpu.stride,
|
||||||
|
src_cpu.layer_stride,
|
||||||
|
srcx, srcy, srcz);
|
||||||
|
}
|
||||||
|
|
||||||
|
pass = compare_textures(ctx, dst, &dst_cpu, bpp);
|
||||||
|
if (pass)
|
||||||
|
num_pass++;
|
||||||
|
else
|
||||||
|
num_fail++;
|
||||||
|
|
||||||
|
printf("BLITs: GFX = %2u, DMA = %2u, %s [%u/%u]\n",
|
||||||
|
gfx_blits, dma_blits, pass ? "pass" : "fail",
|
||||||
|
num_pass, num_pass+num_fail);
|
||||||
|
|
||||||
|
/* cleanup */
|
||||||
|
pipe_resource_reference(&src, NULL);
|
||||||
|
pipe_resource_reference(&dst, NULL);
|
||||||
|
free(src_cpu.ptr);
|
||||||
|
free(dst_cpu.ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx->destroy(ctx);
|
||||||
|
exit(0);
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -45,9 +45,9 @@
|
||||||
#include "vl/vl_mpeg12_decoder.h"
|
#include "vl/vl_mpeg12_decoder.h"
|
||||||
|
|
||||||
#include "r600_pipe.h"
|
#include "r600_pipe.h"
|
||||||
#include "radeon/radeon_video.h"
|
#include "radeon_video.h"
|
||||||
#include "radeon/radeon_uvd.h"
|
#include "radeon_uvd.h"
|
||||||
#include "radeon/radeon_vce.h"
|
#include "radeon_vce.h"
|
||||||
#include "r600d.h"
|
#include "r600d.h"
|
||||||
|
|
||||||
#define R600_UVD_ENABLE_TILING 0
|
#define R600_UVD_ENABLE_TILING 0
|
||||||
|
|
|
@ -0,0 +1,433 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2012 Advanced Micro Devices, Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||||
|
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||||
|
* the Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||||
|
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||||
|
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "r600_cs.h"
|
||||||
|
#include "util/u_viewport.h"
|
||||||
|
#include "tgsi/tgsi_scan.h"
|
||||||
|
|
||||||
|
#define GET_MAX_SCISSOR(rctx) (rctx->chip_class >= EVERGREEN ? 16384 : 8192)
|
||||||
|
|
||||||
|
static void r600_set_scissor_states(struct pipe_context *ctx,
|
||||||
|
unsigned start_slot,
|
||||||
|
unsigned num_scissors,
|
||||||
|
const struct pipe_scissor_state *state)
|
||||||
|
{
|
||||||
|
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < num_scissors; i++)
|
||||||
|
rctx->scissors.states[start_slot + i] = state[i];
|
||||||
|
|
||||||
|
if (!rctx->scissor_enabled)
|
||||||
|
return;
|
||||||
|
|
||||||
|
rctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot;
|
||||||
|
rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Since the guard band disables clipping, we have to clip per-pixel
|
||||||
|
* using a scissor.
|
||||||
|
*/
|
||||||
|
static void r600_get_scissor_from_viewport(struct r600_common_context *rctx,
|
||||||
|
const struct pipe_viewport_state *vp,
|
||||||
|
struct r600_signed_scissor *scissor)
|
||||||
|
{
|
||||||
|
float tmp, minx, miny, maxx, maxy;
|
||||||
|
|
||||||
|
/* Convert (-1, -1) and (1, 1) from clip space into window space. */
|
||||||
|
minx = -vp->scale[0] + vp->translate[0];
|
||||||
|
miny = -vp->scale[1] + vp->translate[1];
|
||||||
|
maxx = vp->scale[0] + vp->translate[0];
|
||||||
|
maxy = vp->scale[1] + vp->translate[1];
|
||||||
|
|
||||||
|
/* r600_draw_rectangle sets this. Disable the scissor. */
|
||||||
|
if (minx == -1 && miny == -1 && maxx == 1 && maxy == 1) {
|
||||||
|
scissor->minx = scissor->miny = 0;
|
||||||
|
scissor->maxx = scissor->maxy = GET_MAX_SCISSOR(rctx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle inverted viewports. */
|
||||||
|
if (minx > maxx) {
|
||||||
|
tmp = minx;
|
||||||
|
minx = maxx;
|
||||||
|
maxx = tmp;
|
||||||
|
}
|
||||||
|
if (miny > maxy) {
|
||||||
|
tmp = miny;
|
||||||
|
miny = maxy;
|
||||||
|
maxy = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Convert to integer and round up the max bounds. */
|
||||||
|
scissor->minx = minx;
|
||||||
|
scissor->miny = miny;
|
||||||
|
scissor->maxx = ceilf(maxx);
|
||||||
|
scissor->maxy = ceilf(maxy);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_clamp_scissor(struct r600_common_context *rctx,
|
||||||
|
struct pipe_scissor_state *out,
|
||||||
|
struct r600_signed_scissor *scissor)
|
||||||
|
{
|
||||||
|
unsigned max_scissor = GET_MAX_SCISSOR(rctx);
|
||||||
|
out->minx = CLAMP(scissor->minx, 0, max_scissor);
|
||||||
|
out->miny = CLAMP(scissor->miny, 0, max_scissor);
|
||||||
|
out->maxx = CLAMP(scissor->maxx, 0, max_scissor);
|
||||||
|
out->maxy = CLAMP(scissor->maxy, 0, max_scissor);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_clip_scissor(struct pipe_scissor_state *out,
|
||||||
|
struct pipe_scissor_state *clip)
|
||||||
|
{
|
||||||
|
out->minx = MAX2(out->minx, clip->minx);
|
||||||
|
out->miny = MAX2(out->miny, clip->miny);
|
||||||
|
out->maxx = MIN2(out->maxx, clip->maxx);
|
||||||
|
out->maxy = MIN2(out->maxy, clip->maxy);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_scissor_make_union(struct r600_signed_scissor *out,
|
||||||
|
struct r600_signed_scissor *in)
|
||||||
|
{
|
||||||
|
out->minx = MIN2(out->minx, in->minx);
|
||||||
|
out->miny = MIN2(out->miny, in->miny);
|
||||||
|
out->maxx = MAX2(out->maxx, in->maxx);
|
||||||
|
out->maxy = MAX2(out->maxy, in->maxy);
|
||||||
|
}
|
||||||
|
|
||||||
|
void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
|
||||||
|
struct pipe_scissor_state *scissor)
|
||||||
|
{
|
||||||
|
if (rctx->chip_class == EVERGREEN || rctx->chip_class == CAYMAN) {
|
||||||
|
if (scissor->maxx == 0)
|
||||||
|
scissor->minx = 1;
|
||||||
|
if (scissor->maxy == 0)
|
||||||
|
scissor->miny = 1;
|
||||||
|
|
||||||
|
if (rctx->chip_class == CAYMAN &&
|
||||||
|
scissor->maxx == 1 && scissor->maxy == 1)
|
||||||
|
scissor->maxx = 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_emit_one_scissor(struct r600_common_context *rctx,
|
||||||
|
struct radeon_winsys_cs *cs,
|
||||||
|
struct r600_signed_scissor *vp_scissor,
|
||||||
|
struct pipe_scissor_state *scissor)
|
||||||
|
{
|
||||||
|
struct pipe_scissor_state final;
|
||||||
|
|
||||||
|
if (rctx->vs_disables_clipping_viewport) {
|
||||||
|
final.minx = final.miny = 0;
|
||||||
|
final.maxx = final.maxy = GET_MAX_SCISSOR(rctx);
|
||||||
|
} else {
|
||||||
|
r600_clamp_scissor(rctx, &final, vp_scissor);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scissor)
|
||||||
|
r600_clip_scissor(&final, scissor);
|
||||||
|
|
||||||
|
evergreen_apply_scissor_bug_workaround(rctx, &final);
|
||||||
|
|
||||||
|
radeon_emit(cs, S_028250_TL_X(final.minx) |
|
||||||
|
S_028250_TL_Y(final.miny) |
|
||||||
|
S_028250_WINDOW_OFFSET_DISABLE(1));
|
||||||
|
radeon_emit(cs, S_028254_BR_X(final.maxx) |
|
||||||
|
S_028254_BR_Y(final.maxy));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* the range is [-MAX, MAX] */
|
||||||
|
#define GET_MAX_VIEWPORT_RANGE(rctx) (rctx->chip_class >= EVERGREEN ? 32768 : 16384)
|
||||||
|
|
||||||
|
static void r600_emit_guardband(struct r600_common_context *rctx,
|
||||||
|
struct r600_signed_scissor *vp_as_scissor)
|
||||||
|
{
|
||||||
|
struct radeon_winsys_cs *cs = rctx->gfx.cs;
|
||||||
|
struct pipe_viewport_state vp;
|
||||||
|
float left, top, right, bottom, max_range, guardband_x, guardband_y;
|
||||||
|
|
||||||
|
/* Reconstruct the viewport transformation from the scissor. */
|
||||||
|
vp.translate[0] = (vp_as_scissor->minx + vp_as_scissor->maxx) / 2.0;
|
||||||
|
vp.translate[1] = (vp_as_scissor->miny + vp_as_scissor->maxy) / 2.0;
|
||||||
|
vp.scale[0] = vp_as_scissor->maxx - vp.translate[0];
|
||||||
|
vp.scale[1] = vp_as_scissor->maxy - vp.translate[1];
|
||||||
|
|
||||||
|
/* Treat a 0x0 viewport as 1x1 to prevent division by zero. */
|
||||||
|
if (vp_as_scissor->minx == vp_as_scissor->maxx)
|
||||||
|
vp.scale[0] = 0.5;
|
||||||
|
if (vp_as_scissor->miny == vp_as_scissor->maxy)
|
||||||
|
vp.scale[1] = 0.5;
|
||||||
|
|
||||||
|
/* Find the biggest guard band that is inside the supported viewport
|
||||||
|
* range. The guard band is specified as a horizontal and vertical
|
||||||
|
* distance from (0,0) in clip space.
|
||||||
|
*
|
||||||
|
* This is done by applying the inverse viewport transformation
|
||||||
|
* on the viewport limits to get those limits in clip space.
|
||||||
|
*
|
||||||
|
* Use a limit one pixel smaller to allow for some precision error.
|
||||||
|
*/
|
||||||
|
max_range = GET_MAX_VIEWPORT_RANGE(rctx) - 1;
|
||||||
|
left = (-max_range - vp.translate[0]) / vp.scale[0];
|
||||||
|
right = ( max_range - vp.translate[0]) / vp.scale[0];
|
||||||
|
top = (-max_range - vp.translate[1]) / vp.scale[1];
|
||||||
|
bottom = ( max_range - vp.translate[1]) / vp.scale[1];
|
||||||
|
|
||||||
|
assert(left <= -1 && top <= -1 && right >= 1 && bottom >= 1);
|
||||||
|
|
||||||
|
guardband_x = MIN2(-left, right);
|
||||||
|
guardband_y = MIN2(-top, bottom);
|
||||||
|
|
||||||
|
/* If any of the GB registers is updated, all of them must be updated. */
|
||||||
|
if (rctx->chip_class >= CAYMAN)
|
||||||
|
radeon_set_context_reg_seq(cs, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
|
||||||
|
else
|
||||||
|
radeon_set_context_reg_seq(cs, R600_R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 4);
|
||||||
|
|
||||||
|
radeon_emit(cs, fui(guardband_y)); /* R_028BE8_PA_CL_GB_VERT_CLIP_ADJ */
|
||||||
|
radeon_emit(cs, fui(1.0)); /* R_028BEC_PA_CL_GB_VERT_DISC_ADJ */
|
||||||
|
radeon_emit(cs, fui(guardband_x)); /* R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ */
|
||||||
|
radeon_emit(cs, fui(1.0)); /* R_028BF4_PA_CL_GB_HORZ_DISC_ADJ */
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_emit_scissors(struct r600_common_context *rctx, struct r600_atom *atom)
|
||||||
|
{
|
||||||
|
struct radeon_winsys_cs *cs = rctx->gfx.cs;
|
||||||
|
struct pipe_scissor_state *states = rctx->scissors.states;
|
||||||
|
unsigned mask = rctx->scissors.dirty_mask;
|
||||||
|
bool scissor_enabled = rctx->scissor_enabled;
|
||||||
|
struct r600_signed_scissor max_vp_scissor;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* The simple case: Only 1 viewport is active. */
|
||||||
|
if (!rctx->vs_writes_viewport_index) {
|
||||||
|
struct r600_signed_scissor *vp = &rctx->viewports.as_scissor[0];
|
||||||
|
|
||||||
|
if (!(mask & 1))
|
||||||
|
return;
|
||||||
|
|
||||||
|
radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2);
|
||||||
|
r600_emit_one_scissor(rctx, cs, vp, scissor_enabled ? &states[0] : NULL);
|
||||||
|
r600_emit_guardband(rctx, vp);
|
||||||
|
rctx->scissors.dirty_mask &= ~1; /* clear one bit */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Shaders can draw to any viewport. Make a union of all viewports. */
|
||||||
|
max_vp_scissor = rctx->viewports.as_scissor[0];
|
||||||
|
for (i = 1; i < R600_MAX_VIEWPORTS; i++)
|
||||||
|
r600_scissor_make_union(&max_vp_scissor,
|
||||||
|
&rctx->viewports.as_scissor[i]);
|
||||||
|
|
||||||
|
while (mask) {
|
||||||
|
int start, count, i;
|
||||||
|
|
||||||
|
u_bit_scan_consecutive_range(&mask, &start, &count);
|
||||||
|
|
||||||
|
radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL +
|
||||||
|
start * 4 * 2, count * 2);
|
||||||
|
for (i = start; i < start+count; i++) {
|
||||||
|
r600_emit_one_scissor(rctx, cs, &rctx->viewports.as_scissor[i],
|
||||||
|
scissor_enabled ? &states[i] : NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r600_emit_guardband(rctx, &max_vp_scissor);
|
||||||
|
rctx->scissors.dirty_mask = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_set_viewport_states(struct pipe_context *ctx,
|
||||||
|
unsigned start_slot,
|
||||||
|
unsigned num_viewports,
|
||||||
|
const struct pipe_viewport_state *state)
|
||||||
|
{
|
||||||
|
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
|
||||||
|
unsigned mask;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < num_viewports; i++) {
|
||||||
|
unsigned index = start_slot + i;
|
||||||
|
|
||||||
|
rctx->viewports.states[index] = state[i];
|
||||||
|
r600_get_scissor_from_viewport(rctx, &state[i],
|
||||||
|
&rctx->viewports.as_scissor[index]);
|
||||||
|
}
|
||||||
|
|
||||||
|
mask = ((1 << num_viewports) - 1) << start_slot;
|
||||||
|
rctx->viewports.dirty_mask |= mask;
|
||||||
|
rctx->viewports.depth_range_dirty_mask |= mask;
|
||||||
|
rctx->scissors.dirty_mask |= mask;
|
||||||
|
rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
|
||||||
|
rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_emit_one_viewport(struct r600_common_context *rctx,
|
||||||
|
struct pipe_viewport_state *state)
|
||||||
|
{
|
||||||
|
struct radeon_winsys_cs *cs = rctx->gfx.cs;
|
||||||
|
|
||||||
|
radeon_emit(cs, fui(state->scale[0]));
|
||||||
|
radeon_emit(cs, fui(state->translate[0]));
|
||||||
|
radeon_emit(cs, fui(state->scale[1]));
|
||||||
|
radeon_emit(cs, fui(state->translate[1]));
|
||||||
|
radeon_emit(cs, fui(state->scale[2]));
|
||||||
|
radeon_emit(cs, fui(state->translate[2]));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_emit_viewports(struct r600_common_context *rctx)
|
||||||
|
{
|
||||||
|
struct radeon_winsys_cs *cs = rctx->gfx.cs;
|
||||||
|
struct pipe_viewport_state *states = rctx->viewports.states;
|
||||||
|
unsigned mask = rctx->viewports.dirty_mask;
|
||||||
|
|
||||||
|
/* The simple case: Only 1 viewport is active. */
|
||||||
|
if (!rctx->vs_writes_viewport_index) {
|
||||||
|
if (!(mask & 1))
|
||||||
|
return;
|
||||||
|
|
||||||
|
radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
|
||||||
|
r600_emit_one_viewport(rctx, &states[0]);
|
||||||
|
rctx->viewports.dirty_mask &= ~1; /* clear one bit */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (mask) {
|
||||||
|
int start, count, i;
|
||||||
|
|
||||||
|
u_bit_scan_consecutive_range(&mask, &start, &count);
|
||||||
|
|
||||||
|
radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
|
||||||
|
start * 4 * 6, count * 6);
|
||||||
|
for (i = start; i < start+count; i++)
|
||||||
|
r600_emit_one_viewport(rctx, &states[i]);
|
||||||
|
}
|
||||||
|
rctx->viewports.dirty_mask = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_emit_depth_ranges(struct r600_common_context *rctx)
|
||||||
|
{
|
||||||
|
struct radeon_winsys_cs *cs = rctx->gfx.cs;
|
||||||
|
struct pipe_viewport_state *states = rctx->viewports.states;
|
||||||
|
unsigned mask = rctx->viewports.depth_range_dirty_mask;
|
||||||
|
float zmin, zmax;
|
||||||
|
|
||||||
|
/* The simple case: Only 1 viewport is active. */
|
||||||
|
if (!rctx->vs_writes_viewport_index) {
|
||||||
|
if (!(mask & 1))
|
||||||
|
return;
|
||||||
|
|
||||||
|
util_viewport_zmin_zmax(&states[0], rctx->clip_halfz, &zmin, &zmax);
|
||||||
|
|
||||||
|
radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
|
||||||
|
radeon_emit(cs, fui(zmin));
|
||||||
|
radeon_emit(cs, fui(zmax));
|
||||||
|
rctx->viewports.depth_range_dirty_mask &= ~1; /* clear one bit */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (mask) {
|
||||||
|
int start, count, i;
|
||||||
|
|
||||||
|
u_bit_scan_consecutive_range(&mask, &start, &count);
|
||||||
|
|
||||||
|
radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 +
|
||||||
|
start * 4 * 2, count * 2);
|
||||||
|
for (i = start; i < start+count; i++) {
|
||||||
|
util_viewport_zmin_zmax(&states[i], rctx->clip_halfz, &zmin, &zmax);
|
||||||
|
radeon_emit(cs, fui(zmin));
|
||||||
|
radeon_emit(cs, fui(zmax));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rctx->viewports.depth_range_dirty_mask = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void r600_emit_viewport_states(struct r600_common_context *rctx,
|
||||||
|
struct r600_atom *atom)
|
||||||
|
{
|
||||||
|
r600_emit_viewports(rctx);
|
||||||
|
r600_emit_depth_ranges(rctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set viewport dependencies on pipe_rasterizer_state. */
|
||||||
|
void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
|
||||||
|
bool scissor_enable, bool clip_halfz)
|
||||||
|
{
|
||||||
|
if (rctx->scissor_enabled != scissor_enable) {
|
||||||
|
rctx->scissor_enabled = scissor_enable;
|
||||||
|
rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
|
||||||
|
rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
|
||||||
|
}
|
||||||
|
if (rctx->clip_halfz != clip_halfz) {
|
||||||
|
rctx->clip_halfz = clip_halfz;
|
||||||
|
rctx->viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
|
||||||
|
rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normally, we only emit 1 viewport and 1 scissor if no shader is using
|
||||||
|
* the VIEWPORT_INDEX output, and emitting the other viewports and scissors
|
||||||
|
* is delayed. When a shader with VIEWPORT_INDEX appears, this should be
|
||||||
|
* called to emit the rest.
|
||||||
|
*/
|
||||||
|
void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
|
||||||
|
struct tgsi_shader_info *info)
|
||||||
|
{
|
||||||
|
bool vs_window_space;
|
||||||
|
|
||||||
|
if (!info)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* When the VS disables clipping and viewport transformation. */
|
||||||
|
vs_window_space =
|
||||||
|
info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
|
||||||
|
|
||||||
|
if (rctx->vs_disables_clipping_viewport != vs_window_space) {
|
||||||
|
rctx->vs_disables_clipping_viewport = vs_window_space;
|
||||||
|
rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
|
||||||
|
rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Viewport index handling. */
|
||||||
|
rctx->vs_writes_viewport_index = info->writes_viewport_index;
|
||||||
|
if (!rctx->vs_writes_viewport_index)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (rctx->scissors.dirty_mask)
|
||||||
|
rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
|
||||||
|
|
||||||
|
if (rctx->viewports.dirty_mask ||
|
||||||
|
rctx->viewports.depth_range_dirty_mask)
|
||||||
|
rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
void r600_init_viewport_functions(struct r600_common_context *rctx)
|
||||||
|
{
|
||||||
|
rctx->scissors.atom.emit = r600_emit_scissors;
|
||||||
|
rctx->viewports.atom.emit = r600_emit_viewport_states;
|
||||||
|
|
||||||
|
rctx->scissors.atom.num_dw = (2 + 16 * 2) + 6;
|
||||||
|
rctx->viewports.atom.num_dw = 2 + 16 * 6;
|
||||||
|
|
||||||
|
rctx->b.set_scissor_states = r600_set_scissor_states;
|
||||||
|
rctx->b.set_viewport_states = r600_set_viewport_states;
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,447 @@
|
||||||
|
/**************************************************************************
|
||||||
|
*
|
||||||
|
* Copyright 2011 Advanced Micro Devices, Inc.
|
||||||
|
* All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the
|
||||||
|
* "Software"), to deal in the Software without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sub license, and/or sell copies of the Software, and to
|
||||||
|
* permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the
|
||||||
|
* next paragraph) shall be included in all copies or substantial portions
|
||||||
|
* of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||||
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
|
||||||
|
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
**************************************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Authors:
|
||||||
|
* Christian König <christian.koenig@amd.com>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef RADEON_UVD_H
|
||||||
|
#define RADEON_UVD_H
|
||||||
|
|
||||||
|
#include "radeon/radeon_winsys.h"
|
||||||
|
#include "vl/vl_video_buffer.h"
|
||||||
|
|
||||||
|
/* UVD uses PM4 packet type 0 and 2 */
|
||||||
|
#define RUVD_PKT_TYPE_S(x) (((unsigned)(x) & 0x3) << 30)
|
||||||
|
#define RUVD_PKT_TYPE_G(x) (((x) >> 30) & 0x3)
|
||||||
|
#define RUVD_PKT_TYPE_C 0x3FFFFFFF
|
||||||
|
#define RUVD_PKT_COUNT_S(x) (((unsigned)(x) & 0x3FFF) << 16)
|
||||||
|
#define RUVD_PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF)
|
||||||
|
#define RUVD_PKT_COUNT_C 0xC000FFFF
|
||||||
|
#define RUVD_PKT0_BASE_INDEX_S(x) (((unsigned)(x) & 0xFFFF) << 0)
|
||||||
|
#define RUVD_PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF)
|
||||||
|
#define RUVD_PKT0_BASE_INDEX_C 0xFFFF0000
|
||||||
|
#define RUVD_PKT0(index, count) (RUVD_PKT_TYPE_S(0) | RUVD_PKT0_BASE_INDEX_S(index) | RUVD_PKT_COUNT_S(count))
|
||||||
|
#define RUVD_PKT2() (RUVD_PKT_TYPE_S(2))
|
||||||
|
|
||||||
|
/* registers involved with UVD */
|
||||||
|
#define RUVD_GPCOM_VCPU_CMD 0xEF0C
|
||||||
|
#define RUVD_GPCOM_VCPU_DATA0 0xEF10
|
||||||
|
#define RUVD_GPCOM_VCPU_DATA1 0xEF14
|
||||||
|
#define RUVD_ENGINE_CNTL 0xEF18
|
||||||
|
|
||||||
|
#define RUVD_GPCOM_VCPU_CMD_SOC15 0x2070c
|
||||||
|
#define RUVD_GPCOM_VCPU_DATA0_SOC15 0x20710
|
||||||
|
#define RUVD_GPCOM_VCPU_DATA1_SOC15 0x20714
|
||||||
|
#define RUVD_ENGINE_CNTL_SOC15 0x20718
|
||||||
|
|
||||||
|
/* UVD commands to VCPU */
|
||||||
|
#define RUVD_CMD_MSG_BUFFER 0x00000000
|
||||||
|
#define RUVD_CMD_DPB_BUFFER 0x00000001
|
||||||
|
#define RUVD_CMD_DECODING_TARGET_BUFFER 0x00000002
|
||||||
|
#define RUVD_CMD_FEEDBACK_BUFFER 0x00000003
|
||||||
|
#define RUVD_CMD_SESSION_CONTEXT_BUFFER 0x00000005
|
||||||
|
#define RUVD_CMD_BITSTREAM_BUFFER 0x00000100
|
||||||
|
#define RUVD_CMD_ITSCALING_TABLE_BUFFER 0x00000204
|
||||||
|
#define RUVD_CMD_CONTEXT_BUFFER 0x00000206
|
||||||
|
|
||||||
|
/* UVD message types */
|
||||||
|
#define RUVD_MSG_CREATE 0
|
||||||
|
#define RUVD_MSG_DECODE 1
|
||||||
|
#define RUVD_MSG_DESTROY 2
|
||||||
|
|
||||||
|
/* UVD stream types */
|
||||||
|
#define RUVD_CODEC_H264 0x00000000
|
||||||
|
#define RUVD_CODEC_VC1 0x00000001
|
||||||
|
#define RUVD_CODEC_MPEG2 0x00000003
|
||||||
|
#define RUVD_CODEC_MPEG4 0x00000004
|
||||||
|
#define RUVD_CODEC_H264_PERF 0x00000007
|
||||||
|
#define RUVD_CODEC_MJPEG 0x00000008
|
||||||
|
#define RUVD_CODEC_H265 0x00000010
|
||||||
|
|
||||||
|
/* UVD decode target buffer tiling mode */
|
||||||
|
#define RUVD_TILE_LINEAR 0x00000000
|
||||||
|
#define RUVD_TILE_8X4 0x00000001
|
||||||
|
#define RUVD_TILE_8X8 0x00000002
|
||||||
|
#define RUVD_TILE_32AS8 0x00000003
|
||||||
|
|
||||||
|
/* UVD decode target buffer array mode */
|
||||||
|
#define RUVD_ARRAY_MODE_LINEAR 0x00000000
|
||||||
|
#define RUVD_ARRAY_MODE_MACRO_LINEAR_MICRO_TILED 0x00000001
|
||||||
|
#define RUVD_ARRAY_MODE_1D_THIN 0x00000002
|
||||||
|
#define RUVD_ARRAY_MODE_2D_THIN 0x00000004
|
||||||
|
#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_LINEAR 0x00000004
|
||||||
|
#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_TILED 0x00000005
|
||||||
|
|
||||||
|
/* UVD tile config */
|
||||||
|
#define RUVD_BANK_WIDTH(x) ((x) << 0)
|
||||||
|
#define RUVD_BANK_HEIGHT(x) ((x) << 3)
|
||||||
|
#define RUVD_MACRO_TILE_ASPECT_RATIO(x) ((x) << 6)
|
||||||
|
#define RUVD_NUM_BANKS(x) ((x) << 9)
|
||||||
|
|
||||||
|
/* H.264 profile definitions */
|
||||||
|
#define RUVD_H264_PROFILE_BASELINE 0x00000000
|
||||||
|
#define RUVD_H264_PROFILE_MAIN 0x00000001
|
||||||
|
#define RUVD_H264_PROFILE_HIGH 0x00000002
|
||||||
|
#define RUVD_H264_PROFILE_STEREO_HIGH 0x00000003
|
||||||
|
#define RUVD_H264_PROFILE_MVC 0x00000004
|
||||||
|
|
||||||
|
/* VC-1 profile definitions */
|
||||||
|
#define RUVD_VC1_PROFILE_SIMPLE 0x00000000
|
||||||
|
#define RUVD_VC1_PROFILE_MAIN 0x00000001
|
||||||
|
#define RUVD_VC1_PROFILE_ADVANCED 0x00000002
|
||||||
|
|
||||||
|
enum ruvd_surface_type {
|
||||||
|
RUVD_SURFACE_TYPE_LEGACY = 0,
|
||||||
|
RUVD_SURFACE_TYPE_GFX9
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ruvd_mvc_element {
|
||||||
|
uint16_t viewOrderIndex;
|
||||||
|
uint16_t viewId;
|
||||||
|
uint16_t numOfAnchorRefsInL0;
|
||||||
|
uint16_t viewIdOfAnchorRefsInL0[15];
|
||||||
|
uint16_t numOfAnchorRefsInL1;
|
||||||
|
uint16_t viewIdOfAnchorRefsInL1[15];
|
||||||
|
uint16_t numOfNonAnchorRefsInL0;
|
||||||
|
uint16_t viewIdOfNonAnchorRefsInL0[15];
|
||||||
|
uint16_t numOfNonAnchorRefsInL1;
|
||||||
|
uint16_t viewIdOfNonAnchorRefsInL1[15];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ruvd_h264 {
|
||||||
|
uint32_t profile;
|
||||||
|
uint32_t level;
|
||||||
|
|
||||||
|
uint32_t sps_info_flags;
|
||||||
|
uint32_t pps_info_flags;
|
||||||
|
uint8_t chroma_format;
|
||||||
|
uint8_t bit_depth_luma_minus8;
|
||||||
|
uint8_t bit_depth_chroma_minus8;
|
||||||
|
uint8_t log2_max_frame_num_minus4;
|
||||||
|
|
||||||
|
uint8_t pic_order_cnt_type;
|
||||||
|
uint8_t log2_max_pic_order_cnt_lsb_minus4;
|
||||||
|
uint8_t num_ref_frames;
|
||||||
|
uint8_t reserved_8bit;
|
||||||
|
|
||||||
|
int8_t pic_init_qp_minus26;
|
||||||
|
int8_t pic_init_qs_minus26;
|
||||||
|
int8_t chroma_qp_index_offset;
|
||||||
|
int8_t second_chroma_qp_index_offset;
|
||||||
|
|
||||||
|
uint8_t num_slice_groups_minus1;
|
||||||
|
uint8_t slice_group_map_type;
|
||||||
|
uint8_t num_ref_idx_l0_active_minus1;
|
||||||
|
uint8_t num_ref_idx_l1_active_minus1;
|
||||||
|
|
||||||
|
uint16_t slice_group_change_rate_minus1;
|
||||||
|
uint16_t reserved_16bit_1;
|
||||||
|
|
||||||
|
uint8_t scaling_list_4x4[6][16];
|
||||||
|
uint8_t scaling_list_8x8[2][64];
|
||||||
|
|
||||||
|
uint32_t frame_num;
|
||||||
|
uint32_t frame_num_list[16];
|
||||||
|
int32_t curr_field_order_cnt_list[2];
|
||||||
|
int32_t field_order_cnt_list[16][2];
|
||||||
|
|
||||||
|
uint32_t decoded_pic_idx;
|
||||||
|
|
||||||
|
uint32_t curr_pic_ref_frame_num;
|
||||||
|
|
||||||
|
uint8_t ref_frame_list[16];
|
||||||
|
|
||||||
|
uint32_t reserved[122];
|
||||||
|
|
||||||
|
struct {
|
||||||
|
uint32_t numViews;
|
||||||
|
uint32_t viewId0;
|
||||||
|
struct ruvd_mvc_element mvcElements[1];
|
||||||
|
} mvc;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ruvd_h265 {
|
||||||
|
uint32_t sps_info_flags;
|
||||||
|
uint32_t pps_info_flags;
|
||||||
|
|
||||||
|
uint8_t chroma_format;
|
||||||
|
uint8_t bit_depth_luma_minus8;
|
||||||
|
uint8_t bit_depth_chroma_minus8;
|
||||||
|
uint8_t log2_max_pic_order_cnt_lsb_minus4;
|
||||||
|
|
||||||
|
uint8_t sps_max_dec_pic_buffering_minus1;
|
||||||
|
uint8_t log2_min_luma_coding_block_size_minus3;
|
||||||
|
uint8_t log2_diff_max_min_luma_coding_block_size;
|
||||||
|
uint8_t log2_min_transform_block_size_minus2;
|
||||||
|
|
||||||
|
uint8_t log2_diff_max_min_transform_block_size;
|
||||||
|
uint8_t max_transform_hierarchy_depth_inter;
|
||||||
|
uint8_t max_transform_hierarchy_depth_intra;
|
||||||
|
uint8_t pcm_sample_bit_depth_luma_minus1;
|
||||||
|
|
||||||
|
uint8_t pcm_sample_bit_depth_chroma_minus1;
|
||||||
|
uint8_t log2_min_pcm_luma_coding_block_size_minus3;
|
||||||
|
uint8_t log2_diff_max_min_pcm_luma_coding_block_size;
|
||||||
|
uint8_t num_extra_slice_header_bits;
|
||||||
|
|
||||||
|
uint8_t num_short_term_ref_pic_sets;
|
||||||
|
uint8_t num_long_term_ref_pic_sps;
|
||||||
|
uint8_t num_ref_idx_l0_default_active_minus1;
|
||||||
|
uint8_t num_ref_idx_l1_default_active_minus1;
|
||||||
|
|
||||||
|
int8_t pps_cb_qp_offset;
|
||||||
|
int8_t pps_cr_qp_offset;
|
||||||
|
int8_t pps_beta_offset_div2;
|
||||||
|
int8_t pps_tc_offset_div2;
|
||||||
|
|
||||||
|
uint8_t diff_cu_qp_delta_depth;
|
||||||
|
uint8_t num_tile_columns_minus1;
|
||||||
|
uint8_t num_tile_rows_minus1;
|
||||||
|
uint8_t log2_parallel_merge_level_minus2;
|
||||||
|
|
||||||
|
uint16_t column_width_minus1[19];
|
||||||
|
uint16_t row_height_minus1[21];
|
||||||
|
|
||||||
|
int8_t init_qp_minus26;
|
||||||
|
uint8_t num_delta_pocs_ref_rps_idx;
|
||||||
|
uint8_t curr_idx;
|
||||||
|
uint8_t reserved1;
|
||||||
|
int32_t curr_poc;
|
||||||
|
uint8_t ref_pic_list[16];
|
||||||
|
int32_t poc_list[16];
|
||||||
|
uint8_t ref_pic_set_st_curr_before[8];
|
||||||
|
uint8_t ref_pic_set_st_curr_after[8];
|
||||||
|
uint8_t ref_pic_set_lt_curr[8];
|
||||||
|
|
||||||
|
uint8_t ucScalingListDCCoefSizeID2[6];
|
||||||
|
uint8_t ucScalingListDCCoefSizeID3[2];
|
||||||
|
|
||||||
|
uint8_t highestTid;
|
||||||
|
uint8_t isNonRef;
|
||||||
|
|
||||||
|
uint8_t p010_mode;
|
||||||
|
uint8_t msb_mode;
|
||||||
|
uint8_t luma_10to8;
|
||||||
|
uint8_t chroma_10to8;
|
||||||
|
uint8_t sclr_luma10to8;
|
||||||
|
uint8_t sclr_chroma10to8;
|
||||||
|
|
||||||
|
uint8_t direct_reflist[2][15];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ruvd_vc1 {
|
||||||
|
uint32_t profile;
|
||||||
|
uint32_t level;
|
||||||
|
uint32_t sps_info_flags;
|
||||||
|
uint32_t pps_info_flags;
|
||||||
|
uint32_t pic_structure;
|
||||||
|
uint32_t chroma_format;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ruvd_mpeg2 {
|
||||||
|
uint32_t decoded_pic_idx;
|
||||||
|
uint32_t ref_pic_idx[2];
|
||||||
|
|
||||||
|
uint8_t load_intra_quantiser_matrix;
|
||||||
|
uint8_t load_nonintra_quantiser_matrix;
|
||||||
|
uint8_t reserved_quantiser_alignement[2];
|
||||||
|
uint8_t intra_quantiser_matrix[64];
|
||||||
|
uint8_t nonintra_quantiser_matrix[64];
|
||||||
|
|
||||||
|
uint8_t profile_and_level_indication;
|
||||||
|
uint8_t chroma_format;
|
||||||
|
|
||||||
|
uint8_t picture_coding_type;
|
||||||
|
|
||||||
|
uint8_t reserved_1;
|
||||||
|
|
||||||
|
uint8_t f_code[2][2];
|
||||||
|
uint8_t intra_dc_precision;
|
||||||
|
uint8_t pic_structure;
|
||||||
|
uint8_t top_field_first;
|
||||||
|
uint8_t frame_pred_frame_dct;
|
||||||
|
uint8_t concealment_motion_vectors;
|
||||||
|
uint8_t q_scale_type;
|
||||||
|
uint8_t intra_vlc_format;
|
||||||
|
uint8_t alternate_scan;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ruvd_mpeg4
|
||||||
|
{
|
||||||
|
uint32_t decoded_pic_idx;
|
||||||
|
uint32_t ref_pic_idx[2];
|
||||||
|
|
||||||
|
uint32_t variant_type;
|
||||||
|
uint8_t profile_and_level_indication;
|
||||||
|
|
||||||
|
uint8_t video_object_layer_verid;
|
||||||
|
uint8_t video_object_layer_shape;
|
||||||
|
|
||||||
|
uint8_t reserved_1;
|
||||||
|
|
||||||
|
uint16_t video_object_layer_width;
|
||||||
|
uint16_t video_object_layer_height;
|
||||||
|
|
||||||
|
uint16_t vop_time_increment_resolution;
|
||||||
|
|
||||||
|
uint16_t reserved_2;
|
||||||
|
|
||||||
|
uint32_t flags;
|
||||||
|
|
||||||
|
uint8_t quant_type;
|
||||||
|
|
||||||
|
uint8_t reserved_3[3];
|
||||||
|
|
||||||
|
uint8_t intra_quant_mat[64];
|
||||||
|
uint8_t nonintra_quant_mat[64];
|
||||||
|
|
||||||
|
struct {
|
||||||
|
uint8_t sprite_enable;
|
||||||
|
|
||||||
|
uint8_t reserved_4[3];
|
||||||
|
|
||||||
|
uint16_t sprite_width;
|
||||||
|
uint16_t sprite_height;
|
||||||
|
int16_t sprite_left_coordinate;
|
||||||
|
int16_t sprite_top_coordinate;
|
||||||
|
|
||||||
|
uint8_t no_of_sprite_warping_points;
|
||||||
|
uint8_t sprite_warping_accuracy;
|
||||||
|
uint8_t sprite_brightness_change;
|
||||||
|
uint8_t low_latency_sprite_enable;
|
||||||
|
} sprite_config;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
uint32_t flags;
|
||||||
|
uint8_t vol_mode;
|
||||||
|
uint8_t reserved_5[3];
|
||||||
|
} divx_311_config;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* message between driver and hardware */
|
||||||
|
struct ruvd_msg {
|
||||||
|
|
||||||
|
uint32_t size;
|
||||||
|
uint32_t msg_type;
|
||||||
|
uint32_t stream_handle;
|
||||||
|
uint32_t status_report_feedback_number;
|
||||||
|
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
uint32_t stream_type;
|
||||||
|
uint32_t session_flags;
|
||||||
|
uint32_t asic_id;
|
||||||
|
uint32_t width_in_samples;
|
||||||
|
uint32_t height_in_samples;
|
||||||
|
uint32_t dpb_buffer;
|
||||||
|
uint32_t dpb_size;
|
||||||
|
uint32_t dpb_model;
|
||||||
|
uint32_t version_info;
|
||||||
|
} create;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
uint32_t stream_type;
|
||||||
|
uint32_t decode_flags;
|
||||||
|
uint32_t width_in_samples;
|
||||||
|
uint32_t height_in_samples;
|
||||||
|
|
||||||
|
uint32_t dpb_buffer;
|
||||||
|
uint32_t dpb_size;
|
||||||
|
uint32_t dpb_model;
|
||||||
|
uint32_t dpb_reserved;
|
||||||
|
|
||||||
|
uint32_t db_offset_alignment;
|
||||||
|
uint32_t db_pitch;
|
||||||
|
uint32_t db_tiling_mode;
|
||||||
|
uint32_t db_array_mode;
|
||||||
|
uint32_t db_field_mode;
|
||||||
|
uint32_t db_surf_tile_config;
|
||||||
|
uint32_t db_aligned_height;
|
||||||
|
uint32_t db_reserved;
|
||||||
|
|
||||||
|
uint32_t use_addr_macro;
|
||||||
|
|
||||||
|
uint32_t bsd_buffer;
|
||||||
|
uint32_t bsd_size;
|
||||||
|
|
||||||
|
uint32_t pic_param_buffer;
|
||||||
|
uint32_t pic_param_size;
|
||||||
|
uint32_t mb_cntl_buffer;
|
||||||
|
uint32_t mb_cntl_size;
|
||||||
|
|
||||||
|
uint32_t dt_buffer;
|
||||||
|
uint32_t dt_pitch;
|
||||||
|
uint32_t dt_tiling_mode;
|
||||||
|
uint32_t dt_array_mode;
|
||||||
|
uint32_t dt_field_mode;
|
||||||
|
uint32_t dt_luma_top_offset;
|
||||||
|
uint32_t dt_luma_bottom_offset;
|
||||||
|
uint32_t dt_chroma_top_offset;
|
||||||
|
uint32_t dt_chroma_bottom_offset;
|
||||||
|
uint32_t dt_surf_tile_config;
|
||||||
|
uint32_t dt_uv_surf_tile_config;
|
||||||
|
// re-use dt_wa_chroma_top_offset as dt_ext_info for UV pitch in stoney
|
||||||
|
uint32_t dt_wa_chroma_top_offset;
|
||||||
|
uint32_t dt_wa_chroma_bottom_offset;
|
||||||
|
|
||||||
|
uint32_t reserved[16];
|
||||||
|
|
||||||
|
union {
|
||||||
|
struct ruvd_h264 h264;
|
||||||
|
struct ruvd_h265 h265;
|
||||||
|
struct ruvd_vc1 vc1;
|
||||||
|
struct ruvd_mpeg2 mpeg2;
|
||||||
|
struct ruvd_mpeg4 mpeg4;
|
||||||
|
|
||||||
|
uint32_t info[768];
|
||||||
|
} codec;
|
||||||
|
|
||||||
|
uint8_t extension_support;
|
||||||
|
uint8_t reserved_8bit_1;
|
||||||
|
uint8_t reserved_8bit_2;
|
||||||
|
uint8_t reserved_8bit_3;
|
||||||
|
uint32_t extension_reserved[64];
|
||||||
|
} decode;
|
||||||
|
} body;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* driver dependent callback */
|
||||||
|
typedef struct pb_buffer* (*ruvd_set_dtb)
|
||||||
|
(struct ruvd_msg* msg, struct vl_video_buffer *vb);
|
||||||
|
|
||||||
|
/* create an UVD decode */
|
||||||
|
struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
|
||||||
|
const struct pipe_video_codec *templat,
|
||||||
|
ruvd_set_dtb set_dtb);
|
||||||
|
|
||||||
|
/* fill decoding target field from the luma and chroma surfaces */
|
||||||
|
void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
|
||||||
|
struct radeon_surf *chroma, enum ruvd_surface_type type);
|
||||||
|
#endif
|
|
@ -0,0 +1,553 @@
|
||||||
|
/**************************************************************************
|
||||||
|
*
|
||||||
|
* Copyright 2013 Advanced Micro Devices, Inc.
|
||||||
|
* All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the
|
||||||
|
* "Software"), to deal in the Software without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sub license, and/or sell copies of the Software, and to
|
||||||
|
* permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the
|
||||||
|
* next paragraph) shall be included in all copies or substantial portions
|
||||||
|
* of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||||
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
|
||||||
|
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
**************************************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Authors:
|
||||||
|
* Christian König <christian.koenig@amd.com>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "pipe/p_video_codec.h"
|
||||||
|
|
||||||
|
#include "util/u_video.h"
|
||||||
|
#include "util/u_memory.h"
|
||||||
|
|
||||||
|
#include "vl/vl_video_buffer.h"
|
||||||
|
|
||||||
|
#include "r600_pipe_common.h"
|
||||||
|
#include "radeon_video.h"
|
||||||
|
#include "radeon_vce.h"
|
||||||
|
|
||||||
|
#define FW_40_2_2 ((40 << 24) | (2 << 16) | (2 << 8))
|
||||||
|
#define FW_50_0_1 ((50 << 24) | (0 << 16) | (1 << 8))
|
||||||
|
#define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8))
|
||||||
|
#define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8))
|
||||||
|
#define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8))
|
||||||
|
#define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8))
|
||||||
|
#define FW_52_4_3 ((52 << 24) | (4 << 16) | (3 << 8))
|
||||||
|
#define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8))
|
||||||
|
#define FW_53 (53 << 24)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* flush commands to the hardware
|
||||||
|
*/
|
||||||
|
static void flush(struct rvce_encoder *enc)
|
||||||
|
{
|
||||||
|
enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL);
|
||||||
|
enc->task_info_idx = 0;
|
||||||
|
enc->bs_idx = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb)
|
||||||
|
{
|
||||||
|
uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
|
||||||
|
unsigned i = 0;
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
fprintf(stderr, "encStatus:\t\t\t%08x\n", ptr[i++]);
|
||||||
|
fprintf(stderr, "encHasBitstream:\t\t%08x\n", ptr[i++]);
|
||||||
|
fprintf(stderr, "encHasAudioBitstream:\t\t%08x\n", ptr[i++]);
|
||||||
|
fprintf(stderr, "encBitstreamOffset:\t\t%08x\n", ptr[i++]);
|
||||||
|
fprintf(stderr, "encBitstreamSize:\t\t%08x\n", ptr[i++]);
|
||||||
|
fprintf(stderr, "encAudioBitstreamOffset:\t%08x\n", ptr[i++]);
|
||||||
|
fprintf(stderr, "encAudioBitstreamSize:\t\t%08x\n", ptr[i++]);
|
||||||
|
fprintf(stderr, "encExtrabytes:\t\t\t%08x\n", ptr[i++]);
|
||||||
|
fprintf(stderr, "encAudioExtrabytes:\t\t%08x\n", ptr[i++]);
|
||||||
|
fprintf(stderr, "videoTimeStamp:\t\t\t%08x\n", ptr[i++]);
|
||||||
|
fprintf(stderr, "audioTimeStamp:\t\t\t%08x\n", ptr[i++]);
|
||||||
|
fprintf(stderr, "videoOutputType:\t\t%08x\n", ptr[i++]);
|
||||||
|
fprintf(stderr, "attributeFlags:\t\t\t%08x\n", ptr[i++]);
|
||||||
|
fprintf(stderr, "seiPrivatePackageOffset:\t%08x\n", ptr[i++]);
|
||||||
|
fprintf(stderr, "seiPrivatePackageSize:\t\t%08x\n", ptr[i++]);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
enc->ws->buffer_unmap(fb->res->buf);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* reset the CPB handling
|
||||||
|
*/
|
||||||
|
static void reset_cpb(struct rvce_encoder *enc)
|
||||||
|
{
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
|
LIST_INITHEAD(&enc->cpb_slots);
|
||||||
|
for (i = 0; i < enc->cpb_num; ++i) {
|
||||||
|
struct rvce_cpb_slot *slot = &enc->cpb_array[i];
|
||||||
|
slot->index = i;
|
||||||
|
slot->picture_type = PIPE_H264_ENC_PICTURE_TYPE_SKIP;
|
||||||
|
slot->frame_num = 0;
|
||||||
|
slot->pic_order_cnt = 0;
|
||||||
|
LIST_ADDTAIL(&slot->list, &enc->cpb_slots);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* sort l0 and l1 to the top of the list
|
||||||
|
*/
|
||||||
|
static void sort_cpb(struct rvce_encoder *enc)
|
||||||
|
{
|
||||||
|
struct rvce_cpb_slot *i, *l0 = NULL, *l1 = NULL;
|
||||||
|
|
||||||
|
LIST_FOR_EACH_ENTRY(i, &enc->cpb_slots, list) {
|
||||||
|
if (i->frame_num == enc->pic.ref_idx_l0)
|
||||||
|
l0 = i;
|
||||||
|
|
||||||
|
if (i->frame_num == enc->pic.ref_idx_l1)
|
||||||
|
l1 = i;
|
||||||
|
|
||||||
|
if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P && l0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B &&
|
||||||
|
l0 && l1)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (l1) {
|
||||||
|
LIST_DEL(&l1->list);
|
||||||
|
LIST_ADD(&l1->list, &enc->cpb_slots);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (l0) {
|
||||||
|
LIST_DEL(&l0->list);
|
||||||
|
LIST_ADD(&l0->list, &enc->cpb_slots);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get number of cpbs based on dpb
|
||||||
|
*/
|
||||||
|
static unsigned get_cpb_num(struct rvce_encoder *enc)
|
||||||
|
{
|
||||||
|
unsigned w = align(enc->base.width, 16) / 16;
|
||||||
|
unsigned h = align(enc->base.height, 16) / 16;
|
||||||
|
unsigned dpb;
|
||||||
|
|
||||||
|
switch (enc->base.level) {
|
||||||
|
case 10:
|
||||||
|
dpb = 396;
|
||||||
|
break;
|
||||||
|
case 11:
|
||||||
|
dpb = 900;
|
||||||
|
break;
|
||||||
|
case 12:
|
||||||
|
case 13:
|
||||||
|
case 20:
|
||||||
|
dpb = 2376;
|
||||||
|
break;
|
||||||
|
case 21:
|
||||||
|
dpb = 4752;
|
||||||
|
break;
|
||||||
|
case 22:
|
||||||
|
case 30:
|
||||||
|
dpb = 8100;
|
||||||
|
break;
|
||||||
|
case 31:
|
||||||
|
dpb = 18000;
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
dpb = 20480;
|
||||||
|
break;
|
||||||
|
case 40:
|
||||||
|
case 41:
|
||||||
|
dpb = 32768;
|
||||||
|
break;
|
||||||
|
case 42:
|
||||||
|
dpb = 34816;
|
||||||
|
break;
|
||||||
|
case 50:
|
||||||
|
dpb = 110400;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
case 51:
|
||||||
|
case 52:
|
||||||
|
dpb = 184320;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return MIN2(dpb / (w * h), 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the slot for the currently encoded frame
|
||||||
|
*/
|
||||||
|
struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc)
|
||||||
|
{
|
||||||
|
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the slot for L0
|
||||||
|
*/
|
||||||
|
struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc)
|
||||||
|
{
|
||||||
|
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the slot for L1
|
||||||
|
*/
|
||||||
|
struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc)
|
||||||
|
{
|
||||||
|
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate the offsets into the CPB
|
||||||
|
*/
|
||||||
|
void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
|
||||||
|
signed *luma_offset, signed *chroma_offset)
|
||||||
|
{
|
||||||
|
struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen;
|
||||||
|
unsigned pitch, vpitch, fsize;
|
||||||
|
|
||||||
|
if (rscreen->chip_class < GFX9) {
|
||||||
|
pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128);
|
||||||
|
vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16);
|
||||||
|
} else {
|
||||||
|
pitch = align(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe, 256);
|
||||||
|
vpitch = align(enc->luma->u.gfx9.surf_height, 16);
|
||||||
|
}
|
||||||
|
fsize = pitch * (vpitch + vpitch / 2);
|
||||||
|
|
||||||
|
*luma_offset = slot->index * fsize;
|
||||||
|
*chroma_offset = *luma_offset + pitch * vpitch;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* destroy this video encoder
|
||||||
|
*/
|
||||||
|
static void rvce_destroy(struct pipe_video_codec *encoder)
|
||||||
|
{
|
||||||
|
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
|
||||||
|
if (enc->stream_handle) {
|
||||||
|
struct rvid_buffer fb;
|
||||||
|
rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
|
||||||
|
enc->fb = &fb;
|
||||||
|
enc->session(enc);
|
||||||
|
enc->feedback(enc);
|
||||||
|
enc->destroy(enc);
|
||||||
|
flush(enc);
|
||||||
|
rvid_destroy_buffer(&fb);
|
||||||
|
}
|
||||||
|
rvid_destroy_buffer(&enc->cpb);
|
||||||
|
enc->ws->cs_destroy(enc->cs);
|
||||||
|
FREE(enc->cpb_array);
|
||||||
|
FREE(enc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rvce_begin_frame(struct pipe_video_codec *encoder,
|
||||||
|
struct pipe_video_buffer *source,
|
||||||
|
struct pipe_picture_desc *picture)
|
||||||
|
{
|
||||||
|
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
|
||||||
|
struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source;
|
||||||
|
struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture;
|
||||||
|
|
||||||
|
bool need_rate_control =
|
||||||
|
enc->pic.rate_ctrl.rate_ctrl_method != pic->rate_ctrl.rate_ctrl_method ||
|
||||||
|
enc->pic.quant_i_frames != pic->quant_i_frames ||
|
||||||
|
enc->pic.quant_p_frames != pic->quant_p_frames ||
|
||||||
|
enc->pic.quant_b_frames != pic->quant_b_frames;
|
||||||
|
|
||||||
|
enc->pic = *pic;
|
||||||
|
get_pic_param(enc, pic);
|
||||||
|
|
||||||
|
enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
|
||||||
|
enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
|
||||||
|
|
||||||
|
if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR)
|
||||||
|
reset_cpb(enc);
|
||||||
|
else if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
|
||||||
|
pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_B)
|
||||||
|
sort_cpb(enc);
|
||||||
|
|
||||||
|
if (!enc->stream_handle) {
|
||||||
|
struct rvid_buffer fb;
|
||||||
|
enc->stream_handle = rvid_alloc_stream_handle();
|
||||||
|
rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
|
||||||
|
enc->fb = &fb;
|
||||||
|
enc->session(enc);
|
||||||
|
enc->create(enc);
|
||||||
|
enc->config(enc);
|
||||||
|
enc->feedback(enc);
|
||||||
|
flush(enc);
|
||||||
|
//dump_feedback(enc, &fb);
|
||||||
|
rvid_destroy_buffer(&fb);
|
||||||
|
need_rate_control = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (need_rate_control) {
|
||||||
|
enc->session(enc);
|
||||||
|
enc->config(enc);
|
||||||
|
flush(enc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
|
||||||
|
struct pipe_video_buffer *source,
|
||||||
|
struct pipe_resource *destination,
|
||||||
|
void **fb)
|
||||||
|
{
|
||||||
|
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
|
||||||
|
enc->get_buffer(destination, &enc->bs_handle, NULL);
|
||||||
|
enc->bs_size = destination->width0;
|
||||||
|
|
||||||
|
*fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
|
||||||
|
if (!rvid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) {
|
||||||
|
RVID_ERR("Can't create feedback buffer.\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!radeon_emitted(enc->cs, 0))
|
||||||
|
enc->session(enc);
|
||||||
|
enc->encode(enc);
|
||||||
|
enc->feedback(enc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rvce_end_frame(struct pipe_video_codec *encoder,
|
||||||
|
struct pipe_video_buffer *source,
|
||||||
|
struct pipe_picture_desc *picture)
|
||||||
|
{
|
||||||
|
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
|
||||||
|
struct rvce_cpb_slot *slot = LIST_ENTRY(
|
||||||
|
struct rvce_cpb_slot, enc->cpb_slots.prev, list);
|
||||||
|
|
||||||
|
if (!enc->dual_inst || enc->bs_idx > 1)
|
||||||
|
flush(enc);
|
||||||
|
|
||||||
|
/* update the CPB backtrack with the just encoded frame */
|
||||||
|
slot->picture_type = enc->pic.picture_type;
|
||||||
|
slot->frame_num = enc->pic.frame_num;
|
||||||
|
slot->pic_order_cnt = enc->pic.pic_order_cnt;
|
||||||
|
if (!enc->pic.not_referenced) {
|
||||||
|
LIST_DEL(&slot->list);
|
||||||
|
LIST_ADD(&slot->list, &enc->cpb_slots);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rvce_get_feedback(struct pipe_video_codec *encoder,
|
||||||
|
void *feedback, unsigned *size)
|
||||||
|
{
|
||||||
|
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
|
||||||
|
struct rvid_buffer *fb = feedback;
|
||||||
|
|
||||||
|
if (size) {
|
||||||
|
uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
|
||||||
|
|
||||||
|
if (ptr[1]) {
|
||||||
|
*size = ptr[4] - ptr[9];
|
||||||
|
} else {
|
||||||
|
*size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
enc->ws->buffer_unmap(fb->res->buf);
|
||||||
|
}
|
||||||
|
//dump_feedback(enc, fb);
|
||||||
|
rvid_destroy_buffer(fb);
|
||||||
|
FREE(fb);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* flush any outstanding command buffers to the hardware
|
||||||
|
*/
|
||||||
|
static void rvce_flush(struct pipe_video_codec *encoder)
|
||||||
|
{
|
||||||
|
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
|
||||||
|
|
||||||
|
flush(enc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rvce_cs_flush(void *ctx, unsigned flags,
|
||||||
|
struct pipe_fence_handle **fence)
|
||||||
|
{
|
||||||
|
// just ignored
|
||||||
|
}
|
||||||
|
|
||||||
|
struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
|
||||||
|
const struct pipe_video_codec *templ,
|
||||||
|
struct radeon_winsys* ws,
|
||||||
|
rvce_get_buffer get_buffer)
|
||||||
|
{
|
||||||
|
struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen;
|
||||||
|
struct r600_common_context *rctx = (struct r600_common_context*)context;
|
||||||
|
struct rvce_encoder *enc;
|
||||||
|
struct pipe_video_buffer *tmp_buf, templat = {};
|
||||||
|
struct radeon_surf *tmp_surf;
|
||||||
|
unsigned cpb_size;
|
||||||
|
|
||||||
|
if (!rscreen->info.vce_fw_version) {
|
||||||
|
RVID_ERR("Kernel doesn't supports VCE!\n");
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
} else if (!rvce_is_fw_version_supported(rscreen)) {
|
||||||
|
RVID_ERR("Unsupported VCE fw version loaded!\n");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
enc = CALLOC_STRUCT(rvce_encoder);
|
||||||
|
if (!enc)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (rscreen->info.drm_major == 3)
|
||||||
|
enc->use_vm = true;
|
||||||
|
if ((rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) ||
|
||||||
|
rscreen->info.drm_major == 3)
|
||||||
|
enc->use_vui = true;
|
||||||
|
if (rscreen->info.family >= CHIP_TONGA &&
|
||||||
|
rscreen->info.family != CHIP_STONEY &&
|
||||||
|
rscreen->info.family != CHIP_POLARIS11 &&
|
||||||
|
rscreen->info.family != CHIP_POLARIS12)
|
||||||
|
enc->dual_pipe = true;
|
||||||
|
/* TODO enable B frame with dual instance */
|
||||||
|
if ((rscreen->info.family >= CHIP_TONGA) &&
|
||||||
|
(templ->max_references == 1) &&
|
||||||
|
(rscreen->info.vce_harvest_config == 0))
|
||||||
|
enc->dual_inst = true;
|
||||||
|
|
||||||
|
enc->base = *templ;
|
||||||
|
enc->base.context = context;
|
||||||
|
|
||||||
|
enc->base.destroy = rvce_destroy;
|
||||||
|
enc->base.begin_frame = rvce_begin_frame;
|
||||||
|
enc->base.encode_bitstream = rvce_encode_bitstream;
|
||||||
|
enc->base.end_frame = rvce_end_frame;
|
||||||
|
enc->base.flush = rvce_flush;
|
||||||
|
enc->base.get_feedback = rvce_get_feedback;
|
||||||
|
enc->get_buffer = get_buffer;
|
||||||
|
|
||||||
|
enc->screen = context->screen;
|
||||||
|
enc->ws = ws;
|
||||||
|
enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc);
|
||||||
|
if (!enc->cs) {
|
||||||
|
RVID_ERR("Can't get command submission context.\n");
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
templat.buffer_format = PIPE_FORMAT_NV12;
|
||||||
|
templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
|
||||||
|
templat.width = enc->base.width;
|
||||||
|
templat.height = enc->base.height;
|
||||||
|
templat.interlaced = false;
|
||||||
|
if (!(tmp_buf = context->create_video_buffer(context, &templat))) {
|
||||||
|
RVID_ERR("Can't create video buffer.\n");
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
enc->cpb_num = get_cpb_num(enc);
|
||||||
|
if (!enc->cpb_num)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf);
|
||||||
|
|
||||||
|
cpb_size = (rscreen->chip_class < GFX9) ?
|
||||||
|
align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) *
|
||||||
|
align(tmp_surf->u.legacy.level[0].nblk_y, 32) :
|
||||||
|
|
||||||
|
align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) *
|
||||||
|
align(tmp_surf->u.gfx9.surf_height, 32);
|
||||||
|
|
||||||
|
cpb_size = cpb_size * 3 / 2;
|
||||||
|
cpb_size = cpb_size * enc->cpb_num;
|
||||||
|
if (enc->dual_pipe)
|
||||||
|
cpb_size += RVCE_MAX_AUX_BUFFER_NUM *
|
||||||
|
RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
|
||||||
|
tmp_buf->destroy(tmp_buf);
|
||||||
|
if (!rvid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
|
||||||
|
RVID_ERR("Can't create CPB buffer.\n");
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
enc->cpb_array = CALLOC(enc->cpb_num, sizeof(struct rvce_cpb_slot));
|
||||||
|
if (!enc->cpb_array)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
reset_cpb(enc);
|
||||||
|
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
return &enc->base;
|
||||||
|
|
||||||
|
error:
|
||||||
|
if (enc->cs)
|
||||||
|
enc->ws->cs_destroy(enc->cs);
|
||||||
|
|
||||||
|
rvid_destroy_buffer(&enc->cpb);
|
||||||
|
|
||||||
|
FREE(enc->cpb_array);
|
||||||
|
FREE(enc);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* check if kernel has the right fw version loaded
|
||||||
|
*/
|
||||||
|
bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
|
||||||
|
{
|
||||||
|
switch (rscreen->info.vce_fw_version) {
|
||||||
|
case FW_40_2_2:
|
||||||
|
case FW_50_0_1:
|
||||||
|
case FW_50_1_2:
|
||||||
|
case FW_50_10_2:
|
||||||
|
case FW_50_17_3:
|
||||||
|
case FW_52_0_3:
|
||||||
|
case FW_52_4_3:
|
||||||
|
case FW_52_8_3:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
if ((rscreen->info.vce_fw_version & (0xff << 24)) == FW_53)
|
||||||
|
return true;
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add the buffer as relocation to the current command submission
|
||||||
|
*/
|
||||||
|
void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
|
||||||
|
enum radeon_bo_usage usage, enum radeon_bo_domain domain,
|
||||||
|
signed offset)
|
||||||
|
{
|
||||||
|
int reloc_idx;
|
||||||
|
|
||||||
|
reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
|
||||||
|
domain, RADEON_PRIO_VCE);
|
||||||
|
if (enc->use_vm) {
|
||||||
|
uint64_t addr;
|
||||||
|
addr = enc->ws->buffer_get_virtual_address(buf);
|
||||||
|
addr = addr + offset;
|
||||||
|
RVCE_CS(addr >> 32);
|
||||||
|
RVCE_CS(addr);
|
||||||
|
} else {
|
||||||
|
offset += enc->ws->buffer_get_reloc_offset(buf);
|
||||||
|
RVCE_CS(reloc_idx * 4);
|
||||||
|
RVCE_CS(offset);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,462 @@
|
||||||
|
/**************************************************************************
|
||||||
|
*
|
||||||
|
* Copyright 2013 Advanced Micro Devices, Inc.
|
||||||
|
* All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the
|
||||||
|
* "Software"), to deal in the Software without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sub license, and/or sell copies of the Software, and to
|
||||||
|
* permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the
|
||||||
|
* next paragraph) shall be included in all copies or substantial portions
|
||||||
|
* of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||||
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
|
||||||
|
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
**************************************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Authors:
|
||||||
|
* Christian König <christian.koenig@amd.com>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef RADEON_VCE_H
|
||||||
|
#define RADEON_VCE_H
|
||||||
|
|
||||||
|
#include "util/list.h"
|
||||||
|
|
||||||
|
#define RVCE_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value))
|
||||||
|
#define RVCE_BEGIN(cmd) { \
|
||||||
|
uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
|
||||||
|
RVCE_CS(cmd)
|
||||||
|
#define RVCE_READ(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
|
||||||
|
#define RVCE_WRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
|
||||||
|
#define RVCE_READWRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
|
||||||
|
#define RVCE_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; }
|
||||||
|
|
||||||
|
#define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5)
|
||||||
|
#define RVCE_MAX_AUX_BUFFER_NUM 4
|
||||||
|
|
||||||
|
struct r600_common_screen;
|
||||||
|
|
||||||
|
/* driver dependent callback */
|
||||||
|
typedef void (*rvce_get_buffer)(struct pipe_resource *resource,
|
||||||
|
struct pb_buffer **handle,
|
||||||
|
struct radeon_surf **surface);
|
||||||
|
|
||||||
|
/* Coded picture buffer slot */
|
||||||
|
struct rvce_cpb_slot {
|
||||||
|
struct list_head list;
|
||||||
|
|
||||||
|
unsigned index;
|
||||||
|
enum pipe_h264_enc_picture_type picture_type;
|
||||||
|
unsigned frame_num;
|
||||||
|
unsigned pic_order_cnt;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct rvce_rate_control {
|
||||||
|
uint32_t rc_method;
|
||||||
|
uint32_t target_bitrate;
|
||||||
|
uint32_t peak_bitrate;
|
||||||
|
uint32_t frame_rate_num;
|
||||||
|
uint32_t gop_size;
|
||||||
|
uint32_t quant_i_frames;
|
||||||
|
uint32_t quant_p_frames;
|
||||||
|
uint32_t quant_b_frames;
|
||||||
|
uint32_t vbv_buffer_size;
|
||||||
|
uint32_t frame_rate_den;
|
||||||
|
uint32_t vbv_buf_lv;
|
||||||
|
uint32_t max_au_size;
|
||||||
|
uint32_t qp_initial_mode;
|
||||||
|
uint32_t target_bits_picture;
|
||||||
|
uint32_t peak_bits_picture_integer;
|
||||||
|
uint32_t peak_bits_picture_fraction;
|
||||||
|
uint32_t min_qp;
|
||||||
|
uint32_t max_qp;
|
||||||
|
uint32_t skip_frame_enable;
|
||||||
|
uint32_t fill_data_enable;
|
||||||
|
uint32_t enforce_hrd;
|
||||||
|
uint32_t b_pics_delta_qp;
|
||||||
|
uint32_t ref_b_pics_delta_qp;
|
||||||
|
uint32_t rc_reinit_disable;
|
||||||
|
uint32_t enc_lcvbr_init_qp_flag;
|
||||||
|
uint32_t lcvbrsatd_based_nonlinear_bit_budget_flag;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct rvce_motion_estimation {
|
||||||
|
uint32_t enc_ime_decimation_search;
|
||||||
|
uint32_t motion_est_half_pixel;
|
||||||
|
uint32_t motion_est_quarter_pixel;
|
||||||
|
uint32_t disable_favor_pmv_point;
|
||||||
|
uint32_t force_zero_point_center;
|
||||||
|
uint32_t lsmvert;
|
||||||
|
uint32_t enc_search_range_x;
|
||||||
|
uint32_t enc_search_range_y;
|
||||||
|
uint32_t enc_search1_range_x;
|
||||||
|
uint32_t enc_search1_range_y;
|
||||||
|
uint32_t disable_16x16_frame1;
|
||||||
|
uint32_t disable_satd;
|
||||||
|
uint32_t enable_amd;
|
||||||
|
uint32_t enc_disable_sub_mode;
|
||||||
|
uint32_t enc_ime_skip_x;
|
||||||
|
uint32_t enc_ime_skip_y;
|
||||||
|
uint32_t enc_en_ime_overw_dis_subm;
|
||||||
|
uint32_t enc_ime_overw_dis_subm_no;
|
||||||
|
uint32_t enc_ime2_search_range_x;
|
||||||
|
uint32_t enc_ime2_search_range_y;
|
||||||
|
uint32_t parallel_mode_speedup_enable;
|
||||||
|
uint32_t fme0_enc_disable_sub_mode;
|
||||||
|
uint32_t fme1_enc_disable_sub_mode;
|
||||||
|
uint32_t ime_sw_speedup_enable;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct rvce_pic_control {
|
||||||
|
uint32_t enc_use_constrained_intra_pred;
|
||||||
|
uint32_t enc_cabac_enable;
|
||||||
|
uint32_t enc_cabac_idc;
|
||||||
|
uint32_t enc_loop_filter_disable;
|
||||||
|
int32_t enc_lf_beta_offset;
|
||||||
|
int32_t enc_lf_alpha_c0_offset;
|
||||||
|
uint32_t enc_crop_left_offset;
|
||||||
|
uint32_t enc_crop_right_offset;
|
||||||
|
uint32_t enc_crop_top_offset;
|
||||||
|
uint32_t enc_crop_bottom_offset;
|
||||||
|
uint32_t enc_num_mbs_per_slice;
|
||||||
|
uint32_t enc_intra_refresh_num_mbs_per_slot;
|
||||||
|
uint32_t enc_force_intra_refresh;
|
||||||
|
uint32_t enc_force_imb_period;
|
||||||
|
uint32_t enc_pic_order_cnt_type;
|
||||||
|
uint32_t log2_max_pic_order_cnt_lsb_minus4;
|
||||||
|
uint32_t enc_sps_id;
|
||||||
|
uint32_t enc_pps_id;
|
||||||
|
uint32_t enc_constraint_set_flags;
|
||||||
|
uint32_t enc_b_pic_pattern;
|
||||||
|
uint32_t weight_pred_mode_b_picture;
|
||||||
|
uint32_t enc_number_of_reference_frames;
|
||||||
|
uint32_t enc_max_num_ref_frames;
|
||||||
|
uint32_t enc_num_default_active_ref_l0;
|
||||||
|
uint32_t enc_num_default_active_ref_l1;
|
||||||
|
uint32_t enc_slice_mode;
|
||||||
|
uint32_t enc_max_slice_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct rvce_task_info {
|
||||||
|
uint32_t offset_of_next_task_info;
|
||||||
|
uint32_t task_operation;
|
||||||
|
uint32_t reference_picture_dependency;
|
||||||
|
uint32_t collocate_flag_dependency;
|
||||||
|
uint32_t feedback_index;
|
||||||
|
uint32_t video_bitstream_ring_index;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct rvce_feedback_buf_pkg {
|
||||||
|
uint32_t feedback_ring_address_hi;
|
||||||
|
uint32_t feedback_ring_address_lo;
|
||||||
|
uint32_t feedback_ring_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct rvce_rdo {
|
||||||
|
uint32_t enc_disable_tbe_pred_i_frame;
|
||||||
|
uint32_t enc_disable_tbe_pred_p_frame;
|
||||||
|
uint32_t use_fme_interpol_y;
|
||||||
|
uint32_t use_fme_interpol_uv;
|
||||||
|
uint32_t use_fme_intrapol_y;
|
||||||
|
uint32_t use_fme_intrapol_uv;
|
||||||
|
uint32_t use_fme_interpol_y_1;
|
||||||
|
uint32_t use_fme_interpol_uv_1;
|
||||||
|
uint32_t use_fme_intrapol_y_1;
|
||||||
|
uint32_t use_fme_intrapol_uv_1;
|
||||||
|
uint32_t enc_16x16_cost_adj;
|
||||||
|
uint32_t enc_skip_cost_adj;
|
||||||
|
uint32_t enc_force_16x16_skip;
|
||||||
|
uint32_t enc_disable_threshold_calc_a;
|
||||||
|
uint32_t enc_luma_coeff_cost;
|
||||||
|
uint32_t enc_luma_mb_coeff_cost;
|
||||||
|
uint32_t enc_chroma_coeff_cost;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct rvce_vui {
|
||||||
|
uint32_t aspect_ratio_info_present_flag;
|
||||||
|
uint32_t aspect_ratio_idc;
|
||||||
|
uint32_t sar_width;
|
||||||
|
uint32_t sar_height;
|
||||||
|
uint32_t overscan_info_present_flag;
|
||||||
|
uint32_t overscan_Approp_flag;
|
||||||
|
uint32_t video_signal_type_present_flag;
|
||||||
|
uint32_t video_format;
|
||||||
|
uint32_t video_full_range_flag;
|
||||||
|
uint32_t color_description_present_flag;
|
||||||
|
uint32_t color_prim;
|
||||||
|
uint32_t transfer_char;
|
||||||
|
uint32_t matrix_coef;
|
||||||
|
uint32_t chroma_loc_info_present_flag;
|
||||||
|
uint32_t chroma_loc_top;
|
||||||
|
uint32_t chroma_loc_bottom;
|
||||||
|
uint32_t timing_info_present_flag;
|
||||||
|
uint32_t num_units_in_tick;
|
||||||
|
uint32_t time_scale;
|
||||||
|
uint32_t fixed_frame_rate_flag;
|
||||||
|
uint32_t nal_hrd_parameters_present_flag;
|
||||||
|
uint32_t cpb_cnt_minus1;
|
||||||
|
uint32_t bit_rate_scale;
|
||||||
|
uint32_t cpb_size_scale;
|
||||||
|
uint32_t bit_rate_value_minus;
|
||||||
|
uint32_t cpb_size_value_minus;
|
||||||
|
uint32_t cbr_flag;
|
||||||
|
uint32_t initial_cpb_removal_delay_length_minus1;
|
||||||
|
uint32_t cpb_removal_delay_length_minus1;
|
||||||
|
uint32_t dpb_output_delay_length_minus1;
|
||||||
|
uint32_t time_offset_length;
|
||||||
|
uint32_t low_delay_hrd_flag;
|
||||||
|
uint32_t pic_struct_present_flag;
|
||||||
|
uint32_t bitstream_restriction_present_flag;
|
||||||
|
uint32_t motion_vectors_over_pic_boundaries_flag;
|
||||||
|
uint32_t max_bytes_per_pic_denom;
|
||||||
|
uint32_t max_bits_per_mb_denom;
|
||||||
|
uint32_t log2_max_mv_length_hori;
|
||||||
|
uint32_t log2_max_mv_length_vert;
|
||||||
|
uint32_t num_reorder_frames;
|
||||||
|
uint32_t max_dec_frame_buffering;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct rvce_enc_operation {
|
||||||
|
uint32_t insert_headers;
|
||||||
|
uint32_t picture_structure;
|
||||||
|
uint32_t allowed_max_bitstream_size;
|
||||||
|
uint32_t force_refresh_map;
|
||||||
|
uint32_t insert_aud;
|
||||||
|
uint32_t end_of_sequence;
|
||||||
|
uint32_t end_of_stream;
|
||||||
|
uint32_t input_picture_luma_address_hi;
|
||||||
|
uint32_t input_picture_luma_address_lo;
|
||||||
|
uint32_t input_picture_chroma_address_hi;
|
||||||
|
uint32_t input_picture_chroma_address_lo;
|
||||||
|
uint32_t enc_input_frame_y_pitch;
|
||||||
|
uint32_t enc_input_pic_luma_pitch;
|
||||||
|
uint32_t enc_input_pic_chroma_pitch;;
|
||||||
|
uint32_t enc_input_pic_addr_array;
|
||||||
|
uint32_t enc_input_pic_addr_array_disable2pipe_disablemboffload;
|
||||||
|
uint32_t enc_input_pic_tile_config;
|
||||||
|
uint32_t enc_pic_type;
|
||||||
|
uint32_t enc_idr_flag;
|
||||||
|
uint32_t enc_idr_pic_id;
|
||||||
|
uint32_t enc_mgs_key_pic;
|
||||||
|
uint32_t enc_reference_flag;
|
||||||
|
uint32_t enc_temporal_layer_index;
|
||||||
|
uint32_t num_ref_idx_active_override_flag;
|
||||||
|
uint32_t num_ref_idx_l0_active_minus1;
|
||||||
|
uint32_t num_ref_idx_l1_active_minus1;
|
||||||
|
uint32_t enc_ref_list_modification_op;
|
||||||
|
uint32_t enc_ref_list_modification_num;
|
||||||
|
uint32_t enc_decoded_picture_marking_op;
|
||||||
|
uint32_t enc_decoded_picture_marking_num;
|
||||||
|
uint32_t enc_decoded_picture_marking_idx;
|
||||||
|
uint32_t enc_decoded_ref_base_picture_marking_op;
|
||||||
|
uint32_t enc_decoded_ref_base_picture_marking_num;
|
||||||
|
uint32_t l0_picture_structure;
|
||||||
|
uint32_t l0_enc_pic_type;
|
||||||
|
uint32_t l0_frame_number;
|
||||||
|
uint32_t l0_picture_order_count;
|
||||||
|
uint32_t l0_luma_offset;
|
||||||
|
uint32_t l0_chroma_offset;
|
||||||
|
uint32_t l1_picture_structure;
|
||||||
|
uint32_t l1_enc_pic_type;
|
||||||
|
uint32_t l1_frame_number;
|
||||||
|
uint32_t l1_picture_order_count;
|
||||||
|
uint32_t l1_luma_offset;
|
||||||
|
uint32_t l1_chroma_offset;
|
||||||
|
uint32_t enc_reconstructed_luma_offset;
|
||||||
|
uint32_t enc_reconstructed_chroma_offset;;
|
||||||
|
uint32_t enc_coloc_buffer_offset;
|
||||||
|
uint32_t enc_reconstructed_ref_base_picture_luma_offset;
|
||||||
|
uint32_t enc_reconstructed_ref_base_picture_chroma_offset;
|
||||||
|
uint32_t enc_reference_ref_base_picture_luma_offset;
|
||||||
|
uint32_t enc_reference_ref_base_picture_chroma_offset;
|
||||||
|
uint32_t picture_count;
|
||||||
|
uint32_t frame_number;
|
||||||
|
uint32_t picture_order_count;
|
||||||
|
uint32_t num_i_pic_remain_in_rcgop;
|
||||||
|
uint32_t num_p_pic_remain_in_rcgop;
|
||||||
|
uint32_t num_b_pic_remain_in_rcgop;
|
||||||
|
uint32_t num_ir_pic_remain_in_rcgop;
|
||||||
|
uint32_t enable_intra_refresh;
|
||||||
|
uint32_t aq_variance_en;
|
||||||
|
uint32_t aq_block_size;
|
||||||
|
uint32_t aq_mb_variance_sel;
|
||||||
|
uint32_t aq_frame_variance_sel;
|
||||||
|
uint32_t aq_param_a;
|
||||||
|
uint32_t aq_param_b;
|
||||||
|
uint32_t aq_param_c;
|
||||||
|
uint32_t aq_param_d;
|
||||||
|
uint32_t aq_param_e;
|
||||||
|
uint32_t context_in_sfb;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct rvce_enc_create {
|
||||||
|
uint32_t enc_use_circular_buffer;
|
||||||
|
uint32_t enc_profile;
|
||||||
|
uint32_t enc_level;
|
||||||
|
uint32_t enc_pic_struct_restriction;
|
||||||
|
uint32_t enc_image_width;
|
||||||
|
uint32_t enc_image_height;
|
||||||
|
uint32_t enc_ref_pic_luma_pitch;
|
||||||
|
uint32_t enc_ref_pic_chroma_pitch;
|
||||||
|
uint32_t enc_ref_y_height_in_qw;
|
||||||
|
uint32_t enc_ref_pic_addr_array_enc_pic_struct_restriction_disable_rdo;
|
||||||
|
uint32_t enc_pre_encode_context_buffer_offset;
|
||||||
|
uint32_t enc_pre_encode_input_luma_buffer_offset;
|
||||||
|
uint32_t enc_pre_encode_input_chroma_buffer_offset;
|
||||||
|
uint32_t enc_pre_encode_mode_chromaflag_vbaqmode_scenechangesensitivity;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct rvce_config_ext {
|
||||||
|
uint32_t enc_enable_perf_logging;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct rvce_h264_enc_pic {
|
||||||
|
struct rvce_rate_control rc;
|
||||||
|
struct rvce_motion_estimation me;
|
||||||
|
struct rvce_pic_control pc;
|
||||||
|
struct rvce_task_info ti;
|
||||||
|
struct rvce_feedback_buf_pkg fb;
|
||||||
|
struct rvce_rdo rdo;
|
||||||
|
struct rvce_vui vui;
|
||||||
|
struct rvce_enc_operation eo;
|
||||||
|
struct rvce_enc_create ec;
|
||||||
|
struct rvce_config_ext ce;
|
||||||
|
|
||||||
|
unsigned quant_i_frames;
|
||||||
|
unsigned quant_p_frames;
|
||||||
|
unsigned quant_b_frames;
|
||||||
|
|
||||||
|
enum pipe_h264_enc_picture_type picture_type;
|
||||||
|
unsigned frame_num;
|
||||||
|
unsigned frame_num_cnt;
|
||||||
|
unsigned p_remain;
|
||||||
|
unsigned i_remain;
|
||||||
|
unsigned idr_pic_id;
|
||||||
|
unsigned gop_cnt;
|
||||||
|
unsigned gop_size;
|
||||||
|
unsigned pic_order_cnt;
|
||||||
|
unsigned ref_idx_l0;
|
||||||
|
unsigned ref_idx_l1;
|
||||||
|
unsigned addrmode_arraymode_disrdo_distwoinstants;
|
||||||
|
|
||||||
|
bool not_referenced;
|
||||||
|
bool is_idr;
|
||||||
|
bool has_ref_pic_list;
|
||||||
|
bool enable_vui;
|
||||||
|
unsigned int ref_pic_list_0[32];
|
||||||
|
unsigned int ref_pic_list_1[32];
|
||||||
|
unsigned int frame_idx[32];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* VCE encoder representation */
|
||||||
|
struct rvce_encoder {
|
||||||
|
struct pipe_video_codec base;
|
||||||
|
|
||||||
|
/* version specific packets */
|
||||||
|
void (*session)(struct rvce_encoder *enc);
|
||||||
|
void (*create)(struct rvce_encoder *enc);
|
||||||
|
void (*feedback)(struct rvce_encoder *enc);
|
||||||
|
void (*rate_control)(struct rvce_encoder *enc);
|
||||||
|
void (*config_extension)(struct rvce_encoder *enc);
|
||||||
|
void (*pic_control)(struct rvce_encoder *enc);
|
||||||
|
void (*motion_estimation)(struct rvce_encoder *enc);
|
||||||
|
void (*rdo)(struct rvce_encoder *enc);
|
||||||
|
void (*vui)(struct rvce_encoder *enc);
|
||||||
|
void (*config)(struct rvce_encoder *enc);
|
||||||
|
void (*encode)(struct rvce_encoder *enc);
|
||||||
|
void (*destroy)(struct rvce_encoder *enc);
|
||||||
|
void (*task_info)(struct rvce_encoder *enc, uint32_t op,
|
||||||
|
uint32_t dep, uint32_t fb_idx,
|
||||||
|
uint32_t ring_idx);
|
||||||
|
|
||||||
|
unsigned stream_handle;
|
||||||
|
|
||||||
|
struct pipe_screen *screen;
|
||||||
|
struct radeon_winsys* ws;
|
||||||
|
struct radeon_winsys_cs* cs;
|
||||||
|
|
||||||
|
rvce_get_buffer get_buffer;
|
||||||
|
|
||||||
|
struct pb_buffer* handle;
|
||||||
|
struct radeon_surf* luma;
|
||||||
|
struct radeon_surf* chroma;
|
||||||
|
|
||||||
|
struct pb_buffer* bs_handle;
|
||||||
|
unsigned bs_size;
|
||||||
|
|
||||||
|
struct rvce_cpb_slot *cpb_array;
|
||||||
|
struct list_head cpb_slots;
|
||||||
|
unsigned cpb_num;
|
||||||
|
|
||||||
|
struct rvid_buffer *fb;
|
||||||
|
struct rvid_buffer cpb;
|
||||||
|
struct pipe_h264_enc_picture_desc pic;
|
||||||
|
struct rvce_h264_enc_pic enc_pic;
|
||||||
|
|
||||||
|
unsigned task_info_idx;
|
||||||
|
unsigned bs_idx;
|
||||||
|
|
||||||
|
bool use_vm;
|
||||||
|
bool use_vui;
|
||||||
|
bool dual_pipe;
|
||||||
|
bool dual_inst;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* CPB handling functions */
|
||||||
|
struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc);
|
||||||
|
struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc);
|
||||||
|
struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc);
|
||||||
|
void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
|
||||||
|
signed *luma_offset, signed *chroma_offset);
|
||||||
|
|
||||||
|
struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
|
||||||
|
const struct pipe_video_codec *templat,
|
||||||
|
struct radeon_winsys* ws,
|
||||||
|
rvce_get_buffer get_buffer);
|
||||||
|
|
||||||
|
bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen);
|
||||||
|
|
||||||
|
void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
|
||||||
|
enum radeon_bo_usage usage, enum radeon_bo_domain domain,
|
||||||
|
signed offset);
|
||||||
|
|
||||||
|
/* init vce fw 40.2.2 specific callbacks */
|
||||||
|
void radeon_vce_40_2_2_init(struct rvce_encoder *enc);
|
||||||
|
|
||||||
|
/* init vce fw 50 specific callbacks */
|
||||||
|
void radeon_vce_50_init(struct rvce_encoder *enc);
|
||||||
|
|
||||||
|
/* init vce fw 52 specific callbacks */
|
||||||
|
void radeon_vce_52_init(struct rvce_encoder *enc);
|
||||||
|
|
||||||
|
/* version specific function for getting parameters */
|
||||||
|
void (*get_pic_param)(struct rvce_encoder *enc,
|
||||||
|
struct pipe_h264_enc_picture_desc *pic);
|
||||||
|
|
||||||
|
/* get parameters for vce 40.2.2 */
|
||||||
|
void radeon_vce_40_2_2_get_param(struct rvce_encoder *enc,
|
||||||
|
struct pipe_h264_enc_picture_desc *pic);
|
||||||
|
|
||||||
|
/* get parameters for vce 50 */
|
||||||
|
void radeon_vce_50_get_param(struct rvce_encoder *enc,
|
||||||
|
struct pipe_h264_enc_picture_desc *pic);
|
||||||
|
|
||||||
|
/* get parameters for vce 52 */
|
||||||
|
void radeon_vce_52_get_param(struct rvce_encoder *enc,
|
||||||
|
struct pipe_h264_enc_picture_desc *pic);
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,372 @@
|
||||||
|
/**************************************************************************
|
||||||
|
*
|
||||||
|
* Copyright 2013 Advanced Micro Devices, Inc.
|
||||||
|
* All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the
|
||||||
|
* "Software"), to deal in the Software without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sub license, and/or sell copies of the Software, and to
|
||||||
|
* permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the
|
||||||
|
* next paragraph) shall be included in all copies or substantial portions
|
||||||
|
* of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||||
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
|
||||||
|
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
**************************************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Authors:
|
||||||
|
* Christian König <christian.koenig@amd.com>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include "util/u_memory.h"
|
||||||
|
#include "util/u_video.h"
|
||||||
|
|
||||||
|
#include "vl/vl_defines.h"
|
||||||
|
#include "vl/vl_video_buffer.h"
|
||||||
|
|
||||||
|
#include "r600_pipe_common.h"
|
||||||
|
#include "radeon_video.h"
|
||||||
|
#include "radeon_vce.h"
|
||||||
|
|
||||||
|
#define UVD_FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8))
|
||||||
|
|
||||||
|
/* generate an stream handle */
|
||||||
|
unsigned rvid_alloc_stream_handle()
|
||||||
|
{
|
||||||
|
static unsigned counter = 0;
|
||||||
|
unsigned stream_handle = 0;
|
||||||
|
unsigned pid = getpid();
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < 32; ++i)
|
||||||
|
stream_handle |= ((pid >> i) & 1) << (31 - i);
|
||||||
|
|
||||||
|
stream_handle ^= ++counter;
|
||||||
|
return stream_handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* create a buffer in the winsys */
|
||||||
|
bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
|
||||||
|
unsigned size, unsigned usage)
|
||||||
|
{
|
||||||
|
memset(buffer, 0, sizeof(*buffer));
|
||||||
|
buffer->usage = usage;
|
||||||
|
|
||||||
|
/* Hardware buffer placement restrictions require the kernel to be
|
||||||
|
* able to move buffers around individually, so request a
|
||||||
|
* non-sub-allocated buffer.
|
||||||
|
*/
|
||||||
|
buffer->res = (struct r600_resource *)
|
||||||
|
pipe_buffer_create(screen, PIPE_BIND_SHARED,
|
||||||
|
usage, size);
|
||||||
|
|
||||||
|
return buffer->res != NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* destroy a buffer */
|
||||||
|
void rvid_destroy_buffer(struct rvid_buffer *buffer)
|
||||||
|
{
|
||||||
|
r600_resource_reference(&buffer->res, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* reallocate a buffer, preserving its content */
|
||||||
|
bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
|
||||||
|
struct rvid_buffer *new_buf, unsigned new_size)
|
||||||
|
{
|
||||||
|
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
|
||||||
|
struct radeon_winsys* ws = rscreen->ws;
|
||||||
|
unsigned bytes = MIN2(new_buf->res->buf->size, new_size);
|
||||||
|
struct rvid_buffer old_buf = *new_buf;
|
||||||
|
void *src = NULL, *dst = NULL;
|
||||||
|
|
||||||
|
if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage))
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ);
|
||||||
|
if (!src)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE);
|
||||||
|
if (!dst)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
memcpy(dst, src, bytes);
|
||||||
|
if (new_size > bytes) {
|
||||||
|
new_size -= bytes;
|
||||||
|
dst += bytes;
|
||||||
|
memset(dst, 0, new_size);
|
||||||
|
}
|
||||||
|
ws->buffer_unmap(new_buf->res->buf);
|
||||||
|
ws->buffer_unmap(old_buf.res->buf);
|
||||||
|
rvid_destroy_buffer(&old_buf);
|
||||||
|
return true;
|
||||||
|
|
||||||
|
error:
|
||||||
|
if (src)
|
||||||
|
ws->buffer_unmap(old_buf.res->buf);
|
||||||
|
rvid_destroy_buffer(new_buf);
|
||||||
|
*new_buf = old_buf;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* clear the buffer with zeros */
|
||||||
|
void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
|
||||||
|
{
|
||||||
|
struct r600_common_context *rctx = (struct r600_common_context*)context;
|
||||||
|
|
||||||
|
rctx->dma_clear_buffer(context, &buffer->res->b.b, 0,
|
||||||
|
buffer->res->buf->size, 0);
|
||||||
|
context->flush(context, NULL, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* join surfaces into the same buffer with identical tiling params
|
||||||
|
* sumup their sizes and replace the backend buffers with a single bo
|
||||||
|
*/
|
||||||
|
void rvid_join_surfaces(struct r600_common_context *rctx,
|
||||||
|
struct pb_buffer** buffers[VL_NUM_COMPONENTS],
|
||||||
|
struct radeon_surf *surfaces[VL_NUM_COMPONENTS])
|
||||||
|
{
|
||||||
|
struct radeon_winsys* ws;
|
||||||
|
unsigned best_tiling, best_wh, off;
|
||||||
|
unsigned size, alignment;
|
||||||
|
struct pb_buffer *pb;
|
||||||
|
unsigned i, j;
|
||||||
|
|
||||||
|
ws = rctx->ws;
|
||||||
|
|
||||||
|
for (i = 0, best_tiling = 0, best_wh = ~0; i < VL_NUM_COMPONENTS; ++i) {
|
||||||
|
unsigned wh;
|
||||||
|
|
||||||
|
if (!surfaces[i])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (rctx->chip_class < GFX9) {
|
||||||
|
/* choose the smallest bank w/h for now */
|
||||||
|
wh = surfaces[i]->u.legacy.bankw * surfaces[i]->u.legacy.bankh;
|
||||||
|
if (wh < best_wh) {
|
||||||
|
best_wh = wh;
|
||||||
|
best_tiling = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0, off = 0; i < VL_NUM_COMPONENTS; ++i) {
|
||||||
|
if (!surfaces[i])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* adjust the texture layer offsets */
|
||||||
|
off = align(off, surfaces[i]->surf_alignment);
|
||||||
|
|
||||||
|
if (rctx->chip_class < GFX9) {
|
||||||
|
/* copy the tiling parameters */
|
||||||
|
surfaces[i]->u.legacy.bankw = surfaces[best_tiling]->u.legacy.bankw;
|
||||||
|
surfaces[i]->u.legacy.bankh = surfaces[best_tiling]->u.legacy.bankh;
|
||||||
|
surfaces[i]->u.legacy.mtilea = surfaces[best_tiling]->u.legacy.mtilea;
|
||||||
|
surfaces[i]->u.legacy.tile_split = surfaces[best_tiling]->u.legacy.tile_split;
|
||||||
|
|
||||||
|
for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.legacy.level); ++j)
|
||||||
|
surfaces[i]->u.legacy.level[j].offset += off;
|
||||||
|
} else
|
||||||
|
surfaces[i]->u.gfx9.surf_offset += off;
|
||||||
|
|
||||||
|
off += surfaces[i]->surf_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0, size = 0, alignment = 0; i < VL_NUM_COMPONENTS; ++i) {
|
||||||
|
if (!buffers[i] || !*buffers[i])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
size = align(size, (*buffers[i])->alignment);
|
||||||
|
size += (*buffers[i])->size;
|
||||||
|
alignment = MAX2(alignment, (*buffers[i])->alignment * 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!size)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* TODO: 2D tiling workaround */
|
||||||
|
alignment *= 2;
|
||||||
|
|
||||||
|
pb = ws->buffer_create(ws, size, alignment, RADEON_DOMAIN_VRAM,
|
||||||
|
RADEON_FLAG_GTT_WC);
|
||||||
|
if (!pb)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
|
||||||
|
if (!buffers[i] || !*buffers[i])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
pb_reference(buffers[i], pb);
|
||||||
|
}
|
||||||
|
|
||||||
|
pb_reference(&pb, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
int rvid_get_video_param(struct pipe_screen *screen,
|
||||||
|
enum pipe_video_profile profile,
|
||||||
|
enum pipe_video_entrypoint entrypoint,
|
||||||
|
enum pipe_video_cap param)
|
||||||
|
{
|
||||||
|
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
|
||||||
|
enum pipe_video_format codec = u_reduce_video_profile(profile);
|
||||||
|
struct radeon_info info;
|
||||||
|
|
||||||
|
rscreen->ws->query_info(rscreen->ws, &info);
|
||||||
|
|
||||||
|
if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
|
||||||
|
switch (param) {
|
||||||
|
case PIPE_VIDEO_CAP_SUPPORTED:
|
||||||
|
return codec == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
|
||||||
|
rvce_is_fw_version_supported(rscreen);
|
||||||
|
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
|
||||||
|
return 1;
|
||||||
|
case PIPE_VIDEO_CAP_MAX_WIDTH:
|
||||||
|
return (rscreen->family < CHIP_TONGA) ? 2048 : 4096;
|
||||||
|
case PIPE_VIDEO_CAP_MAX_HEIGHT:
|
||||||
|
return (rscreen->family < CHIP_TONGA) ? 1152 : 2304;
|
||||||
|
case PIPE_VIDEO_CAP_PREFERED_FORMAT:
|
||||||
|
return PIPE_FORMAT_NV12;
|
||||||
|
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
|
||||||
|
return false;
|
||||||
|
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
|
||||||
|
return false;
|
||||||
|
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
|
||||||
|
return true;
|
||||||
|
case PIPE_VIDEO_CAP_STACKED_FRAMES:
|
||||||
|
return (rscreen->family < CHIP_TONGA) ? 1 : 2;
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (param) {
|
||||||
|
case PIPE_VIDEO_CAP_SUPPORTED:
|
||||||
|
switch (codec) {
|
||||||
|
case PIPE_VIDEO_FORMAT_MPEG12:
|
||||||
|
return profile != PIPE_VIDEO_PROFILE_MPEG1;
|
||||||
|
case PIPE_VIDEO_FORMAT_MPEG4:
|
||||||
|
/* no support for MPEG4 on older hw */
|
||||||
|
return rscreen->family >= CHIP_PALM;
|
||||||
|
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
|
||||||
|
if ((rscreen->family == CHIP_POLARIS10 ||
|
||||||
|
rscreen->family == CHIP_POLARIS11) &&
|
||||||
|
info.uvd_fw_version < UVD_FW_1_66_16 ) {
|
||||||
|
RVID_ERR("POLARIS10/11 firmware version need to be updated.\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
case PIPE_VIDEO_FORMAT_VC1:
|
||||||
|
return true;
|
||||||
|
case PIPE_VIDEO_FORMAT_HEVC:
|
||||||
|
/* Carrizo only supports HEVC Main */
|
||||||
|
if (rscreen->family >= CHIP_STONEY)
|
||||||
|
return (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN ||
|
||||||
|
profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10);
|
||||||
|
else if (rscreen->family >= CHIP_CARRIZO)
|
||||||
|
return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN;
|
||||||
|
return false;
|
||||||
|
case PIPE_VIDEO_FORMAT_JPEG:
|
||||||
|
if (rscreen->family < CHIP_CARRIZO || rscreen->family >= CHIP_VEGA10)
|
||||||
|
return false;
|
||||||
|
if (!(rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 19)) {
|
||||||
|
RVID_ERR("No MJPEG support for the kernel version\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
|
||||||
|
return 1;
|
||||||
|
case PIPE_VIDEO_CAP_MAX_WIDTH:
|
||||||
|
return (rscreen->family < CHIP_TONGA) ? 2048 : 4096;
|
||||||
|
case PIPE_VIDEO_CAP_MAX_HEIGHT:
|
||||||
|
return (rscreen->family < CHIP_TONGA) ? 1152 : 4096;
|
||||||
|
case PIPE_VIDEO_CAP_PREFERED_FORMAT:
|
||||||
|
if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
|
||||||
|
return PIPE_FORMAT_P016;
|
||||||
|
else
|
||||||
|
return PIPE_FORMAT_NV12;
|
||||||
|
|
||||||
|
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
|
||||||
|
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
|
||||||
|
if (rscreen->family < CHIP_PALM) {
|
||||||
|
/* MPEG2 only with shaders and no support for
|
||||||
|
interlacing on R6xx style UVD */
|
||||||
|
return codec != PIPE_VIDEO_FORMAT_MPEG12 &&
|
||||||
|
rscreen->family > CHIP_RV770;
|
||||||
|
} else {
|
||||||
|
enum pipe_video_format format = u_reduce_video_profile(profile);
|
||||||
|
|
||||||
|
if (format == PIPE_VIDEO_FORMAT_HEVC)
|
||||||
|
return false; //The firmware doesn't support interlaced HEVC.
|
||||||
|
else if (format == PIPE_VIDEO_FORMAT_JPEG)
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
|
||||||
|
return true;
|
||||||
|
case PIPE_VIDEO_CAP_MAX_LEVEL:
|
||||||
|
switch (profile) {
|
||||||
|
case PIPE_VIDEO_PROFILE_MPEG1:
|
||||||
|
return 0;
|
||||||
|
case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
|
||||||
|
case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
|
||||||
|
return 3;
|
||||||
|
case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE:
|
||||||
|
return 3;
|
||||||
|
case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE:
|
||||||
|
return 5;
|
||||||
|
case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
|
||||||
|
return 1;
|
||||||
|
case PIPE_VIDEO_PROFILE_VC1_MAIN:
|
||||||
|
return 2;
|
||||||
|
case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
|
||||||
|
return 4;
|
||||||
|
case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
|
||||||
|
case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
|
||||||
|
case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
|
||||||
|
return (rscreen->family < CHIP_TONGA) ? 41 : 52;
|
||||||
|
case PIPE_VIDEO_PROFILE_HEVC_MAIN:
|
||||||
|
case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
|
||||||
|
return 186;
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean rvid_is_format_supported(struct pipe_screen *screen,
|
||||||
|
enum pipe_format format,
|
||||||
|
enum pipe_video_profile profile,
|
||||||
|
enum pipe_video_entrypoint entrypoint)
|
||||||
|
{
|
||||||
|
/* HEVC 10 bit decoding should use P016 instead of NV12 if possible */
|
||||||
|
if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
|
||||||
|
return (format == PIPE_FORMAT_NV12) ||
|
||||||
|
(format == PIPE_FORMAT_P016);
|
||||||
|
|
||||||
|
/* we can only handle this one with UVD */
|
||||||
|
if (profile != PIPE_VIDEO_PROFILE_UNKNOWN)
|
||||||
|
return format == PIPE_FORMAT_NV12;
|
||||||
|
|
||||||
|
return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint);
|
||||||
|
}
|
|
@ -0,0 +1,85 @@
|
||||||
|
/**************************************************************************
|
||||||
|
*
|
||||||
|
* Copyright 2013 Advanced Micro Devices, Inc.
|
||||||
|
* All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the
|
||||||
|
* "Software"), to deal in the Software without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sub license, and/or sell copies of the Software, and to
|
||||||
|
* permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the
|
||||||
|
* next paragraph) shall be included in all copies or substantial portions
|
||||||
|
* of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||||
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
|
||||||
|
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
**************************************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Authors:
|
||||||
|
* Christian König <christian.koenig@amd.com>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef RADEON_VIDEO_H
|
||||||
|
#define RADEON_VIDEO_H
|
||||||
|
|
||||||
|
#include "radeon/radeon_winsys.h"
|
||||||
|
#include "vl/vl_video_buffer.h"
|
||||||
|
|
||||||
|
#define RVID_ERR(fmt, args...) \
|
||||||
|
fprintf(stderr, "EE %s:%d %s UVD - "fmt, __FILE__, __LINE__, __func__, ##args)
|
||||||
|
|
||||||
|
/* video buffer representation */
|
||||||
|
struct rvid_buffer
|
||||||
|
{
|
||||||
|
unsigned usage;
|
||||||
|
struct r600_resource *res;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* generate an stream handle */
|
||||||
|
unsigned rvid_alloc_stream_handle(void);
|
||||||
|
|
||||||
|
/* create a buffer in the winsys */
|
||||||
|
bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
|
||||||
|
unsigned size, unsigned usage);
|
||||||
|
|
||||||
|
/* destroy a buffer */
|
||||||
|
void rvid_destroy_buffer(struct rvid_buffer *buffer);
|
||||||
|
|
||||||
|
/* reallocate a buffer, preserving its content */
|
||||||
|
bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
|
||||||
|
struct rvid_buffer *new_buf, unsigned new_size);
|
||||||
|
|
||||||
|
/* clear the buffer with zeros */
|
||||||
|
void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer);
|
||||||
|
|
||||||
|
/* join surfaces into the same buffer with identical tiling params
|
||||||
|
sumup their sizes and replace the backend buffers with a single bo */
|
||||||
|
void rvid_join_surfaces(struct r600_common_context *rctx,
|
||||||
|
struct pb_buffer** buffers[VL_NUM_COMPONENTS],
|
||||||
|
struct radeon_surf *surfaces[VL_NUM_COMPONENTS]);
|
||||||
|
|
||||||
|
/* returns supported codecs and other parameters */
|
||||||
|
int rvid_get_video_param(struct pipe_screen *screen,
|
||||||
|
enum pipe_video_profile profile,
|
||||||
|
enum pipe_video_entrypoint entrypoint,
|
||||||
|
enum pipe_video_cap param);
|
||||||
|
|
||||||
|
/* the hardware only supports NV12 */
|
||||||
|
boolean rvid_is_format_supported(struct pipe_screen *screen,
|
||||||
|
enum pipe_format format,
|
||||||
|
enum pipe_video_profile profile,
|
||||||
|
enum pipe_video_entrypoint entrypoint);
|
||||||
|
|
||||||
|
#endif // RADEON_VIDEO_H
|
|
@ -28,22 +28,22 @@
|
||||||
|
|
||||||
/* 2xMSAA
|
/* 2xMSAA
|
||||||
* There are two locations (4, 4), (-4, -4). */
|
* There are two locations (4, 4), (-4, -4). */
|
||||||
const uint32_t eg_sample_locs_2x[4] = {
|
static const uint32_t eg_sample_locs_2x[4] = {
|
||||||
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
|
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
|
||||||
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
|
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
|
||||||
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
|
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
|
||||||
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
|
FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
|
||||||
};
|
};
|
||||||
const unsigned eg_max_dist_2x = 4;
|
static const unsigned eg_max_dist_2x = 4;
|
||||||
/* 4xMSAA
|
/* 4xMSAA
|
||||||
* There are 4 locations: (-2, 6), (6, -2), (-6, 2), (2, 6). */
|
* There are 4 locations: (-2, 6), (6, -2), (-6, 2), (2, 6). */
|
||||||
const uint32_t eg_sample_locs_4x[4] = {
|
static const uint32_t eg_sample_locs_4x[4] = {
|
||||||
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
|
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
|
||||||
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
|
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
|
||||||
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
|
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
|
||||||
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
|
FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
|
||||||
};
|
};
|
||||||
const unsigned eg_max_dist_4x = 6;
|
static const unsigned eg_max_dist_4x = 6;
|
||||||
|
|
||||||
/* Cayman 8xMSAA */
|
/* Cayman 8xMSAA */
|
||||||
static const uint32_t cm_sample_locs_8x[] = {
|
static const uint32_t cm_sample_locs_8x[] = {
|
||||||
|
@ -78,7 +78,7 @@ static const uint32_t cm_sample_locs_16x[] = {
|
||||||
};
|
};
|
||||||
static const unsigned cm_max_dist_16x = 8;
|
static const unsigned cm_max_dist_16x = 8;
|
||||||
|
|
||||||
void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
|
void si_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
|
||||||
unsigned sample_index, float *out_value)
|
unsigned sample_index, float *out_value)
|
||||||
{
|
{
|
||||||
int offset, index;
|
int offset, index;
|
||||||
|
@ -123,24 +123,24 @@ void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cayman_init_msaa(struct pipe_context *ctx)
|
void si_init_msaa(struct pipe_context *ctx)
|
||||||
{
|
{
|
||||||
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
|
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]);
|
si_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]);
|
||||||
|
|
||||||
for (i = 0; i < 2; i++)
|
for (i = 0; i < 2; i++)
|
||||||
cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]);
|
si_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]);
|
||||||
for (i = 0; i < 4; i++)
|
for (i = 0; i < 4; i++)
|
||||||
cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]);
|
si_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]);
|
||||||
for (i = 0; i < 8; i++)
|
for (i = 0; i < 8; i++)
|
||||||
cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]);
|
si_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]);
|
||||||
for (i = 0; i < 16; i++)
|
for (i = 0; i < 16; i++)
|
||||||
cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]);
|
si_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
|
void si_common_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
|
||||||
{
|
{
|
||||||
switch (nr_samples) {
|
switch (nr_samples) {
|
||||||
default:
|
default:
|
||||||
|
@ -201,9 +201,9 @@ void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
|
void si_common_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
|
||||||
int ps_iter_samples, int overrast_samples,
|
int ps_iter_samples, int overrast_samples,
|
||||||
unsigned sc_mode_cntl_1)
|
unsigned sc_mode_cntl_1)
|
||||||
{
|
{
|
||||||
int setup_samples = nr_samples > 1 ? nr_samples :
|
int setup_samples = nr_samples > 1 ? nr_samples :
|
||||||
overrast_samples > 1 ? overrast_samples : 0;
|
overrast_samples > 1 ? overrast_samples : 0;
|
||||||
|
|
|
@ -30,9 +30,9 @@
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
|
bool si_rings_is_buffer_referenced(struct r600_common_context *ctx,
|
||||||
struct pb_buffer *buf,
|
struct pb_buffer *buf,
|
||||||
enum radeon_bo_usage usage)
|
enum radeon_bo_usage usage)
|
||||||
{
|
{
|
||||||
if (ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, buf, usage)) {
|
if (ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, buf, usage)) {
|
||||||
return true;
|
return true;
|
||||||
|
@ -44,9 +44,9 @@ bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
|
void *si_buffer_map_sync_with_rings(struct r600_common_context *ctx,
|
||||||
struct r600_resource *resource,
|
struct r600_resource *resource,
|
||||||
unsigned usage)
|
unsigned usage)
|
||||||
{
|
{
|
||||||
enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
|
enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
|
||||||
bool busy = false;
|
bool busy = false;
|
||||||
|
@ -101,9 +101,9 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
|
||||||
return ctx->ws->buffer_map(resource->buf, NULL, usage);
|
return ctx->ws->buffer_map(resource->buf, NULL, usage);
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_init_resource_fields(struct r600_common_screen *rscreen,
|
void si_init_resource_fields(struct r600_common_screen *rscreen,
|
||||||
struct r600_resource *res,
|
struct r600_resource *res,
|
||||||
uint64_t size, unsigned alignment)
|
uint64_t size, unsigned alignment)
|
||||||
{
|
{
|
||||||
struct r600_texture *rtex = (struct r600_texture*)res;
|
struct r600_texture *rtex = (struct r600_texture*)res;
|
||||||
|
|
||||||
|
@ -205,8 +205,8 @@ void r600_init_resource_fields(struct r600_common_screen *rscreen,
|
||||||
res->gart_usage = size;
|
res->gart_usage = size;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool r600_alloc_resource(struct r600_common_screen *rscreen,
|
bool si_alloc_resource(struct r600_common_screen *rscreen,
|
||||||
struct r600_resource *res)
|
struct r600_resource *res)
|
||||||
{
|
{
|
||||||
struct pb_buffer *old_buf, *new_buf;
|
struct pb_buffer *old_buf, *new_buf;
|
||||||
|
|
||||||
|
@ -274,7 +274,7 @@ r600_invalidate_buffer(struct r600_common_context *rctx,
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* Check if mapping this buffer would cause waiting for the GPU. */
|
/* Check if mapping this buffer would cause waiting for the GPU. */
|
||||||
if (r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
|
if (si_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
|
||||||
!rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
|
!rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
|
||||||
rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
|
rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
|
||||||
} else {
|
} else {
|
||||||
|
@ -285,7 +285,7 @@ r600_invalidate_buffer(struct r600_common_context *rctx,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Replace the storage of dst with src. */
|
/* Replace the storage of dst with src. */
|
||||||
void r600_replace_buffer_storage(struct pipe_context *ctx,
|
void si_replace_buffer_storage(struct pipe_context *ctx,
|
||||||
struct pipe_resource *dst,
|
struct pipe_resource *dst,
|
||||||
struct pipe_resource *src)
|
struct pipe_resource *src)
|
||||||
{
|
{
|
||||||
|
@ -308,8 +308,8 @@ void r600_replace_buffer_storage(struct pipe_context *ctx,
|
||||||
rctx->rebind_buffer(ctx, dst, old_gpu_address);
|
rctx->rebind_buffer(ctx, dst, old_gpu_address);
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_invalidate_resource(struct pipe_context *ctx,
|
void si_invalidate_resource(struct pipe_context *ctx,
|
||||||
struct pipe_resource *resource)
|
struct pipe_resource *resource)
|
||||||
{
|
{
|
||||||
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
|
struct r600_common_context *rctx = (struct r600_common_context*)ctx;
|
||||||
struct r600_resource *rbuffer = r600_resource(resource);
|
struct r600_resource *rbuffer = r600_resource(resource);
|
||||||
|
@ -429,7 +429,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
|
||||||
/* Check if mapping this buffer would cause waiting for the GPU.
|
/* Check if mapping this buffer would cause waiting for the GPU.
|
||||||
*/
|
*/
|
||||||
if (rbuffer->flags & RADEON_FLAG_SPARSE ||
|
if (rbuffer->flags & RADEON_FLAG_SPARSE ||
|
||||||
r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
|
si_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
|
||||||
!rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
|
!rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
|
||||||
/* Do a wait-free write-only transfer using a temporary buffer. */
|
/* Do a wait-free write-only transfer using a temporary buffer. */
|
||||||
unsigned offset;
|
unsigned offset;
|
||||||
|
@ -472,7 +472,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
|
||||||
box->x % R600_MAP_BUFFER_ALIGNMENT,
|
box->x % R600_MAP_BUFFER_ALIGNMENT,
|
||||||
0, 0, resource, 0, box);
|
0, 0, resource, 0, box);
|
||||||
|
|
||||||
data = r600_buffer_map_sync_with_rings(rctx, staging,
|
data = si_buffer_map_sync_with_rings(rctx, staging,
|
||||||
usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
|
usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
|
||||||
if (!data) {
|
if (!data) {
|
||||||
r600_resource_reference(&staging, NULL);
|
r600_resource_reference(&staging, NULL);
|
||||||
|
@ -487,7 +487,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage);
|
data = si_buffer_map_sync_with_rings(rctx, rbuffer, usage);
|
||||||
if (!data) {
|
if (!data) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -557,10 +557,10 @@ static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
|
||||||
slab_free(&rctx->pool_transfers, transfer);
|
slab_free(&rctx->pool_transfers, transfer);
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_buffer_subdata(struct pipe_context *ctx,
|
void si_buffer_subdata(struct pipe_context *ctx,
|
||||||
struct pipe_resource *buffer,
|
struct pipe_resource *buffer,
|
||||||
unsigned usage, unsigned offset,
|
unsigned usage, unsigned offset,
|
||||||
unsigned size, const void *data)
|
unsigned size, const void *data)
|
||||||
{
|
{
|
||||||
struct pipe_transfer *transfer = NULL;
|
struct pipe_transfer *transfer = NULL;
|
||||||
struct pipe_box box;
|
struct pipe_box box;
|
||||||
|
@ -611,30 +611,30 @@ r600_alloc_buffer_struct(struct pipe_screen *screen,
|
||||||
return rbuffer;
|
return rbuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
|
struct pipe_resource *si_buffer_create(struct pipe_screen *screen,
|
||||||
const struct pipe_resource *templ,
|
const struct pipe_resource *templ,
|
||||||
unsigned alignment)
|
unsigned alignment)
|
||||||
{
|
{
|
||||||
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
|
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
|
||||||
struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
|
struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
|
||||||
|
|
||||||
r600_init_resource_fields(rscreen, rbuffer, templ->width0, alignment);
|
si_init_resource_fields(rscreen, rbuffer, templ->width0, alignment);
|
||||||
|
|
||||||
if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
|
if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
|
||||||
rbuffer->flags |= RADEON_FLAG_SPARSE;
|
rbuffer->flags |= RADEON_FLAG_SPARSE;
|
||||||
|
|
||||||
if (!r600_alloc_resource(rscreen, rbuffer)) {
|
if (!si_alloc_resource(rscreen, rbuffer)) {
|
||||||
FREE(rbuffer);
|
FREE(rbuffer);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return &rbuffer->b.b;
|
return &rbuffer->b.b;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen,
|
struct pipe_resource *si_aligned_buffer_create(struct pipe_screen *screen,
|
||||||
unsigned flags,
|
unsigned flags,
|
||||||
unsigned usage,
|
unsigned usage,
|
||||||
unsigned size,
|
unsigned size,
|
||||||
unsigned alignment)
|
unsigned alignment)
|
||||||
{
|
{
|
||||||
struct pipe_resource buffer;
|
struct pipe_resource buffer;
|
||||||
|
|
||||||
|
@ -648,13 +648,13 @@ struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen,
|
||||||
buffer.height0 = 1;
|
buffer.height0 = 1;
|
||||||
buffer.depth0 = 1;
|
buffer.depth0 = 1;
|
||||||
buffer.array_size = 1;
|
buffer.array_size = 1;
|
||||||
return r600_buffer_create(screen, &buffer, alignment);
|
return si_buffer_create(screen, &buffer, alignment);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct pipe_resource *
|
struct pipe_resource *
|
||||||
r600_buffer_from_user_memory(struct pipe_screen *screen,
|
si_buffer_from_user_memory(struct pipe_screen *screen,
|
||||||
const struct pipe_resource *templ,
|
const struct pipe_resource *templ,
|
||||||
void *user_memory)
|
void *user_memory)
|
||||||
{
|
{
|
||||||
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
|
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
|
||||||
struct radeon_winsys *ws = rscreen->ws;
|
struct radeon_winsys *ws = rscreen->ws;
|
||||||
|
|
|
@ -162,7 +162,7 @@ r600_gpu_load_thread(void *param)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen)
|
void si_gpu_load_kill_thread(struct r600_common_screen *rscreen)
|
||||||
{
|
{
|
||||||
if (!rscreen->gpu_load_thread)
|
if (!rscreen->gpu_load_thread)
|
||||||
return;
|
return;
|
||||||
|
@ -269,14 +269,14 @@ static unsigned busy_index_from_type(struct r600_common_screen *rscreen,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type)
|
uint64_t si_begin_counter(struct r600_common_screen *rscreen, unsigned type)
|
||||||
{
|
{
|
||||||
unsigned busy_index = busy_index_from_type(rscreen, type);
|
unsigned busy_index = busy_index_from_type(rscreen, type);
|
||||||
return r600_read_mmio_counter(rscreen, busy_index);
|
return r600_read_mmio_counter(rscreen, busy_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
|
unsigned si_end_counter(struct r600_common_screen *rscreen, unsigned type,
|
||||||
uint64_t begin)
|
uint64_t begin)
|
||||||
{
|
{
|
||||||
unsigned busy_index = busy_index_from_type(rscreen, type);
|
unsigned busy_index = busy_index_from_type(rscreen, type);
|
||||||
return r600_end_mmio_counter(rscreen, begin, busy_index);
|
return r600_end_mmio_counter(rscreen, begin, busy_index);
|
||||||
|
|
|
@ -112,7 +112,7 @@ static void r600_pc_query_destroy(struct r600_common_screen *rscreen,
|
||||||
|
|
||||||
FREE(query->counters);
|
FREE(query->counters);
|
||||||
|
|
||||||
r600_query_hw_destroy(rscreen, rquery);
|
si_query_hw_destroy(rscreen, rquery);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool r600_pc_query_prepare_buffer(struct r600_common_screen *screen,
|
static bool r600_pc_query_prepare_buffer(struct r600_common_screen *screen,
|
||||||
|
@ -217,9 +217,9 @@ static void r600_pc_query_add_result(struct r600_common_screen *rscreen,
|
||||||
|
|
||||||
static struct r600_query_ops batch_query_ops = {
|
static struct r600_query_ops batch_query_ops = {
|
||||||
.destroy = r600_pc_query_destroy,
|
.destroy = r600_pc_query_destroy,
|
||||||
.begin = r600_query_hw_begin,
|
.begin = si_query_hw_begin,
|
||||||
.end = r600_query_hw_end,
|
.end = si_query_hw_end,
|
||||||
.get_result = r600_query_hw_get_result
|
.get_result = si_query_hw_get_result
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct r600_query_hw_ops batch_query_hw_ops = {
|
static struct r600_query_hw_ops batch_query_hw_ops = {
|
||||||
|
@ -297,9 +297,9 @@ static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
|
||||||
return group;
|
return group;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
|
struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
|
||||||
unsigned num_queries,
|
unsigned num_queries,
|
||||||
unsigned *query_types)
|
unsigned *query_types)
|
||||||
{
|
{
|
||||||
struct r600_common_screen *screen =
|
struct r600_common_screen *screen =
|
||||||
(struct r600_common_screen *)ctx->screen;
|
(struct r600_common_screen *)ctx->screen;
|
||||||
|
@ -417,7 +417,7 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
|
||||||
counter->qwords *= block->num_instances;
|
counter->qwords *= block->num_instances;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!r600_query_hw_init(screen, &query->b))
|
if (!si_query_hw_init(screen, &query->b))
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
return (struct pipe_query *)query;
|
return (struct pipe_query *)query;
|
||||||
|
@ -511,9 +511,9 @@ static bool r600_init_block_names(struct r600_common_screen *screen,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int r600_get_perfcounter_info(struct r600_common_screen *screen,
|
int si_get_perfcounter_info(struct r600_common_screen *screen,
|
||||||
unsigned index,
|
unsigned index,
|
||||||
struct pipe_driver_query_info *info)
|
struct pipe_driver_query_info *info)
|
||||||
{
|
{
|
||||||
struct r600_perfcounters *pc = screen->perfcounters;
|
struct r600_perfcounters *pc = screen->perfcounters;
|
||||||
struct r600_perfcounter_block *block;
|
struct r600_perfcounter_block *block;
|
||||||
|
@ -553,9 +553,9 @@ int r600_get_perfcounter_info(struct r600_common_screen *screen,
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int r600_get_perfcounter_group_info(struct r600_common_screen *screen,
|
int si_get_perfcounter_group_info(struct r600_common_screen *screen,
|
||||||
unsigned index,
|
unsigned index,
|
||||||
struct pipe_driver_query_group_info *info)
|
struct pipe_driver_query_group_info *info)
|
||||||
{
|
{
|
||||||
struct r600_perfcounters *pc = screen->perfcounters;
|
struct r600_perfcounters *pc = screen->perfcounters;
|
||||||
struct r600_perfcounter_block *block;
|
struct r600_perfcounter_block *block;
|
||||||
|
@ -580,13 +580,13 @@ int r600_get_perfcounter_group_info(struct r600_common_screen *screen,
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_perfcounters_destroy(struct r600_common_screen *rscreen)
|
void si_perfcounters_destroy(struct r600_common_screen *rscreen)
|
||||||
{
|
{
|
||||||
if (rscreen->perfcounters)
|
if (rscreen->perfcounters)
|
||||||
rscreen->perfcounters->cleanup(rscreen);
|
rscreen->perfcounters->cleanup(rscreen);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool r600_perfcounters_init(struct r600_perfcounters *pc,
|
bool si_perfcounters_init(struct r600_perfcounters *pc,
|
||||||
unsigned num_blocks)
|
unsigned num_blocks)
|
||||||
{
|
{
|
||||||
pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block));
|
pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block));
|
||||||
|
@ -599,11 +599,11 @@ bool r600_perfcounters_init(struct r600_perfcounters *pc,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_perfcounters_add_block(struct r600_common_screen *rscreen,
|
void si_perfcounters_add_block(struct r600_common_screen *rscreen,
|
||||||
struct r600_perfcounters *pc,
|
struct r600_perfcounters *pc,
|
||||||
const char *name, unsigned flags,
|
const char *name, unsigned flags,
|
||||||
unsigned counters, unsigned selectors,
|
unsigned counters, unsigned selectors,
|
||||||
unsigned instances, void *data)
|
unsigned instances, void *data)
|
||||||
{
|
{
|
||||||
struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks];
|
struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks];
|
||||||
|
|
||||||
|
@ -636,7 +636,7 @@ void r600_perfcounters_add_block(struct r600_common_screen *rscreen,
|
||||||
pc->num_groups += block->num_groups;
|
pc->num_groups += block->num_groups;
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_perfcounters_do_destroy(struct r600_perfcounters *pc)
|
void si_perfcounters_do_destroy(struct r600_perfcounters *pc)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
|
|
|
@ -39,17 +39,8 @@
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
#include <sys/utsname.h>
|
#include <sys/utsname.h>
|
||||||
|
|
||||||
#ifndef HAVE_LLVM
|
|
||||||
#define HAVE_LLVM 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if HAVE_LLVM
|
|
||||||
#include <llvm-c/TargetMachine.h>
|
#include <llvm-c/TargetMachine.h>
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef MESA_LLVM_VERSION_PATCH
|
|
||||||
#define MESA_LLVM_VERSION_PATCH 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct r600_multi_fence {
|
struct r600_multi_fence {
|
||||||
struct pipe_reference reference;
|
struct pipe_reference reference;
|
||||||
|
@ -66,12 +57,12 @@ struct r600_multi_fence {
|
||||||
/*
|
/*
|
||||||
* shader binary helpers.
|
* shader binary helpers.
|
||||||
*/
|
*/
|
||||||
void radeon_shader_binary_init(struct ac_shader_binary *b)
|
void si_radeon_shader_binary_init(struct ac_shader_binary *b)
|
||||||
{
|
{
|
||||||
memset(b, 0, sizeof(*b));
|
memset(b, 0, sizeof(*b));
|
||||||
}
|
}
|
||||||
|
|
||||||
void radeon_shader_binary_clean(struct ac_shader_binary *b)
|
void si_radeon_shader_binary_clean(struct ac_shader_binary *b)
|
||||||
{
|
{
|
||||||
if (!b)
|
if (!b)
|
||||||
return;
|
return;
|
||||||
|
@ -99,11 +90,11 @@ void radeon_shader_binary_clean(struct ac_shader_binary *b)
|
||||||
* \param old_value Previous fence value (for a bug workaround)
|
* \param old_value Previous fence value (for a bug workaround)
|
||||||
* \param new_value Fence value to write for this event.
|
* \param new_value Fence value to write for this event.
|
||||||
*/
|
*/
|
||||||
void r600_gfx_write_event_eop(struct r600_common_context *ctx,
|
void si_gfx_write_event_eop(struct r600_common_context *ctx,
|
||||||
unsigned event, unsigned event_flags,
|
unsigned event, unsigned event_flags,
|
||||||
unsigned data_sel,
|
unsigned data_sel,
|
||||||
struct r600_resource *buf, uint64_t va,
|
struct r600_resource *buf, uint64_t va,
|
||||||
uint32_t new_fence, unsigned query_type)
|
uint32_t new_fence, unsigned query_type)
|
||||||
{
|
{
|
||||||
struct radeon_winsys_cs *cs = ctx->gfx.cs;
|
struct radeon_winsys_cs *cs = ctx->gfx.cs;
|
||||||
unsigned op = EVENT_TYPE(event) |
|
unsigned op = EVENT_TYPE(event) |
|
||||||
|
@ -183,7 +174,7 @@ void r600_gfx_write_event_eop(struct r600_common_context *ctx,
|
||||||
RADEON_PRIO_QUERY);
|
RADEON_PRIO_QUERY);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen)
|
unsigned si_gfx_write_fence_dwords(struct r600_common_screen *screen)
|
||||||
{
|
{
|
||||||
unsigned dwords = 6;
|
unsigned dwords = 6;
|
||||||
|
|
||||||
|
@ -197,8 +188,8 @@ unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen)
|
||||||
return dwords;
|
return dwords;
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_gfx_wait_fence(struct r600_common_context *ctx,
|
void si_gfx_wait_fence(struct r600_common_context *ctx,
|
||||||
uint64_t va, uint32_t ref, uint32_t mask)
|
uint64_t va, uint32_t ref, uint32_t mask)
|
||||||
{
|
{
|
||||||
struct radeon_winsys_cs *cs = ctx->gfx.cs;
|
struct radeon_winsys_cs *cs = ctx->gfx.cs;
|
||||||
|
|
||||||
|
@ -211,11 +202,11 @@ void r600_gfx_wait_fence(struct r600_common_context *ctx,
|
||||||
radeon_emit(cs, 4); /* poll interval */
|
radeon_emit(cs, 4); /* poll interval */
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_draw_rectangle(struct blitter_context *blitter,
|
void si_draw_rectangle(struct blitter_context *blitter,
|
||||||
int x1, int y1, int x2, int y2,
|
int x1, int y1, int x2, int y2,
|
||||||
float depth, unsigned num_instances,
|
float depth, unsigned num_instances,
|
||||||
enum blitter_attrib_type type,
|
enum blitter_attrib_type type,
|
||||||
const union blitter_attrib *attrib)
|
const union blitter_attrib *attrib)
|
||||||
{
|
{
|
||||||
struct r600_common_context *rctx =
|
struct r600_common_context *rctx =
|
||||||
(struct r600_common_context*)util_blitter_get_pipe(blitter);
|
(struct r600_common_context*)util_blitter_get_pipe(blitter);
|
||||||
|
@ -309,8 +300,8 @@ static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
|
void si_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
|
||||||
struct r600_resource *dst, struct r600_resource *src)
|
struct r600_resource *dst, struct r600_resource *src)
|
||||||
{
|
{
|
||||||
uint64_t vram = ctx->dma.cs->used_vram;
|
uint64_t vram = ctx->dma.cs->used_vram;
|
||||||
uint64_t gtt = ctx->dma.cs->used_gart;
|
uint64_t gtt = ctx->dma.cs->used_gart;
|
||||||
|
@ -387,29 +378,29 @@ static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_preflush_suspend_features(struct r600_common_context *ctx)
|
void si_preflush_suspend_features(struct r600_common_context *ctx)
|
||||||
{
|
{
|
||||||
/* suspend queries */
|
/* suspend queries */
|
||||||
if (!LIST_IS_EMPTY(&ctx->active_queries))
|
if (!LIST_IS_EMPTY(&ctx->active_queries))
|
||||||
r600_suspend_queries(ctx);
|
si_suspend_queries(ctx);
|
||||||
|
|
||||||
ctx->streamout.suspended = false;
|
ctx->streamout.suspended = false;
|
||||||
if (ctx->streamout.begin_emitted) {
|
if (ctx->streamout.begin_emitted) {
|
||||||
r600_emit_streamout_end(ctx);
|
si_emit_streamout_end(ctx);
|
||||||
ctx->streamout.suspended = true;
|
ctx->streamout.suspended = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_postflush_resume_features(struct r600_common_context *ctx)
|
void si_postflush_resume_features(struct r600_common_context *ctx)
|
||||||
{
|
{
|
||||||
if (ctx->streamout.suspended) {
|
if (ctx->streamout.suspended) {
|
||||||
ctx->streamout.append_bitmask = ctx->streamout.enabled_mask;
|
ctx->streamout.append_bitmask = ctx->streamout.enabled_mask;
|
||||||
r600_streamout_buffers_dirty(ctx);
|
si_streamout_buffers_dirty(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* resume queries */
|
/* resume queries */
|
||||||
if (!LIST_IS_EMPTY(&ctx->active_queries))
|
if (!LIST_IS_EMPTY(&ctx->active_queries))
|
||||||
r600_resume_queries(ctx);
|
si_resume_queries(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void r600_add_fence_dependency(struct r600_common_context *rctx,
|
static void r600_add_fence_dependency(struct r600_common_context *rctx,
|
||||||
|
@ -542,7 +533,7 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (check_vm)
|
if (check_vm)
|
||||||
radeon_save_cs(rctx->ws, cs, &saved, true);
|
si_save_cs(rctx->ws, cs, &saved, true);
|
||||||
|
|
||||||
rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence);
|
rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence);
|
||||||
if (fence)
|
if (fence)
|
||||||
|
@ -555,7 +546,7 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags,
|
||||||
rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 800*1000*1000);
|
rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 800*1000*1000);
|
||||||
|
|
||||||
rctx->check_vm_faults(rctx, &saved, RING_DMA);
|
rctx->check_vm_faults(rctx, &saved, RING_DMA);
|
||||||
radeon_clear_saved_cs(&saved);
|
si_clear_saved_cs(&saved);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -563,8 +554,8 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags,
|
||||||
* Store a linearized copy of all chunks of \p cs together with the buffer
|
* Store a linearized copy of all chunks of \p cs together with the buffer
|
||||||
* list in \p saved.
|
* list in \p saved.
|
||||||
*/
|
*/
|
||||||
void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
|
void si_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
|
||||||
struct radeon_saved_cs *saved, bool get_buffer_list)
|
struct radeon_saved_cs *saved, bool get_buffer_list)
|
||||||
{
|
{
|
||||||
uint32_t *buf;
|
uint32_t *buf;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
@ -602,7 +593,7 @@ oom:
|
||||||
memset(saved, 0, sizeof(*saved));
|
memset(saved, 0, sizeof(*saved));
|
||||||
}
|
}
|
||||||
|
|
||||||
void radeon_clear_saved_cs(struct radeon_saved_cs *saved)
|
void si_clear_saved_cs(struct radeon_saved_cs *saved)
|
||||||
{
|
{
|
||||||
FREE(saved->ib);
|
FREE(saved->ib);
|
||||||
FREE(saved->bo_list);
|
FREE(saved->bo_list);
|
||||||
|
@ -646,7 +637,7 @@ static void r600_set_device_reset_callback(struct pipe_context *ctx,
|
||||||
sizeof(rctx->device_reset_callback));
|
sizeof(rctx->device_reset_callback));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool r600_check_device_reset(struct r600_common_context *rctx)
|
bool si_check_device_reset(struct r600_common_context *rctx)
|
||||||
{
|
{
|
||||||
enum pipe_reset_status status;
|
enum pipe_reset_status status;
|
||||||
|
|
||||||
|
@ -708,9 +699,9 @@ static bool r600_resource_commit(struct pipe_context *pctx,
|
||||||
return ctx->ws->buffer_commit(res->buf, box->x, box->width, commit);
|
return ctx->ws->buffer_commit(res->buf, box->x, box->width, commit);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool r600_common_context_init(struct r600_common_context *rctx,
|
bool si_common_context_init(struct r600_common_context *rctx,
|
||||||
struct r600_common_screen *rscreen,
|
struct r600_common_screen *rscreen,
|
||||||
unsigned context_flags)
|
unsigned context_flags)
|
||||||
{
|
{
|
||||||
slab_create_child(&rctx->pool_transfers, &rscreen->pool_transfers);
|
slab_create_child(&rctx->pool_transfers, &rscreen->pool_transfers);
|
||||||
slab_create_child(&rctx->pool_transfers_unsync, &rscreen->pool_transfers);
|
slab_create_child(&rctx->pool_transfers_unsync, &rscreen->pool_transfers);
|
||||||
|
@ -720,7 +711,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
|
||||||
rctx->family = rscreen->family;
|
rctx->family = rscreen->family;
|
||||||
rctx->chip_class = rscreen->chip_class;
|
rctx->chip_class = rscreen->chip_class;
|
||||||
|
|
||||||
rctx->b.invalidate_resource = r600_invalidate_resource;
|
rctx->b.invalidate_resource = si_invalidate_resource;
|
||||||
rctx->b.resource_commit = r600_resource_commit;
|
rctx->b.resource_commit = r600_resource_commit;
|
||||||
rctx->b.transfer_map = u_transfer_map_vtbl;
|
rctx->b.transfer_map = u_transfer_map_vtbl;
|
||||||
rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl;
|
rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl;
|
||||||
|
@ -731,15 +722,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
|
||||||
rctx->b.set_debug_callback = r600_set_debug_callback;
|
rctx->b.set_debug_callback = r600_set_debug_callback;
|
||||||
rctx->b.fence_server_sync = r600_fence_server_sync;
|
rctx->b.fence_server_sync = r600_fence_server_sync;
|
||||||
rctx->dma_clear_buffer = r600_dma_clear_buffer_fallback;
|
rctx->dma_clear_buffer = r600_dma_clear_buffer_fallback;
|
||||||
|
rctx->b.buffer_subdata = si_buffer_subdata;
|
||||||
/* evergreen_compute.c has a special codepath for global buffers.
|
|
||||||
* Everything else can use the direct path.
|
|
||||||
*/
|
|
||||||
if ((rscreen->chip_class == EVERGREEN || rscreen->chip_class == CAYMAN) &&
|
|
||||||
(context_flags & PIPE_CONTEXT_COMPUTE_ONLY))
|
|
||||||
rctx->b.buffer_subdata = u_default_buffer_subdata;
|
|
||||||
else
|
|
||||||
rctx->b.buffer_subdata = r600_buffer_subdata;
|
|
||||||
|
|
||||||
if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) {
|
if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) {
|
||||||
rctx->b.get_device_reset_status = r600_get_reset_status;
|
rctx->b.get_device_reset_status = r600_get_reset_status;
|
||||||
|
@ -750,11 +733,11 @@ bool r600_common_context_init(struct r600_common_context *rctx,
|
||||||
|
|
||||||
rctx->b.set_device_reset_callback = r600_set_device_reset_callback;
|
rctx->b.set_device_reset_callback = r600_set_device_reset_callback;
|
||||||
|
|
||||||
r600_init_context_texture_functions(rctx);
|
si_init_context_texture_functions(rctx);
|
||||||
r600_init_viewport_functions(rctx);
|
si_init_viewport_functions(rctx);
|
||||||
r600_streamout_init(rctx);
|
si_streamout_init(rctx);
|
||||||
r600_query_init(rctx);
|
si_init_query_functions(rctx);
|
||||||
cayman_init_msaa(&rctx->b);
|
si_init_msaa(&rctx->b);
|
||||||
|
|
||||||
if (rctx->chip_class == CIK ||
|
if (rctx->chip_class == CIK ||
|
||||||
rctx->chip_class == VI ||
|
rctx->chip_class == VI ||
|
||||||
|
@ -796,7 +779,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_common_context_cleanup(struct r600_common_context *rctx)
|
void si_common_context_cleanup(struct r600_common_context *rctx)
|
||||||
{
|
{
|
||||||
unsigned i,j;
|
unsigned i,j;
|
||||||
|
|
||||||
|
@ -976,19 +959,14 @@ static void r600_disk_cache_create(struct r600_common_screen *rscreen)
|
||||||
&mesa_timestamp)) {
|
&mesa_timestamp)) {
|
||||||
char *timestamp_str;
|
char *timestamp_str;
|
||||||
int res = -1;
|
int res = -1;
|
||||||
if (rscreen->chip_class < SI) {
|
uint32_t llvm_timestamp;
|
||||||
res = asprintf(×tamp_str, "%u",mesa_timestamp);
|
|
||||||
|
if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo,
|
||||||
|
&llvm_timestamp)) {
|
||||||
|
res = asprintf(×tamp_str, "%u_%u",
|
||||||
|
mesa_timestamp, llvm_timestamp);
|
||||||
}
|
}
|
||||||
#if HAVE_LLVM
|
|
||||||
else {
|
|
||||||
uint32_t llvm_timestamp;
|
|
||||||
if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo,
|
|
||||||
&llvm_timestamp)) {
|
|
||||||
res = asprintf(×tamp_str, "%u_%u",
|
|
||||||
mesa_timestamp, llvm_timestamp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
if (res != -1) {
|
if (res != -1) {
|
||||||
/* These flags affect shader compilation. */
|
/* These flags affect shader compilation. */
|
||||||
uint64_t shader_debug_flags =
|
uint64_t shader_debug_flags =
|
||||||
|
@ -1074,7 +1052,7 @@ static int r600_get_video_param(struct pipe_screen *screen,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *r600_get_llvm_processor_name(enum radeon_family family)
|
const char *si_get_llvm_processor_name(enum radeon_family family)
|
||||||
{
|
{
|
||||||
switch (family) {
|
switch (family) {
|
||||||
case CHIP_R600:
|
case CHIP_R600:
|
||||||
|
@ -1161,10 +1139,7 @@ static unsigned get_max_threads_per_block(struct r600_common_screen *screen,
|
||||||
/* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice
|
/* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice
|
||||||
* round number.
|
* round number.
|
||||||
*/
|
*/
|
||||||
if (screen->chip_class >= SI)
|
return 2048;
|
||||||
return 2048;
|
|
||||||
|
|
||||||
return 256;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int r600_get_compute_param(struct pipe_screen *screen,
|
static int r600_get_compute_param(struct pipe_screen *screen,
|
||||||
|
@ -1193,7 +1168,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
|
||||||
* GPUs, so we need to use the name of a similar GPU.
|
* GPUs, so we need to use the name of a similar GPU.
|
||||||
*/
|
*/
|
||||||
default:
|
default:
|
||||||
gpu = r600_get_llvm_processor_name(rscreen->family);
|
gpu = si_get_llvm_processor_name(rscreen->family);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
@ -1237,9 +1212,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
|
||||||
case PIPE_COMPUTE_CAP_ADDRESS_BITS:
|
case PIPE_COMPUTE_CAP_ADDRESS_BITS:
|
||||||
if (ret) {
|
if (ret) {
|
||||||
uint32_t *address_bits = ret;
|
uint32_t *address_bits = ret;
|
||||||
address_bits[0] = 32;
|
address_bits[0] = 64;
|
||||||
if (rscreen->chip_class >= SI)
|
|
||||||
address_bits[0] = 64;
|
|
||||||
}
|
}
|
||||||
return 1 * sizeof(uint32_t);
|
return 1 * sizeof(uint32_t);
|
||||||
|
|
||||||
|
@ -1319,8 +1292,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
|
||||||
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
|
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
|
||||||
if (ret) {
|
if (ret) {
|
||||||
uint64_t *max_variable_threads_per_block = ret;
|
uint64_t *max_variable_threads_per_block = ret;
|
||||||
if (rscreen->chip_class >= SI &&
|
if (ir_type == PIPE_SHADER_IR_TGSI)
|
||||||
ir_type == PIPE_SHADER_IR_TGSI)
|
|
||||||
*max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
|
*max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
|
||||||
else
|
else
|
||||||
*max_variable_threads_per_block = 0;
|
*max_variable_threads_per_block = 0;
|
||||||
|
@ -1444,18 +1416,18 @@ static void r600_query_memory_info(struct pipe_screen *screen,
|
||||||
info->nr_device_memory_evictions = info->device_memory_evicted / 64;
|
info->nr_device_memory_evictions = info->device_memory_evicted / 64;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
|
struct pipe_resource *si_resource_create_common(struct pipe_screen *screen,
|
||||||
const struct pipe_resource *templ)
|
const struct pipe_resource *templ)
|
||||||
{
|
{
|
||||||
if (templ->target == PIPE_BUFFER) {
|
if (templ->target == PIPE_BUFFER) {
|
||||||
return r600_buffer_create(screen, templ, 256);
|
return si_buffer_create(screen, templ, 256);
|
||||||
} else {
|
} else {
|
||||||
return r600_texture_create(screen, templ);
|
return si_texture_create(screen, templ);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool r600_common_screen_init(struct r600_common_screen *rscreen,
|
bool si_common_screen_init(struct r600_common_screen *rscreen,
|
||||||
struct radeon_winsys *ws)
|
struct radeon_winsys *ws)
|
||||||
{
|
{
|
||||||
char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {};
|
char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {};
|
||||||
struct utsname uname_data;
|
struct utsname uname_data;
|
||||||
|
@ -1496,19 +1468,19 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
|
||||||
rscreen->b.fence_finish = r600_fence_finish;
|
rscreen->b.fence_finish = r600_fence_finish;
|
||||||
rscreen->b.fence_reference = r600_fence_reference;
|
rscreen->b.fence_reference = r600_fence_reference;
|
||||||
rscreen->b.resource_destroy = u_resource_destroy_vtbl;
|
rscreen->b.resource_destroy = u_resource_destroy_vtbl;
|
||||||
rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory;
|
rscreen->b.resource_from_user_memory = si_buffer_from_user_memory;
|
||||||
rscreen->b.query_memory_info = r600_query_memory_info;
|
rscreen->b.query_memory_info = r600_query_memory_info;
|
||||||
|
|
||||||
if (rscreen->info.has_hw_decode) {
|
if (rscreen->info.has_hw_decode) {
|
||||||
rscreen->b.get_video_param = rvid_get_video_param;
|
rscreen->b.get_video_param = si_vid_get_video_param;
|
||||||
rscreen->b.is_video_format_supported = rvid_is_format_supported;
|
rscreen->b.is_video_format_supported = si_vid_is_format_supported;
|
||||||
} else {
|
} else {
|
||||||
rscreen->b.get_video_param = r600_get_video_param;
|
rscreen->b.get_video_param = r600_get_video_param;
|
||||||
rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
|
rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
|
||||||
}
|
}
|
||||||
|
|
||||||
r600_init_screen_texture_functions(rscreen);
|
si_init_screen_texture_functions(rscreen);
|
||||||
r600_init_screen_query_functions(rscreen);
|
si_init_screen_query_functions(rscreen);
|
||||||
|
|
||||||
rscreen->family = rscreen->info.family;
|
rscreen->family = rscreen->info.family;
|
||||||
rscreen->chip_class = rscreen->info.chip_class;
|
rscreen->chip_class = rscreen->info.chip_class;
|
||||||
|
@ -1587,10 +1559,10 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_destroy_common_screen(struct r600_common_screen *rscreen)
|
void si_destroy_common_screen(struct r600_common_screen *rscreen)
|
||||||
{
|
{
|
||||||
r600_perfcounters_destroy(rscreen);
|
si_perfcounters_destroy(rscreen);
|
||||||
r600_gpu_load_kill_thread(rscreen);
|
si_gpu_load_kill_thread(rscreen);
|
||||||
|
|
||||||
mtx_destroy(&rscreen->gpu_load_mutex);
|
mtx_destroy(&rscreen->gpu_load_mutex);
|
||||||
mtx_destroy(&rscreen->aux_context_lock);
|
mtx_destroy(&rscreen->aux_context_lock);
|
||||||
|
@ -1603,20 +1575,20 @@ void r600_destroy_common_screen(struct r600_common_screen *rscreen)
|
||||||
FREE(rscreen);
|
FREE(rscreen);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool r600_can_dump_shader(struct r600_common_screen *rscreen,
|
bool si_can_dump_shader(struct r600_common_screen *rscreen,
|
||||||
unsigned processor)
|
unsigned processor)
|
||||||
{
|
{
|
||||||
return rscreen->debug_flags & (1 << processor);
|
return rscreen->debug_flags & (1 << processor);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool r600_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor)
|
bool si_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor)
|
||||||
{
|
{
|
||||||
return (rscreen->debug_flags & DBG_CHECK_IR) ||
|
return (rscreen->debug_flags & DBG_CHECK_IR) ||
|
||||||
r600_can_dump_shader(rscreen, processor);
|
si_can_dump_shader(rscreen, processor);
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
|
void si_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
|
||||||
uint64_t offset, uint64_t size, unsigned value)
|
uint64_t offset, uint64_t size, unsigned value)
|
||||||
{
|
{
|
||||||
struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
|
struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
|
||||||
|
|
||||||
|
|
|
@ -141,8 +141,8 @@ struct r600_perfcounters;
|
||||||
struct tgsi_shader_info;
|
struct tgsi_shader_info;
|
||||||
struct r600_qbo_state;
|
struct r600_qbo_state;
|
||||||
|
|
||||||
void radeon_shader_binary_init(struct ac_shader_binary *b);
|
void si_radeon_shader_binary_init(struct ac_shader_binary *b);
|
||||||
void radeon_shader_binary_clean(struct ac_shader_binary *b);
|
void si_radeon_shader_binary_clean(struct ac_shader_binary *b);
|
||||||
|
|
||||||
/* Only 32-bit buffer allocations are supported, gallium doesn't support more
|
/* Only 32-bit buffer allocations are supported, gallium doesn't support more
|
||||||
* at the moment.
|
* at the moment.
|
||||||
|
@ -723,130 +723,125 @@ struct r600_common_context {
|
||||||
};
|
};
|
||||||
|
|
||||||
/* r600_buffer_common.c */
|
/* r600_buffer_common.c */
|
||||||
bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
|
bool si_rings_is_buffer_referenced(struct r600_common_context *ctx,
|
||||||
struct pb_buffer *buf,
|
struct pb_buffer *buf,
|
||||||
enum radeon_bo_usage usage);
|
enum radeon_bo_usage usage);
|
||||||
void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
|
void *si_buffer_map_sync_with_rings(struct r600_common_context *ctx,
|
||||||
struct r600_resource *resource,
|
struct r600_resource *resource,
|
||||||
unsigned usage);
|
unsigned usage);
|
||||||
void r600_buffer_subdata(struct pipe_context *ctx,
|
void si_buffer_subdata(struct pipe_context *ctx,
|
||||||
struct pipe_resource *buffer,
|
struct pipe_resource *buffer,
|
||||||
unsigned usage, unsigned offset,
|
unsigned usage, unsigned offset,
|
||||||
unsigned size, const void *data);
|
unsigned size, const void *data);
|
||||||
void r600_init_resource_fields(struct r600_common_screen *rscreen,
|
void si_init_resource_fields(struct r600_common_screen *rscreen,
|
||||||
struct r600_resource *res,
|
struct r600_resource *res,
|
||||||
uint64_t size, unsigned alignment);
|
uint64_t size, unsigned alignment);
|
||||||
bool r600_alloc_resource(struct r600_common_screen *rscreen,
|
bool si_alloc_resource(struct r600_common_screen *rscreen,
|
||||||
struct r600_resource *res);
|
struct r600_resource *res);
|
||||||
struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
|
struct pipe_resource *si_buffer_create(struct pipe_screen *screen,
|
||||||
const struct pipe_resource *templ,
|
const struct pipe_resource *templ,
|
||||||
unsigned alignment);
|
unsigned alignment);
|
||||||
struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen,
|
struct pipe_resource *si_aligned_buffer_create(struct pipe_screen *screen,
|
||||||
unsigned flags,
|
unsigned flags,
|
||||||
unsigned usage,
|
unsigned usage,
|
||||||
unsigned size,
|
unsigned size,
|
||||||
unsigned alignment);
|
unsigned alignment);
|
||||||
struct pipe_resource *
|
struct pipe_resource *
|
||||||
r600_buffer_from_user_memory(struct pipe_screen *screen,
|
si_buffer_from_user_memory(struct pipe_screen *screen,
|
||||||
const struct pipe_resource *templ,
|
const struct pipe_resource *templ,
|
||||||
void *user_memory);
|
void *user_memory);
|
||||||
void
|
void si_invalidate_resource(struct pipe_context *ctx,
|
||||||
r600_invalidate_resource(struct pipe_context *ctx,
|
struct pipe_resource *resource);
|
||||||
struct pipe_resource *resource);
|
void si_replace_buffer_storage(struct pipe_context *ctx,
|
||||||
void r600_replace_buffer_storage(struct pipe_context *ctx,
|
struct pipe_resource *dst,
|
||||||
struct pipe_resource *dst,
|
struct pipe_resource *src);
|
||||||
struct pipe_resource *src);
|
|
||||||
|
|
||||||
/* r600_common_pipe.c */
|
/* r600_common_pipe.c */
|
||||||
void r600_gfx_write_event_eop(struct r600_common_context *ctx,
|
void si_gfx_write_event_eop(struct r600_common_context *ctx,
|
||||||
unsigned event, unsigned event_flags,
|
unsigned event, unsigned event_flags,
|
||||||
unsigned data_sel,
|
unsigned data_sel,
|
||||||
struct r600_resource *buf, uint64_t va,
|
struct r600_resource *buf, uint64_t va,
|
||||||
uint32_t new_fence, unsigned query_type);
|
uint32_t new_fence, unsigned query_type);
|
||||||
unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen);
|
unsigned si_gfx_write_fence_dwords(struct r600_common_screen *screen);
|
||||||
void r600_gfx_wait_fence(struct r600_common_context *ctx,
|
void si_gfx_wait_fence(struct r600_common_context *ctx,
|
||||||
uint64_t va, uint32_t ref, uint32_t mask);
|
uint64_t va, uint32_t ref, uint32_t mask);
|
||||||
void r600_draw_rectangle(struct blitter_context *blitter,
|
void si_draw_rectangle(struct blitter_context *blitter,
|
||||||
int x1, int y1, int x2, int y2,
|
int x1, int y1, int x2, int y2,
|
||||||
float depth, unsigned num_instances,
|
float depth, unsigned num_instances,
|
||||||
enum blitter_attrib_type type,
|
enum blitter_attrib_type type,
|
||||||
const union blitter_attrib *attrib);
|
const union blitter_attrib *attrib);
|
||||||
bool r600_common_screen_init(struct r600_common_screen *rscreen,
|
bool si_common_screen_init(struct r600_common_screen *rscreen,
|
||||||
struct radeon_winsys *ws);
|
struct radeon_winsys *ws);
|
||||||
void r600_destroy_common_screen(struct r600_common_screen *rscreen);
|
void si_destroy_common_screen(struct r600_common_screen *rscreen);
|
||||||
void r600_preflush_suspend_features(struct r600_common_context *ctx);
|
void si_preflush_suspend_features(struct r600_common_context *ctx);
|
||||||
void r600_postflush_resume_features(struct r600_common_context *ctx);
|
void si_postflush_resume_features(struct r600_common_context *ctx);
|
||||||
bool r600_common_context_init(struct r600_common_context *rctx,
|
bool si_common_context_init(struct r600_common_context *rctx,
|
||||||
struct r600_common_screen *rscreen,
|
struct r600_common_screen *rscreen,
|
||||||
unsigned context_flags);
|
unsigned context_flags);
|
||||||
void r600_common_context_cleanup(struct r600_common_context *rctx);
|
void si_common_context_cleanup(struct r600_common_context *rctx);
|
||||||
bool r600_can_dump_shader(struct r600_common_screen *rscreen,
|
bool si_can_dump_shader(struct r600_common_screen *rscreen,
|
||||||
unsigned processor);
|
unsigned processor);
|
||||||
bool r600_extra_shader_checks(struct r600_common_screen *rscreen,
|
bool si_extra_shader_checks(struct r600_common_screen *rscreen,
|
||||||
unsigned processor);
|
unsigned processor);
|
||||||
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
|
void si_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
|
||||||
uint64_t offset, uint64_t size, unsigned value);
|
uint64_t offset, uint64_t size, unsigned value);
|
||||||
struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
|
struct pipe_resource *si_resource_create_common(struct pipe_screen *screen,
|
||||||
const struct pipe_resource *templ);
|
const struct pipe_resource *templ);
|
||||||
const char *r600_get_llvm_processor_name(enum radeon_family family);
|
const char *si_get_llvm_processor_name(enum radeon_family family);
|
||||||
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
|
void si_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
|
||||||
struct r600_resource *dst, struct r600_resource *src);
|
struct r600_resource *dst, struct r600_resource *src);
|
||||||
void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
|
void si_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
|
||||||
struct radeon_saved_cs *saved, bool get_buffer_list);
|
struct radeon_saved_cs *saved, bool get_buffer_list);
|
||||||
void radeon_clear_saved_cs(struct radeon_saved_cs *saved);
|
void si_clear_saved_cs(struct radeon_saved_cs *saved);
|
||||||
bool r600_check_device_reset(struct r600_common_context *rctx);
|
bool si_check_device_reset(struct r600_common_context *rctx);
|
||||||
|
|
||||||
/* r600_gpu_load.c */
|
/* r600_gpu_load.c */
|
||||||
void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
|
void si_gpu_load_kill_thread(struct r600_common_screen *rscreen);
|
||||||
uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type);
|
uint64_t si_begin_counter(struct r600_common_screen *rscreen, unsigned type);
|
||||||
unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
|
unsigned si_end_counter(struct r600_common_screen *rscreen, unsigned type,
|
||||||
uint64_t begin);
|
uint64_t begin);
|
||||||
|
|
||||||
/* r600_perfcounters.c */
|
/* r600_perfcounters.c */
|
||||||
void r600_perfcounters_destroy(struct r600_common_screen *rscreen);
|
void si_perfcounters_destroy(struct r600_common_screen *rscreen);
|
||||||
|
|
||||||
/* r600_query.c */
|
/* r600_query.c */
|
||||||
void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
|
void si_init_screen_query_functions(struct r600_common_screen *rscreen);
|
||||||
void r600_query_init(struct r600_common_context *rctx);
|
void si_init_query_functions(struct r600_common_context *rctx);
|
||||||
void r600_suspend_queries(struct r600_common_context *ctx);
|
void si_suspend_queries(struct r600_common_context *ctx);
|
||||||
void r600_resume_queries(struct r600_common_context *ctx);
|
void si_resume_queries(struct r600_common_context *ctx);
|
||||||
void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen);
|
|
||||||
|
|
||||||
/* r600_streamout.c */
|
/* r600_streamout.c */
|
||||||
void r600_streamout_buffers_dirty(struct r600_common_context *rctx);
|
void si_streamout_buffers_dirty(struct r600_common_context *rctx);
|
||||||
void r600_set_streamout_targets(struct pipe_context *ctx,
|
void si_common_set_streamout_targets(struct pipe_context *ctx,
|
||||||
unsigned num_targets,
|
unsigned num_targets,
|
||||||
struct pipe_stream_output_target **targets,
|
struct pipe_stream_output_target **targets,
|
||||||
const unsigned *offset);
|
const unsigned *offset);
|
||||||
void r600_emit_streamout_end(struct r600_common_context *rctx);
|
void si_emit_streamout_end(struct r600_common_context *rctx);
|
||||||
void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
|
void si_update_prims_generated_query_state(struct r600_common_context *rctx,
|
||||||
unsigned type, int diff);
|
unsigned type, int diff);
|
||||||
void r600_streamout_init(struct r600_common_context *rctx);
|
void si_streamout_init(struct r600_common_context *rctx);
|
||||||
|
|
||||||
/* r600_test_dma.c */
|
/* r600_test_dma.c */
|
||||||
void r600_test_dma(struct r600_common_screen *rscreen);
|
void si_test_dma(struct r600_common_screen *rscreen);
|
||||||
|
|
||||||
/* r600_texture.c */
|
/* r600_texture.c */
|
||||||
bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
|
bool si_prepare_for_dma_blit(struct r600_common_context *rctx,
|
||||||
struct r600_texture *rdst,
|
struct r600_texture *rdst,
|
||||||
unsigned dst_level, unsigned dstx,
|
unsigned dst_level, unsigned dstx,
|
||||||
unsigned dsty, unsigned dstz,
|
unsigned dsty, unsigned dstz,
|
||||||
struct r600_texture *rsrc,
|
struct r600_texture *rsrc,
|
||||||
unsigned src_level,
|
unsigned src_level,
|
||||||
const struct pipe_box *src_box);
|
const struct pipe_box *src_box);
|
||||||
void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
|
void si_texture_get_fmask_info(struct r600_common_screen *rscreen,
|
||||||
struct r600_texture *rtex,
|
struct r600_texture *rtex,
|
||||||
unsigned nr_samples,
|
unsigned nr_samples,
|
||||||
struct r600_fmask_info *out);
|
struct r600_fmask_info *out);
|
||||||
void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
|
bool si_init_flushed_depth_texture(struct pipe_context *ctx,
|
||||||
struct r600_texture *rtex,
|
struct pipe_resource *texture,
|
||||||
struct r600_cmask_info *out);
|
struct r600_texture **staging);
|
||||||
bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
|
void si_print_texture_info(struct r600_common_screen *rscreen,
|
||||||
struct pipe_resource *texture,
|
struct r600_texture *rtex, struct u_log_context *log);
|
||||||
struct r600_texture **staging);
|
struct pipe_resource *si_texture_create(struct pipe_screen *screen,
|
||||||
void r600_print_texture_info(struct r600_common_screen *rscreen,
|
|
||||||
struct r600_texture *rtex, struct u_log_context *log);
|
|
||||||
struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
|
|
||||||
const struct pipe_resource *templ);
|
const struct pipe_resource *templ);
|
||||||
bool vi_dcc_formats_compatible(enum pipe_format format1,
|
bool vi_dcc_formats_compatible(enum pipe_format format1,
|
||||||
enum pipe_format format2);
|
enum pipe_format format2);
|
||||||
|
@ -857,12 +852,12 @@ void vi_disable_dcc_if_incompatible_format(struct r600_common_context *rctx,
|
||||||
struct pipe_resource *tex,
|
struct pipe_resource *tex,
|
||||||
unsigned level,
|
unsigned level,
|
||||||
enum pipe_format view_format);
|
enum pipe_format view_format);
|
||||||
struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
|
struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,
|
||||||
struct pipe_resource *texture,
|
struct pipe_resource *texture,
|
||||||
const struct pipe_surface *templ,
|
const struct pipe_surface *templ,
|
||||||
unsigned width0, unsigned height0,
|
unsigned width0, unsigned height0,
|
||||||
unsigned width, unsigned height);
|
unsigned width, unsigned height);
|
||||||
unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap);
|
unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap);
|
||||||
void vi_separate_dcc_start_query(struct pipe_context *ctx,
|
void vi_separate_dcc_start_query(struct pipe_context *ctx,
|
||||||
struct r600_texture *tex);
|
struct r600_texture *tex);
|
||||||
void vi_separate_dcc_stop_query(struct pipe_context *ctx,
|
void vi_separate_dcc_stop_query(struct pipe_context *ctx,
|
||||||
|
@ -872,37 +867,33 @@ void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
|
||||||
void vi_dcc_clear_level(struct r600_common_context *rctx,
|
void vi_dcc_clear_level(struct r600_common_context *rctx,
|
||||||
struct r600_texture *rtex,
|
struct r600_texture *rtex,
|
||||||
unsigned level, unsigned clear_value);
|
unsigned level, unsigned clear_value);
|
||||||
void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
|
void si_do_fast_color_clear(struct r600_common_context *rctx,
|
||||||
struct pipe_framebuffer_state *fb,
|
struct pipe_framebuffer_state *fb,
|
||||||
struct r600_atom *fb_state,
|
struct r600_atom *fb_state,
|
||||||
unsigned *buffers, ubyte *dirty_cbufs,
|
unsigned *buffers, ubyte *dirty_cbufs,
|
||||||
const union pipe_color_union *color);
|
const union pipe_color_union *color);
|
||||||
bool r600_texture_disable_dcc(struct r600_common_context *rctx,
|
bool si_texture_disable_dcc(struct r600_common_context *rctx,
|
||||||
struct r600_texture *rtex);
|
struct r600_texture *rtex);
|
||||||
void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
|
void si_init_screen_texture_functions(struct r600_common_screen *rscreen);
|
||||||
void r600_init_context_texture_functions(struct r600_common_context *rctx);
|
void si_init_context_texture_functions(struct r600_common_context *rctx);
|
||||||
|
|
||||||
/* r600_viewport.c */
|
/* r600_viewport.c */
|
||||||
void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
|
void si_apply_scissor_bug_workaround(struct r600_common_context *rctx,
|
||||||
struct pipe_scissor_state *scissor);
|
struct pipe_scissor_state *scissor);
|
||||||
void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
|
void si_viewport_set_rast_deps(struct r600_common_context *rctx,
|
||||||
bool scissor_enable, bool clip_halfz);
|
bool scissor_enable, bool clip_halfz);
|
||||||
void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
|
void si_update_vs_writes_viewport_index(struct r600_common_context *rctx,
|
||||||
struct tgsi_shader_info *info);
|
struct tgsi_shader_info *info);
|
||||||
void r600_init_viewport_functions(struct r600_common_context *rctx);
|
void si_init_viewport_functions(struct r600_common_context *rctx);
|
||||||
|
|
||||||
/* cayman_msaa.c */
|
/* cayman_msaa.c */
|
||||||
extern const uint32_t eg_sample_locs_2x[4];
|
void si_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
|
||||||
extern const unsigned eg_max_dist_2x;
|
unsigned sample_index, float *out_value);
|
||||||
extern const uint32_t eg_sample_locs_4x[4];
|
void si_init_msaa(struct pipe_context *ctx);
|
||||||
extern const unsigned eg_max_dist_4x;
|
void si_common_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples);
|
||||||
void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
|
void si_common_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
|
||||||
unsigned sample_index, float *out_value);
|
int ps_iter_samples, int overrast_samples,
|
||||||
void cayman_init_msaa(struct pipe_context *ctx);
|
unsigned sc_mode_cntl_1);
|
||||||
void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples);
|
|
||||||
void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
|
|
||||||
int ps_iter_samples, int overrast_samples,
|
|
||||||
unsigned sc_mode_cntl_1);
|
|
||||||
|
|
||||||
|
|
||||||
/* Inline helpers. */
|
/* Inline helpers. */
|
||||||
|
|
|
@ -219,7 +219,7 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx,
|
||||||
case R600_QUERY_GPU_SURF_SYNC_BUSY:
|
case R600_QUERY_GPU_SURF_SYNC_BUSY:
|
||||||
case R600_QUERY_GPU_CP_DMA_BUSY:
|
case R600_QUERY_GPU_CP_DMA_BUSY:
|
||||||
case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
|
case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
|
||||||
query->begin_result = r600_begin_counter(rctx->screen,
|
query->begin_result = si_begin_counter(rctx->screen,
|
||||||
query->b.type);
|
query->b.type);
|
||||||
break;
|
break;
|
||||||
case R600_QUERY_NUM_COMPILATIONS:
|
case R600_QUERY_NUM_COMPILATIONS:
|
||||||
|
@ -375,7 +375,7 @@ static bool r600_query_sw_end(struct r600_common_context *rctx,
|
||||||
case R600_QUERY_GPU_SURF_SYNC_BUSY:
|
case R600_QUERY_GPU_SURF_SYNC_BUSY:
|
||||||
case R600_QUERY_GPU_CP_DMA_BUSY:
|
case R600_QUERY_GPU_CP_DMA_BUSY:
|
||||||
case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
|
case R600_QUERY_GPU_SCRATCH_RAM_BUSY:
|
||||||
query->end_result = r600_end_counter(rctx->screen,
|
query->end_result = si_end_counter(rctx->screen,
|
||||||
query->b.type,
|
query->b.type,
|
||||||
query->begin_result);
|
query->begin_result);
|
||||||
query->begin_result = 0;
|
query->begin_result = 0;
|
||||||
|
@ -494,8 +494,8 @@ static struct pipe_query *r600_query_sw_create(unsigned query_type)
|
||||||
return (struct pipe_query *)query;
|
return (struct pipe_query *)query;
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_query_hw_destroy(struct r600_common_screen *rscreen,
|
void si_query_hw_destroy(struct r600_common_screen *rscreen,
|
||||||
struct r600_query *rquery)
|
struct r600_query *rquery)
|
||||||
{
|
{
|
||||||
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
|
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
|
||||||
struct r600_query_buffer *prev = query->buffer.previous;
|
struct r600_query_buffer *prev = query->buffer.previous;
|
||||||
|
@ -583,10 +583,10 @@ static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
|
||||||
unsigned offset);
|
unsigned offset);
|
||||||
|
|
||||||
static struct r600_query_ops query_hw_ops = {
|
static struct r600_query_ops query_hw_ops = {
|
||||||
.destroy = r600_query_hw_destroy,
|
.destroy = si_query_hw_destroy,
|
||||||
.begin = r600_query_hw_begin,
|
.begin = si_query_hw_begin,
|
||||||
.end = r600_query_hw_end,
|
.end = si_query_hw_end,
|
||||||
.get_result = r600_query_hw_get_result,
|
.get_result = si_query_hw_get_result,
|
||||||
.get_result_resource = r600_query_hw_get_result_resource,
|
.get_result_resource = r600_query_hw_get_result_resource,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -612,8 +612,8 @@ static struct r600_query_hw_ops query_hw_default_hw_ops = {
|
||||||
.add_result = r600_query_hw_add_result,
|
.add_result = r600_query_hw_add_result,
|
||||||
};
|
};
|
||||||
|
|
||||||
bool r600_query_hw_init(struct r600_common_screen *rscreen,
|
bool si_query_hw_init(struct r600_common_screen *rscreen,
|
||||||
struct r600_query_hw *query)
|
struct r600_query_hw *query)
|
||||||
{
|
{
|
||||||
query->buffer.buf = r600_new_query_buffer(rscreen, query);
|
query->buffer.buf = r600_new_query_buffer(rscreen, query);
|
||||||
if (!query->buffer.buf)
|
if (!query->buffer.buf)
|
||||||
|
@ -641,16 +641,16 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscree
|
||||||
query->result_size = 16 * rscreen->info.num_render_backends;
|
query->result_size = 16 * rscreen->info.num_render_backends;
|
||||||
query->result_size += 16; /* for the fence + alignment */
|
query->result_size += 16; /* for the fence + alignment */
|
||||||
query->num_cs_dw_begin = 6;
|
query->num_cs_dw_begin = 6;
|
||||||
query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen);
|
query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(rscreen);
|
||||||
break;
|
break;
|
||||||
case PIPE_QUERY_TIME_ELAPSED:
|
case PIPE_QUERY_TIME_ELAPSED:
|
||||||
query->result_size = 24;
|
query->result_size = 24;
|
||||||
query->num_cs_dw_begin = 8;
|
query->num_cs_dw_begin = 8;
|
||||||
query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen);
|
query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(rscreen);
|
||||||
break;
|
break;
|
||||||
case PIPE_QUERY_TIMESTAMP:
|
case PIPE_QUERY_TIMESTAMP:
|
||||||
query->result_size = 16;
|
query->result_size = 16;
|
||||||
query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen);
|
query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(rscreen);
|
||||||
query->flags = R600_QUERY_HW_FLAG_NO_START;
|
query->flags = R600_QUERY_HW_FLAG_NO_START;
|
||||||
break;
|
break;
|
||||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||||
|
@ -670,11 +670,11 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscree
|
||||||
query->num_cs_dw_end = 6 * R600_MAX_STREAMS;
|
query->num_cs_dw_end = 6 * R600_MAX_STREAMS;
|
||||||
break;
|
break;
|
||||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||||
/* 11 values on EG, 8 on R600. */
|
/* 11 values on GCN. */
|
||||||
query->result_size = (rscreen->chip_class >= EVERGREEN ? 11 : 8) * 16;
|
query->result_size = 11 * 16;
|
||||||
query->result_size += 8; /* for the fence + alignment */
|
query->result_size += 8; /* for the fence + alignment */
|
||||||
query->num_cs_dw_begin = 6;
|
query->num_cs_dw_begin = 6;
|
||||||
query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen);
|
query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(rscreen);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(0);
|
assert(0);
|
||||||
|
@ -682,7 +682,7 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscree
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!r600_query_hw_init(rscreen, query)) {
|
if (!si_query_hw_init(rscreen, query)) {
|
||||||
FREE(query);
|
FREE(query);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -782,7 +782,7 @@ static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
|
||||||
/* Write the timestamp after the last draw is done.
|
/* Write the timestamp after the last draw is done.
|
||||||
* (bottom-of-pipe)
|
* (bottom-of-pipe)
|
||||||
*/
|
*/
|
||||||
r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
|
si_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
|
||||||
0, EOP_DATA_SEL_TIMESTAMP,
|
0, EOP_DATA_SEL_TIMESTAMP,
|
||||||
NULL, va, 0, query->b.type);
|
NULL, va, 0, query->b.type);
|
||||||
}
|
}
|
||||||
|
@ -809,7 +809,7 @@ static void r600_query_hw_emit_start(struct r600_common_context *ctx,
|
||||||
return; // previous buffer allocation failure
|
return; // previous buffer allocation failure
|
||||||
|
|
||||||
r600_update_occlusion_query_state(ctx, query->b.type, 1);
|
r600_update_occlusion_query_state(ctx, query->b.type, 1);
|
||||||
r600_update_prims_generated_query_state(ctx, query->b.type, 1);
|
si_update_prims_generated_query_state(ctx, query->b.type, 1);
|
||||||
|
|
||||||
ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + query->num_cs_dw_end,
|
ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + query->num_cs_dw_end,
|
||||||
true);
|
true);
|
||||||
|
@ -869,7 +869,7 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
|
||||||
va += 8;
|
va += 8;
|
||||||
/* fall through */
|
/* fall through */
|
||||||
case PIPE_QUERY_TIMESTAMP:
|
case PIPE_QUERY_TIMESTAMP:
|
||||||
r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
|
si_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
|
||||||
0, EOP_DATA_SEL_TIMESTAMP, NULL, va,
|
0, EOP_DATA_SEL_TIMESTAMP, NULL, va,
|
||||||
0, query->b.type);
|
0, query->b.type);
|
||||||
fence_va = va + 8;
|
fence_va = va + 8;
|
||||||
|
@ -893,7 +893,7 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
|
||||||
RADEON_PRIO_QUERY);
|
RADEON_PRIO_QUERY);
|
||||||
|
|
||||||
if (fence_va)
|
if (fence_va)
|
||||||
r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0,
|
si_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0,
|
||||||
EOP_DATA_SEL_VALUE_32BIT,
|
EOP_DATA_SEL_VALUE_32BIT,
|
||||||
query->buffer.buf, fence_va, 0x80000000,
|
query->buffer.buf, fence_va, 0x80000000,
|
||||||
query->b.type);
|
query->b.type);
|
||||||
|
@ -923,7 +923,7 @@ static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
|
||||||
ctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end;
|
ctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end;
|
||||||
|
|
||||||
r600_update_occlusion_query_state(ctx, query->b.type, -1);
|
r600_update_occlusion_query_state(ctx, query->b.type, -1);
|
||||||
r600_update_prims_generated_query_state(ctx, query->b.type, -1);
|
si_update_prims_generated_query_state(ctx, query->b.type, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void emit_set_predicate(struct r600_common_context *ctx,
|
static void emit_set_predicate(struct r600_common_context *ctx,
|
||||||
|
@ -1057,8 +1057,8 @@ static boolean r600_begin_query(struct pipe_context *ctx,
|
||||||
return rquery->ops->begin(rctx, rquery);
|
return rquery->ops->begin(rctx, rquery);
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
|
void si_query_hw_reset_buffers(struct r600_common_context *rctx,
|
||||||
struct r600_query_hw *query)
|
struct r600_query_hw *query)
|
||||||
{
|
{
|
||||||
struct r600_query_buffer *prev = query->buffer.previous;
|
struct r600_query_buffer *prev = query->buffer.previous;
|
||||||
|
|
||||||
|
@ -1074,7 +1074,7 @@ void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
|
||||||
query->buffer.previous = NULL;
|
query->buffer.previous = NULL;
|
||||||
|
|
||||||
/* Obtain a new buffer if the current one can't be mapped without a stall. */
|
/* Obtain a new buffer if the current one can't be mapped without a stall. */
|
||||||
if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) ||
|
if (si_rings_is_buffer_referenced(rctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) ||
|
||||||
!rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
|
!rctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
|
||||||
r600_resource_reference(&query->buffer.buf, NULL);
|
r600_resource_reference(&query->buffer.buf, NULL);
|
||||||
query->buffer.buf = r600_new_query_buffer(rctx->screen, query);
|
query->buffer.buf = r600_new_query_buffer(rctx->screen, query);
|
||||||
|
@ -1084,8 +1084,8 @@ void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool r600_query_hw_begin(struct r600_common_context *rctx,
|
bool si_query_hw_begin(struct r600_common_context *rctx,
|
||||||
struct r600_query *rquery)
|
struct r600_query *rquery)
|
||||||
{
|
{
|
||||||
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
|
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
|
||||||
|
|
||||||
|
@ -1095,7 +1095,7 @@ bool r600_query_hw_begin(struct r600_common_context *rctx,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(query->flags & R600_QUERY_HW_FLAG_BEGIN_RESUMES))
|
if (!(query->flags & R600_QUERY_HW_FLAG_BEGIN_RESUMES))
|
||||||
r600_query_hw_reset_buffers(rctx, query);
|
si_query_hw_reset_buffers(rctx, query);
|
||||||
|
|
||||||
r600_resource_reference(&query->workaround_buf, NULL);
|
r600_resource_reference(&query->workaround_buf, NULL);
|
||||||
|
|
||||||
|
@ -1115,13 +1115,13 @@ static bool r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
|
||||||
return rquery->ops->end(rctx, rquery);
|
return rquery->ops->end(rctx, rquery);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool r600_query_hw_end(struct r600_common_context *rctx,
|
bool si_query_hw_end(struct r600_common_context *rctx,
|
||||||
struct r600_query *rquery)
|
struct r600_query *rquery)
|
||||||
{
|
{
|
||||||
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
|
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
|
||||||
|
|
||||||
if (query->flags & R600_QUERY_HW_FLAG_NO_START)
|
if (query->flags & R600_QUERY_HW_FLAG_NO_START)
|
||||||
r600_query_hw_reset_buffers(rctx, query);
|
si_query_hw_reset_buffers(rctx, query);
|
||||||
|
|
||||||
r600_query_hw_emit_stop(rctx, query);
|
r600_query_hw_emit_stop(rctx, query);
|
||||||
|
|
||||||
|
@ -1287,47 +1287,28 @@ static void r600_query_hw_add_result(struct r600_common_screen *rscreen,
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||||
if (rscreen->chip_class >= EVERGREEN) {
|
result->pipeline_statistics.ps_invocations +=
|
||||||
result->pipeline_statistics.ps_invocations +=
|
r600_query_read_result(buffer, 0, 22, false);
|
||||||
r600_query_read_result(buffer, 0, 22, false);
|
result->pipeline_statistics.c_primitives +=
|
||||||
result->pipeline_statistics.c_primitives +=
|
r600_query_read_result(buffer, 2, 24, false);
|
||||||
r600_query_read_result(buffer, 2, 24, false);
|
result->pipeline_statistics.c_invocations +=
|
||||||
result->pipeline_statistics.c_invocations +=
|
r600_query_read_result(buffer, 4, 26, false);
|
||||||
r600_query_read_result(buffer, 4, 26, false);
|
result->pipeline_statistics.vs_invocations +=
|
||||||
result->pipeline_statistics.vs_invocations +=
|
r600_query_read_result(buffer, 6, 28, false);
|
||||||
r600_query_read_result(buffer, 6, 28, false);
|
result->pipeline_statistics.gs_invocations +=
|
||||||
result->pipeline_statistics.gs_invocations +=
|
r600_query_read_result(buffer, 8, 30, false);
|
||||||
r600_query_read_result(buffer, 8, 30, false);
|
result->pipeline_statistics.gs_primitives +=
|
||||||
result->pipeline_statistics.gs_primitives +=
|
r600_query_read_result(buffer, 10, 32, false);
|
||||||
r600_query_read_result(buffer, 10, 32, false);
|
result->pipeline_statistics.ia_primitives +=
|
||||||
result->pipeline_statistics.ia_primitives +=
|
r600_query_read_result(buffer, 12, 34, false);
|
||||||
r600_query_read_result(buffer, 12, 34, false);
|
result->pipeline_statistics.ia_vertices +=
|
||||||
result->pipeline_statistics.ia_vertices +=
|
r600_query_read_result(buffer, 14, 36, false);
|
||||||
r600_query_read_result(buffer, 14, 36, false);
|
result->pipeline_statistics.hs_invocations +=
|
||||||
result->pipeline_statistics.hs_invocations +=
|
r600_query_read_result(buffer, 16, 38, false);
|
||||||
r600_query_read_result(buffer, 16, 38, false);
|
result->pipeline_statistics.ds_invocations +=
|
||||||
result->pipeline_statistics.ds_invocations +=
|
r600_query_read_result(buffer, 18, 40, false);
|
||||||
r600_query_read_result(buffer, 18, 40, false);
|
result->pipeline_statistics.cs_invocations +=
|
||||||
result->pipeline_statistics.cs_invocations +=
|
r600_query_read_result(buffer, 20, 42, false);
|
||||||
r600_query_read_result(buffer, 20, 42, false);
|
|
||||||
} else {
|
|
||||||
result->pipeline_statistics.ps_invocations +=
|
|
||||||
r600_query_read_result(buffer, 0, 16, false);
|
|
||||||
result->pipeline_statistics.c_primitives +=
|
|
||||||
r600_query_read_result(buffer, 2, 18, false);
|
|
||||||
result->pipeline_statistics.c_invocations +=
|
|
||||||
r600_query_read_result(buffer, 4, 20, false);
|
|
||||||
result->pipeline_statistics.vs_invocations +=
|
|
||||||
r600_query_read_result(buffer, 6, 22, false);
|
|
||||||
result->pipeline_statistics.gs_invocations +=
|
|
||||||
r600_query_read_result(buffer, 8, 24, false);
|
|
||||||
result->pipeline_statistics.gs_primitives +=
|
|
||||||
r600_query_read_result(buffer, 10, 26, false);
|
|
||||||
result->pipeline_statistics.ia_primitives +=
|
|
||||||
r600_query_read_result(buffer, 12, 28, false);
|
|
||||||
result->pipeline_statistics.ia_vertices +=
|
|
||||||
r600_query_read_result(buffer, 14, 30, false);
|
|
||||||
}
|
|
||||||
#if 0 /* for testing */
|
#if 0 /* for testing */
|
||||||
printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
|
printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
|
||||||
"DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, "
|
"DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, "
|
||||||
|
@ -1381,9 +1362,9 @@ static void r600_query_hw_clear_result(struct r600_query_hw *query,
|
||||||
util_query_clear_result(result, query->b.type);
|
util_query_clear_result(result, query->b.type);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool r600_query_hw_get_result(struct r600_common_context *rctx,
|
bool si_query_hw_get_result(struct r600_common_context *rctx,
|
||||||
struct r600_query *rquery,
|
struct r600_query *rquery,
|
||||||
bool wait, union pipe_query_result *result)
|
bool wait, union pipe_query_result *result)
|
||||||
{
|
{
|
||||||
struct r600_common_screen *rscreen = rctx->screen;
|
struct r600_common_screen *rscreen = rctx->screen;
|
||||||
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
|
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
|
||||||
|
@ -1400,7 +1381,7 @@ bool r600_query_hw_get_result(struct r600_common_context *rctx,
|
||||||
if (rquery->b.flushed)
|
if (rquery->b.flushed)
|
||||||
map = rctx->ws->buffer_map(qbuf->buf->buf, NULL, usage);
|
map = rctx->ws->buffer_map(qbuf->buf->buf, NULL, usage);
|
||||||
else
|
else
|
||||||
map = r600_buffer_map_sync_with_rings(rctx, qbuf->buf, usage);
|
map = si_buffer_map_sync_with_rings(rctx, qbuf->buf, usage);
|
||||||
|
|
||||||
if (!map)
|
if (!map)
|
||||||
return false;
|
return false;
|
||||||
|
@ -1787,7 +1768,7 @@ static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
|
||||||
va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size;
|
va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size;
|
||||||
va += params.fence_offset;
|
va += params.fence_offset;
|
||||||
|
|
||||||
r600_gfx_wait_fence(rctx, va, 0x80000000, 0x80000000);
|
si_gfx_wait_fence(rctx, va, 0x80000000, 0x80000000);
|
||||||
}
|
}
|
||||||
|
|
||||||
rctx->b.launch_grid(&rctx->b, &grid);
|
rctx->b.launch_grid(&rctx->b, &grid);
|
||||||
|
@ -1871,7 +1852,7 @@ static void r600_render_condition(struct pipe_context *ctx,
|
||||||
rctx->set_atom_dirty(rctx, atom, query != NULL);
|
rctx->set_atom_dirty(rctx, atom, query != NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_suspend_queries(struct r600_common_context *ctx)
|
void si_suspend_queries(struct r600_common_context *ctx)
|
||||||
{
|
{
|
||||||
struct r600_query_hw *query;
|
struct r600_query_hw *query;
|
||||||
|
|
||||||
|
@ -1906,7 +1887,7 @@ static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *
|
||||||
return num_dw;
|
return num_dw;
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_resume_queries(struct r600_common_context *ctx)
|
void si_resume_queries(struct r600_common_context *ctx)
|
||||||
{
|
{
|
||||||
struct r600_query_hw *query;
|
struct r600_query_hw *query;
|
||||||
unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, &ctx->active_queries);
|
unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, &ctx->active_queries);
|
||||||
|
@ -1921,84 +1902,6 @@ void r600_resume_queries(struct r600_common_context *ctx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Fix radeon_info::enabled_rb_mask for R600, R700, EVERGREEN, NI. */
|
|
||||||
void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen)
|
|
||||||
{
|
|
||||||
struct r600_common_context *ctx =
|
|
||||||
(struct r600_common_context*)rscreen->aux_context;
|
|
||||||
struct radeon_winsys_cs *cs = ctx->gfx.cs;
|
|
||||||
struct r600_resource *buffer;
|
|
||||||
uint32_t *results;
|
|
||||||
unsigned i, mask = 0;
|
|
||||||
unsigned max_rbs = ctx->screen->info.num_render_backends;
|
|
||||||
|
|
||||||
assert(rscreen->chip_class <= CAYMAN);
|
|
||||||
|
|
||||||
/* if backend_map query is supported by the kernel */
|
|
||||||
if (rscreen->info.r600_gb_backend_map_valid) {
|
|
||||||
unsigned num_tile_pipes = rscreen->info.num_tile_pipes;
|
|
||||||
unsigned backend_map = rscreen->info.r600_gb_backend_map;
|
|
||||||
unsigned item_width, item_mask;
|
|
||||||
|
|
||||||
if (ctx->chip_class >= EVERGREEN) {
|
|
||||||
item_width = 4;
|
|
||||||
item_mask = 0x7;
|
|
||||||
} else {
|
|
||||||
item_width = 2;
|
|
||||||
item_mask = 0x3;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (num_tile_pipes--) {
|
|
||||||
i = backend_map & item_mask;
|
|
||||||
mask |= (1<<i);
|
|
||||||
backend_map >>= item_width;
|
|
||||||
}
|
|
||||||
if (mask != 0) {
|
|
||||||
rscreen->info.enabled_rb_mask = mask;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* otherwise backup path for older kernels */
|
|
||||||
|
|
||||||
/* create buffer for event data */
|
|
||||||
buffer = (struct r600_resource*)
|
|
||||||
pipe_buffer_create(ctx->b.screen, 0,
|
|
||||||
PIPE_USAGE_STAGING, max_rbs * 16);
|
|
||||||
if (!buffer)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* initialize buffer with zeroes */
|
|
||||||
results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE);
|
|
||||||
if (results) {
|
|
||||||
memset(results, 0, max_rbs * 4 * 4);
|
|
||||||
|
|
||||||
/* emit EVENT_WRITE for ZPASS_DONE */
|
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
|
||||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
|
|
||||||
radeon_emit(cs, buffer->gpu_address);
|
|
||||||
radeon_emit(cs, buffer->gpu_address >> 32);
|
|
||||||
|
|
||||||
r600_emit_reloc(ctx, &ctx->gfx, buffer,
|
|
||||||
RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
|
|
||||||
|
|
||||||
/* analyze results */
|
|
||||||
results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ);
|
|
||||||
if (results) {
|
|
||||||
for(i = 0; i < max_rbs; i++) {
|
|
||||||
/* at least highest bit will be set if backend is used */
|
|
||||||
if (results[i*4 + 1])
|
|
||||||
mask |= (1<<i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
r600_resource_reference(&buffer, NULL);
|
|
||||||
|
|
||||||
if (mask)
|
|
||||||
rscreen->info.enabled_rb_mask = mask;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define XFULL(name_, query_type_, type_, result_type_, group_id_) \
|
#define XFULL(name_, query_type_, type_, result_type_, group_id_) \
|
||||||
{ \
|
{ \
|
||||||
.name = name_, \
|
.name = name_, \
|
||||||
|
@ -2124,13 +2027,13 @@ static int r600_get_driver_query_info(struct pipe_screen *screen,
|
||||||
|
|
||||||
if (!info) {
|
if (!info) {
|
||||||
unsigned num_perfcounters =
|
unsigned num_perfcounters =
|
||||||
r600_get_perfcounter_info(rscreen, 0, NULL);
|
si_get_perfcounter_info(rscreen, 0, NULL);
|
||||||
|
|
||||||
return num_queries + num_perfcounters;
|
return num_queries + num_perfcounters;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (index >= num_queries)
|
if (index >= num_queries)
|
||||||
return r600_get_perfcounter_info(rscreen, index - num_queries, info);
|
return si_get_perfcounter_info(rscreen, index - num_queries, info);
|
||||||
|
|
||||||
*info = r600_driver_query_list[index];
|
*info = r600_driver_query_list[index];
|
||||||
|
|
||||||
|
@ -2177,7 +2080,7 @@ static int r600_get_driver_query_group_info(struct pipe_screen *screen,
|
||||||
return num_pc_groups + R600_NUM_SW_QUERY_GROUPS;
|
return num_pc_groups + R600_NUM_SW_QUERY_GROUPS;
|
||||||
|
|
||||||
if (index < num_pc_groups)
|
if (index < num_pc_groups)
|
||||||
return r600_get_perfcounter_group_info(rscreen, index, info);
|
return si_get_perfcounter_group_info(rscreen, index, info);
|
||||||
|
|
||||||
index -= num_pc_groups;
|
index -= num_pc_groups;
|
||||||
if (index >= R600_NUM_SW_QUERY_GROUPS)
|
if (index >= R600_NUM_SW_QUERY_GROUPS)
|
||||||
|
@ -2189,10 +2092,10 @@ static int r600_get_driver_query_group_info(struct pipe_screen *screen,
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_query_init(struct r600_common_context *rctx)
|
void si_init_query_functions(struct r600_common_context *rctx)
|
||||||
{
|
{
|
||||||
rctx->b.create_query = r600_create_query;
|
rctx->b.create_query = r600_create_query;
|
||||||
rctx->b.create_batch_query = r600_create_batch_query;
|
rctx->b.create_batch_query = si_create_batch_query;
|
||||||
rctx->b.destroy_query = r600_destroy_query;
|
rctx->b.destroy_query = r600_destroy_query;
|
||||||
rctx->b.begin_query = r600_begin_query;
|
rctx->b.begin_query = r600_begin_query;
|
||||||
rctx->b.end_query = r600_end_query;
|
rctx->b.end_query = r600_end_query;
|
||||||
|
@ -2206,7 +2109,7 @@ void r600_query_init(struct r600_common_context *rctx)
|
||||||
LIST_INITHEAD(&rctx->active_queries);
|
LIST_INITHEAD(&rctx->active_queries);
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_init_screen_query_functions(struct r600_common_screen *rscreen)
|
void si_init_screen_query_functions(struct r600_common_screen *rscreen)
|
||||||
{
|
{
|
||||||
rscreen->b.get_driver_query_info = r600_get_driver_query_info;
|
rscreen->b.get_driver_query_info = r600_get_driver_query_info;
|
||||||
rscreen->b.get_driver_query_group_info = r600_get_driver_query_group_info;
|
rscreen->b.get_driver_query_group_info = r600_get_driver_query_group_info;
|
||||||
|
|
|
@ -200,18 +200,18 @@ struct r600_query_hw {
|
||||||
unsigned workaround_offset;
|
unsigned workaround_offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool r600_query_hw_init(struct r600_common_screen *rscreen,
|
bool si_query_hw_init(struct r600_common_screen *rscreen,
|
||||||
struct r600_query_hw *query);
|
struct r600_query_hw *query);
|
||||||
void r600_query_hw_destroy(struct r600_common_screen *rscreen,
|
void si_query_hw_destroy(struct r600_common_screen *rscreen,
|
||||||
struct r600_query *rquery);
|
|
||||||
bool r600_query_hw_begin(struct r600_common_context *rctx,
|
|
||||||
struct r600_query *rquery);
|
struct r600_query *rquery);
|
||||||
bool r600_query_hw_end(struct r600_common_context *rctx,
|
bool si_query_hw_begin(struct r600_common_context *rctx,
|
||||||
struct r600_query *rquery);
|
struct r600_query *rquery);
|
||||||
bool r600_query_hw_get_result(struct r600_common_context *rctx,
|
bool si_query_hw_end(struct r600_common_context *rctx,
|
||||||
struct r600_query *rquery,
|
struct r600_query *rquery);
|
||||||
bool wait,
|
bool si_query_hw_get_result(struct r600_common_context *rctx,
|
||||||
union pipe_query_result *result);
|
struct r600_query *rquery,
|
||||||
|
bool wait,
|
||||||
|
union pipe_query_result *result);
|
||||||
|
|
||||||
/* Performance counters */
|
/* Performance counters */
|
||||||
enum {
|
enum {
|
||||||
|
@ -297,26 +297,26 @@ struct r600_perfcounters {
|
||||||
bool separate_instance;
|
bool separate_instance;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
|
struct pipe_query *si_create_batch_query(struct pipe_context *ctx,
|
||||||
unsigned num_queries,
|
unsigned num_queries,
|
||||||
unsigned *query_types);
|
unsigned *query_types);
|
||||||
|
|
||||||
int r600_get_perfcounter_info(struct r600_common_screen *,
|
int si_get_perfcounter_info(struct r600_common_screen *,
|
||||||
unsigned index,
|
unsigned index,
|
||||||
struct pipe_driver_query_info *info);
|
struct pipe_driver_query_info *info);
|
||||||
int r600_get_perfcounter_group_info(struct r600_common_screen *,
|
int si_get_perfcounter_group_info(struct r600_common_screen *,
|
||||||
unsigned index,
|
unsigned index,
|
||||||
struct pipe_driver_query_group_info *info);
|
struct pipe_driver_query_group_info *info);
|
||||||
|
|
||||||
bool r600_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks);
|
bool si_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks);
|
||||||
void r600_perfcounters_add_block(struct r600_common_screen *,
|
void si_perfcounters_add_block(struct r600_common_screen *,
|
||||||
struct r600_perfcounters *,
|
struct r600_perfcounters *,
|
||||||
const char *name, unsigned flags,
|
const char *name, unsigned flags,
|
||||||
unsigned counters, unsigned selectors,
|
unsigned counters, unsigned selectors,
|
||||||
unsigned instances, void *data);
|
unsigned instances, void *data);
|
||||||
void r600_perfcounters_do_destroy(struct r600_perfcounters *);
|
void si_perfcounters_do_destroy(struct r600_perfcounters *);
|
||||||
void r600_query_hw_reset_buffers(struct r600_common_context *rctx,
|
void si_query_hw_reset_buffers(struct r600_common_context *rctx,
|
||||||
struct r600_query_hw *query);
|
struct r600_query_hw *query);
|
||||||
|
|
||||||
struct r600_qbo_state {
|
struct r600_qbo_state {
|
||||||
void *saved_compute;
|
void *saved_compute;
|
||||||
|
|
|
@ -74,7 +74,7 @@ static void r600_so_target_destroy(struct pipe_context *ctx,
|
||||||
FREE(t);
|
FREE(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_streamout_buffers_dirty(struct r600_common_context *rctx)
|
void si_streamout_buffers_dirty(struct r600_common_context *rctx)
|
||||||
{
|
{
|
||||||
struct r600_atom *begin = &rctx->streamout.begin_atom;
|
struct r600_atom *begin = &rctx->streamout.begin_atom;
|
||||||
unsigned num_bufs = util_bitcount(rctx->streamout.enabled_mask);
|
unsigned num_bufs = util_bitcount(rctx->streamout.enabled_mask);
|
||||||
|
@ -109,10 +109,10 @@ void r600_streamout_buffers_dirty(struct r600_common_context *rctx)
|
||||||
r600_set_streamout_enable(rctx, true);
|
r600_set_streamout_enable(rctx, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_set_streamout_targets(struct pipe_context *ctx,
|
void si_common_set_streamout_targets(struct pipe_context *ctx,
|
||||||
unsigned num_targets,
|
unsigned num_targets,
|
||||||
struct pipe_stream_output_target **targets,
|
struct pipe_stream_output_target **targets,
|
||||||
const unsigned *offsets)
|
const unsigned *offsets)
|
||||||
{
|
{
|
||||||
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
|
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
@ -120,7 +120,7 @@ void r600_set_streamout_targets(struct pipe_context *ctx,
|
||||||
|
|
||||||
/* Stop streamout. */
|
/* Stop streamout. */
|
||||||
if (rctx->streamout.num_targets && rctx->streamout.begin_emitted) {
|
if (rctx->streamout.num_targets && rctx->streamout.begin_emitted) {
|
||||||
r600_emit_streamout_end(rctx);
|
si_emit_streamout_end(rctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set the new targets. */
|
/* Set the new targets. */
|
||||||
|
@ -144,7 +144,7 @@ void r600_set_streamout_targets(struct pipe_context *ctx,
|
||||||
rctx->streamout.append_bitmask = append_bitmask;
|
rctx->streamout.append_bitmask = append_bitmask;
|
||||||
|
|
||||||
if (num_targets) {
|
if (num_targets) {
|
||||||
r600_streamout_buffers_dirty(rctx);
|
si_streamout_buffers_dirty(rctx);
|
||||||
} else {
|
} else {
|
||||||
rctx->set_atom_dirty(rctx, &rctx->streamout.begin_atom, false);
|
rctx->set_atom_dirty(rctx, &rctx->streamout.begin_atom, false);
|
||||||
r600_set_streamout_enable(rctx, false);
|
r600_set_streamout_enable(rctx, false);
|
||||||
|
@ -266,7 +266,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
|
||||||
rctx->streamout.begin_emitted = true;
|
rctx->streamout.begin_emitted = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_emit_streamout_end(struct r600_common_context *rctx)
|
void si_emit_streamout_end(struct r600_common_context *rctx)
|
||||||
{
|
{
|
||||||
struct radeon_winsys_cs *cs = rctx->gfx.cs;
|
struct radeon_winsys_cs *cs = rctx->gfx.cs;
|
||||||
struct r600_so_target **t = rctx->streamout.targets;
|
struct r600_so_target **t = rctx->streamout.targets;
|
||||||
|
@ -353,8 +353,8 @@ static void r600_set_streamout_enable(struct r600_common_context *rctx, bool ena
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
|
void si_update_prims_generated_query_state(struct r600_common_context *rctx,
|
||||||
unsigned type, int diff)
|
unsigned type, int diff)
|
||||||
{
|
{
|
||||||
if (type == PIPE_QUERY_PRIMITIVES_GENERATED) {
|
if (type == PIPE_QUERY_PRIMITIVES_GENERATED) {
|
||||||
bool old_strmout_en = r600_get_strmout_en(rctx);
|
bool old_strmout_en = r600_get_strmout_en(rctx);
|
||||||
|
@ -371,7 +371,7 @@ void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_streamout_init(struct r600_common_context *rctx)
|
void si_streamout_init(struct r600_common_context *rctx)
|
||||||
{
|
{
|
||||||
rctx->b.create_stream_output_target = r600_create_so_target;
|
rctx->b.create_stream_output_target = r600_create_so_target;
|
||||||
rctx->b.stream_output_target_destroy = r600_so_target_destroy;
|
rctx->b.stream_output_target_destroy = r600_so_target_destroy;
|
||||||
|
|
|
@ -171,7 +171,7 @@ static unsigned generate_max_tex_side(unsigned max_tex_side)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_test_dma(struct r600_common_screen *rscreen)
|
void si_test_dma(struct r600_common_screen *rscreen)
|
||||||
{
|
{
|
||||||
struct pipe_screen *screen = &rscreen->b;
|
struct pipe_screen *screen = &rscreen->b;
|
||||||
struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
|
struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
|
||||||
|
|
|
@ -44,13 +44,13 @@ r600_choose_tiling(struct r600_common_screen *rscreen,
|
||||||
const struct pipe_resource *templ);
|
const struct pipe_resource *templ);
|
||||||
|
|
||||||
|
|
||||||
bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
|
bool si_prepare_for_dma_blit(struct r600_common_context *rctx,
|
||||||
struct r600_texture *rdst,
|
struct r600_texture *rdst,
|
||||||
unsigned dst_level, unsigned dstx,
|
unsigned dst_level, unsigned dstx,
|
||||||
unsigned dsty, unsigned dstz,
|
unsigned dsty, unsigned dstz,
|
||||||
struct r600_texture *rsrc,
|
struct r600_texture *rsrc,
|
||||||
unsigned src_level,
|
unsigned src_level,
|
||||||
const struct pipe_box *src_box)
|
const struct pipe_box *src_box)
|
||||||
{
|
{
|
||||||
if (!rctx->dma.cs)
|
if (!rctx->dma.cs)
|
||||||
return false;
|
return false;
|
||||||
|
@ -237,7 +237,7 @@ static int r600_init_surface(struct r600_common_screen *rscreen,
|
||||||
is_depth = util_format_has_depth(desc);
|
is_depth = util_format_has_depth(desc);
|
||||||
is_stencil = util_format_has_stencil(desc);
|
is_stencil = util_format_has_stencil(desc);
|
||||||
|
|
||||||
if (rscreen->chip_class >= EVERGREEN && !is_flushed_depth &&
|
if (!is_flushed_depth &&
|
||||||
ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
|
ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
|
||||||
bpe = 4; /* stencil is allocated separately on evergreen */
|
bpe = 4; /* stencil is allocated separately on evergreen */
|
||||||
} else {
|
} else {
|
||||||
|
@ -408,10 +408,7 @@ static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
|
||||||
rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;
|
rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;
|
||||||
rtex->dirty_level_mask = 0;
|
rtex->dirty_level_mask = 0;
|
||||||
|
|
||||||
if (rscreen->chip_class >= SI)
|
rtex->cb_color_info &= ~SI_S_028C70_FAST_CLEAR(1);
|
||||||
rtex->cb_color_info &= ~SI_S_028C70_FAST_CLEAR(1);
|
|
||||||
else
|
|
||||||
rtex->cb_color_info &= ~EG_S_028C70_FAST_CLEAR(1);
|
|
||||||
|
|
||||||
if (rtex->cmask_buffer != &rtex->resource)
|
if (rtex->cmask_buffer != &rtex->resource)
|
||||||
r600_resource_reference(&rtex->cmask_buffer, NULL);
|
r600_resource_reference(&rtex->cmask_buffer, NULL);
|
||||||
|
@ -466,8 +463,8 @@ static bool r600_texture_discard_dcc(struct r600_common_screen *rscreen,
|
||||||
* \param rctx the current context if you have one, or rscreen->aux_context
|
* \param rctx the current context if you have one, or rscreen->aux_context
|
||||||
* if you don't.
|
* if you don't.
|
||||||
*/
|
*/
|
||||||
bool r600_texture_disable_dcc(struct r600_common_context *rctx,
|
bool si_texture_disable_dcc(struct r600_common_context *rctx,
|
||||||
struct r600_texture *rtex)
|
struct r600_texture *rtex)
|
||||||
{
|
{
|
||||||
struct r600_common_screen *rscreen = rctx->screen;
|
struct r600_common_screen *rscreen = rctx->screen;
|
||||||
|
|
||||||
|
@ -624,7 +621,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
|
||||||
* access.
|
* access.
|
||||||
*/
|
*/
|
||||||
if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) {
|
if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) {
|
||||||
if (r600_texture_disable_dcc(rctx, rtex))
|
if (si_texture_disable_dcc(rctx, rtex))
|
||||||
update_metadata = true;
|
update_metadata = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -681,7 +678,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
|
||||||
rctx->b.resource_copy_region(&rctx->b, newb, 0, 0, 0, 0,
|
rctx->b.resource_copy_region(&rctx->b, newb, 0, 0, 0, 0,
|
||||||
&res->b.b, 0, &box);
|
&res->b.b, 0, &box);
|
||||||
/* Move the new buffer storage to the old pipe_resource. */
|
/* Move the new buffer storage to the old pipe_resource. */
|
||||||
r600_replace_buffer_storage(&rctx->b, &res->b.b, newb);
|
si_replace_buffer_storage(&rctx->b, &res->b.b, newb);
|
||||||
pipe_resource_reference(&newb, NULL);
|
pipe_resource_reference(&newb, NULL);
|
||||||
|
|
||||||
assert(res->b.b.bind & PIPE_BIND_SHARED);
|
assert(res->b.b.bind & PIPE_BIND_SHARED);
|
||||||
|
@ -730,10 +727,10 @@ static void r600_texture_destroy(struct pipe_screen *screen,
|
||||||
static const struct u_resource_vtbl r600_texture_vtbl;
|
static const struct u_resource_vtbl r600_texture_vtbl;
|
||||||
|
|
||||||
/* The number of samples can be specified independently of the texture. */
|
/* The number of samples can be specified independently of the texture. */
|
||||||
void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
|
void si_texture_get_fmask_info(struct r600_common_screen *rscreen,
|
||||||
struct r600_texture *rtex,
|
struct r600_texture *rtex,
|
||||||
unsigned nr_samples,
|
unsigned nr_samples,
|
||||||
struct r600_fmask_info *out)
|
struct r600_fmask_info *out)
|
||||||
{
|
{
|
||||||
/* FMASK is allocated like an ordinary texture. */
|
/* FMASK is allocated like an ordinary texture. */
|
||||||
struct pipe_resource templ = rtex->resource.b.b;
|
struct pipe_resource templ = rtex->resource.b.b;
|
||||||
|
@ -751,17 +748,6 @@ void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
|
||||||
templ.nr_samples = 1;
|
templ.nr_samples = 1;
|
||||||
flags = rtex->surface.flags | RADEON_SURF_FMASK;
|
flags = rtex->surface.flags | RADEON_SURF_FMASK;
|
||||||
|
|
||||||
if (rscreen->chip_class <= CAYMAN) {
|
|
||||||
/* Use the same parameters and tile mode. */
|
|
||||||
fmask.u.legacy.bankw = rtex->surface.u.legacy.bankw;
|
|
||||||
fmask.u.legacy.bankh = rtex->surface.u.legacy.bankh;
|
|
||||||
fmask.u.legacy.mtilea = rtex->surface.u.legacy.mtilea;
|
|
||||||
fmask.u.legacy.tile_split = rtex->surface.u.legacy.tile_split;
|
|
||||||
|
|
||||||
if (nr_samples <= 4)
|
|
||||||
fmask.u.legacy.bankh = 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (nr_samples) {
|
switch (nr_samples) {
|
||||||
case 2:
|
case 2:
|
||||||
case 4:
|
case 4:
|
||||||
|
@ -775,13 +761,6 @@ void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Overallocate FMASK on R600-R700 to fix colorbuffer corruption.
|
|
||||||
* This can be fixed by writing a separate FMASK allocator specifically
|
|
||||||
* for R600-R700 asics. */
|
|
||||||
if (rscreen->chip_class <= R700) {
|
|
||||||
bpe *= 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rscreen->ws->surface_init(rscreen->ws, &templ, flags, bpe,
|
if (rscreen->ws->surface_init(rscreen->ws, &templ, flags, bpe,
|
||||||
RADEON_SURF_MODE_2D, &fmask)) {
|
RADEON_SURF_MODE_2D, &fmask)) {
|
||||||
R600_ERR("Got error in surface_init while allocating FMASK.\n");
|
R600_ERR("Got error in surface_init while allocating FMASK.\n");
|
||||||
|
@ -805,47 +784,13 @@ void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
|
||||||
static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen,
|
static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen,
|
||||||
struct r600_texture *rtex)
|
struct r600_texture *rtex)
|
||||||
{
|
{
|
||||||
r600_texture_get_fmask_info(rscreen, rtex,
|
si_texture_get_fmask_info(rscreen, rtex,
|
||||||
rtex->resource.b.b.nr_samples, &rtex->fmask);
|
rtex->resource.b.b.nr_samples, &rtex->fmask);
|
||||||
|
|
||||||
rtex->fmask.offset = align64(rtex->size, rtex->fmask.alignment);
|
rtex->fmask.offset = align64(rtex->size, rtex->fmask.alignment);
|
||||||
rtex->size = rtex->fmask.offset + rtex->fmask.size;
|
rtex->size = rtex->fmask.offset + rtex->fmask.size;
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
|
|
||||||
struct r600_texture *rtex,
|
|
||||||
struct r600_cmask_info *out)
|
|
||||||
{
|
|
||||||
unsigned cmask_tile_width = 8;
|
|
||||||
unsigned cmask_tile_height = 8;
|
|
||||||
unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height;
|
|
||||||
unsigned element_bits = 4;
|
|
||||||
unsigned cmask_cache_bits = 1024;
|
|
||||||
unsigned num_pipes = rscreen->info.num_tile_pipes;
|
|
||||||
unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
|
|
||||||
|
|
||||||
unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes;
|
|
||||||
unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements;
|
|
||||||
unsigned sqrt_pixels_per_macro_tile = sqrt(pixels_per_macro_tile);
|
|
||||||
unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile);
|
|
||||||
unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width;
|
|
||||||
|
|
||||||
unsigned pitch_elements = align(rtex->resource.b.b.width0, macro_tile_width);
|
|
||||||
unsigned height = align(rtex->resource.b.b.height0, macro_tile_height);
|
|
||||||
|
|
||||||
unsigned base_align = num_pipes * pipe_interleave_bytes;
|
|
||||||
unsigned slice_bytes =
|
|
||||||
((pitch_elements * height * element_bits + 7) / 8) / cmask_tile_elements;
|
|
||||||
|
|
||||||
assert(macro_tile_width % 128 == 0);
|
|
||||||
assert(macro_tile_height % 128 == 0);
|
|
||||||
|
|
||||||
out->slice_tile_max = ((pitch_elements * height) / (128*128)) - 1;
|
|
||||||
out->alignment = MAX2(256, base_align);
|
|
||||||
out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
|
|
||||||
align(slice_bytes, base_align);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void si_texture_get_cmask_info(struct r600_common_screen *rscreen,
|
static void si_texture_get_cmask_info(struct r600_common_screen *rscreen,
|
||||||
struct r600_texture *rtex,
|
struct r600_texture *rtex,
|
||||||
struct r600_cmask_info *out)
|
struct r600_cmask_info *out)
|
||||||
|
@ -903,19 +848,12 @@ static void si_texture_get_cmask_info(struct r600_common_screen *rscreen,
|
||||||
static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen,
|
static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen,
|
||||||
struct r600_texture *rtex)
|
struct r600_texture *rtex)
|
||||||
{
|
{
|
||||||
if (rscreen->chip_class >= SI) {
|
si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
|
||||||
si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
|
|
||||||
} else {
|
|
||||||
r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
|
|
||||||
}
|
|
||||||
|
|
||||||
rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment);
|
rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment);
|
||||||
rtex->size = rtex->cmask.offset + rtex->cmask.size;
|
rtex->size = rtex->cmask.offset + rtex->cmask.size;
|
||||||
|
|
||||||
if (rscreen->chip_class >= SI)
|
rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
|
||||||
rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
|
|
||||||
else
|
|
||||||
rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen,
|
static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen,
|
||||||
|
@ -926,14 +864,10 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen
|
||||||
|
|
||||||
assert(rtex->cmask.size == 0);
|
assert(rtex->cmask.size == 0);
|
||||||
|
|
||||||
if (rscreen->chip_class >= SI) {
|
si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
|
||||||
si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
|
|
||||||
} else {
|
|
||||||
r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
|
|
||||||
}
|
|
||||||
|
|
||||||
rtex->cmask_buffer = (struct r600_resource *)
|
rtex->cmask_buffer = (struct r600_resource *)
|
||||||
r600_aligned_buffer_create(&rscreen->b,
|
si_aligned_buffer_create(&rscreen->b,
|
||||||
R600_RESOURCE_FLAG_UNMAPPABLE,
|
R600_RESOURCE_FLAG_UNMAPPABLE,
|
||||||
PIPE_USAGE_DEFAULT,
|
PIPE_USAGE_DEFAULT,
|
||||||
rtex->cmask.size,
|
rtex->cmask.size,
|
||||||
|
@ -946,10 +880,7 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen
|
||||||
/* update colorbuffer state bits */
|
/* update colorbuffer state bits */
|
||||||
rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
|
rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
|
||||||
|
|
||||||
if (rscreen->chip_class >= SI)
|
rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
|
||||||
rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
|
|
||||||
else
|
|
||||||
rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
|
|
||||||
|
|
||||||
p_atomic_inc(&rscreen->compressed_colortex_counter);
|
p_atomic_inc(&rscreen->compressed_colortex_counter);
|
||||||
}
|
}
|
||||||
|
@ -965,16 +896,6 @@ static void r600_texture_get_htile_size(struct r600_common_screen *rscreen,
|
||||||
|
|
||||||
rtex->surface.htile_size = 0;
|
rtex->surface.htile_size = 0;
|
||||||
|
|
||||||
if (rscreen->chip_class <= EVERGREEN &&
|
|
||||||
rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* HW bug on R6xx. */
|
|
||||||
if (rscreen->chip_class == R600 &&
|
|
||||||
(rtex->resource.b.b.width0 > 7680 ||
|
|
||||||
rtex->resource.b.b.height0 > 7680))
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* HTILE is broken with 1D tiling on old kernels and CIK. */
|
/* HTILE is broken with 1D tiling on old kernels and CIK. */
|
||||||
if (rscreen->chip_class >= CIK &&
|
if (rscreen->chip_class >= CIK &&
|
||||||
rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
|
rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
|
||||||
|
@ -1045,8 +966,8 @@ static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
|
||||||
rtex->size = rtex->htile_offset + rtex->surface.htile_size;
|
rtex->size = rtex->htile_offset + rtex->surface.htile_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_print_texture_info(struct r600_common_screen *rscreen,
|
void si_print_texture_info(struct r600_common_screen *rscreen,
|
||||||
struct r600_texture *rtex, struct u_log_context *log)
|
struct r600_texture *rtex, struct u_log_context *log)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
@ -1252,21 +1173,12 @@ r600_texture_create_object(struct pipe_screen *screen,
|
||||||
rtex->ps_draw_ratio = 0;
|
rtex->ps_draw_ratio = 0;
|
||||||
|
|
||||||
if (rtex->is_depth) {
|
if (rtex->is_depth) {
|
||||||
if (base->flags & (R600_RESOURCE_FLAG_TRANSFER |
|
if (rscreen->chip_class >= GFX9) {
|
||||||
R600_RESOURCE_FLAG_FLUSHED_DEPTH) ||
|
rtex->can_sample_z = true;
|
||||||
rscreen->chip_class >= EVERGREEN) {
|
rtex->can_sample_s = true;
|
||||||
if (rscreen->chip_class >= GFX9) {
|
|
||||||
rtex->can_sample_z = true;
|
|
||||||
rtex->can_sample_s = true;
|
|
||||||
} else {
|
|
||||||
rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted;
|
|
||||||
rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
if (rtex->resource.b.b.nr_samples <= 1 &&
|
rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted;
|
||||||
(rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
|
rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted;
|
||||||
rtex->resource.b.b.format == PIPE_FORMAT_Z32_FLOAT))
|
|
||||||
rtex->can_sample_z = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
|
if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
|
||||||
|
@ -1304,14 +1216,14 @@ r600_texture_create_object(struct pipe_screen *screen,
|
||||||
|
|
||||||
/* Now create the backing buffer. */
|
/* Now create the backing buffer. */
|
||||||
if (!buf) {
|
if (!buf) {
|
||||||
r600_init_resource_fields(rscreen, resource, rtex->size,
|
si_init_resource_fields(rscreen, resource, rtex->size,
|
||||||
rtex->surface.surf_alignment);
|
rtex->surface.surf_alignment);
|
||||||
|
|
||||||
/* Displayable surfaces are not suballocated. */
|
/* Displayable surfaces are not suballocated. */
|
||||||
if (resource->b.b.bind & PIPE_BIND_SCANOUT)
|
if (resource->b.b.bind & PIPE_BIND_SCANOUT)
|
||||||
resource->flags |= RADEON_FLAG_NO_SUBALLOC;
|
resource->flags |= RADEON_FLAG_NO_SUBALLOC;
|
||||||
|
|
||||||
if (!r600_alloc_resource(rscreen, resource)) {
|
if (!si_alloc_resource(rscreen, resource)) {
|
||||||
FREE(rtex);
|
FREE(rtex);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -1329,7 +1241,7 @@ r600_texture_create_object(struct pipe_screen *screen,
|
||||||
|
|
||||||
if (rtex->cmask.size) {
|
if (rtex->cmask.size) {
|
||||||
/* Initialize the cmask to 0xCC (= compressed state). */
|
/* Initialize the cmask to 0xCC (= compressed state). */
|
||||||
r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
|
si_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
|
||||||
rtex->cmask.offset, rtex->cmask.size,
|
rtex->cmask.offset, rtex->cmask.size,
|
||||||
0xCCCCCCCC);
|
0xCCCCCCCC);
|
||||||
}
|
}
|
||||||
|
@ -1339,7 +1251,7 @@ r600_texture_create_object(struct pipe_screen *screen,
|
||||||
if (rscreen->chip_class >= GFX9 || rtex->tc_compatible_htile)
|
if (rscreen->chip_class >= GFX9 || rtex->tc_compatible_htile)
|
||||||
clear_value = 0x0000030F;
|
clear_value = 0x0000030F;
|
||||||
|
|
||||||
r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
|
si_screen_clear_buffer(rscreen, &rtex->resource.b.b,
|
||||||
rtex->htile_offset,
|
rtex->htile_offset,
|
||||||
rtex->surface.htile_size,
|
rtex->surface.htile_size,
|
||||||
clear_value);
|
clear_value);
|
||||||
|
@ -1347,7 +1259,7 @@ r600_texture_create_object(struct pipe_screen *screen,
|
||||||
|
|
||||||
/* Initialize DCC only if the texture is not being imported. */
|
/* Initialize DCC only if the texture is not being imported. */
|
||||||
if (!buf && rtex->dcc_offset) {
|
if (!buf && rtex->dcc_offset) {
|
||||||
r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
|
si_screen_clear_buffer(rscreen, &rtex->resource.b.b,
|
||||||
rtex->dcc_offset,
|
rtex->dcc_offset,
|
||||||
rtex->surface.dcc_size,
|
rtex->surface.dcc_size,
|
||||||
0xFFFFFFFF);
|
0xFFFFFFFF);
|
||||||
|
@ -1369,7 +1281,7 @@ r600_texture_create_object(struct pipe_screen *screen,
|
||||||
puts("Texture:");
|
puts("Texture:");
|
||||||
struct u_log_context log;
|
struct u_log_context log;
|
||||||
u_log_context_init(&log);
|
u_log_context_init(&log);
|
||||||
r600_print_texture_info(rscreen, rtex, &log);
|
si_print_texture_info(rscreen, rtex, &log);
|
||||||
u_log_new_page_print(&log, stdout);
|
u_log_new_page_print(&log, stdout);
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
u_log_context_destroy(&log);
|
u_log_context_destroy(&log);
|
||||||
|
@ -1403,13 +1315,6 @@ r600_choose_tiling(struct r600_common_screen *rscreen,
|
||||||
(templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY))
|
(templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY))
|
||||||
return RADEON_SURF_MODE_2D;
|
return RADEON_SURF_MODE_2D;
|
||||||
|
|
||||||
/* r600g: force tiling on TEXTURE_2D and TEXTURE_3D compute resources. */
|
|
||||||
if (rscreen->chip_class >= R600 && rscreen->chip_class <= CAYMAN &&
|
|
||||||
(templ->bind & PIPE_BIND_COMPUTE_RESOURCE) &&
|
|
||||||
(templ->target == PIPE_TEXTURE_2D ||
|
|
||||||
templ->target == PIPE_TEXTURE_3D))
|
|
||||||
force_tiling = true;
|
|
||||||
|
|
||||||
/* Handle common candidates for the linear mode.
|
/* Handle common candidates for the linear mode.
|
||||||
* Compressed textures and DB surfaces must always be tiled.
|
* Compressed textures and DB surfaces must always be tiled.
|
||||||
*/
|
*/
|
||||||
|
@ -1425,8 +1330,7 @@ r600_choose_tiling(struct r600_common_screen *rscreen,
|
||||||
|
|
||||||
/* Cursors are linear on SI.
|
/* Cursors are linear on SI.
|
||||||
* (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */
|
* (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */
|
||||||
if (rscreen->chip_class >= SI &&
|
if (templ->bind & PIPE_BIND_CURSOR)
|
||||||
(templ->bind & PIPE_BIND_CURSOR))
|
|
||||||
return RADEON_SURF_MODE_LINEAR_ALIGNED;
|
return RADEON_SURF_MODE_LINEAR_ALIGNED;
|
||||||
|
|
||||||
if (templ->bind & PIPE_BIND_LINEAR)
|
if (templ->bind & PIPE_BIND_LINEAR)
|
||||||
|
@ -1455,8 +1359,8 @@ r600_choose_tiling(struct r600_common_screen *rscreen,
|
||||||
return RADEON_SURF_MODE_2D;
|
return RADEON_SURF_MODE_2D;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
|
struct pipe_resource *si_texture_create(struct pipe_screen *screen,
|
||||||
const struct pipe_resource *templ)
|
const struct pipe_resource *templ)
|
||||||
{
|
{
|
||||||
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
|
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
|
||||||
struct radeon_surf surface = {0};
|
struct radeon_surf surface = {0};
|
||||||
|
@ -1531,9 +1435,9 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
|
||||||
return &rtex->resource.b.b;
|
return &rtex->resource.b.b;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
|
bool si_init_flushed_depth_texture(struct pipe_context *ctx,
|
||||||
struct pipe_resource *texture,
|
struct pipe_resource *texture,
|
||||||
struct r600_texture **staging)
|
struct r600_texture **staging)
|
||||||
{
|
{
|
||||||
struct r600_texture *rtex = (struct r600_texture*)texture;
|
struct r600_texture *rtex = (struct r600_texture*)texture;
|
||||||
struct pipe_resource resource;
|
struct pipe_resource resource;
|
||||||
|
@ -1633,9 +1537,7 @@ static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen,
|
||||||
unsigned transfer_usage,
|
unsigned transfer_usage,
|
||||||
const struct pipe_box *box)
|
const struct pipe_box *box)
|
||||||
{
|
{
|
||||||
/* r600g doesn't react to dirty_tex_descriptor_counter */
|
return !rtex->resource.b.is_shared &&
|
||||||
return rscreen->chip_class >= SI &&
|
|
||||||
!rtex->resource.b.is_shared &&
|
|
||||||
!(transfer_usage & PIPE_TRANSFER_READ) &&
|
!(transfer_usage & PIPE_TRANSFER_READ) &&
|
||||||
rtex->resource.b.b.last_level == 0 &&
|
rtex->resource.b.b.last_level == 0 &&
|
||||||
util_texrange_covers_whole_level(&rtex->resource.b.b, 0,
|
util_texrange_covers_whole_level(&rtex->resource.b.b, 0,
|
||||||
|
@ -1654,7 +1556,7 @@ static void r600_texture_invalidate_storage(struct r600_common_context *rctx,
|
||||||
assert(rtex->surface.is_linear);
|
assert(rtex->surface.is_linear);
|
||||||
|
|
||||||
/* Reallocate the buffer in the same pipe_resource. */
|
/* Reallocate the buffer in the same pipe_resource. */
|
||||||
r600_alloc_resource(rscreen, &rtex->resource);
|
si_alloc_resource(rscreen, &rtex->resource);
|
||||||
|
|
||||||
/* Initialize the CMASK base address (needed even without CMASK). */
|
/* Initialize the CMASK base address (needed even without CMASK). */
|
||||||
rtex->cmask.base_address_reg =
|
rtex->cmask.base_address_reg =
|
||||||
|
@ -1718,7 +1620,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
|
||||||
rtex->resource.domains & RADEON_DOMAIN_VRAM ||
|
rtex->resource.domains & RADEON_DOMAIN_VRAM ||
|
||||||
rtex->resource.flags & RADEON_FLAG_GTT_WC;
|
rtex->resource.flags & RADEON_FLAG_GTT_WC;
|
||||||
/* Write & linear only: */
|
/* Write & linear only: */
|
||||||
else if (r600_rings_is_buffer_referenced(rctx, rtex->resource.buf,
|
else if (si_rings_is_buffer_referenced(rctx, rtex->resource.buf,
|
||||||
RADEON_USAGE_READWRITE) ||
|
RADEON_USAGE_READWRITE) ||
|
||||||
!rctx->ws->buffer_wait(rtex->resource.buf, 0,
|
!rctx->ws->buffer_wait(rtex->resource.buf, 0,
|
||||||
RADEON_USAGE_READWRITE)) {
|
RADEON_USAGE_READWRITE)) {
|
||||||
|
@ -1757,7 +1659,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
|
||||||
|
|
||||||
r600_init_temp_resource_from_box(&resource, texture, box, level, 0);
|
r600_init_temp_resource_from_box(&resource, texture, box, level, 0);
|
||||||
|
|
||||||
if (!r600_init_flushed_depth_texture(ctx, &resource, &staging_depth)) {
|
if (!si_init_flushed_depth_texture(ctx, &resource, &staging_depth)) {
|
||||||
R600_ERR("failed to create temporary texture to hold untiled copy\n");
|
R600_ERR("failed to create temporary texture to hold untiled copy\n");
|
||||||
FREE(trans);
|
FREE(trans);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -1784,7 +1686,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
|
||||||
} else {
|
} else {
|
||||||
/* XXX: only readback the rectangle which is being mapped? */
|
/* XXX: only readback the rectangle which is being mapped? */
|
||||||
/* XXX: when discard is true, no need to read back from depth texture */
|
/* XXX: when discard is true, no need to read back from depth texture */
|
||||||
if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) {
|
if (!si_init_flushed_depth_texture(ctx, texture, &staging_depth)) {
|
||||||
R600_ERR("failed to create temporary texture to hold untiled copy\n");
|
R600_ERR("failed to create temporary texture to hold untiled copy\n");
|
||||||
FREE(trans);
|
FREE(trans);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -1840,7 +1742,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
|
||||||
buf = &rtex->resource;
|
buf = &rtex->resource;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(map = r600_buffer_map_sync_with_rings(rctx, buf, usage))) {
|
if (!(map = si_buffer_map_sync_with_rings(rctx, buf, usage))) {
|
||||||
r600_resource_reference(&trans->staging, NULL);
|
r600_resource_reference(&trans->staging, NULL);
|
||||||
FREE(trans);
|
FREE(trans);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -2010,15 +1912,15 @@ void vi_disable_dcc_if_incompatible_format(struct r600_common_context *rctx,
|
||||||
|
|
||||||
if (vi_dcc_enabled(rtex, level) &&
|
if (vi_dcc_enabled(rtex, level) &&
|
||||||
!vi_dcc_formats_compatible(tex->format, view_format))
|
!vi_dcc_formats_compatible(tex->format, view_format))
|
||||||
if (!r600_texture_disable_dcc(rctx, (struct r600_texture*)tex))
|
if (!si_texture_disable_dcc(rctx, (struct r600_texture*)tex))
|
||||||
rctx->decompress_dcc(&rctx->b, rtex);
|
rctx->decompress_dcc(&rctx->b, rtex);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
|
struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,
|
||||||
struct pipe_resource *texture,
|
struct pipe_resource *texture,
|
||||||
const struct pipe_surface *templ,
|
const struct pipe_surface *templ,
|
||||||
unsigned width0, unsigned height0,
|
unsigned width0, unsigned height0,
|
||||||
unsigned width, unsigned height)
|
unsigned width, unsigned height)
|
||||||
{
|
{
|
||||||
struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
|
struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
|
||||||
|
|
||||||
|
@ -2079,7 +1981,7 @@ static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return r600_create_surface_custom(pipe, tex, templ,
|
return si_create_surface_custom(pipe, tex, templ,
|
||||||
width0, height0,
|
width0, height0,
|
||||||
width, height);
|
width, height);
|
||||||
}
|
}
|
||||||
|
@ -2159,7 +2061,7 @@ static void r600_clear_texture(struct pipe_context *pipe,
|
||||||
pipe_surface_reference(&sf, NULL);
|
pipe_surface_reference(&sf, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap)
|
unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap)
|
||||||
{
|
{
|
||||||
const struct util_format_description *desc = util_format_description(format);
|
const struct util_format_description *desc = util_format_description(format);
|
||||||
|
|
||||||
|
@ -2380,7 +2282,7 @@ static void vi_separate_dcc_try_enable(struct r600_common_context *rctx,
|
||||||
tex->last_dcc_separate_buffer = NULL;
|
tex->last_dcc_separate_buffer = NULL;
|
||||||
} else {
|
} else {
|
||||||
tex->dcc_separate_buffer = (struct r600_resource*)
|
tex->dcc_separate_buffer = (struct r600_resource*)
|
||||||
r600_aligned_buffer_create(rctx->b.screen,
|
si_aligned_buffer_create(rctx->b.screen,
|
||||||
R600_RESOURCE_FLAG_UNMAPPABLE,
|
R600_RESOURCE_FLAG_UNMAPPABLE,
|
||||||
PIPE_USAGE_DEFAULT,
|
PIPE_USAGE_DEFAULT,
|
||||||
tex->surface.dcc_size,
|
tex->surface.dcc_size,
|
||||||
|
@ -2416,7 +2318,7 @@ void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
|
||||||
/* Read the results. */
|
/* Read the results. */
|
||||||
ctx->get_query_result(ctx, rctx->dcc_stats[i].ps_stats[2],
|
ctx->get_query_result(ctx, rctx->dcc_stats[i].ps_stats[2],
|
||||||
true, &result);
|
true, &result);
|
||||||
r600_query_hw_reset_buffers(rctx,
|
si_query_hw_reset_buffers(rctx,
|
||||||
(struct r600_query_hw*)
|
(struct r600_query_hw*)
|
||||||
rctx->dcc_stats[i].ps_stats[2]);
|
rctx->dcc_stats[i].ps_stats[2]);
|
||||||
|
|
||||||
|
@ -2527,7 +2429,7 @@ static bool vi_get_fast_clear_parameters(enum pipe_format surface_format,
|
||||||
util_format_is_alpha(surface_format)) {
|
util_format_is_alpha(surface_format)) {
|
||||||
extra_channel = -1;
|
extra_channel = -1;
|
||||||
} else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
|
} else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
|
||||||
if(r600_translate_colorswap(surface_format, false) <= 1)
|
if(si_translate_colorswap(surface_format, false) <= 1)
|
||||||
extra_channel = desc->nr_channels - 1;
|
extra_channel = desc->nr_channels - 1;
|
||||||
else
|
else
|
||||||
extra_channel = 0;
|
extra_channel = 0;
|
||||||
|
@ -2725,7 +2627,7 @@ static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen,
|
||||||
p_atomic_inc(&rscreen->dirty_tex_counter);
|
p_atomic_inc(&rscreen->dirty_tex_counter);
|
||||||
}
|
}
|
||||||
|
|
||||||
void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
|
void si_do_fast_color_clear(struct r600_common_context *rctx,
|
||||||
struct pipe_framebuffer_state *fb,
|
struct pipe_framebuffer_state *fb,
|
||||||
struct r600_atom *fb_state,
|
struct r600_atom *fb_state,
|
||||||
unsigned *buffers, ubyte *dirty_cbufs,
|
unsigned *buffers, ubyte *dirty_cbufs,
|
||||||
|
@ -2858,8 +2760,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We can change the micro tile mode before a full clear. */
|
/* We can change the micro tile mode before a full clear. */
|
||||||
if (rctx->screen->chip_class >= SI)
|
si_set_optimal_micro_tile_mode(rctx->screen, tex);
|
||||||
si_set_optimal_micro_tile_mode(rctx->screen, tex);
|
|
||||||
|
|
||||||
evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
|
evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
|
||||||
|
|
||||||
|
@ -2982,7 +2883,7 @@ r600_texture_from_memobj(struct pipe_screen *screen,
|
||||||
return &rtex->resource.b.b;
|
return &rtex->resource.b.b;
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_init_screen_texture_functions(struct r600_common_screen *rscreen)
|
void si_init_screen_texture_functions(struct r600_common_screen *rscreen)
|
||||||
{
|
{
|
||||||
rscreen->b.resource_from_handle = r600_texture_from_handle;
|
rscreen->b.resource_from_handle = r600_texture_from_handle;
|
||||||
rscreen->b.resource_get_handle = r600_texture_get_handle;
|
rscreen->b.resource_get_handle = r600_texture_get_handle;
|
||||||
|
@ -2991,7 +2892,7 @@ void r600_init_screen_texture_functions(struct r600_common_screen *rscreen)
|
||||||
rscreen->b.memobj_destroy = r600_memobj_destroy;
|
rscreen->b.memobj_destroy = r600_memobj_destroy;
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_init_context_texture_functions(struct r600_common_context *rctx)
|
void si_init_context_texture_functions(struct r600_common_context *rctx)
|
||||||
{
|
{
|
||||||
rctx->b.create_surface = r600_create_surface;
|
rctx->b.create_surface = r600_create_surface;
|
||||||
rctx->b.surface_destroy = r600_surface_destroy;
|
rctx->b.surface_destroy = r600_surface_destroy;
|
||||||
|
|
|
@ -115,8 +115,8 @@ static void r600_scissor_make_union(struct r600_signed_scissor *out,
|
||||||
out->maxy = MAX2(out->maxy, in->maxy);
|
out->maxy = MAX2(out->maxy, in->maxy);
|
||||||
}
|
}
|
||||||
|
|
||||||
void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
|
void si_apply_scissor_bug_workaround(struct r600_common_context *rctx,
|
||||||
struct pipe_scissor_state *scissor)
|
struct pipe_scissor_state *scissor)
|
||||||
{
|
{
|
||||||
if (rctx->chip_class == EVERGREEN || rctx->chip_class == CAYMAN) {
|
if (rctx->chip_class == EVERGREEN || rctx->chip_class == CAYMAN) {
|
||||||
if (scissor->maxx == 0)
|
if (scissor->maxx == 0)
|
||||||
|
@ -147,7 +147,7 @@ static void r600_emit_one_scissor(struct r600_common_context *rctx,
|
||||||
if (scissor)
|
if (scissor)
|
||||||
r600_clip_scissor(&final, scissor);
|
r600_clip_scissor(&final, scissor);
|
||||||
|
|
||||||
evergreen_apply_scissor_bug_workaround(rctx, &final);
|
si_apply_scissor_bug_workaround(rctx, &final);
|
||||||
|
|
||||||
radeon_emit(cs, S_028250_TL_X(final.minx) |
|
radeon_emit(cs, S_028250_TL_X(final.minx) |
|
||||||
S_028250_TL_Y(final.miny) |
|
S_028250_TL_Y(final.miny) |
|
||||||
|
@ -368,8 +368,8 @@ static void r600_emit_viewport_states(struct r600_common_context *rctx,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set viewport dependencies on pipe_rasterizer_state. */
|
/* Set viewport dependencies on pipe_rasterizer_state. */
|
||||||
void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
|
void si_viewport_set_rast_deps(struct r600_common_context *rctx,
|
||||||
bool scissor_enable, bool clip_halfz)
|
bool scissor_enable, bool clip_halfz)
|
||||||
{
|
{
|
||||||
if (rctx->scissor_enabled != scissor_enable) {
|
if (rctx->scissor_enabled != scissor_enable) {
|
||||||
rctx->scissor_enabled = scissor_enable;
|
rctx->scissor_enabled = scissor_enable;
|
||||||
|
@ -389,8 +389,8 @@ void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
|
||||||
* is delayed. When a shader with VIEWPORT_INDEX appears, this should be
|
* is delayed. When a shader with VIEWPORT_INDEX appears, this should be
|
||||||
* called to emit the rest.
|
* called to emit the rest.
|
||||||
*/
|
*/
|
||||||
void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
|
void si_update_vs_writes_viewport_index(struct r600_common_context *rctx,
|
||||||
struct tgsi_shader_info *info)
|
struct tgsi_shader_info *info)
|
||||||
{
|
{
|
||||||
bool vs_window_space;
|
bool vs_window_space;
|
||||||
|
|
||||||
|
@ -420,7 +420,7 @@ void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
|
||||||
rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
|
rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
void r600_init_viewport_functions(struct r600_common_context *rctx)
|
void si_init_viewport_functions(struct r600_common_context *rctx)
|
||||||
{
|
{
|
||||||
rctx->scissors.atom.emit = r600_emit_scissors;
|
rctx->scissors.atom.emit = r600_emit_scissors;
|
||||||
rctx->viewports.atom.emit = r600_emit_viewport_states;
|
rctx->viewports.atom.emit = r600_emit_viewport_states;
|
||||||
|
|
|
@ -1101,13 +1101,13 @@ static void ruvd_destroy(struct pipe_video_codec *decoder)
|
||||||
dec->ws->cs_destroy(dec->cs);
|
dec->ws->cs_destroy(dec->cs);
|
||||||
|
|
||||||
for (i = 0; i < NUM_BUFFERS; ++i) {
|
for (i = 0; i < NUM_BUFFERS; ++i) {
|
||||||
rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
|
si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
|
||||||
rvid_destroy_buffer(&dec->bs_buffers[i]);
|
si_vid_destroy_buffer(&dec->bs_buffers[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
rvid_destroy_buffer(&dec->dpb);
|
si_vid_destroy_buffer(&dec->dpb);
|
||||||
rvid_destroy_buffer(&dec->ctx);
|
si_vid_destroy_buffer(&dec->ctx);
|
||||||
rvid_destroy_buffer(&dec->sessionctx);
|
si_vid_destroy_buffer(&dec->sessionctx);
|
||||||
|
|
||||||
FREE(dec);
|
FREE(dec);
|
||||||
}
|
}
|
||||||
|
@ -1178,7 +1178,7 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,
|
||||||
|
|
||||||
if (new_size > buf->res->buf->size) {
|
if (new_size > buf->res->buf->size) {
|
||||||
dec->ws->buffer_unmap(buf->res->buf);
|
dec->ws->buffer_unmap(buf->res->buf);
|
||||||
if (!rvid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
|
if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
|
||||||
RVID_ERR("Can't resize bitstream buffer!");
|
RVID_ERR("Can't resize bitstream buffer!");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1271,10 +1271,10 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
|
||||||
ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc*)picture);
|
ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc*)picture);
|
||||||
else
|
else
|
||||||
ctx_size = calc_ctx_size_h265_main(dec);
|
ctx_size = calc_ctx_size_h265_main(dec);
|
||||||
if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
|
if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
|
||||||
RVID_ERR("Can't allocated context buffer.\n");
|
RVID_ERR("Can't allocated context buffer.\n");
|
||||||
}
|
}
|
||||||
rvid_clear_buffer(decoder->context, &dec->ctx);
|
si_vid_clear_buffer(decoder->context, &dec->ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dec->ctx.res)
|
if (dec->ctx.res)
|
||||||
|
@ -1341,9 +1341,9 @@ static void ruvd_flush(struct pipe_video_codec *decoder)
|
||||||
/**
|
/**
|
||||||
* create and UVD decoder
|
* create and UVD decoder
|
||||||
*/
|
*/
|
||||||
struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
|
struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *context,
|
||||||
const struct pipe_video_codec *templ,
|
const struct pipe_video_codec *templ,
|
||||||
ruvd_set_dtb set_dtb)
|
ruvd_set_dtb set_dtb)
|
||||||
{
|
{
|
||||||
struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws;
|
struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws;
|
||||||
struct r600_common_context *rctx = (struct r600_common_context*)context;
|
struct r600_common_context *rctx = (struct r600_common_context*)context;
|
||||||
|
@ -1398,7 +1398,7 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
|
||||||
|
|
||||||
dec->stream_type = profile2stream_type(dec, info.family);
|
dec->stream_type = profile2stream_type(dec, info.family);
|
||||||
dec->set_dtb = set_dtb;
|
dec->set_dtb = set_dtb;
|
||||||
dec->stream_handle = rvid_alloc_stream_handle();
|
dec->stream_handle = si_vid_alloc_stream_handle();
|
||||||
dec->screen = context->screen;
|
dec->screen = context->screen;
|
||||||
dec->ws = ws;
|
dec->ws = ws;
|
||||||
dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL);
|
dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL);
|
||||||
|
@ -1415,48 +1415,48 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
|
||||||
STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);
|
STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);
|
||||||
if (have_it(dec))
|
if (have_it(dec))
|
||||||
msg_fb_it_size += IT_SCALING_TABLE_SIZE;
|
msg_fb_it_size += IT_SCALING_TABLE_SIZE;
|
||||||
if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
|
if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
|
||||||
msg_fb_it_size, PIPE_USAGE_STAGING)) {
|
msg_fb_it_size, PIPE_USAGE_STAGING)) {
|
||||||
RVID_ERR("Can't allocated message buffers.\n");
|
RVID_ERR("Can't allocated message buffers.\n");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!rvid_create_buffer(dec->screen, &dec->bs_buffers[i],
|
if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i],
|
||||||
bs_buf_size, PIPE_USAGE_STAGING)) {
|
bs_buf_size, PIPE_USAGE_STAGING)) {
|
||||||
RVID_ERR("Can't allocated bitstream buffers.\n");
|
RVID_ERR("Can't allocated bitstream buffers.\n");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
rvid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
|
si_vid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
|
||||||
rvid_clear_buffer(context, &dec->bs_buffers[i]);
|
si_vid_clear_buffer(context, &dec->bs_buffers[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
dpb_size = calc_dpb_size(dec);
|
dpb_size = calc_dpb_size(dec);
|
||||||
if (dpb_size) {
|
if (dpb_size) {
|
||||||
if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
|
if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
|
||||||
RVID_ERR("Can't allocated dpb.\n");
|
RVID_ERR("Can't allocated dpb.\n");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
rvid_clear_buffer(context, &dec->dpb);
|
si_vid_clear_buffer(context, &dec->dpb);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dec->stream_type == RUVD_CODEC_H264_PERF && info.family >= CHIP_POLARIS10) {
|
if (dec->stream_type == RUVD_CODEC_H264_PERF && info.family >= CHIP_POLARIS10) {
|
||||||
unsigned ctx_size = calc_ctx_size_h264_perf(dec);
|
unsigned ctx_size = calc_ctx_size_h264_perf(dec);
|
||||||
if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
|
if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
|
||||||
RVID_ERR("Can't allocated context buffer.\n");
|
RVID_ERR("Can't allocated context buffer.\n");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
rvid_clear_buffer(context, &dec->ctx);
|
si_vid_clear_buffer(context, &dec->ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (info.family >= CHIP_POLARIS10 && info.drm_minor >= 3) {
|
if (info.family >= CHIP_POLARIS10 && info.drm_minor >= 3) {
|
||||||
if (!rvid_create_buffer(dec->screen, &dec->sessionctx,
|
if (!si_vid_create_buffer(dec->screen, &dec->sessionctx,
|
||||||
UVD_SESSION_CONTEXT_SIZE,
|
UVD_SESSION_CONTEXT_SIZE,
|
||||||
PIPE_USAGE_DEFAULT)) {
|
PIPE_USAGE_DEFAULT)) {
|
||||||
RVID_ERR("Can't allocated session ctx.\n");
|
RVID_ERR("Can't allocated session ctx.\n");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
rvid_clear_buffer(context, &dec->sessionctx);
|
si_vid_clear_buffer(context, &dec->sessionctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (info.family >= CHIP_VEGA10) {
|
if (info.family >= CHIP_VEGA10) {
|
||||||
|
@ -1492,13 +1492,13 @@ error:
|
||||||
if (dec->cs) dec->ws->cs_destroy(dec->cs);
|
if (dec->cs) dec->ws->cs_destroy(dec->cs);
|
||||||
|
|
||||||
for (i = 0; i < NUM_BUFFERS; ++i) {
|
for (i = 0; i < NUM_BUFFERS; ++i) {
|
||||||
rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
|
si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
|
||||||
rvid_destroy_buffer(&dec->bs_buffers[i]);
|
si_vid_destroy_buffer(&dec->bs_buffers[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
rvid_destroy_buffer(&dec->dpb);
|
si_vid_destroy_buffer(&dec->dpb);
|
||||||
rvid_destroy_buffer(&dec->ctx);
|
si_vid_destroy_buffer(&dec->ctx);
|
||||||
rvid_destroy_buffer(&dec->sessionctx);
|
si_vid_destroy_buffer(&dec->sessionctx);
|
||||||
|
|
||||||
FREE(dec);
|
FREE(dec);
|
||||||
|
|
||||||
|
@ -1551,8 +1551,8 @@ static unsigned bank_wh(unsigned bankwh)
|
||||||
/**
|
/**
|
||||||
* fill decoding target field from the luma and chroma surfaces
|
* fill decoding target field from the luma and chroma surfaces
|
||||||
*/
|
*/
|
||||||
void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
|
void si_uvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
|
||||||
struct radeon_surf *chroma, enum ruvd_surface_type type)
|
struct radeon_surf *chroma, enum ruvd_surface_type type)
|
||||||
{
|
{
|
||||||
switch (type) {
|
switch (type) {
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -437,11 +437,11 @@ typedef struct pb_buffer* (*ruvd_set_dtb)
|
||||||
(struct ruvd_msg* msg, struct vl_video_buffer *vb);
|
(struct ruvd_msg* msg, struct vl_video_buffer *vb);
|
||||||
|
|
||||||
/* create an UVD decode */
|
/* create an UVD decode */
|
||||||
struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
|
struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *context,
|
||||||
const struct pipe_video_codec *templat,
|
const struct pipe_video_codec *templat,
|
||||||
ruvd_set_dtb set_dtb);
|
ruvd_set_dtb set_dtb);
|
||||||
|
|
||||||
/* fill decoding target field from the luma and chroma surfaces */
|
/* fill decoding target field from the luma and chroma surfaces */
|
||||||
void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
|
void si_uvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
|
||||||
struct radeon_surf *chroma, enum ruvd_surface_type type);
|
struct radeon_surf *chroma, enum ruvd_surface_type type);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -198,7 +198,7 @@ static unsigned get_cpb_num(struct rvce_encoder *enc)
|
||||||
/**
|
/**
|
||||||
* Get the slot for the currently encoded frame
|
* Get the slot for the currently encoded frame
|
||||||
*/
|
*/
|
||||||
struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc)
|
struct rvce_cpb_slot *si_current_slot(struct rvce_encoder *enc)
|
||||||
{
|
{
|
||||||
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list);
|
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list);
|
||||||
}
|
}
|
||||||
|
@ -206,7 +206,7 @@ struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc)
|
||||||
/**
|
/**
|
||||||
* Get the slot for L0
|
* Get the slot for L0
|
||||||
*/
|
*/
|
||||||
struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc)
|
struct rvce_cpb_slot *si_l0_slot(struct rvce_encoder *enc)
|
||||||
{
|
{
|
||||||
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list);
|
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list);
|
||||||
}
|
}
|
||||||
|
@ -214,7 +214,7 @@ struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc)
|
||||||
/**
|
/**
|
||||||
* Get the slot for L1
|
* Get the slot for L1
|
||||||
*/
|
*/
|
||||||
struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc)
|
struct rvce_cpb_slot *si_l1_slot(struct rvce_encoder *enc)
|
||||||
{
|
{
|
||||||
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list);
|
return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list);
|
||||||
}
|
}
|
||||||
|
@ -222,8 +222,8 @@ struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc)
|
||||||
/**
|
/**
|
||||||
* Calculate the offsets into the CPB
|
* Calculate the offsets into the CPB
|
||||||
*/
|
*/
|
||||||
void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
|
void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
|
||||||
signed *luma_offset, signed *chroma_offset)
|
signed *luma_offset, signed *chroma_offset)
|
||||||
{
|
{
|
||||||
struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen;
|
struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen;
|
||||||
unsigned pitch, vpitch, fsize;
|
unsigned pitch, vpitch, fsize;
|
||||||
|
@ -249,15 +249,15 @@ static void rvce_destroy(struct pipe_video_codec *encoder)
|
||||||
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
|
struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
|
||||||
if (enc->stream_handle) {
|
if (enc->stream_handle) {
|
||||||
struct rvid_buffer fb;
|
struct rvid_buffer fb;
|
||||||
rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
|
si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
|
||||||
enc->fb = &fb;
|
enc->fb = &fb;
|
||||||
enc->session(enc);
|
enc->session(enc);
|
||||||
enc->feedback(enc);
|
enc->feedback(enc);
|
||||||
enc->destroy(enc);
|
enc->destroy(enc);
|
||||||
flush(enc);
|
flush(enc);
|
||||||
rvid_destroy_buffer(&fb);
|
si_vid_destroy_buffer(&fb);
|
||||||
}
|
}
|
||||||
rvid_destroy_buffer(&enc->cpb);
|
si_vid_destroy_buffer(&enc->cpb);
|
||||||
enc->ws->cs_destroy(enc->cs);
|
enc->ws->cs_destroy(enc->cs);
|
||||||
FREE(enc->cpb_array);
|
FREE(enc->cpb_array);
|
||||||
FREE(enc);
|
FREE(enc);
|
||||||
|
@ -278,7 +278,7 @@ static void rvce_begin_frame(struct pipe_video_codec *encoder,
|
||||||
enc->pic.quant_b_frames != pic->quant_b_frames;
|
enc->pic.quant_b_frames != pic->quant_b_frames;
|
||||||
|
|
||||||
enc->pic = *pic;
|
enc->pic = *pic;
|
||||||
get_pic_param(enc, pic);
|
si_get_pic_param(enc, pic);
|
||||||
|
|
||||||
enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
|
enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
|
||||||
enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
|
enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
|
||||||
|
@ -291,8 +291,8 @@ static void rvce_begin_frame(struct pipe_video_codec *encoder,
|
||||||
|
|
||||||
if (!enc->stream_handle) {
|
if (!enc->stream_handle) {
|
||||||
struct rvid_buffer fb;
|
struct rvid_buffer fb;
|
||||||
enc->stream_handle = rvid_alloc_stream_handle();
|
enc->stream_handle = si_vid_alloc_stream_handle();
|
||||||
rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
|
si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
|
||||||
enc->fb = &fb;
|
enc->fb = &fb;
|
||||||
enc->session(enc);
|
enc->session(enc);
|
||||||
enc->create(enc);
|
enc->create(enc);
|
||||||
|
@ -300,7 +300,7 @@ static void rvce_begin_frame(struct pipe_video_codec *encoder,
|
||||||
enc->feedback(enc);
|
enc->feedback(enc);
|
||||||
flush(enc);
|
flush(enc);
|
||||||
//dump_feedback(enc, &fb);
|
//dump_feedback(enc, &fb);
|
||||||
rvid_destroy_buffer(&fb);
|
si_vid_destroy_buffer(&fb);
|
||||||
need_rate_control = false;
|
need_rate_control = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -321,7 +321,7 @@ static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
|
||||||
enc->bs_size = destination->width0;
|
enc->bs_size = destination->width0;
|
||||||
|
|
||||||
*fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
|
*fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
|
||||||
if (!rvid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) {
|
if (!si_vid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) {
|
||||||
RVID_ERR("Can't create feedback buffer.\n");
|
RVID_ERR("Can't create feedback buffer.\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -370,7 +370,7 @@ static void rvce_get_feedback(struct pipe_video_codec *encoder,
|
||||||
enc->ws->buffer_unmap(fb->res->buf);
|
enc->ws->buffer_unmap(fb->res->buf);
|
||||||
}
|
}
|
||||||
//dump_feedback(enc, fb);
|
//dump_feedback(enc, fb);
|
||||||
rvid_destroy_buffer(fb);
|
si_vid_destroy_buffer(fb);
|
||||||
FREE(fb);
|
FREE(fb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -390,10 +390,10 @@ static void rvce_cs_flush(void *ctx, unsigned flags,
|
||||||
// just ignored
|
// just ignored
|
||||||
}
|
}
|
||||||
|
|
||||||
struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
|
struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context,
|
||||||
const struct pipe_video_codec *templ,
|
const struct pipe_video_codec *templ,
|
||||||
struct radeon_winsys* ws,
|
struct radeon_winsys* ws,
|
||||||
rvce_get_buffer get_buffer)
|
rvce_get_buffer get_buffer)
|
||||||
{
|
{
|
||||||
struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen;
|
struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen;
|
||||||
struct r600_common_context *rctx = (struct r600_common_context*)context;
|
struct r600_common_context *rctx = (struct r600_common_context*)context;
|
||||||
|
@ -406,7 +406,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
|
||||||
RVID_ERR("Kernel doesn't supports VCE!\n");
|
RVID_ERR("Kernel doesn't supports VCE!\n");
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
} else if (!rvce_is_fw_version_supported(rscreen)) {
|
} else if (!si_vce_is_fw_version_supported(rscreen)) {
|
||||||
RVID_ERR("Unsupported VCE fw version loaded!\n");
|
RVID_ERR("Unsupported VCE fw version loaded!\n");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -479,7 +479,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
|
||||||
cpb_size += RVCE_MAX_AUX_BUFFER_NUM *
|
cpb_size += RVCE_MAX_AUX_BUFFER_NUM *
|
||||||
RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
|
RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
|
||||||
tmp_buf->destroy(tmp_buf);
|
tmp_buf->destroy(tmp_buf);
|
||||||
if (!rvid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
|
if (!si_vid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
|
||||||
RVID_ERR("Can't create CPB buffer.\n");
|
RVID_ERR("Can't create CPB buffer.\n");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
@ -492,29 +492,29 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
|
||||||
|
|
||||||
switch (rscreen->info.vce_fw_version) {
|
switch (rscreen->info.vce_fw_version) {
|
||||||
case FW_40_2_2:
|
case FW_40_2_2:
|
||||||
radeon_vce_40_2_2_init(enc);
|
si_vce_40_2_2_init(enc);
|
||||||
get_pic_param = radeon_vce_40_2_2_get_param;
|
si_get_pic_param = si_vce_40_2_2_get_param;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case FW_50_0_1:
|
case FW_50_0_1:
|
||||||
case FW_50_1_2:
|
case FW_50_1_2:
|
||||||
case FW_50_10_2:
|
case FW_50_10_2:
|
||||||
case FW_50_17_3:
|
case FW_50_17_3:
|
||||||
radeon_vce_50_init(enc);
|
si_vce_50_init(enc);
|
||||||
get_pic_param = radeon_vce_50_get_param;
|
si_get_pic_param = si_vce_50_get_param;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case FW_52_0_3:
|
case FW_52_0_3:
|
||||||
case FW_52_4_3:
|
case FW_52_4_3:
|
||||||
case FW_52_8_3:
|
case FW_52_8_3:
|
||||||
radeon_vce_52_init(enc);
|
si_vce_52_init(enc);
|
||||||
get_pic_param = radeon_vce_52_get_param;
|
si_get_pic_param = si_vce_52_get_param;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
if ((rscreen->info.vce_fw_version & (0xff << 24)) == FW_53) {
|
if ((rscreen->info.vce_fw_version & (0xff << 24)) == FW_53) {
|
||||||
radeon_vce_52_init(enc);
|
si_vce_52_init(enc);
|
||||||
get_pic_param = radeon_vce_52_get_param;
|
si_get_pic_param = si_vce_52_get_param;
|
||||||
} else
|
} else
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
@ -525,7 +525,7 @@ error:
|
||||||
if (enc->cs)
|
if (enc->cs)
|
||||||
enc->ws->cs_destroy(enc->cs);
|
enc->ws->cs_destroy(enc->cs);
|
||||||
|
|
||||||
rvid_destroy_buffer(&enc->cpb);
|
si_vid_destroy_buffer(&enc->cpb);
|
||||||
|
|
||||||
FREE(enc->cpb_array);
|
FREE(enc->cpb_array);
|
||||||
FREE(enc);
|
FREE(enc);
|
||||||
|
@ -535,7 +535,7 @@ error:
|
||||||
/**
|
/**
|
||||||
* check if kernel has the right fw version loaded
|
* check if kernel has the right fw version loaded
|
||||||
*/
|
*/
|
||||||
bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
|
bool si_vce_is_fw_version_supported(struct r600_common_screen *rscreen)
|
||||||
{
|
{
|
||||||
switch (rscreen->info.vce_fw_version) {
|
switch (rscreen->info.vce_fw_version) {
|
||||||
case FW_40_2_2:
|
case FW_40_2_2:
|
||||||
|
@ -558,9 +558,9 @@ bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
|
||||||
/**
|
/**
|
||||||
* Add the buffer as relocation to the current command submission
|
* Add the buffer as relocation to the current command submission
|
||||||
*/
|
*/
|
||||||
void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
|
void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
|
||||||
enum radeon_bo_usage usage, enum radeon_bo_domain domain,
|
enum radeon_bo_usage usage, enum radeon_bo_domain domain,
|
||||||
signed offset)
|
signed offset)
|
||||||
{
|
{
|
||||||
int reloc_idx;
|
int reloc_idx;
|
||||||
|
|
||||||
|
|
|
@ -40,9 +40,9 @@
|
||||||
#define RVCE_BEGIN(cmd) { \
|
#define RVCE_BEGIN(cmd) { \
|
||||||
uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
|
uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
|
||||||
RVCE_CS(cmd)
|
RVCE_CS(cmd)
|
||||||
#define RVCE_READ(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
|
#define RVCE_READ(buf, domain, off) si_vce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
|
||||||
#define RVCE_WRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
|
#define RVCE_WRITE(buf, domain, off) si_vce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
|
||||||
#define RVCE_READWRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
|
#define RVCE_READWRITE(buf, domain, off) si_vce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
|
||||||
#define RVCE_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; }
|
#define RVCE_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; }
|
||||||
|
|
||||||
#define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5)
|
#define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5)
|
||||||
|
@ -417,46 +417,46 @@ struct rvce_encoder {
|
||||||
};
|
};
|
||||||
|
|
||||||
/* CPB handling functions */
|
/* CPB handling functions */
|
||||||
struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc);
|
struct rvce_cpb_slot *si_current_slot(struct rvce_encoder *enc);
|
||||||
struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc);
|
struct rvce_cpb_slot *si_l0_slot(struct rvce_encoder *enc);
|
||||||
struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc);
|
struct rvce_cpb_slot *si_l1_slot(struct rvce_encoder *enc);
|
||||||
void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
|
void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
|
||||||
signed *luma_offset, signed *chroma_offset);
|
signed *luma_offset, signed *chroma_offset);
|
||||||
|
|
||||||
struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
|
struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context,
|
||||||
const struct pipe_video_codec *templat,
|
const struct pipe_video_codec *templat,
|
||||||
struct radeon_winsys* ws,
|
struct radeon_winsys* ws,
|
||||||
rvce_get_buffer get_buffer);
|
rvce_get_buffer get_buffer);
|
||||||
|
|
||||||
bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen);
|
bool si_vce_is_fw_version_supported(struct r600_common_screen *rscreen);
|
||||||
|
|
||||||
void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
|
void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
|
||||||
enum radeon_bo_usage usage, enum radeon_bo_domain domain,
|
enum radeon_bo_usage usage, enum radeon_bo_domain domain,
|
||||||
signed offset);
|
signed offset);
|
||||||
|
|
||||||
/* init vce fw 40.2.2 specific callbacks */
|
/* init vce fw 40.2.2 specific callbacks */
|
||||||
void radeon_vce_40_2_2_init(struct rvce_encoder *enc);
|
void si_vce_40_2_2_init(struct rvce_encoder *enc);
|
||||||
|
|
||||||
/* init vce fw 50 specific callbacks */
|
/* init vce fw 50 specific callbacks */
|
||||||
void radeon_vce_50_init(struct rvce_encoder *enc);
|
void si_vce_50_init(struct rvce_encoder *enc);
|
||||||
|
|
||||||
/* init vce fw 52 specific callbacks */
|
/* init vce fw 52 specific callbacks */
|
||||||
void radeon_vce_52_init(struct rvce_encoder *enc);
|
void si_vce_52_init(struct rvce_encoder *enc);
|
||||||
|
|
||||||
/* version specific function for getting parameters */
|
/* version specific function for getting parameters */
|
||||||
void (*get_pic_param)(struct rvce_encoder *enc,
|
void (*si_get_pic_param)(struct rvce_encoder *enc,
|
||||||
struct pipe_h264_enc_picture_desc *pic);
|
struct pipe_h264_enc_picture_desc *pic);
|
||||||
|
|
||||||
/* get parameters for vce 40.2.2 */
|
/* get parameters for vce 40.2.2 */
|
||||||
void radeon_vce_40_2_2_get_param(struct rvce_encoder *enc,
|
void si_vce_40_2_2_get_param(struct rvce_encoder *enc,
|
||||||
struct pipe_h264_enc_picture_desc *pic);
|
struct pipe_h264_enc_picture_desc *pic);
|
||||||
|
|
||||||
/* get parameters for vce 50 */
|
/* get parameters for vce 50 */
|
||||||
void radeon_vce_50_get_param(struct rvce_encoder *enc,
|
void si_vce_50_get_param(struct rvce_encoder *enc,
|
||||||
struct pipe_h264_enc_picture_desc *pic);
|
struct pipe_h264_enc_picture_desc *pic);
|
||||||
|
|
||||||
/* get parameters for vce 52 */
|
/* get parameters for vce 52 */
|
||||||
void radeon_vce_52_get_param(struct rvce_encoder *enc,
|
void si_vce_52_get_param(struct rvce_encoder *enc,
|
||||||
struct pipe_h264_enc_picture_desc *pic);
|
struct pipe_h264_enc_picture_desc *pic);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -363,8 +363,8 @@ static void encode(struct rvce_encoder *enc)
|
||||||
RVCE_CS(0x00000000); // pictureStructure
|
RVCE_CS(0x00000000); // pictureStructure
|
||||||
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
|
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
|
||||||
enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
|
enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
|
||||||
struct rvce_cpb_slot *l0 = l0_slot(enc);
|
struct rvce_cpb_slot *l0 = si_l0_slot(enc);
|
||||||
rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
|
si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
|
||||||
RVCE_CS(l0->picture_type); // encPicType
|
RVCE_CS(l0->picture_type); // encPicType
|
||||||
RVCE_CS(l0->frame_num); // frameNumber
|
RVCE_CS(l0->frame_num); // frameNumber
|
||||||
RVCE_CS(l0->pic_order_cnt); // pictureOrderCount
|
RVCE_CS(l0->pic_order_cnt); // pictureOrderCount
|
||||||
|
@ -389,8 +389,8 @@ static void encode(struct rvce_encoder *enc)
|
||||||
// encReferencePictureL1[0]
|
// encReferencePictureL1[0]
|
||||||
RVCE_CS(0x00000000); // pictureStructure
|
RVCE_CS(0x00000000); // pictureStructure
|
||||||
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
|
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
|
||||||
struct rvce_cpb_slot *l1 = l1_slot(enc);
|
struct rvce_cpb_slot *l1 = si_l1_slot(enc);
|
||||||
rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
|
si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
|
||||||
RVCE_CS(l1->picture_type); // encPicType
|
RVCE_CS(l1->picture_type); // encPicType
|
||||||
RVCE_CS(l1->frame_num); // frameNumber
|
RVCE_CS(l1->frame_num); // frameNumber
|
||||||
RVCE_CS(l1->pic_order_cnt); // pictureOrderCount
|
RVCE_CS(l1->pic_order_cnt); // pictureOrderCount
|
||||||
|
@ -404,7 +404,7 @@ static void encode(struct rvce_encoder *enc)
|
||||||
RVCE_CS(0xffffffff); // chromaOffset
|
RVCE_CS(0xffffffff); // chromaOffset
|
||||||
}
|
}
|
||||||
|
|
||||||
rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
|
si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset);
|
||||||
RVCE_CS(luma_offset); // encReconstructedLumaOffset
|
RVCE_CS(luma_offset); // encReconstructedLumaOffset
|
||||||
RVCE_CS(chroma_offset); // encReconstructedChromaOffset
|
RVCE_CS(chroma_offset); // encReconstructedChromaOffset
|
||||||
RVCE_CS(0x00000000); // encColocBufferOffset
|
RVCE_CS(0x00000000); // encColocBufferOffset
|
||||||
|
@ -431,11 +431,11 @@ static void destroy(struct rvce_encoder *enc)
|
||||||
RVCE_END();
|
RVCE_END();
|
||||||
}
|
}
|
||||||
|
|
||||||
void radeon_vce_40_2_2_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic)
|
void si_vce_40_2_2_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void radeon_vce_40_2_2_init(struct rvce_encoder *enc)
|
void si_vce_40_2_2_init(struct rvce_encoder *enc)
|
||||||
{
|
{
|
||||||
enc->session = session;
|
enc->session = session;
|
||||||
enc->task_info = task_info;
|
enc->task_info = task_info;
|
||||||
|
|
|
@ -173,8 +173,8 @@ static void encode(struct rvce_encoder *enc)
|
||||||
RVCE_CS(0x00000000); // pictureStructure
|
RVCE_CS(0x00000000); // pictureStructure
|
||||||
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
|
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
|
||||||
enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
|
enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
|
||||||
struct rvce_cpb_slot *l0 = l0_slot(enc);
|
struct rvce_cpb_slot *l0 = si_l0_slot(enc);
|
||||||
rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
|
si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
|
||||||
RVCE_CS(l0->picture_type); // encPicType
|
RVCE_CS(l0->picture_type); // encPicType
|
||||||
RVCE_CS(l0->frame_num); // frameNumber
|
RVCE_CS(l0->frame_num); // frameNumber
|
||||||
RVCE_CS(l0->pic_order_cnt); // pictureOrderCount
|
RVCE_CS(l0->pic_order_cnt); // pictureOrderCount
|
||||||
|
@ -199,8 +199,8 @@ static void encode(struct rvce_encoder *enc)
|
||||||
// encReferencePictureL1[0]
|
// encReferencePictureL1[0]
|
||||||
RVCE_CS(0x00000000); // pictureStructure
|
RVCE_CS(0x00000000); // pictureStructure
|
||||||
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
|
if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
|
||||||
struct rvce_cpb_slot *l1 = l1_slot(enc);
|
struct rvce_cpb_slot *l1 = si_l1_slot(enc);
|
||||||
rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
|
si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
|
||||||
RVCE_CS(l1->picture_type); // encPicType
|
RVCE_CS(l1->picture_type); // encPicType
|
||||||
RVCE_CS(l1->frame_num); // frameNumber
|
RVCE_CS(l1->frame_num); // frameNumber
|
||||||
RVCE_CS(l1->pic_order_cnt); // pictureOrderCount
|
RVCE_CS(l1->pic_order_cnt); // pictureOrderCount
|
||||||
|
@ -214,7 +214,7 @@ static void encode(struct rvce_encoder *enc)
|
||||||
RVCE_CS(0xffffffff); // chromaOffset
|
RVCE_CS(0xffffffff); // chromaOffset
|
||||||
}
|
}
|
||||||
|
|
||||||
rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
|
si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset);
|
||||||
RVCE_CS(luma_offset); // encReconstructedLumaOffset
|
RVCE_CS(luma_offset); // encReconstructedLumaOffset
|
||||||
RVCE_CS(chroma_offset); // encReconstructedChromaOffset
|
RVCE_CS(chroma_offset); // encReconstructedChromaOffset
|
||||||
RVCE_CS(0x00000000); // encColocBufferOffset
|
RVCE_CS(0x00000000); // encColocBufferOffset
|
||||||
|
@ -233,13 +233,13 @@ static void encode(struct rvce_encoder *enc)
|
||||||
RVCE_END();
|
RVCE_END();
|
||||||
}
|
}
|
||||||
|
|
||||||
void radeon_vce_50_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic)
|
void si_vce_50_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void radeon_vce_50_init(struct rvce_encoder *enc)
|
void si_vce_50_init(struct rvce_encoder *enc)
|
||||||
{
|
{
|
||||||
radeon_vce_40_2_2_init(enc);
|
si_vce_40_2_2_init(enc);
|
||||||
|
|
||||||
/* only the two below are different */
|
/* only the two below are different */
|
||||||
enc->rate_control = rate_control;
|
enc->rate_control = rate_control;
|
||||||
|
|
|
@ -138,7 +138,7 @@ static void get_vui_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture
|
||||||
enc->enc_pic.vui.max_dec_frame_buffering = 0x00000003;
|
enc->enc_pic.vui.max_dec_frame_buffering = 0x00000003;
|
||||||
}
|
}
|
||||||
|
|
||||||
void radeon_vce_52_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic)
|
void si_vce_52_get_param(struct rvce_encoder *enc, struct pipe_h264_enc_picture_desc *pic)
|
||||||
{
|
{
|
||||||
get_rate_control_param(enc, pic);
|
get_rate_control_param(enc, pic);
|
||||||
get_motion_estimation_param(enc, pic);
|
get_motion_estimation_param(enc, pic);
|
||||||
|
@ -319,8 +319,8 @@ static void encode(struct rvce_encoder *enc)
|
||||||
RVCE_CS(0x00000000); // pictureStructure
|
RVCE_CS(0x00000000); // pictureStructure
|
||||||
if(enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
|
if(enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
|
||||||
enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
|
enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
|
||||||
struct rvce_cpb_slot *l0 = l0_slot(enc);
|
struct rvce_cpb_slot *l0 = si_l0_slot(enc);
|
||||||
rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
|
si_vce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
|
||||||
RVCE_CS(l0->picture_type);
|
RVCE_CS(l0->picture_type);
|
||||||
RVCE_CS(l0->frame_num);
|
RVCE_CS(l0->frame_num);
|
||||||
RVCE_CS(l0->pic_order_cnt);
|
RVCE_CS(l0->pic_order_cnt);
|
||||||
|
@ -356,8 +356,8 @@ static void encode(struct rvce_encoder *enc)
|
||||||
// encReferencePictureL1[0]
|
// encReferencePictureL1[0]
|
||||||
RVCE_CS(0x00000000); // pictureStructure
|
RVCE_CS(0x00000000); // pictureStructure
|
||||||
if(enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
|
if(enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
|
||||||
struct rvce_cpb_slot *l1 = l1_slot(enc);
|
struct rvce_cpb_slot *l1 = si_l1_slot(enc);
|
||||||
rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
|
si_vce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
|
||||||
RVCE_CS(l1->picture_type);
|
RVCE_CS(l1->picture_type);
|
||||||
RVCE_CS(l1->frame_num);
|
RVCE_CS(l1->frame_num);
|
||||||
RVCE_CS(l1->pic_order_cnt);
|
RVCE_CS(l1->pic_order_cnt);
|
||||||
|
@ -376,7 +376,7 @@ static void encode(struct rvce_encoder *enc)
|
||||||
RVCE_CS(enc->enc_pic.eo.l1_chroma_offset);
|
RVCE_CS(enc->enc_pic.eo.l1_chroma_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
|
si_vce_frame_offset(enc, si_current_slot(enc), &luma_offset, &chroma_offset);
|
||||||
RVCE_CS(luma_offset);
|
RVCE_CS(luma_offset);
|
||||||
RVCE_CS(chroma_offset);
|
RVCE_CS(chroma_offset);
|
||||||
RVCE_CS(enc->enc_pic.eo.enc_coloc_buffer_offset);
|
RVCE_CS(enc->enc_pic.eo.enc_coloc_buffer_offset);
|
||||||
|
@ -646,7 +646,7 @@ static void vui(struct rvce_encoder *enc)
|
||||||
RVCE_END();
|
RVCE_END();
|
||||||
}
|
}
|
||||||
|
|
||||||
void radeon_vce_52_init(struct rvce_encoder *enc)
|
void si_vce_52_init(struct rvce_encoder *enc)
|
||||||
{
|
{
|
||||||
enc->session = session;
|
enc->session = session;
|
||||||
enc->task_info = task_info;
|
enc->task_info = task_info;
|
||||||
|
|
|
@ -678,9 +678,9 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
|
||||||
(struct pipe_h265_picture_desc*)picture);
|
(struct pipe_h265_picture_desc*)picture);
|
||||||
else
|
else
|
||||||
ctx_size = calc_ctx_size_h265_main(dec);
|
ctx_size = calc_ctx_size_h265_main(dec);
|
||||||
if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT))
|
if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT))
|
||||||
RVID_ERR("Can't allocated context buffer.\n");
|
RVID_ERR("Can't allocated context buffer.\n");
|
||||||
rvid_clear_buffer(dec->base.context, &dec->ctx);
|
si_vid_clear_buffer(dec->base.context, &dec->ctx);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1026,13 +1026,13 @@ static void radeon_dec_destroy(struct pipe_video_codec *decoder)
|
||||||
dec->ws->cs_destroy(dec->cs);
|
dec->ws->cs_destroy(dec->cs);
|
||||||
|
|
||||||
for (i = 0; i < NUM_BUFFERS; ++i) {
|
for (i = 0; i < NUM_BUFFERS; ++i) {
|
||||||
rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
|
si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
|
||||||
rvid_destroy_buffer(&dec->bs_buffers[i]);
|
si_vid_destroy_buffer(&dec->bs_buffers[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
rvid_destroy_buffer(&dec->dpb);
|
si_vid_destroy_buffer(&dec->dpb);
|
||||||
rvid_destroy_buffer(&dec->ctx);
|
si_vid_destroy_buffer(&dec->ctx);
|
||||||
rvid_destroy_buffer(&dec->sessionctx);
|
si_vid_destroy_buffer(&dec->sessionctx);
|
||||||
|
|
||||||
FREE(dec);
|
FREE(dec);
|
||||||
}
|
}
|
||||||
|
@ -1096,7 +1096,7 @@ static void radeon_dec_decode_bitstream(struct pipe_video_codec *decoder,
|
||||||
|
|
||||||
if (new_size > buf->res->buf->size) {
|
if (new_size > buf->res->buf->size) {
|
||||||
dec->ws->buffer_unmap(buf->res->buf);
|
dec->ws->buffer_unmap(buf->res->buf);
|
||||||
if (!rvid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
|
if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
|
||||||
RVID_ERR("Can't resize bitstream buffer!");
|
RVID_ERR("Can't resize bitstream buffer!");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1227,7 +1227,7 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
|
||||||
dec->base.flush = radeon_dec_flush;
|
dec->base.flush = radeon_dec_flush;
|
||||||
|
|
||||||
dec->stream_type = stream_type;
|
dec->stream_type = stream_type;
|
||||||
dec->stream_handle = rvid_alloc_stream_handle();
|
dec->stream_handle = si_vid_alloc_stream_handle();
|
||||||
dec->screen = context->screen;
|
dec->screen = context->screen;
|
||||||
dec->ws = ws;
|
dec->ws = ws;
|
||||||
dec->cs = ws->cs_create(rctx->ctx, RING_VCN_DEC, NULL, NULL);
|
dec->cs = ws->cs_create(rctx->ctx, RING_VCN_DEC, NULL, NULL);
|
||||||
|
@ -1242,47 +1242,47 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
|
||||||
if (have_it(dec))
|
if (have_it(dec))
|
||||||
msg_fb_it_size += IT_SCALING_TABLE_SIZE;
|
msg_fb_it_size += IT_SCALING_TABLE_SIZE;
|
||||||
/* use vram to improve performance, workaround an unknown bug */
|
/* use vram to improve performance, workaround an unknown bug */
|
||||||
if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
|
if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
|
||||||
msg_fb_it_size, PIPE_USAGE_DEFAULT)) {
|
msg_fb_it_size, PIPE_USAGE_DEFAULT)) {
|
||||||
RVID_ERR("Can't allocated message buffers.\n");
|
RVID_ERR("Can't allocated message buffers.\n");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!rvid_create_buffer(dec->screen, &dec->bs_buffers[i],
|
if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i],
|
||||||
bs_buf_size, PIPE_USAGE_STAGING)) {
|
bs_buf_size, PIPE_USAGE_STAGING)) {
|
||||||
RVID_ERR("Can't allocated bitstream buffers.\n");
|
RVID_ERR("Can't allocated bitstream buffers.\n");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
rvid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
|
si_vid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
|
||||||
rvid_clear_buffer(context, &dec->bs_buffers[i]);
|
si_vid_clear_buffer(context, &dec->bs_buffers[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
dpb_size = calc_dpb_size(dec);
|
dpb_size = calc_dpb_size(dec);
|
||||||
|
|
||||||
if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
|
if (!si_vid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
|
||||||
RVID_ERR("Can't allocated dpb.\n");
|
RVID_ERR("Can't allocated dpb.\n");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
rvid_clear_buffer(context, &dec->dpb);
|
si_vid_clear_buffer(context, &dec->dpb);
|
||||||
|
|
||||||
if (dec->stream_type == RDECODE_CODEC_H264_PERF) {
|
if (dec->stream_type == RDECODE_CODEC_H264_PERF) {
|
||||||
unsigned ctx_size = calc_ctx_size_h264_perf(dec);
|
unsigned ctx_size = calc_ctx_size_h264_perf(dec);
|
||||||
if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
|
if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
|
||||||
RVID_ERR("Can't allocated context buffer.\n");
|
RVID_ERR("Can't allocated context buffer.\n");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
rvid_clear_buffer(context, &dec->ctx);
|
si_vid_clear_buffer(context, &dec->ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!rvid_create_buffer(dec->screen, &dec->sessionctx,
|
if (!si_vid_create_buffer(dec->screen, &dec->sessionctx,
|
||||||
RDECODE_SESSION_CONTEXT_SIZE,
|
RDECODE_SESSION_CONTEXT_SIZE,
|
||||||
PIPE_USAGE_DEFAULT)) {
|
PIPE_USAGE_DEFAULT)) {
|
||||||
RVID_ERR("Can't allocated session ctx.\n");
|
RVID_ERR("Can't allocated session ctx.\n");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
rvid_clear_buffer(context, &dec->sessionctx);
|
si_vid_clear_buffer(context, &dec->sessionctx);
|
||||||
|
|
||||||
map_msg_fb_it_buf(dec);
|
map_msg_fb_it_buf(dec);
|
||||||
rvcn_dec_message_create(dec);
|
rvcn_dec_message_create(dec);
|
||||||
|
@ -1299,13 +1299,13 @@ error:
|
||||||
if (dec->cs) dec->ws->cs_destroy(dec->cs);
|
if (dec->cs) dec->ws->cs_destroy(dec->cs);
|
||||||
|
|
||||||
for (i = 0; i < NUM_BUFFERS; ++i) {
|
for (i = 0; i < NUM_BUFFERS; ++i) {
|
||||||
rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
|
si_vid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
|
||||||
rvid_destroy_buffer(&dec->bs_buffers[i]);
|
si_vid_destroy_buffer(&dec->bs_buffers[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
rvid_destroy_buffer(&dec->dpb);
|
si_vid_destroy_buffer(&dec->dpb);
|
||||||
rvid_destroy_buffer(&dec->ctx);
|
si_vid_destroy_buffer(&dec->ctx);
|
||||||
rvid_destroy_buffer(&dec->sessionctx);
|
si_vid_destroy_buffer(&dec->sessionctx);
|
||||||
|
|
||||||
FREE(dec);
|
FREE(dec);
|
||||||
|
|
||||||
|
|
|
@ -46,7 +46,7 @@
|
||||||
#define UVD_FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8))
|
#define UVD_FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8))
|
||||||
|
|
||||||
/* generate an stream handle */
|
/* generate an stream handle */
|
||||||
unsigned rvid_alloc_stream_handle()
|
unsigned si_vid_alloc_stream_handle()
|
||||||
{
|
{
|
||||||
static unsigned counter = 0;
|
static unsigned counter = 0;
|
||||||
unsigned stream_handle = 0;
|
unsigned stream_handle = 0;
|
||||||
|
@ -61,8 +61,8 @@ unsigned rvid_alloc_stream_handle()
|
||||||
}
|
}
|
||||||
|
|
||||||
/* create a buffer in the winsys */
|
/* create a buffer in the winsys */
|
||||||
bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
|
bool si_vid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
|
||||||
unsigned size, unsigned usage)
|
unsigned size, unsigned usage)
|
||||||
{
|
{
|
||||||
memset(buffer, 0, sizeof(*buffer));
|
memset(buffer, 0, sizeof(*buffer));
|
||||||
buffer->usage = usage;
|
buffer->usage = usage;
|
||||||
|
@ -79,14 +79,14 @@ bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* destroy a buffer */
|
/* destroy a buffer */
|
||||||
void rvid_destroy_buffer(struct rvid_buffer *buffer)
|
void si_vid_destroy_buffer(struct rvid_buffer *buffer)
|
||||||
{
|
{
|
||||||
r600_resource_reference(&buffer->res, NULL);
|
r600_resource_reference(&buffer->res, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* reallocate a buffer, preserving its content */
|
/* reallocate a buffer, preserving its content */
|
||||||
bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
|
bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
|
||||||
struct rvid_buffer *new_buf, unsigned new_size)
|
struct rvid_buffer *new_buf, unsigned new_size)
|
||||||
{
|
{
|
||||||
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
|
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
|
||||||
struct radeon_winsys* ws = rscreen->ws;
|
struct radeon_winsys* ws = rscreen->ws;
|
||||||
|
@ -94,7 +94,7 @@ bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
|
||||||
struct rvid_buffer old_buf = *new_buf;
|
struct rvid_buffer old_buf = *new_buf;
|
||||||
void *src = NULL, *dst = NULL;
|
void *src = NULL, *dst = NULL;
|
||||||
|
|
||||||
if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage))
|
if (!si_vid_create_buffer(screen, new_buf, new_size, new_buf->usage))
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ);
|
src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ);
|
||||||
|
@ -113,19 +113,19 @@ bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
|
||||||
}
|
}
|
||||||
ws->buffer_unmap(new_buf->res->buf);
|
ws->buffer_unmap(new_buf->res->buf);
|
||||||
ws->buffer_unmap(old_buf.res->buf);
|
ws->buffer_unmap(old_buf.res->buf);
|
||||||
rvid_destroy_buffer(&old_buf);
|
si_vid_destroy_buffer(&old_buf);
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
if (src)
|
if (src)
|
||||||
ws->buffer_unmap(old_buf.res->buf);
|
ws->buffer_unmap(old_buf.res->buf);
|
||||||
rvid_destroy_buffer(new_buf);
|
si_vid_destroy_buffer(new_buf);
|
||||||
*new_buf = old_buf;
|
*new_buf = old_buf;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* clear the buffer with zeros */
|
/* clear the buffer with zeros */
|
||||||
void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
|
void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
|
||||||
{
|
{
|
||||||
struct r600_common_context *rctx = (struct r600_common_context*)context;
|
struct r600_common_context *rctx = (struct r600_common_context*)context;
|
||||||
|
|
||||||
|
@ -138,9 +138,9 @@ void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
|
||||||
* join surfaces into the same buffer with identical tiling params
|
* join surfaces into the same buffer with identical tiling params
|
||||||
* sumup their sizes and replace the backend buffers with a single bo
|
* sumup their sizes and replace the backend buffers with a single bo
|
||||||
*/
|
*/
|
||||||
void rvid_join_surfaces(struct r600_common_context *rctx,
|
void si_vid_join_surfaces(struct r600_common_context *rctx,
|
||||||
struct pb_buffer** buffers[VL_NUM_COMPONENTS],
|
struct pb_buffer** buffers[VL_NUM_COMPONENTS],
|
||||||
struct radeon_surf *surfaces[VL_NUM_COMPONENTS])
|
struct radeon_surf *surfaces[VL_NUM_COMPONENTS])
|
||||||
{
|
{
|
||||||
struct radeon_winsys* ws;
|
struct radeon_winsys* ws;
|
||||||
unsigned best_tiling, best_wh, off;
|
unsigned best_tiling, best_wh, off;
|
||||||
|
@ -218,10 +218,10 @@ void rvid_join_surfaces(struct r600_common_context *rctx,
|
||||||
pb_reference(&pb, NULL);
|
pb_reference(&pb, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
int rvid_get_video_param(struct pipe_screen *screen,
|
int si_vid_get_video_param(struct pipe_screen *screen,
|
||||||
enum pipe_video_profile profile,
|
enum pipe_video_profile profile,
|
||||||
enum pipe_video_entrypoint entrypoint,
|
enum pipe_video_entrypoint entrypoint,
|
||||||
enum pipe_video_cap param)
|
enum pipe_video_cap param)
|
||||||
{
|
{
|
||||||
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
|
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
|
||||||
enum pipe_video_format codec = u_reduce_video_profile(profile);
|
enum pipe_video_format codec = u_reduce_video_profile(profile);
|
||||||
|
@ -233,7 +233,7 @@ int rvid_get_video_param(struct pipe_screen *screen,
|
||||||
switch (param) {
|
switch (param) {
|
||||||
case PIPE_VIDEO_CAP_SUPPORTED:
|
case PIPE_VIDEO_CAP_SUPPORTED:
|
||||||
return codec == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
|
return codec == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
|
||||||
rvce_is_fw_version_supported(rscreen);
|
si_vce_is_fw_version_supported(rscreen);
|
||||||
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
|
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
|
||||||
return 1;
|
return 1;
|
||||||
case PIPE_VIDEO_CAP_MAX_WIDTH:
|
case PIPE_VIDEO_CAP_MAX_WIDTH:
|
||||||
|
@ -354,10 +354,10 @@ int rvid_get_video_param(struct pipe_screen *screen,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean rvid_is_format_supported(struct pipe_screen *screen,
|
boolean si_vid_is_format_supported(struct pipe_screen *screen,
|
||||||
enum pipe_format format,
|
enum pipe_format format,
|
||||||
enum pipe_video_profile profile,
|
enum pipe_video_profile profile,
|
||||||
enum pipe_video_entrypoint entrypoint)
|
enum pipe_video_entrypoint entrypoint)
|
||||||
{
|
{
|
||||||
/* HEVC 10 bit decoding should use P016 instead of NV12 if possible */
|
/* HEVC 10 bit decoding should use P016 instead of NV12 if possible */
|
||||||
if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
|
if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
|
||||||
|
|
|
@ -48,38 +48,38 @@ struct rvid_buffer
|
||||||
};
|
};
|
||||||
|
|
||||||
/* generate an stream handle */
|
/* generate an stream handle */
|
||||||
unsigned rvid_alloc_stream_handle(void);
|
unsigned si_vid_alloc_stream_handle(void);
|
||||||
|
|
||||||
/* create a buffer in the winsys */
|
/* create a buffer in the winsys */
|
||||||
bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
|
bool si_vid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
|
||||||
unsigned size, unsigned usage);
|
unsigned size, unsigned usage);
|
||||||
|
|
||||||
/* destroy a buffer */
|
/* destroy a buffer */
|
||||||
void rvid_destroy_buffer(struct rvid_buffer *buffer);
|
void si_vid_destroy_buffer(struct rvid_buffer *buffer);
|
||||||
|
|
||||||
/* reallocate a buffer, preserving its content */
|
/* reallocate a buffer, preserving its content */
|
||||||
bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
|
bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
|
||||||
struct rvid_buffer *new_buf, unsigned new_size);
|
struct rvid_buffer *new_buf, unsigned new_size);
|
||||||
|
|
||||||
/* clear the buffer with zeros */
|
/* clear the buffer with zeros */
|
||||||
void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer);
|
void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer);
|
||||||
|
|
||||||
/* join surfaces into the same buffer with identical tiling params
|
/* join surfaces into the same buffer with identical tiling params
|
||||||
sumup their sizes and replace the backend buffers with a single bo */
|
sumup their sizes and replace the backend buffers with a single bo */
|
||||||
void rvid_join_surfaces(struct r600_common_context *rctx,
|
void si_vid_join_surfaces(struct r600_common_context *rctx,
|
||||||
struct pb_buffer** buffers[VL_NUM_COMPONENTS],
|
struct pb_buffer** buffers[VL_NUM_COMPONENTS],
|
||||||
struct radeon_surf *surfaces[VL_NUM_COMPONENTS]);
|
struct radeon_surf *surfaces[VL_NUM_COMPONENTS]);
|
||||||
|
|
||||||
/* returns supported codecs and other parameters */
|
/* returns supported codecs and other parameters */
|
||||||
int rvid_get_video_param(struct pipe_screen *screen,
|
int si_vid_get_video_param(struct pipe_screen *screen,
|
||||||
enum pipe_video_profile profile,
|
enum pipe_video_profile profile,
|
||||||
enum pipe_video_entrypoint entrypoint,
|
enum pipe_video_entrypoint entrypoint,
|
||||||
enum pipe_video_cap param);
|
enum pipe_video_cap param);
|
||||||
|
|
||||||
/* the hardware only supports NV12 */
|
/* the hardware only supports NV12 */
|
||||||
boolean rvid_is_format_supported(struct pipe_screen *screen,
|
boolean si_vid_is_format_supported(struct pipe_screen *screen,
|
||||||
enum pipe_format format,
|
enum pipe_format format,
|
||||||
enum pipe_video_profile profile,
|
enum pipe_video_profile profile,
|
||||||
enum pipe_video_entrypoint entrypoint);
|
enum pipe_video_entrypoint entrypoint);
|
||||||
|
|
||||||
#endif // RADEON_VIDEO_H
|
#endif // RADEON_VIDEO_H
|
||||||
|
|
|
@ -50,7 +50,7 @@ static void cik_sdma_copy_buffer(struct si_context *ctx,
|
||||||
src_offset += rsrc->gpu_address;
|
src_offset += rsrc->gpu_address;
|
||||||
|
|
||||||
ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
|
ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
|
||||||
r600_need_dma_space(&ctx->b, ncopy * 7, rdst, rsrc);
|
si_need_dma_space(&ctx->b, ncopy * 7, rdst, rsrc);
|
||||||
|
|
||||||
for (i = 0; i < ncopy; i++) {
|
for (i = 0; i < ncopy; i++) {
|
||||||
csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE);
|
csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE);
|
||||||
|
@ -95,7 +95,7 @@ static void cik_sdma_clear_buffer(struct pipe_context *ctx,
|
||||||
|
|
||||||
/* the same maximum size as for copying */
|
/* the same maximum size as for copying */
|
||||||
ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
|
ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
|
||||||
r600_need_dma_space(&sctx->b, ncopy * 5, rdst, NULL);
|
si_need_dma_space(&sctx->b, ncopy * 5, rdst, NULL);
|
||||||
|
|
||||||
for (i = 0; i < ncopy; i++) {
|
for (i = 0; i < ncopy; i++) {
|
||||||
csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE);
|
csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE);
|
||||||
|
@ -194,7 +194,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
|
||||||
src_slice_pitch * bpp * (srcz + src_box->depth) <=
|
src_slice_pitch * bpp * (srcz + src_box->depth) <=
|
||||||
rsrc->resource.buf->size);
|
rsrc->resource.buf->size);
|
||||||
|
|
||||||
if (!r600_prepare_for_dma_blit(&sctx->b, rdst, dst_level, dstx, dsty,
|
if (!si_prepare_for_dma_blit(&sctx->b, rdst, dst_level, dstx, dsty,
|
||||||
dstz, rsrc, src_level, src_box))
|
dstz, rsrc, src_level, src_box))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -235,7 +235,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
|
||||||
srcy + copy_height != (1 << 14)))) {
|
srcy + copy_height != (1 << 14)))) {
|
||||||
struct radeon_winsys_cs *cs = sctx->b.dma.cs;
|
struct radeon_winsys_cs *cs = sctx->b.dma.cs;
|
||||||
|
|
||||||
r600_need_dma_space(&sctx->b, 13, &rdst->resource, &rsrc->resource);
|
si_need_dma_space(&sctx->b, 13, &rdst->resource, &rsrc->resource);
|
||||||
|
|
||||||
radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
|
radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
|
||||||
CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) |
|
CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW, 0) |
|
||||||
|
@ -398,7 +398,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
|
||||||
struct radeon_winsys_cs *cs = sctx->b.dma.cs;
|
struct radeon_winsys_cs *cs = sctx->b.dma.cs;
|
||||||
uint32_t direction = linear == rdst ? 1u << 31 : 0;
|
uint32_t direction = linear == rdst ? 1u << 31 : 0;
|
||||||
|
|
||||||
r600_need_dma_space(&sctx->b, 14, &rdst->resource, &rsrc->resource);
|
si_need_dma_space(&sctx->b, 14, &rdst->resource, &rsrc->resource);
|
||||||
|
|
||||||
radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
|
radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
|
||||||
CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW, 0) |
|
CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW, 0) |
|
||||||
|
@ -492,7 +492,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
|
||||||
dstx + copy_width != (1 << 14)))) {
|
dstx + copy_width != (1 << 14)))) {
|
||||||
struct radeon_winsys_cs *cs = sctx->b.dma.cs;
|
struct radeon_winsys_cs *cs = sctx->b.dma.cs;
|
||||||
|
|
||||||
r600_need_dma_space(&sctx->b, 15, &rdst->resource, &rsrc->resource);
|
si_need_dma_space(&sctx->b, 15, &rdst->resource, &rsrc->resource);
|
||||||
|
|
||||||
radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
|
radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
|
||||||
CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW, 0));
|
CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW, 0));
|
||||||
|
|
|
@ -350,7 +350,7 @@ si_decompress_depth(struct si_context *sctx,
|
||||||
*/
|
*/
|
||||||
if (copy_planes &&
|
if (copy_planes &&
|
||||||
(tex->flushed_depth_texture ||
|
(tex->flushed_depth_texture ||
|
||||||
r600_init_flushed_depth_texture(&sctx->b.b, &tex->resource.b.b, NULL))) {
|
si_init_flushed_depth_texture(&sctx->b.b, &tex->resource.b.b, NULL))) {
|
||||||
struct r600_texture *dst = tex->flushed_depth_texture;
|
struct r600_texture *dst = tex->flushed_depth_texture;
|
||||||
unsigned fully_copied_levels;
|
unsigned fully_copied_levels;
|
||||||
unsigned levels = 0;
|
unsigned levels = 0;
|
||||||
|
@ -621,7 +621,7 @@ static void si_check_render_feedback_texture(struct si_context *sctx,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (render_feedback)
|
if (render_feedback)
|
||||||
r600_texture_disable_dcc(&sctx->b, tex);
|
si_texture_disable_dcc(&sctx->b, tex);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_check_render_feedback_textures(struct si_context *sctx,
|
static void si_check_render_feedback_textures(struct si_context *sctx,
|
||||||
|
@ -835,7 +835,7 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
|
||||||
zsbuf ? (struct r600_texture*)zsbuf->texture : NULL;
|
zsbuf ? (struct r600_texture*)zsbuf->texture : NULL;
|
||||||
|
|
||||||
if (buffers & PIPE_CLEAR_COLOR) {
|
if (buffers & PIPE_CLEAR_COLOR) {
|
||||||
evergreen_do_fast_color_clear(&sctx->b, fb,
|
si_do_fast_color_clear(&sctx->b, fb,
|
||||||
&sctx->framebuffer.atom, &buffers,
|
&sctx->framebuffer.atom, &buffers,
|
||||||
&sctx->framebuffer.dirty_cbufs,
|
&sctx->framebuffer.dirty_cbufs,
|
||||||
color);
|
color);
|
||||||
|
@ -1175,7 +1175,7 @@ void si_resource_copy_region(struct pipe_context *ctx,
|
||||||
src_templ.format);
|
src_templ.format);
|
||||||
|
|
||||||
/* Initialize the surface. */
|
/* Initialize the surface. */
|
||||||
dst_view = r600_create_surface_custom(ctx, dst, &dst_templ,
|
dst_view = si_create_surface_custom(ctx, dst, &dst_templ,
|
||||||
dst_width0, dst_height0,
|
dst_width0, dst_height0,
|
||||||
dst_width, dst_height);
|
dst_width, dst_height);
|
||||||
|
|
||||||
|
|
|
@ -175,7 +175,7 @@ static void *si_create_compute_state(
|
||||||
|
|
||||||
if ((sctx->b.debug.debug_message && !sctx->b.debug.async) ||
|
if ((sctx->b.debug.debug_message && !sctx->b.debug.async) ||
|
||||||
sctx->is_debug ||
|
sctx->is_debug ||
|
||||||
r600_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE))
|
si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE))
|
||||||
si_create_compute_state_async(program, -1);
|
si_create_compute_state_async(program, -1);
|
||||||
else
|
else
|
||||||
util_queue_add_job(&sscreen->shader_compiler_queue,
|
util_queue_add_job(&sscreen->shader_compiler_queue,
|
||||||
|
@ -328,7 +328,7 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
|
||||||
r600_resource_reference(&sctx->compute_scratch_buffer, NULL);
|
r600_resource_reference(&sctx->compute_scratch_buffer, NULL);
|
||||||
|
|
||||||
sctx->compute_scratch_buffer = (struct r600_resource*)
|
sctx->compute_scratch_buffer = (struct r600_resource*)
|
||||||
r600_aligned_buffer_create(&sctx->screen->b.b,
|
si_aligned_buffer_create(&sctx->screen->b.b,
|
||||||
R600_RESOURCE_FLAG_UNMAPPABLE,
|
R600_RESOURCE_FLAG_UNMAPPABLE,
|
||||||
PIPE_USAGE_DEFAULT,
|
PIPE_USAGE_DEFAULT,
|
||||||
scratch_needed, 256);
|
scratch_needed, 256);
|
||||||
|
|
|
@ -309,7 +309,7 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size,
|
||||||
sctx->scratch_buffer->b.b.width0 < scratch_size) {
|
sctx->scratch_buffer->b.b.width0 < scratch_size) {
|
||||||
r600_resource_reference(&sctx->scratch_buffer, NULL);
|
r600_resource_reference(&sctx->scratch_buffer, NULL);
|
||||||
sctx->scratch_buffer = (struct r600_resource*)
|
sctx->scratch_buffer = (struct r600_resource*)
|
||||||
r600_aligned_buffer_create(&sctx->screen->b.b,
|
si_aligned_buffer_create(&sctx->screen->b.b,
|
||||||
R600_RESOURCE_FLAG_UNMAPPABLE,
|
R600_RESOURCE_FLAG_UNMAPPABLE,
|
||||||
PIPE_USAGE_DEFAULT,
|
PIPE_USAGE_DEFAULT,
|
||||||
scratch_size, 256);
|
scratch_size, 256);
|
||||||
|
|
|
@ -540,14 +540,14 @@ static void si_dump_framebuffer(struct si_context *sctx, struct u_log_context *l
|
||||||
|
|
||||||
rtex = (struct r600_texture*)state->cbufs[i]->texture;
|
rtex = (struct r600_texture*)state->cbufs[i]->texture;
|
||||||
u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i);
|
u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i);
|
||||||
r600_print_texture_info(sctx->b.screen, rtex, log);
|
si_print_texture_info(sctx->b.screen, rtex, log);
|
||||||
u_log_printf(log, "\n");
|
u_log_printf(log, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (state->zsbuf) {
|
if (state->zsbuf) {
|
||||||
rtex = (struct r600_texture*)state->zsbuf->texture;
|
rtex = (struct r600_texture*)state->zsbuf->texture;
|
||||||
u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
|
u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
|
||||||
r600_print_texture_info(sctx->b.screen, rtex, log);
|
si_print_texture_info(sctx->b.screen, rtex, log);
|
||||||
u_log_printf(log, "\n");
|
u_log_printf(log, "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -390,7 +390,7 @@ static void si_set_sampler_view_desc(struct si_context *sctx,
|
||||||
|
|
||||||
if (unlikely(!is_buffer && sview->dcc_incompatible)) {
|
if (unlikely(!is_buffer && sview->dcc_incompatible)) {
|
||||||
if (vi_dcc_enabled(rtex, view->u.tex.first_level))
|
if (vi_dcc_enabled(rtex, view->u.tex.first_level))
|
||||||
if (!r600_texture_disable_dcc(&sctx->b, rtex))
|
if (!si_texture_disable_dcc(&sctx->b, rtex))
|
||||||
sctx->b.decompress_dcc(&sctx->b.b, rtex);
|
sctx->b.decompress_dcc(&sctx->b.b, rtex);
|
||||||
|
|
||||||
sview->dcc_incompatible = false;
|
sview->dcc_incompatible = false;
|
||||||
|
@ -674,7 +674,7 @@ static void si_set_shader_image_desc(struct si_context *ctx,
|
||||||
* The decompression is relatively cheap if the surface
|
* The decompression is relatively cheap if the surface
|
||||||
* has been decompressed already.
|
* has been decompressed already.
|
||||||
*/
|
*/
|
||||||
if (!r600_texture_disable_dcc(&ctx->b, tex))
|
if (!si_texture_disable_dcc(&ctx->b, tex))
|
||||||
ctx->b.decompress_dcc(&ctx->b.b, tex);
|
ctx->b.decompress_dcc(&ctx->b.b, tex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1404,7 +1404,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Set the VGT regs. */
|
/* Set the VGT regs. */
|
||||||
r600_set_streamout_targets(ctx, num_targets, targets, offsets);
|
si_common_set_streamout_targets(ctx, num_targets, targets, offsets);
|
||||||
|
|
||||||
/* Set the shader resources.*/
|
/* Set the shader resources.*/
|
||||||
for (i = 0; i < num_targets; i++) {
|
for (i = 0; i < num_targets; i++) {
|
||||||
|
@ -1636,10 +1636,10 @@ static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf
|
||||||
|
|
||||||
/* Update the streamout state. */
|
/* Update the streamout state. */
|
||||||
if (sctx->b.streamout.begin_emitted)
|
if (sctx->b.streamout.begin_emitted)
|
||||||
r600_emit_streamout_end(&sctx->b);
|
si_emit_streamout_end(&sctx->b);
|
||||||
sctx->b.streamout.append_bitmask =
|
sctx->b.streamout.append_bitmask =
|
||||||
sctx->b.streamout.enabled_mask;
|
sctx->b.streamout.enabled_mask;
|
||||||
r600_streamout_buffers_dirty(&sctx->b);
|
si_streamout_buffers_dirty(&sctx->b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1795,7 +1795,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
|
||||||
uint64_t old_va = rbuffer->gpu_address;
|
uint64_t old_va = rbuffer->gpu_address;
|
||||||
|
|
||||||
/* Reallocate the buffer in the same pipe_resource. */
|
/* Reallocate the buffer in the same pipe_resource. */
|
||||||
r600_alloc_resource(&sctx->screen->b, rbuffer);
|
si_alloc_resource(&sctx->screen->b, rbuffer);
|
||||||
|
|
||||||
si_rebind_buffer(ctx, buf, old_va);
|
si_rebind_buffer(ctx, buf, old_va);
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,7 +62,7 @@ static void si_dma_copy_buffer(struct si_context *ctx,
|
||||||
}
|
}
|
||||||
|
|
||||||
ncopy = DIV_ROUND_UP(size, max_size);
|
ncopy = DIV_ROUND_UP(size, max_size);
|
||||||
r600_need_dma_space(&ctx->b, ncopy * 5, rdst, rsrc);
|
si_need_dma_space(&ctx->b, ncopy * 5, rdst, rsrc);
|
||||||
|
|
||||||
for (i = 0; i < ncopy; i++) {
|
for (i = 0; i < ncopy; i++) {
|
||||||
count = MIN2(size, max_size);
|
count = MIN2(size, max_size);
|
||||||
|
@ -104,7 +104,7 @@ static void si_dma_clear_buffer(struct pipe_context *ctx,
|
||||||
|
|
||||||
/* the same maximum size as for copying */
|
/* the same maximum size as for copying */
|
||||||
ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
|
ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
|
||||||
r600_need_dma_space(&sctx->b, ncopy * 4, rdst, NULL);
|
si_need_dma_space(&sctx->b, ncopy * 4, rdst, NULL);
|
||||||
|
|
||||||
for (i = 0; i < ncopy; i++) {
|
for (i = 0; i < ncopy; i++) {
|
||||||
csize = MIN2(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
|
csize = MIN2(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
|
||||||
|
@ -193,7 +193,7 @@ static void si_dma_copy_tile(struct si_context *ctx,
|
||||||
mt = G_009910_MICRO_TILE_MODE(tile_mode);
|
mt = G_009910_MICRO_TILE_MODE(tile_mode);
|
||||||
size = copy_height * pitch;
|
size = copy_height * pitch;
|
||||||
ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
|
ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
|
||||||
r600_need_dma_space(&ctx->b, ncopy * 9, &rdst->resource, &rsrc->resource);
|
si_need_dma_space(&ctx->b, ncopy * 9, &rdst->resource, &rsrc->resource);
|
||||||
|
|
||||||
for (i = 0; i < ncopy; i++) {
|
for (i = 0; i < ncopy; i++) {
|
||||||
cheight = copy_height;
|
cheight = copy_height;
|
||||||
|
@ -261,7 +261,7 @@ static void si_dma_copy(struct pipe_context *ctx,
|
||||||
goto fallback;
|
goto fallback;
|
||||||
|
|
||||||
if (src_box->depth > 1 ||
|
if (src_box->depth > 1 ||
|
||||||
!r600_prepare_for_dma_blit(&sctx->b, rdst, dst_level, dstx, dsty,
|
!si_prepare_for_dma_blit(&sctx->b, rdst, dst_level, dstx, dsty,
|
||||||
dstz, rsrc, src_level, src_box))
|
dstz, rsrc, src_level, src_box))
|
||||||
goto fallback;
|
goto fallback;
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,7 @@
|
||||||
|
|
||||||
void si_destroy_saved_cs(struct si_saved_cs *scs)
|
void si_destroy_saved_cs(struct si_saved_cs *scs)
|
||||||
{
|
{
|
||||||
radeon_clear_saved_cs(&scs->gfx);
|
si_clear_saved_cs(&scs->gfx);
|
||||||
r600_resource_reference(&scs->trace_buf, NULL);
|
r600_resource_reference(&scs->trace_buf, NULL);
|
||||||
free(scs);
|
free(scs);
|
||||||
}
|
}
|
||||||
|
@ -80,7 +80,7 @@ void si_context_gfx_flush(void *context, unsigned flags,
|
||||||
if (!radeon_emitted(cs, ctx->b.initial_gfx_cs_size))
|
if (!radeon_emitted(cs, ctx->b.initial_gfx_cs_size))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (r600_check_device_reset(&ctx->b))
|
if (si_check_device_reset(&ctx->b))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (ctx->screen->b.debug_flags & DBG_CHECK_VM)
|
if (ctx->screen->b.debug_flags & DBG_CHECK_VM)
|
||||||
|
@ -98,7 +98,7 @@ void si_context_gfx_flush(void *context, unsigned flags,
|
||||||
|
|
||||||
ctx->gfx_flush_in_progress = true;
|
ctx->gfx_flush_in_progress = true;
|
||||||
|
|
||||||
r600_preflush_suspend_features(&ctx->b);
|
si_preflush_suspend_features(&ctx->b);
|
||||||
|
|
||||||
ctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
|
ctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
|
||||||
SI_CONTEXT_PS_PARTIAL_FLUSH;
|
SI_CONTEXT_PS_PARTIAL_FLUSH;
|
||||||
|
@ -115,7 +115,7 @@ void si_context_gfx_flush(void *context, unsigned flags,
|
||||||
si_log_hw_flush(ctx);
|
si_log_hw_flush(ctx);
|
||||||
|
|
||||||
/* Save the IB for debug contexts. */
|
/* Save the IB for debug contexts. */
|
||||||
radeon_save_cs(ws, cs, &ctx->current_saved_cs->gfx, true);
|
si_save_cs(ws, cs, &ctx->current_saved_cs->gfx, true);
|
||||||
ctx->current_saved_cs->flushed = true;
|
ctx->current_saved_cs->flushed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -260,7 +260,7 @@ void si_begin_new_cs(struct si_context *ctx)
|
||||||
&ctx->scratch_buffer->b.b);
|
&ctx->scratch_buffer->b.b);
|
||||||
}
|
}
|
||||||
|
|
||||||
r600_postflush_resume_features(&ctx->b);
|
si_postflush_resume_features(&ctx->b);
|
||||||
|
|
||||||
assert(!ctx->b.gfx.cs->prev_dw);
|
assert(!ctx->b.gfx.cs->prev_dw);
|
||||||
ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->current.cdw;
|
ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->current.cdw;
|
||||||
|
|
|
@ -614,10 +614,10 @@ static void si_pc_emit_stop(struct r600_common_context *ctx,
|
||||||
{
|
{
|
||||||
struct radeon_winsys_cs *cs = ctx->gfx.cs;
|
struct radeon_winsys_cs *cs = ctx->gfx.cs;
|
||||||
|
|
||||||
r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0,
|
si_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0,
|
||||||
EOP_DATA_SEL_VALUE_32BIT,
|
EOP_DATA_SEL_VALUE_32BIT,
|
||||||
buffer, va, 0, R600_NOT_QUERY);
|
buffer, va, 0, R600_NOT_QUERY);
|
||||||
r600_gfx_wait_fence(ctx, va, 0, 0xffffffff);
|
si_gfx_wait_fence(ctx, va, 0, 0xffffffff);
|
||||||
|
|
||||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
|
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
|
||||||
|
@ -676,7 +676,7 @@ static void si_pc_emit_read(struct r600_common_context *ctx,
|
||||||
|
|
||||||
static void si_pc_cleanup(struct r600_common_screen *rscreen)
|
static void si_pc_cleanup(struct r600_common_screen *rscreen)
|
||||||
{
|
{
|
||||||
r600_perfcounters_do_destroy(rscreen->perfcounters);
|
si_perfcounters_do_destroy(rscreen->perfcounters);
|
||||||
rscreen->perfcounters = NULL;
|
rscreen->perfcounters = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -717,7 +717,7 @@ void si_init_perfcounters(struct si_screen *screen)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
pc->num_start_cs_dwords = 14;
|
pc->num_start_cs_dwords = 14;
|
||||||
pc->num_stop_cs_dwords = 14 + r600_gfx_write_fence_dwords(&screen->b);
|
pc->num_stop_cs_dwords = 14 + si_gfx_write_fence_dwords(&screen->b);
|
||||||
pc->num_instance_cs_dwords = 3;
|
pc->num_instance_cs_dwords = 3;
|
||||||
pc->num_shaders_cs_dwords = 4;
|
pc->num_shaders_cs_dwords = 4;
|
||||||
|
|
||||||
|
@ -734,7 +734,7 @@ void si_init_perfcounters(struct si_screen *screen)
|
||||||
pc->emit_read = si_pc_emit_read;
|
pc->emit_read = si_pc_emit_read;
|
||||||
pc->cleanup = si_pc_cleanup;
|
pc->cleanup = si_pc_cleanup;
|
||||||
|
|
||||||
if (!r600_perfcounters_init(pc, num_blocks))
|
if (!si_perfcounters_init(pc, num_blocks))
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
for (i = 0; i < num_blocks; ++i) {
|
for (i = 0; i < num_blocks; ++i) {
|
||||||
|
@ -746,7 +746,7 @@ void si_init_perfcounters(struct si_screen *screen)
|
||||||
instances = 2;
|
instances = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
r600_perfcounters_add_block(&screen->b, pc,
|
si_perfcounters_add_block(&screen->b, pc,
|
||||||
block->b->name,
|
block->b->name,
|
||||||
block->b->flags,
|
block->b->flags,
|
||||||
block->b->num_counters,
|
block->b->num_counters,
|
||||||
|
@ -759,5 +759,5 @@ void si_init_perfcounters(struct si_screen *screen)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
r600_perfcounters_do_destroy(pc);
|
si_perfcounters_do_destroy(pc);
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,7 +88,7 @@ static void si_destroy_context(struct pipe_context *context)
|
||||||
if (sctx->blitter)
|
if (sctx->blitter)
|
||||||
util_blitter_destroy(sctx->blitter);
|
util_blitter_destroy(sctx->blitter);
|
||||||
|
|
||||||
r600_common_context_cleanup(&sctx->b);
|
si_common_context_cleanup(&sctx->b);
|
||||||
|
|
||||||
LLVMDisposeTargetMachine(sctx->tm);
|
LLVMDisposeTargetMachine(sctx->tm);
|
||||||
|
|
||||||
|
@ -145,7 +145,7 @@ si_create_llvm_target_machine(struct si_screen *sscreen)
|
||||||
sscreen->b.debug_flags & DBG_SI_SCHED ? ",+si-scheduler" : "");
|
sscreen->b.debug_flags & DBG_SI_SCHED ? ",+si-scheduler" : "");
|
||||||
|
|
||||||
return LLVMCreateTargetMachine(ac_get_llvm_target(triple), triple,
|
return LLVMCreateTargetMachine(ac_get_llvm_target(triple), triple,
|
||||||
r600_get_llvm_processor_name(sscreen->b.family),
|
si_get_llvm_processor_name(sscreen->b.family),
|
||||||
features,
|
features,
|
||||||
LLVMCodeGenLevelDefault,
|
LLVMCodeGenLevelDefault,
|
||||||
LLVMRelocDefault,
|
LLVMRelocDefault,
|
||||||
|
@ -185,7 +185,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
|
||||||
sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
|
sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
|
||||||
sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
|
sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
|
||||||
|
|
||||||
if (!r600_common_context_init(&sctx->b, &sscreen->b, flags))
|
if (!si_common_context_init(&sctx->b, &sscreen->b, flags))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
if (sscreen->b.info.drm_major == 3)
|
if (sscreen->b.info.drm_major == 3)
|
||||||
|
@ -243,7 +243,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
|
||||||
sctx->blitter = util_blitter_create(&sctx->b.b);
|
sctx->blitter = util_blitter_create(&sctx->b.b);
|
||||||
if (sctx->blitter == NULL)
|
if (sctx->blitter == NULL)
|
||||||
goto fail;
|
goto fail;
|
||||||
sctx->blitter->draw_rectangle = r600_draw_rectangle;
|
sctx->blitter->draw_rectangle = si_draw_rectangle;
|
||||||
|
|
||||||
sctx->sample_mask.sample_mask = 0xffff;
|
sctx->sample_mask.sample_mask = 0xffff;
|
||||||
|
|
||||||
|
@ -271,7 +271,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
|
||||||
* if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
|
* if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
|
||||||
if (sctx->b.chip_class == CIK) {
|
if (sctx->b.chip_class == CIK) {
|
||||||
sctx->null_const_buf.buffer =
|
sctx->null_const_buf.buffer =
|
||||||
r600_aligned_buffer_create(screen,
|
si_aligned_buffer_create(screen,
|
||||||
R600_RESOURCE_FLAG_UNMAPPABLE,
|
R600_RESOURCE_FLAG_UNMAPPABLE,
|
||||||
PIPE_USAGE_DEFAULT, 16,
|
PIPE_USAGE_DEFAULT, 16,
|
||||||
sctx->screen->b.info.tcc_cache_line_size);
|
sctx->screen->b.info.tcc_cache_line_size);
|
||||||
|
@ -375,7 +375,7 @@ static struct pipe_context *si_pipe_create_context(struct pipe_screen *screen,
|
||||||
return ctx;
|
return ctx;
|
||||||
|
|
||||||
return threaded_context_create(ctx, &sscreen->b.pool_transfers,
|
return threaded_context_create(ctx, &sscreen->b.pool_transfers,
|
||||||
r600_replace_buffer_storage,
|
si_replace_buffer_storage,
|
||||||
&((struct si_context*)ctx)->b.tc);
|
&((struct si_context*)ctx)->b.tc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -835,13 +835,13 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
|
||||||
struct si_shader_part *part = parts[i];
|
struct si_shader_part *part = parts[i];
|
||||||
|
|
||||||
parts[i] = part->next;
|
parts[i] = part->next;
|
||||||
radeon_shader_binary_clean(&part->binary);
|
si_radeon_shader_binary_clean(&part->binary);
|
||||||
FREE(part);
|
FREE(part);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mtx_destroy(&sscreen->shader_parts_mutex);
|
mtx_destroy(&sscreen->shader_parts_mutex);
|
||||||
si_destroy_shader_cache(sscreen);
|
si_destroy_shader_cache(sscreen);
|
||||||
r600_destroy_common_screen(&sscreen->b);
|
si_destroy_common_screen(&sscreen->b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool si_init_gs_info(struct si_screen *sscreen)
|
static bool si_init_gs_info(struct si_screen *sscreen)
|
||||||
|
@ -885,7 +885,7 @@ static void si_handle_env_var_force_family(struct si_screen *sscreen)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
|
for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
|
||||||
if (!strcmp(family, r600_get_llvm_processor_name(i))) {
|
if (!strcmp(family, si_get_llvm_processor_name(i))) {
|
||||||
/* Override family and chip_class. */
|
/* Override family and chip_class. */
|
||||||
sscreen->b.family = sscreen->b.info.family = i;
|
sscreen->b.family = sscreen->b.info.family = i;
|
||||||
|
|
||||||
|
@ -969,7 +969,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
|
||||||
sscreen->b.b.get_compiler_options = si_get_compiler_options;
|
sscreen->b.b.get_compiler_options = si_get_compiler_options;
|
||||||
sscreen->b.b.get_device_uuid = radeonsi_get_device_uuid;
|
sscreen->b.b.get_device_uuid = radeonsi_get_device_uuid;
|
||||||
sscreen->b.b.get_driver_uuid = radeonsi_get_driver_uuid;
|
sscreen->b.b.get_driver_uuid = radeonsi_get_driver_uuid;
|
||||||
sscreen->b.b.resource_create = r600_resource_create_common;
|
sscreen->b.b.resource_create = si_resource_create_common;
|
||||||
|
|
||||||
si_init_screen_state_functions(sscreen);
|
si_init_screen_state_functions(sscreen);
|
||||||
|
|
||||||
|
@ -982,7 +982,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
|
||||||
if (driQueryOptionb(config->options, "radeonsi_enable_sisched"))
|
if (driQueryOptionb(config->options, "radeonsi_enable_sisched"))
|
||||||
sscreen->b.debug_flags |= DBG_SI_SCHED;
|
sscreen->b.debug_flags |= DBG_SI_SCHED;
|
||||||
|
|
||||||
if (!r600_common_screen_init(&sscreen->b, ws) ||
|
if (!si_common_screen_init(&sscreen->b, ws) ||
|
||||||
!si_init_gs_info(sscreen) ||
|
!si_init_gs_info(sscreen) ||
|
||||||
!si_init_shader_cache(sscreen)) {
|
!si_init_shader_cache(sscreen)) {
|
||||||
FREE(sscreen);
|
FREE(sscreen);
|
||||||
|
@ -1110,7 +1110,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
|
||||||
sscreen->b.aux_context = si_create_context(&sscreen->b.b, 0);
|
sscreen->b.aux_context = si_create_context(&sscreen->b.b, 0);
|
||||||
|
|
||||||
if (sscreen->b.debug_flags & DBG_TEST_DMA)
|
if (sscreen->b.debug_flags & DBG_TEST_DMA)
|
||||||
r600_test_dma(&sscreen->b);
|
si_test_dma(&sscreen->b);
|
||||||
|
|
||||||
if (sscreen->b.debug_flags & (DBG_TEST_VMFAULT_CP |
|
if (sscreen->b.debug_flags & (DBG_TEST_VMFAULT_CP |
|
||||||
DBG_TEST_VMFAULT_SDMA |
|
DBG_TEST_VMFAULT_SDMA |
|
||||||
|
|
|
@ -5109,7 +5109,7 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
|
||||||
max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
|
max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
|
||||||
|
|
||||||
if (!check_debug_option ||
|
if (!check_debug_option ||
|
||||||
r600_can_dump_shader(&sscreen->b, processor)) {
|
si_can_dump_shader(&sscreen->b, processor)) {
|
||||||
if (processor == PIPE_SHADER_FRAGMENT) {
|
if (processor == PIPE_SHADER_FRAGMENT) {
|
||||||
fprintf(file, "*** SHADER CONFIG ***\n"
|
fprintf(file, "*** SHADER CONFIG ***\n"
|
||||||
"SPI_PS_INPUT_ADDR = 0x%04x\n"
|
"SPI_PS_INPUT_ADDR = 0x%04x\n"
|
||||||
|
@ -5181,7 +5181,7 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
|
||||||
FILE *file, bool check_debug_option)
|
FILE *file, bool check_debug_option)
|
||||||
{
|
{
|
||||||
if (!check_debug_option ||
|
if (!check_debug_option ||
|
||||||
r600_can_dump_shader(&sscreen->b, processor))
|
si_can_dump_shader(&sscreen->b, processor))
|
||||||
si_dump_shader_key(processor, shader, file);
|
si_dump_shader_key(processor, shader, file);
|
||||||
|
|
||||||
if (!check_debug_option && shader->binary.llvm_ir_string) {
|
if (!check_debug_option && shader->binary.llvm_ir_string) {
|
||||||
|
@ -5198,7 +5198,7 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!check_debug_option ||
|
if (!check_debug_option ||
|
||||||
(r600_can_dump_shader(&sscreen->b, processor) &&
|
(si_can_dump_shader(&sscreen->b, processor) &&
|
||||||
!(sscreen->b.debug_flags & DBG_NO_ASM))) {
|
!(sscreen->b.debug_flags & DBG_NO_ASM))) {
|
||||||
fprintf(file, "\n%s:\n", si_get_shader_name(shader, processor));
|
fprintf(file, "\n%s:\n", si_get_shader_name(shader, processor));
|
||||||
|
|
||||||
|
@ -5236,7 +5236,7 @@ static int si_compile_llvm(struct si_screen *sscreen,
|
||||||
int r = 0;
|
int r = 0;
|
||||||
unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations);
|
unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations);
|
||||||
|
|
||||||
if (r600_can_dump_shader(&sscreen->b, processor)) {
|
if (si_can_dump_shader(&sscreen->b, processor)) {
|
||||||
fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
|
fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
|
||||||
|
|
||||||
if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR))) {
|
if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR))) {
|
||||||
|
@ -5434,7 +5434,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
|
||||||
debug, PIPE_SHADER_GEOMETRY,
|
debug, PIPE_SHADER_GEOMETRY,
|
||||||
"GS Copy Shader");
|
"GS Copy Shader");
|
||||||
if (!r) {
|
if (!r) {
|
||||||
if (r600_can_dump_shader(&sscreen->b, PIPE_SHADER_GEOMETRY))
|
if (si_can_dump_shader(&sscreen->b, PIPE_SHADER_GEOMETRY))
|
||||||
fprintf(stderr, "GS Copy Shader:\n");
|
fprintf(stderr, "GS Copy Shader:\n");
|
||||||
si_shader_dump(sscreen, ctx.shader, debug,
|
si_shader_dump(sscreen, ctx.shader, debug,
|
||||||
PIPE_SHADER_GEOMETRY, stderr, true);
|
PIPE_SHADER_GEOMETRY, stderr, true);
|
||||||
|
@ -6352,7 +6352,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
|
||||||
|
|
||||||
/* Dump TGSI code before doing TGSI->LLVM conversion in case the
|
/* Dump TGSI code before doing TGSI->LLVM conversion in case the
|
||||||
* conversion fails. */
|
* conversion fails. */
|
||||||
if (r600_can_dump_shader(&sscreen->b, sel->info.processor) &&
|
if (si_can_dump_shader(&sscreen->b, sel->info.processor) &&
|
||||||
!(sscreen->b.debug_flags & DBG_NO_TGSI)) {
|
!(sscreen->b.debug_flags & DBG_NO_TGSI)) {
|
||||||
if (sel->tokens)
|
if (sel->tokens)
|
||||||
tgsi_dump(sel->tokens, 0);
|
tgsi_dump(sel->tokens, 0);
|
||||||
|
@ -6561,7 +6561,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
|
||||||
si_optimize_vs_outputs(&ctx);
|
si_optimize_vs_outputs(&ctx);
|
||||||
|
|
||||||
if ((debug && debug->debug_message) ||
|
if ((debug && debug->debug_message) ||
|
||||||
r600_can_dump_shader(&sscreen->b, ctx.type))
|
si_can_dump_shader(&sscreen->b, ctx.type))
|
||||||
si_count_scratch_private_memory(&ctx);
|
si_count_scratch_private_memory(&ctx);
|
||||||
|
|
||||||
/* Compile to bytecode. */
|
/* Compile to bytecode. */
|
||||||
|
@ -7750,7 +7750,7 @@ void si_shader_destroy(struct si_shader *shader)
|
||||||
r600_resource_reference(&shader->bo, NULL);
|
r600_resource_reference(&shader->bo, NULL);
|
||||||
|
|
||||||
if (!shader->is_binary_shared)
|
if (!shader->is_binary_shared)
|
||||||
radeon_shader_binary_clean(&shader->binary);
|
si_radeon_shader_binary_clean(&shader->binary);
|
||||||
|
|
||||||
free(shader->shader_log);
|
free(shader->shader_log);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1388,7 +1388,7 @@ void si_llvm_optimize_module(struct si_shader_context *ctx)
|
||||||
|
|
||||||
/* Dump LLVM IR before any optimization passes */
|
/* Dump LLVM IR before any optimization passes */
|
||||||
if (ctx->screen->b.debug_flags & DBG_PREOPT_IR &&
|
if (ctx->screen->b.debug_flags & DBG_PREOPT_IR &&
|
||||||
r600_can_dump_shader(&ctx->screen->b, ctx->type))
|
si_can_dump_shader(&ctx->screen->b, ctx->type))
|
||||||
LLVMDumpModule(ctx->gallivm.module);
|
LLVMDumpModule(ctx->gallivm.module);
|
||||||
|
|
||||||
/* Create the pass manager */
|
/* Create the pass manager */
|
||||||
|
@ -1397,7 +1397,7 @@ void si_llvm_optimize_module(struct si_shader_context *ctx)
|
||||||
target_library_info = gallivm_create_target_library_info(triple);
|
target_library_info = gallivm_create_target_library_info(triple);
|
||||||
LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
|
LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
|
||||||
|
|
||||||
if (r600_extra_shader_checks(&ctx->screen->b, ctx->type))
|
if (si_extra_shader_checks(&ctx->screen->b, ctx->type))
|
||||||
LLVMAddVerifierPass(gallivm->passmgr);
|
LLVMAddVerifierPass(gallivm->passmgr);
|
||||||
|
|
||||||
LLVMAddAlwaysInlinerPass(gallivm->passmgr);
|
LLVMAddAlwaysInlinerPass(gallivm->passmgr);
|
||||||
|
|
|
@ -1003,7 +1003,7 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
|
||||||
sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR;
|
sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR;
|
||||||
sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color);
|
sctx->current_vs_state |= S_VS_STATE_CLAMP_VERTEX_COLOR(rs->clamp_vertex_color);
|
||||||
|
|
||||||
r600_viewport_set_rast_deps(&sctx->b, rs->scissor_enable, rs->clip_halfz);
|
si_viewport_set_rast_deps(&sctx->b, rs->scissor_enable, rs->clip_halfz);
|
||||||
|
|
||||||
si_pm4_bind_state(sctx, rasterizer, rs);
|
si_pm4_bind_state(sctx, rasterizer, rs);
|
||||||
si_update_poly_offset_state(sctx);
|
si_update_poly_offset_state(sctx);
|
||||||
|
@ -2093,7 +2093,7 @@ static unsigned si_is_vertex_format_supported(struct pipe_screen *screen,
|
||||||
static bool si_is_colorbuffer_format_supported(enum pipe_format format)
|
static bool si_is_colorbuffer_format_supported(enum pipe_format format)
|
||||||
{
|
{
|
||||||
return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
|
return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
|
||||||
r600_translate_colorswap(format, false) != ~0U;
|
si_translate_colorswap(format, false) != ~0U;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool si_is_zs_format_supported(enum pipe_format format)
|
static bool si_is_zs_format_supported(enum pipe_format format)
|
||||||
|
@ -2354,7 +2354,7 @@ static void si_initialize_color_surface(struct si_context *sctx,
|
||||||
R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
|
R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
|
||||||
}
|
}
|
||||||
assert(format != V_028C70_COLOR_INVALID);
|
assert(format != V_028C70_COLOR_INVALID);
|
||||||
swap = r600_translate_colorswap(surf->base.format, false);
|
swap = si_translate_colorswap(surf->base.format, false);
|
||||||
endian = si_colorformat_endian_swap(format);
|
endian = si_colorformat_endian_swap(format);
|
||||||
|
|
||||||
/* blend clamp should be set for all NORM/SRGB types */
|
/* blend clamp should be set for all NORM/SRGB types */
|
||||||
|
@ -2719,7 +2719,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vi_dcc_enabled(rtex, surf->base.u.tex.level))
|
if (vi_dcc_enabled(rtex, surf->base.u.tex.level))
|
||||||
if (!r600_texture_disable_dcc(&sctx->b, rtex))
|
if (!si_texture_disable_dcc(&sctx->b, rtex))
|
||||||
sctx->b.decompress_dcc(ctx, rtex);
|
sctx->b.decompress_dcc(ctx, rtex);
|
||||||
|
|
||||||
surf->dcc_incompatible = false;
|
surf->dcc_incompatible = false;
|
||||||
|
@ -3184,7 +3184,7 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx,
|
||||||
|
|
||||||
if (nr_samples != sctx->msaa_sample_locs.nr_samples) {
|
if (nr_samples != sctx->msaa_sample_locs.nr_samples) {
|
||||||
sctx->msaa_sample_locs.nr_samples = nr_samples;
|
sctx->msaa_sample_locs.nr_samples = nr_samples;
|
||||||
cayman_emit_msaa_sample_locs(cs, nr_samples);
|
si_common_emit_msaa_sample_locs(cs, nr_samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sctx->b.family >= CHIP_POLARIS10) {
|
if (sctx->b.family >= CHIP_POLARIS10) {
|
||||||
|
@ -3296,7 +3296,7 @@ static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
|
||||||
S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
|
S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
|
||||||
S_028A4C_FORCE_EOV_REZ_ENABLE(1);
|
S_028A4C_FORCE_EOV_REZ_ENABLE(1);
|
||||||
|
|
||||||
cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
|
si_common_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
|
||||||
sctx->ps_iter_samples,
|
sctx->ps_iter_samples,
|
||||||
sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0,
|
sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0,
|
||||||
sc_mode_cntl_1);
|
sc_mode_cntl_1);
|
||||||
|
@ -3629,7 +3629,7 @@ si_make_texture_descriptor(struct si_screen *screen,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tex->dcc_offset) {
|
if (tex->dcc_offset) {
|
||||||
unsigned swap = r600_translate_colorswap(pipe_format, false);
|
unsigned swap = si_translate_colorswap(pipe_format, false);
|
||||||
|
|
||||||
state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
|
state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
|
||||||
} else {
|
} else {
|
||||||
|
@ -3805,7 +3805,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
|
||||||
/* Depth/stencil texturing sometimes needs separate texture. */
|
/* Depth/stencil texturing sometimes needs separate texture. */
|
||||||
if (tmp->is_depth && !r600_can_sample_zs(tmp, view->is_stencil_sampler)) {
|
if (tmp->is_depth && !r600_can_sample_zs(tmp, view->is_stencil_sampler)) {
|
||||||
if (!tmp->flushed_depth_texture &&
|
if (!tmp->flushed_depth_texture &&
|
||||||
!r600_init_flushed_depth_texture(ctx, texture, NULL)) {
|
!si_init_flushed_depth_texture(ctx, texture, NULL)) {
|
||||||
pipe_resource_reference(&view->base.texture, NULL);
|
pipe_resource_reference(&view->base.texture, NULL);
|
||||||
FREE(view);
|
FREE(view);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -4413,7 +4413,7 @@ void si_init_state_functions(struct si_context *sctx)
|
||||||
sctx->b.b.set_stencil_ref = si_set_stencil_ref;
|
sctx->b.b.set_stencil_ref = si_set_stencil_ref;
|
||||||
|
|
||||||
sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
|
sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
|
||||||
sctx->b.b.get_sample_position = cayman_get_sample_position;
|
sctx->b.b.get_sample_position = si_get_sample_position;
|
||||||
|
|
||||||
sctx->b.b.create_sampler_state = si_create_sampler_state;
|
sctx->b.b.create_sampler_state = si_create_sampler_state;
|
||||||
sctx->b.b.delete_sampler_state = si_delete_sampler_state;
|
sctx->b.b.delete_sampler_state = si_delete_sampler_state;
|
||||||
|
|
|
@ -893,7 +893,7 @@ void si_emit_cache_flush(struct si_context *sctx)
|
||||||
|
|
||||||
/* Necessary for DCC */
|
/* Necessary for DCC */
|
||||||
if (rctx->chip_class == VI)
|
if (rctx->chip_class == VI)
|
||||||
r600_gfx_write_event_eop(rctx, V_028A90_FLUSH_AND_INV_CB_DATA_TS,
|
si_gfx_write_event_eop(rctx, V_028A90_FLUSH_AND_INV_CB_DATA_TS,
|
||||||
0, EOP_DATA_SEL_DISCARD, NULL,
|
0, EOP_DATA_SEL_DISCARD, NULL,
|
||||||
0, 0, R600_NOT_QUERY);
|
0, 0, R600_NOT_QUERY);
|
||||||
}
|
}
|
||||||
|
@ -1008,11 +1008,11 @@ void si_emit_cache_flush(struct si_context *sctx)
|
||||||
va = sctx->wait_mem_scratch->gpu_address;
|
va = sctx->wait_mem_scratch->gpu_address;
|
||||||
sctx->wait_mem_number++;
|
sctx->wait_mem_number++;
|
||||||
|
|
||||||
r600_gfx_write_event_eop(rctx, cb_db_event, tc_flags,
|
si_gfx_write_event_eop(rctx, cb_db_event, tc_flags,
|
||||||
EOP_DATA_SEL_VALUE_32BIT,
|
EOP_DATA_SEL_VALUE_32BIT,
|
||||||
sctx->wait_mem_scratch, va,
|
sctx->wait_mem_scratch, va,
|
||||||
sctx->wait_mem_number, R600_NOT_QUERY);
|
sctx->wait_mem_number, R600_NOT_QUERY);
|
||||||
r600_gfx_wait_fence(rctx, va, sctx->wait_mem_number, 0xffffffff);
|
si_gfx_wait_fence(rctx, va, sctx->wait_mem_number, 0xffffffff);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Make sure ME is idle (it executes most packets) before continuing.
|
/* Make sure ME is idle (it executes most packets) before continuing.
|
||||||
|
|
|
@ -2226,7 +2226,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
||||||
|
|
||||||
if ((sctx->b.debug.debug_message && !sctx->b.debug.async) ||
|
if ((sctx->b.debug.debug_message && !sctx->b.debug.async) ||
|
||||||
sctx->is_debug ||
|
sctx->is_debug ||
|
||||||
r600_can_dump_shader(&sscreen->b, sel->info.processor))
|
si_can_dump_shader(&sscreen->b, sel->info.processor))
|
||||||
si_init_shader_selector_async(sel, -1);
|
si_init_shader_selector_async(sel, -1);
|
||||||
else
|
else
|
||||||
util_queue_add_job(&sscreen->shader_compiler_queue, sel,
|
util_queue_add_job(&sscreen->shader_compiler_queue, sel,
|
||||||
|
@ -2299,7 +2299,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
|
||||||
sctx->vs_shader.current = sel ? sel->first_variant : NULL;
|
sctx->vs_shader.current = sel ? sel->first_variant : NULL;
|
||||||
|
|
||||||
si_update_common_shader_state(sctx);
|
si_update_common_shader_state(sctx);
|
||||||
r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
|
si_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
|
||||||
si_set_active_descriptors_for_shader(sctx, sel);
|
si_set_active_descriptors_for_shader(sctx, sel);
|
||||||
si_update_streamout_state(sctx);
|
si_update_streamout_state(sctx);
|
||||||
si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant,
|
si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant,
|
||||||
|
@ -2342,7 +2342,7 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
|
||||||
if (sctx->ia_multi_vgt_param_key.u.uses_tess)
|
if (sctx->ia_multi_vgt_param_key.u.uses_tess)
|
||||||
si_update_tess_uses_prim_id(sctx);
|
si_update_tess_uses_prim_id(sctx);
|
||||||
}
|
}
|
||||||
r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
|
si_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
|
||||||
si_set_active_descriptors_for_shader(sctx, sel);
|
si_set_active_descriptors_for_shader(sctx, sel);
|
||||||
si_update_streamout_state(sctx);
|
si_update_streamout_state(sctx);
|
||||||
si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant,
|
si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant,
|
||||||
|
@ -2393,7 +2393,7 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
|
||||||
si_shader_change_notify(sctx);
|
si_shader_change_notify(sctx);
|
||||||
sctx->last_tes_sh_base = -1; /* invalidate derived tess state */
|
sctx->last_tes_sh_base = -1; /* invalidate derived tess state */
|
||||||
}
|
}
|
||||||
r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
|
si_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
|
||||||
si_set_active_descriptors_for_shader(sctx, sel);
|
si_set_active_descriptors_for_shader(sctx, sel);
|
||||||
si_update_streamout_state(sctx);
|
si_update_streamout_state(sctx);
|
||||||
si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant,
|
si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant,
|
||||||
|
@ -2710,7 +2710,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
|
||||||
if (update_esgs) {
|
if (update_esgs) {
|
||||||
pipe_resource_reference(&sctx->esgs_ring, NULL);
|
pipe_resource_reference(&sctx->esgs_ring, NULL);
|
||||||
sctx->esgs_ring =
|
sctx->esgs_ring =
|
||||||
r600_aligned_buffer_create(sctx->b.b.screen,
|
si_aligned_buffer_create(sctx->b.b.screen,
|
||||||
R600_RESOURCE_FLAG_UNMAPPABLE,
|
R600_RESOURCE_FLAG_UNMAPPABLE,
|
||||||
PIPE_USAGE_DEFAULT,
|
PIPE_USAGE_DEFAULT,
|
||||||
esgs_ring_size, alignment);
|
esgs_ring_size, alignment);
|
||||||
|
@ -2721,7 +2721,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
|
||||||
if (update_gsvs) {
|
if (update_gsvs) {
|
||||||
pipe_resource_reference(&sctx->gsvs_ring, NULL);
|
pipe_resource_reference(&sctx->gsvs_ring, NULL);
|
||||||
sctx->gsvs_ring =
|
sctx->gsvs_ring =
|
||||||
r600_aligned_buffer_create(sctx->b.b.screen,
|
si_aligned_buffer_create(sctx->b.b.screen,
|
||||||
R600_RESOURCE_FLAG_UNMAPPABLE,
|
R600_RESOURCE_FLAG_UNMAPPABLE,
|
||||||
PIPE_USAGE_DEFAULT,
|
PIPE_USAGE_DEFAULT,
|
||||||
gsvs_ring_size, alignment);
|
gsvs_ring_size, alignment);
|
||||||
|
@ -2963,7 +2963,7 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
|
||||||
r600_resource_reference(&sctx->scratch_buffer, NULL);
|
r600_resource_reference(&sctx->scratch_buffer, NULL);
|
||||||
|
|
||||||
sctx->scratch_buffer = (struct r600_resource*)
|
sctx->scratch_buffer = (struct r600_resource*)
|
||||||
r600_aligned_buffer_create(&sctx->screen->b.b,
|
si_aligned_buffer_create(&sctx->screen->b.b,
|
||||||
R600_RESOURCE_FLAG_UNMAPPABLE,
|
R600_RESOURCE_FLAG_UNMAPPABLE,
|
||||||
PIPE_USAGE_DEFAULT,
|
PIPE_USAGE_DEFAULT,
|
||||||
scratch_needed_size, 256);
|
scratch_needed_size, 256);
|
||||||
|
@ -3021,7 +3021,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
|
||||||
/* Use 64K alignment for both rings, so that we can pass the address
|
/* Use 64K alignment for both rings, so that we can pass the address
|
||||||
* to shaders as one SGPR containing bits [16:47].
|
* to shaders as one SGPR containing bits [16:47].
|
||||||
*/
|
*/
|
||||||
sctx->tf_ring = r600_aligned_buffer_create(sctx->b.b.screen,
|
sctx->tf_ring = si_aligned_buffer_create(sctx->b.b.screen,
|
||||||
R600_RESOURCE_FLAG_UNMAPPABLE,
|
R600_RESOURCE_FLAG_UNMAPPABLE,
|
||||||
PIPE_USAGE_DEFAULT,
|
PIPE_USAGE_DEFAULT,
|
||||||
32768 * sctx->screen->b.info.max_se,
|
32768 * sctx->screen->b.info.max_se,
|
||||||
|
@ -3032,7 +3032,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
|
||||||
assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
|
assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
|
||||||
|
|
||||||
sctx->tess_offchip_ring =
|
sctx->tess_offchip_ring =
|
||||||
r600_aligned_buffer_create(sctx->b.b.screen,
|
si_aligned_buffer_create(sctx->b.b.screen,
|
||||||
R600_RESOURCE_FLAG_UNMAPPABLE,
|
R600_RESOURCE_FLAG_UNMAPPABLE,
|
||||||
PIPE_USAGE_DEFAULT,
|
PIPE_USAGE_DEFAULT,
|
||||||
max_offchip_buffers *
|
max_offchip_buffers *
|
||||||
|
|
|
@ -98,7 +98,7 @@ struct pipe_video_buffer *si_video_buffer_create(struct pipe_context *pipe,
|
||||||
pbs[i] = &resources[i]->resource.buf;
|
pbs[i] = &resources[i]->resource.buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
rvid_join_surfaces(&ctx->b, pbs, surfaces);
|
si_vid_join_surfaces(&ctx->b, pbs, surfaces);
|
||||||
|
|
||||||
for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
|
for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
|
||||||
if (!resources[i])
|
if (!resources[i])
|
||||||
|
@ -131,7 +131,7 @@ static struct pb_buffer* si_uvd_set_dtb(struct ruvd_msg *msg, struct vl_video_bu
|
||||||
|
|
||||||
msg->body.decode.dt_field_mode = buf->base.interlaced;
|
msg->body.decode.dt_field_mode = buf->base.interlaced;
|
||||||
|
|
||||||
ruvd_set_dt_surfaces(msg, &luma->surface, (chroma) ? &chroma->surface : NULL, type);
|
si_uvd_set_dt_surfaces(msg, &luma->surface, (chroma) ? &chroma->surface : NULL, type);
|
||||||
|
|
||||||
return luma->resource.buf;
|
return luma->resource.buf;
|
||||||
}
|
}
|
||||||
|
@ -160,8 +160,8 @@ struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context,
|
||||||
bool vcn = (ctx->b.family == CHIP_RAVEN) ? true : false;
|
bool vcn = (ctx->b.family == CHIP_RAVEN) ? true : false;
|
||||||
|
|
||||||
if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE)
|
if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE)
|
||||||
return rvce_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer);
|
return si_vce_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer);
|
||||||
|
|
||||||
return (vcn) ? radeon_create_decoder(context, templ) :
|
return (vcn) ? radeon_create_decoder(context, templ) :
|
||||||
ruvd_create_decoder(context, templ, si_uvd_set_dtb);
|
si_common_uvd_create_decoder(context, templ, si_uvd_set_dtb);
|
||||||
}
|
}
|
||||||
|
|
|
@ -128,7 +128,6 @@ nodist_EXTRA_pipe_r600_la_SOURCES = dummy.cpp
|
||||||
pipe_r600_la_LIBADD = \
|
pipe_r600_la_LIBADD = \
|
||||||
$(PIPE_LIBS) \
|
$(PIPE_LIBS) \
|
||||||
$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la \
|
$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la \
|
||||||
$(top_builddir)/src/gallium/drivers/radeon/libradeon.la \
|
|
||||||
$(top_builddir)/src/gallium/drivers/r600/libr600.la \
|
$(top_builddir)/src/gallium/drivers/r600/libr600.la \
|
||||||
$(LIBDRM_LIBS) \
|
$(LIBDRM_LIBS) \
|
||||||
$(RADEON_LIBS) \
|
$(RADEON_LIBS) \
|
||||||
|
|
Loading…
Reference in New Issue