nvc0: add support for accelerated video decoding through the dedicated engines

Currently the use of external firmware is required, with kernel and
userspace firmware needed for all Fermi cards except nvd9. Kepler and nvd9
should only require kernel firmware.
This commit is contained in:
Maarten Lankhorst 2012-12-02 12:07:35 +01:00 committed by Maarten Lankhorst
parent 6eb0d3d863
commit 9ba7eac535
7 changed files with 1834 additions and 4 deletions

View File

@ -33,7 +33,7 @@ LIBDRM_REQUIRED=2.4.24
LIBDRM_RADEON_REQUIRED=2.4.40
LIBDRM_INTEL_REQUIRED=2.4.38
LIBDRM_NVVIEUX_REQUIRED=2.4.33
LIBDRM_NOUVEAU_REQUIRED=2.4.33
LIBDRM_NOUVEAU_REQUIRED="2.4.33 libdrm >= 2.4.41"
DRI2PROTO_REQUIRED=2.6
GLPROTO_REQUIRED=1.4.14
LIBDRM_XORG_REQUIRED=2.4.24

View File

@ -14,4 +14,7 @@ C_SOURCES := \
nvc0_program.c \
nvc0_shader_state.c \
nvc0_query.c \
nvc0_video.c
nvc0_video.c \
nvc0_video_bsp.c \
nvc0_video_vp.c \
nvc0_video_ppp.c

View File

@ -1,5 +1,5 @@
/*
* Copyright 2011 Maarten Lankhorst
* Copyright 2011-2013 Maarten Lankhorst
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -54,6 +54,116 @@ nvc0_screen_get_video_param(struct pipe_screen *pscreen,
}
}
static void
nvc0_decoder_decode_bitstream(struct pipe_video_decoder *decoder,
struct pipe_video_buffer *video_target,
struct pipe_picture_desc *picture,
unsigned num_buffers,
const void *const *data,
const unsigned *num_bytes)
{
struct nvc0_decoder *dec = (struct nvc0_decoder *)decoder;
struct nvc0_video_buffer *target = (struct nvc0_video_buffer *)video_target;
uint32_t comm_seq = ++dec->fence_seq;
union pipe_desc desc;
unsigned vp_caps, is_ref, ret;
struct nvc0_video_buffer *refs[16] = {};
desc.base = picture;
assert(target->base.buffer_format == PIPE_FORMAT_NV12);
ret = nvc0_decoder_bsp(dec, desc, target, comm_seq,
num_buffers, data, num_bytes,
&vp_caps, &is_ref, refs);
/* did we decode bitstream correctly? */
assert(ret == 2);
nvc0_decoder_vp(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
nvc0_decoder_ppp(dec, desc, target, comm_seq);
}
static void
nvc0_decoder_flush(struct pipe_video_decoder *decoder)
{
struct nvc0_decoder *dec = (struct nvc0_decoder *)decoder;
(void)dec;
}
static void
nvc0_decoder_begin_frame(struct pipe_video_decoder *decoder,
struct pipe_video_buffer *target,
struct pipe_picture_desc *picture)
{
}
static void
nvc0_decoder_end_frame(struct pipe_video_decoder *decoder,
struct pipe_video_buffer *target,
struct pipe_picture_desc *picture)
{
}
static void
nvc0_decoder_destroy(struct pipe_video_decoder *decoder)
{
struct nvc0_decoder *dec = (struct nvc0_decoder *)decoder;
int i;
nouveau_bo_ref(NULL, &dec->ref_bo);
nouveau_bo_ref(NULL, &dec->bitplane_bo);
nouveau_bo_ref(NULL, &dec->inter_bo[0]);
nouveau_bo_ref(NULL, &dec->inter_bo[1]);
#ifdef NVC0_DEBUG_FENCE
nouveau_bo_ref(NULL, &dec->fence_bo);
#endif
nouveau_bo_ref(NULL, &dec->fw_bo);
for (i = 0; i < NVC0_VIDEO_QDEPTH; ++i)
nouveau_bo_ref(NULL, &dec->bsp_bo[i]);
nouveau_object_del(&dec->bsp);
nouveau_object_del(&dec->vp);
nouveau_object_del(&dec->ppp);
if (dec->channel[0] != dec->channel[1]) {
for (i = 0; i < 3; ++i) {
nouveau_pushbuf_del(&dec->pushbuf[i]);
nouveau_object_del(&dec->channel[i]);
}
} else {
nouveau_pushbuf_del(dec->pushbuf);
nouveau_object_del(dec->channel);
}
FREE(dec);
}
static void nvc0_video_getpath(enum pipe_video_profile profile, char *path)
{
switch (u_reduce_video_profile(profile)) {
case PIPE_VIDEO_CODEC_MPEG12: {
sprintf(path, "/lib/firmware/nouveau/vuc-mpeg12-0");
break;
}
case PIPE_VIDEO_CODEC_MPEG4: {
sprintf(path, "/lib/firmware/nouveau/vuc-mpeg4-0");
break;
}
case PIPE_VIDEO_CODEC_VC1: {
sprintf(path, "/lib/firmware/nouveau/vuc-vc1-%u", profile - PIPE_VIDEO_PROFILE_VC1_SIMPLE);
break;
}
case PIPE_VIDEO_CODEC_MPEG4_AVC: {
sprintf(path, "/lib/firmware/nouveau/vuc-h264-0");
break;
}
default: assert(0);
}
}
struct pipe_video_decoder *
nvc0_create_decoder(struct pipe_context *context,
enum pipe_video_profile profile,
@ -62,6 +172,20 @@ nvc0_create_decoder(struct pipe_context *context,
unsigned width, unsigned height, unsigned max_references,
bool chunked_decode)
{
struct nouveau_screen *screen = &((struct nvc0_context *)context)->screen->base;
struct nvc0_decoder *dec;
struct nouveau_pushbuf **push;
union nouveau_bo_config cfg;
bool kepler = screen->device->chipset >= 0xe0;
cfg.nvc0.tile_mode = 0x10;
cfg.nvc0.memtype = 0xfe;
int ret, i;
uint32_t codec = 1, ppp_codec = 3;
uint32_t timeout;
u32 tmp_size = 0;
if (getenv("XVMC_VL"))
return vl_create_decoder(context, profile, entrypoint,
chroma_format, width, height,
@ -72,6 +196,307 @@ nvc0_create_decoder(struct pipe_context *context,
return NULL;
}
dec = CALLOC_STRUCT(nvc0_decoder);
if (!dec)
return NULL;
dec->client = screen->client;
if (!kepler) {
dec->bsp_idx = 5;
dec->vp_idx = 6;
dec->ppp_idx = 7;
} else {
dec->bsp_idx = 2;
dec->vp_idx = 2;
dec->ppp_idx = 2;
}
for (i = 0; i < 3; ++i)
if (i && !kepler) {
dec->channel[i] = dec->channel[0];
dec->pushbuf[i] = dec->pushbuf[0];
} else {
void *data;
u32 size;
struct nvc0_fifo nvc0_args = {};
struct nve0_fifo nve0_args = {};
if (!kepler) {
size = sizeof(nvc0_args);
data = &nvc0_args;
} else {
unsigned engine[] = {
NVE0_FIFO_ENGINE_BSP,
NVE0_FIFO_ENGINE_VP,
NVE0_FIFO_ENGINE_PPP
};
nve0_args.engine = engine[i];
size = sizeof(nve0_args);
data = &nve0_args;
}
ret = nouveau_object_new(&screen->device->object, 0,
NOUVEAU_FIFO_CHANNEL_CLASS,
data, size, &dec->channel[i]);
if (!ret)
ret = nouveau_pushbuf_new(screen->client, dec->channel[i], 4,
32 * 1024, true, &dec->pushbuf[i]);
if (ret)
break;
}
push = dec->pushbuf;
if (!kepler) {
if (!ret)
ret = nouveau_object_new(dec->channel[0], 0x390b1, 0x90b1, NULL, 0, &dec->bsp);
if (!ret)
ret = nouveau_object_new(dec->channel[1], 0x190b2, 0x90b2, NULL, 0, &dec->vp);
if (!ret)
ret = nouveau_object_new(dec->channel[2], 0x290b3, 0x90b3, NULL, 0, &dec->ppp);
} else {
if (!ret)
ret = nouveau_object_new(dec->channel[0], 0x95b1, 0x95b1, NULL, 0, &dec->bsp);
if (!ret)
ret = nouveau_object_new(dec->channel[1], 0x95b2, 0x95b2, NULL, 0, &dec->vp);
if (!ret)
ret = nouveau_object_new(dec->channel[2], 0x90b3, 0x90b3, NULL, 0, &dec->ppp);
}
if (ret)
goto fail;
BEGIN_NVC0(push[0], SUBC_BSP(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push[0], dec->bsp->handle);
BEGIN_NVC0(push[1], SUBC_VP(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push[1], dec->vp->handle);
BEGIN_NVC0(push[2], SUBC_PPP(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push[2], dec->ppp->handle);
dec->base.context = context;
dec->base.profile = profile;
dec->base.entrypoint = entrypoint;
dec->base.chroma_format = chroma_format;
dec->base.width = width;
dec->base.height = height;
dec->base.max_references = max_references;
dec->base.destroy = nvc0_decoder_destroy;
dec->base.flush = nvc0_decoder_flush;
dec->base.decode_bitstream = nvc0_decoder_decode_bitstream;
dec->base.begin_frame = nvc0_decoder_begin_frame;
dec->base.end_frame = nvc0_decoder_end_frame;
for (i = 0; i < NVC0_VIDEO_QDEPTH && !ret; ++i)
ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
0, 1 << 20, &cfg, &dec->bsp_bo[i]);
if (!ret)
ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
0x100, 4 << 20, &cfg, &dec->inter_bo[0]);
if (!ret) {
if (!kepler)
nouveau_bo_ref(dec->inter_bo[0], &dec->inter_bo[1]);
else
ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
0x100, dec->inter_bo[0]->size, &cfg,
&dec->inter_bo[1]);
}
if (ret)
goto fail;
switch (u_reduce_video_profile(profile)) {
case PIPE_VIDEO_CODEC_MPEG12: {
codec = 1;
assert(max_references <= 2);
break;
}
case PIPE_VIDEO_CODEC_MPEG4: {
codec = 4;
tmp_size = mb(height)*16 * mb(width)*16;
assert(max_references <= 2);
break;
}
case PIPE_VIDEO_CODEC_VC1: {
ppp_codec = codec = 2;
tmp_size = mb(height)*16 * mb(width)*16;
assert(max_references <= 2);
break;
}
case PIPE_VIDEO_CODEC_MPEG4_AVC: {
codec = 3;
dec->tmp_stride = 16 * mb_half(width) * nvc0_video_align(height) * 3 / 2;
tmp_size = dec->tmp_stride * (max_references + 1);
assert(max_references <= 16);
break;
}
default:
fprintf(stderr, "invalid codec\n");
goto fail;
}
if (screen->device->chipset < 0xd0) {
int fd;
char path[PATH_MAX];
ssize_t r;
uint32_t *end, endval;
ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0,
0x4000, &cfg, &dec->fw_bo);
if (!ret)
ret = nouveau_bo_map(dec->fw_bo, NOUVEAU_BO_WR, dec->client);
if (ret)
goto fail;
nvc0_video_getpath(profile, path);
fd = open(path, O_RDONLY | O_CLOEXEC);
if (fd < 0) {
fprintf(stderr, "opening firmware file %s failed: %m\n", path);
goto fw_fail;
}
r = read(fd, dec->fw_bo->map, 0x4000);
if (r < 0) {
fprintf(stderr, "reading firmware file %s failed: %m\n", path);
goto fw_fail;
}
if (r == 0x4000) {
close(fd);
fprintf(stderr, "firmware file %s too large!\n", path);
goto fw_fail;
}
if (r & 0xff) {
close(fd);
fprintf(stderr, "firmware file %s wrong size!\n", path);
goto fw_fail;
}
end = dec->fw_bo->map + r - 4;
endval = *end;
while (endval == *end)
end--;
r = (intptr_t)end - (intptr_t)dec->fw_bo->map + 4;
switch (u_reduce_video_profile(profile)) {
case PIPE_VIDEO_CODEC_MPEG12: {
assert((r & 0xff) == 0xe0);
dec->fw_sizes = (0x2e0<<16) | (r - 0x2e0);
break;
}
case PIPE_VIDEO_CODEC_MPEG4: {
assert((r & 0xff) == 0xe0);
dec->fw_sizes = (0x2e0<<16) | (r - 0x2e0);
break;
}
case PIPE_VIDEO_CODEC_VC1: {
assert((r & 0xff) == 0xac);
dec->fw_sizes = (0x3ac<<16) | (r - 0x3ac);
break;
}
case PIPE_VIDEO_CODEC_MPEG4_AVC: {
assert((r & 0xff) == 0x70);
dec->fw_sizes = (0x370<<16) | (r - 0x370);
break;
}
default:
goto fw_fail;
}
munmap(dec->fw_bo->map, dec->fw_bo->size);
dec->fw_bo->map = NULL;
}
if (codec != 3) {
ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0,
0x400, &cfg, &dec->bitplane_bo);
if (ret)
goto fail;
}
dec->ref_stride = mb(width)*16 * (mb_half(height)*32 + nvc0_video_align(height)/2);
ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0,
dec->ref_stride * (max_references+2) + tmp_size,
&cfg, &dec->ref_bo);
if (ret)
goto fail;
timeout = 0;
BEGIN_NVC0(push[0], SUBC_BSP(0x200), 2);
PUSH_DATA (push[0], codec);
PUSH_DATA (push[0], timeout);
BEGIN_NVC0(push[1], SUBC_VP(0x200), 2);
PUSH_DATA (push[1], codec);
PUSH_DATA (push[1], timeout);
BEGIN_NVC0(push[2], SUBC_PPP(0x200), 2);
PUSH_DATA (push[2], ppp_codec);
PUSH_DATA (push[2], timeout);
++dec->fence_seq;
#if NVC0_DEBUG_FENCE
ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART|NOUVEAU_BO_MAP,
0, 0x1000, &cfg, &dec->fence_bo);
if (ret)
goto fail;
nouveau_bo_map(dec->fence_bo, NOUVEAU_BO_RDWR, screen->client);
dec->fence_map = dec->fence_bo->map;
dec->fence_map[0] = dec->fence_map[4] = dec->fence_map[8] = 0;
dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map)));
/* So lets test if the fence is working? */
BEGIN_NVC0(push[0], SUBC_BSP(0x240), 3);
PUSH_DATAh(push[0], dec->fence_bo->offset);
PUSH_DATA (push[0], dec->fence_bo->offset);
PUSH_DATA (push[0], dec->fence_seq);
BEGIN_NVC0(push[0], SUBC_BSP(0x304), 1);
PUSH_DATA (push[0], 1);
PUSH_KICK (push[0]);
BEGIN_NVC0(push[1], SUBC_VP(0x240), 3);
PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10));
PUSH_DATA (push[1], (dec->fence_bo->offset + 0x10));
PUSH_DATA (push[1], dec->fence_seq);
BEGIN_NVC0(push[1], SUBC_VP(0x304), 1);
PUSH_DATA (push[1], 1);
PUSH_KICK (push[1]);
BEGIN_NVC0(push[2], SUBC_PPP(0x240), 3);
PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20));
PUSH_DATA (push[2], (dec->fence_bo->offset + 0x20));
PUSH_DATA (push[2], dec->fence_seq);
BEGIN_NVC0(push[2], SUBC_PPP(0x304), 1);
PUSH_DATA (push[2], 1);
PUSH_KICK (push[2]);
usleep(100);
while (dec->fence_seq > dec->fence_map[0] &&
dec->fence_seq > dec->fence_map[4] &&
dec->fence_seq > dec->fence_map[8]) {
debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]);
usleep(100);
}
debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]);
#endif
return &dec->base;
fw_fail:
debug_printf("Cannot create decoder without firmware..\n");
nvc0_decoder_destroy(&dec->base);
return NULL;
fail:
debug_printf("Creation failed: %s (%i)\n", strerror(-ret), ret);
nvc0_decoder_destroy(&dec->base);
return NULL;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright 2011 Maarten Lankhorst
* Copyright 2011-2013 Maarten Lankhorst
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -29,6 +29,30 @@
#include "util/u_video.h"
#define SLICE_SIZE 0x200
#define VP_OFFSET 0x200
#define COMM_OFFSET 0x500
//#define NVC0_DEBUG_FENCE 1
#ifdef NVC0_DEBUG_FENCE
# define NVC0_VIDEO_QDEPTH 1
#else
# define NVC0_VIDEO_QDEPTH 2
#endif
#define SUBC_BSP(m) dec->bsp_idx, (m)
#define SUBC_VP(m) dec->vp_idx, (m)
#define SUBC_PPP(m) dec->ppp_idx, (m)
union pipe_desc {
struct pipe_picture_desc *base;
struct pipe_mpeg12_picture_desc *mpeg12;
struct pipe_mpeg4_picture_desc *mpeg4;
struct pipe_vc1_picture_desc *vc1;
struct pipe_h264_picture_desc *h264;
};
struct nvc0_video_buffer {
struct pipe_video_buffer base;
unsigned num_planes, valid_ref;
@ -38,6 +62,79 @@ struct nvc0_video_buffer {
struct pipe_surface *surfaces[VL_NUM_COMPONENTS * 2];
};
struct nvc0_decoder {
struct pipe_video_decoder base;
struct nouveau_client *client;
struct nouveau_object *channel[3], *bsp, *vp, *ppp;
struct nouveau_pushbuf *pushbuf[3];
#ifdef NVC0_DEBUG_FENCE
/* dump fence and comm, as needed.. */
unsigned *fence_map;
struct comm *comm;
struct nouveau_bo *fence_bo;
#endif
struct nouveau_bo *fw_bo, *bitplane_bo;
// array size max_references + 2, contains unpostprocessed images
// added at the end of ref_bo is a tmp array
// tmp is an array for h264, with each member being used for a ref frame or current
// target.. size = (((mb(w)*((mb(h)+1)&~1))+3)>>2)<<8 * (max_references+1)
// for other codecs, it simply seems that size = w*h is enough
// unsure what it's supposed to contain..
struct nouveau_bo *ref_bo;
struct nouveau_bo *inter_bo[2];
struct nouveau_bo *bsp_bo[NVC0_VIDEO_QDEPTH];
// bo's used by each cycle:
// bsp_bo: contains raw bitstream data and parameters for BSP and VP.
// inter_bo: contains data shared between BSP and VP
// ref_bo: reference image data, used by PPP and VP
// bitplane_bo: contain bitplane data (similar to ref_bo), used by BSP only
// fw_bo: used by VP only.
// Needed amount of copies in optimal case:
// 2 copies of inter_bo, VP would process the last inter_bo, while BSP is
// writing out a new set.
// NVC0_VIDEO_QDEPTH copies of bsp_bo. We don't want to block the pipeline ever,
// and give shaders a chance to run as well.
struct {
struct nvc0_video_buffer *vidbuf;
unsigned last_used;
unsigned field_pic_flag : 1;
unsigned decoded_top : 1;
unsigned decoded_bottom : 1;
} refs[17];
unsigned fence_seq, fw_sizes, last_frame_num, tmp_stride, ref_stride;
unsigned bsp_idx, vp_idx, ppp_idx;
};
struct comm {
uint32_t bsp_cur_index; // 000
uint32_t byte_ofs; // 004
uint32_t status[0x10]; // 008
uint32_t pos[0x10]; // 048
uint8_t pad[0x100 - 0x88]; // 0a0 bool comm_encrypted
uint32_t pvp_cur_index; // 100
uint32_t acked_byte_ofs; // 104
uint32_t status_vp[0x10]; // 108
uint16_t mb_y[0x10]; //148
uint32_t pvp_stage; // 168 0xeeXX
uint16_t parse_endpos_index; // 16c
uint16_t irq_index; // 16e
uint8_t irq_470[0x10]; // 170
uint32_t irq_pos[0x10]; // 180
uint32_t parse_endpos[0x10]; // 1c0
};
static INLINE uint32_t nvc0_video_align(uint32_t h)
{
return ((h+0x3f)&~0x3f);
@ -52,3 +149,73 @@ static INLINE uint32_t mb_half(uint32_t coord)
{
return (coord + 0x1f)>>5;
}
static INLINE uint64_t
nvc0_video_addr(struct nvc0_decoder *dec, struct nvc0_video_buffer *target)
{
uint64_t ret;
if (target)
ret = dec->ref_stride * target->valid_ref;
else
ret = dec->ref_stride * (dec->base.max_references+1);
return dec->ref_bo->offset + ret;
}
static INLINE void
nvc0_decoder_ycbcr_offsets(struct nvc0_decoder *dec, uint32_t *y2,
uint32_t *cbcr, uint32_t *cbcr2)
{
uint32_t w = mb(dec->base.width), size;
*y2 = mb_half(dec->base.height)*w;
*cbcr = *y2 * 2;
*cbcr2 = *cbcr + w * (nvc0_video_align(dec->base.height)>>6);
/* The check here should never fail because it means a bug
* in the code rather than a bug in hardware..
*/
size = (2 * (*cbcr2 - *cbcr) + *cbcr) << 8;
if (size > dec->ref_stride) {
debug_printf("Overshot ref_stride (%u) with size %u and ofs (%u,%u,%u)\n",
dec->ref_stride, size, *y2<<8, *cbcr<<8, *cbcr2<<8);
*y2 = *cbcr = *cbcr2 = 0;
assert(size <= dec->ref_stride);
}
}
static INLINE void
nvc0_decoder_inter_sizes(struct nvc0_decoder *dec, uint32_t slice_count,
uint32_t *slice_size, uint32_t *bucket_size,
uint32_t *ring_size)
{
*slice_size = (SLICE_SIZE * slice_count)>>8;
if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_CODEC_MPEG12)
*bucket_size = 0;
else
*bucket_size = mb(dec->base.width) * 3;
*ring_size = (dec->inter_bo[0]->size >> 8) - *bucket_size - *slice_size;
}
extern unsigned
nvc0_decoder_bsp(struct nvc0_decoder *dec, union pipe_desc desc,
struct nvc0_video_buffer *target,
unsigned comm_seq, unsigned num_buffers,
const void *const *data, const unsigned *num_bytes,
unsigned *vp_caps, unsigned *is_ref,
struct nvc0_video_buffer *refs[16]);
extern void nvc0_decoder_vp_caps(struct nvc0_decoder *dec,
union pipe_desc desc,
struct nvc0_video_buffer *target,
unsigned comm_seq,
unsigned *caps, unsigned *is_ref,
struct nvc0_video_buffer *refs[16]);
extern void
nvc0_decoder_vp(struct nvc0_decoder *dec, union pipe_desc desc,
struct nvc0_video_buffer *target, unsigned comm_seq,
unsigned caps, unsigned is_ref,
struct nvc0_video_buffer *refs[16]);
extern void
nvc0_decoder_ppp(struct nvc0_decoder *dec, union pipe_desc desc,
struct nvc0_video_buffer *target, unsigned comm_seq);

View File

@ -0,0 +1,423 @@
/*
* Copyright 2011-2013 Maarten Lankhorst
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "nvc0_video.h"
struct strparm_bsp {
uint32_t w0[4]; // bits 0-23 length, bits 24-31 addr_hi
uint32_t w1[4]; // bit 8-24 addr_lo
uint32_t unk20; // should be idx * 0x8000000, bitstream offset
uint32_t do_crypto_crap; // set to 0
};
struct mpeg12_picparm_bsp {
uint16_t width;
uint16_t height;
uint8_t picture_structure;
uint8_t picture_coding_type;
uint8_t intra_dc_precision;
uint8_t frame_pred_frame_dct;
uint8_t concealment_motion_vectors;
uint8_t intra_vlc_format;
uint16_t pad;
uint8_t f_code[2][2];
};
struct mpeg4_picparm_bsp {
uint16_t width;
uint16_t height;
uint8_t vop_time_increment_size;
uint8_t interlaced;
uint8_t resync_marker_disable;
};
struct vc1_picparm_bsp {
uint16_t width;
uint16_t height;
uint8_t profile; // 04 0 simple, 1 main, 2 advanced
uint8_t postprocflag; // 05
uint8_t pulldown; // 06
uint8_t interlaced; // 07
uint8_t tfcntrflag; // 08
uint8_t finterpflag; // 09
uint8_t psf; // 0a
uint8_t pad; // 0b
uint8_t multires; // 0c
uint8_t syncmarker; // 0d
uint8_t rangered; // 0e
uint8_t maxbframes; // 0f
uint8_t dquant; // 10
uint8_t panscan_flag; // 11
uint8_t refdist_flag; // 12
uint8_t quantizer; // 13
uint8_t extended_mv; // 14
uint8_t extended_dmv; // 15
uint8_t overlap; // 16
uint8_t vstransform; // 17
};
struct h264_picparm_bsp {
// 00
uint32_t unk00;
// 04
uint32_t log2_max_frame_num_minus4; // 04 checked
uint32_t pic_order_cnt_type; // 08 checked
uint32_t log2_max_pic_order_cnt_lsb_minus4; // 0c checked
uint32_t delta_pic_order_always_zero_flag; // 10, or unknown
uint32_t frame_mbs_only_flag; // 14, always 1?
uint32_t direct_8x8_inference_flag; // 18, always 1?
uint32_t width_mb; // 1c checked
uint32_t height_mb; // 20 checked
// 24
//struct picparm2
uint32_t entropy_coding_mode_flag; // 00, checked
uint32_t pic_order_present_flag; // 04 checked
uint32_t unk; // 08 seems to be 0?
uint32_t pad1; // 0c seems to be 0?
uint32_t pad2; // 10 always 0 ?
uint32_t num_ref_idx_l0_active_minus1; // 14 always 0?
uint32_t num_ref_idx_l1_active_minus1; // 18 always 0?
uint32_t weighted_pred_flag; // 1c checked
uint32_t weighted_bipred_idc; // 20 checked
uint32_t pic_init_qp_minus26; // 24 checked
uint32_t deblocking_filter_control_present_flag; // 28 always 1?
uint32_t redundant_pic_cnt_present_flag; // 2c always 0?
uint32_t transform_8x8_mode_flag; // 30 checked
uint32_t mb_adaptive_frame_field_flag; // 34 checked-ish
uint8_t field_pic_flag; // 38 checked
uint8_t bottom_field_flag; // 39 checked
uint8_t real_pad[0x1b]; // XX why?
};
static uint32_t
nvc0_decoder_fill_picparm_mpeg12_bsp(struct nvc0_decoder *dec,
struct pipe_mpeg12_picture_desc *desc,
char *map)
{
struct mpeg12_picparm_bsp *pic_bsp = (struct mpeg12_picparm_bsp *)map;
int i;
pic_bsp->width = dec->base.width;
pic_bsp->height = dec->base.height;
pic_bsp->picture_structure = desc->picture_structure;
pic_bsp->picture_coding_type = desc->picture_coding_type;
pic_bsp->intra_dc_precision = desc->intra_dc_precision;
pic_bsp->frame_pred_frame_dct = desc->frame_pred_frame_dct;
pic_bsp->concealment_motion_vectors = desc->concealment_motion_vectors;
pic_bsp->intra_vlc_format = desc->intra_vlc_format;
pic_bsp->pad = 0;
for (i = 0; i < 4; ++i)
pic_bsp->f_code[i/2][i%2] = desc->f_code[i/2][i%2] + 1; // FU
return (desc->num_slices << 4) | (dec->base.profile != PIPE_VIDEO_PROFILE_MPEG1);
}
static uint32_t
nvc0_decoder_fill_picparm_mpeg4_bsp(struct nvc0_decoder *dec,
struct pipe_mpeg4_picture_desc *desc,
char *map)
{
struct mpeg4_picparm_bsp *pic_bsp = (struct mpeg4_picparm_bsp *)map;
uint32_t t, bits = 0;
pic_bsp->width = dec->base.width;
pic_bsp->height = dec->base.height;
assert(desc->vop_time_increment_resolution > 0);
t = desc->vop_time_increment_resolution - 1;
while (t) {
bits++;
t /= 2;
}
if (!bits)
bits = 1;
t = desc->vop_time_increment_resolution - 1;
pic_bsp->vop_time_increment_size = bits;
pic_bsp->interlaced = desc->interlaced;
pic_bsp->resync_marker_disable = desc->resync_marker_disable;
return 4;
}
static uint32_t
nvc0_decoder_fill_picparm_vc1_bsp(struct nvc0_decoder *dec,
struct pipe_vc1_picture_desc *d,
char *map)
{
struct vc1_picparm_bsp *vc = (struct vc1_picparm_bsp *)map;
uint32_t caps = (d->slice_count << 4)&0xfff0;
vc->width = dec->base.width;
vc->height = dec->base.height;
vc->profile = dec->base.profile - PIPE_VIDEO_PROFILE_VC1_SIMPLE; // 04
vc->postprocflag = d->postprocflag;
vc->pulldown = d->pulldown;
vc->interlaced = d->interlace;
vc->tfcntrflag = d->tfcntrflag; // 08
vc->finterpflag = d->finterpflag;
vc->psf = d->psf;
vc->pad = 0;
vc->multires = d->multires; // 0c
vc->syncmarker = d->syncmarker;
vc->rangered = d->rangered;
vc->maxbframes = d->maxbframes;
vc->dquant = d->dquant; // 10
vc->panscan_flag = d->panscan_flag;
vc->refdist_flag = d->refdist_flag;
vc->quantizer = d->quantizer;
vc->extended_mv = d->extended_mv; // 14
vc->extended_dmv = d->extended_dmv;
vc->overlap = d->overlap;
vc->vstransform = d->vstransform;
return caps | 2;
}
static uint32_t
nvc0_decoder_fill_picparm_h264_bsp(struct nvc0_decoder *dec,
struct pipe_h264_picture_desc *d,
char *map)
{
struct h264_picparm_bsp stub_h = {}, *h = &stub_h;
uint32_t caps = (d->slice_count << 4)&0xfff0;
assert(!(d->slice_count & ~0xfff));
if (d->slice_count & 0x1000)
caps |= 1 << 20;
assert(offsetof(struct h264_picparm_bsp, bottom_field_flag) == (0x39 + 0x24));
h->unk00 = 1;
h->pad1 = h->pad2 = 0;
h->unk = 0;
h->log2_max_frame_num_minus4 = d->log2_max_frame_num_minus4;
h->frame_mbs_only_flag = d->frame_mbs_only_flag;
h->direct_8x8_inference_flag = d->direct_8x8_inference_flag;
h->width_mb = mb(dec->base.width);
h->height_mb = mb(dec->base.height);
h->entropy_coding_mode_flag = d->entropy_coding_mode_flag;
h->pic_order_present_flag = d->pic_order_present_flag;
h->pic_order_cnt_type = d->pic_order_cnt_type;
h->log2_max_pic_order_cnt_lsb_minus4 = d->log2_max_pic_order_cnt_lsb_minus4;
h->delta_pic_order_always_zero_flag = d->delta_pic_order_always_zero_flag;
h->num_ref_idx_l0_active_minus1 = d->num_ref_idx_l0_active_minus1;
h->num_ref_idx_l1_active_minus1 = d->num_ref_idx_l1_active_minus1;
h->weighted_pred_flag = d->weighted_pred_flag;
h->weighted_bipred_idc = d->weighted_bipred_idc;
h->pic_init_qp_minus26 = d->pic_init_qp_minus26;
h->deblocking_filter_control_present_flag = d->deblocking_filter_control_present_flag;
h->redundant_pic_cnt_present_flag = d->redundant_pic_cnt_present_flag;
h->transform_8x8_mode_flag = d->transform_8x8_mode_flag;
h->mb_adaptive_frame_field_flag = d->mb_adaptive_frame_field_flag;
h->field_pic_flag = d->field_pic_flag;
h->bottom_field_flag = d->bottom_field_flag;
memset(h->real_pad, 0, sizeof(h->real_pad));
*(struct h264_picparm_bsp *)map = *h;
return caps | 3;
}
#if NVC0_DEBUG_FENCE
static void dump_comm_bsp(struct comm *comm)
{
unsigned idx = comm->bsp_cur_index & 0xf;
debug_printf("Cur seq: %x, bsp byte ofs: %x\n", comm->bsp_cur_index, comm->byte_ofs);
debug_printf("Status: %08x, pos: %08x\n", comm->status[idx], comm->pos[idx]);
}
#endif
unsigned
nvc0_decoder_bsp(struct nvc0_decoder *dec, union pipe_desc desc,
struct nvc0_video_buffer *target,
unsigned comm_seq, unsigned num_buffers,
const void *const *data, const unsigned *num_bytes,
unsigned *vp_caps, unsigned *is_ref,
struct nvc0_video_buffer *refs[16])
{
struct nouveau_pushbuf *push = dec->pushbuf[0];
enum pipe_video_codec codec = u_reduce_video_profile(dec->base.profile);
char *bsp;
uint32_t bsp_addr, comm_addr, inter_addr;
uint32_t slice_size, bucket_size, ring_size;
uint32_t endmarker, caps;
struct strparm_bsp *str_bsp;
int ret, i;
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NVC0_VIDEO_QDEPTH];
struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
unsigned fence_extra = 0;
struct nouveau_pushbuf_refn bo_refs[] = {
{ bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
{ inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
#ifdef NVC0_DEBUG_FENCE
{ dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
#endif
{ dec->bitplane_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
};
int num_refs = sizeof(bo_refs)/sizeof(*bo_refs);
#ifdef NVC0_DEBUG_FENCE
fence_extra = 4;
#endif
ret = nouveau_bo_map(bsp_bo, NOUVEAU_BO_WR, dec->client);
if (ret) {
debug_printf("map failed: %i %s\n", ret, strerror(-ret));
return -1;
}
bsp = bsp_bo->map;
/*
* 0x000..0x100: picparm_bsp
* 0x200..0x500: picparm_vp
* 0x500..0x700: comm
* 0x700..onward: raw bitstream
*/
switch (codec){
case PIPE_VIDEO_CODEC_MPEG12:
endmarker = 0xb7010000;
caps = nvc0_decoder_fill_picparm_mpeg12_bsp(dec, desc.mpeg12, bsp);
break;
case PIPE_VIDEO_CODEC_MPEG4:
endmarker = 0xb1010000;
caps = nvc0_decoder_fill_picparm_mpeg4_bsp(dec, desc.mpeg4, bsp);
break;
case PIPE_VIDEO_CODEC_VC1: {
endmarker = 0x0a010000;
caps = nvc0_decoder_fill_picparm_vc1_bsp(dec, desc.vc1, bsp);
break;
}
case PIPE_VIDEO_CODEC_MPEG4_AVC: {
endmarker = 0x0b010000;
caps = nvc0_decoder_fill_picparm_h264_bsp(dec, desc.h264, bsp);
break;
}
default: assert(0); return -1;
}
nvc0_decoder_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
PUSH_SPACE(push, 6 + (codec == PIPE_VIDEO_CODEC_MPEG4_AVC ? 9 : 7) + fence_extra + 2);
if (!dec->bitplane_bo)
num_refs--;
nouveau_pushbuf_refn(push, bo_refs, num_refs);
caps |= 0 << 16; // reset struct comm if flag is set
caps |= 1 << 17; // enable watchdog
caps |= 0 << 18; // do not report error to VP, so it can continue decoding what we have
caps |= 0 << 19; // if enabled, use crypto crap?
bsp += 0x100;
str_bsp = (struct strparm_bsp *)bsp;
memset(str_bsp, 0, 0x80);
str_bsp->w0[0] = 16;
str_bsp->w1[0] = 0x1;
bsp += 0x100;
/* Reserved for picparm_vp */
bsp += 0x300;
/* Reserved for comm */
#if !NVC0_DEBUG_FENCE
memset(bsp, 0, 0x200);
#endif
bsp += 0x200;
for (i = 0; i < num_buffers; ++i) {
memcpy(bsp, data[i], num_bytes[i]);
bsp += num_bytes[i];
str_bsp->w0[0] += num_bytes[i];
}
/* Append end sequence */
*(uint32_t *)bsp = endmarker;
bsp += 4;
*(uint32_t *)bsp = 0x00000000;
bsp += 4;
*(uint32_t *)bsp = endmarker;
bsp += 4;
*(uint32_t *)bsp = 0x00000000;
bsp_addr = bsp_bo->offset >> 8;
inter_addr = inter_bo->offset >> 8;
#if NVC0_DEBUG_FENCE
memset(dec->comm, 0, 0x200);
comm_addr = (dec->fence_bo->offset + COMM_OFFSET) >> 8;
#else
comm_addr = bsp_addr + (COMM_OFFSET>>8);
#endif
BEGIN_NVC0(push, SUBC_BSP(0x700), 5);
PUSH_DATA (push, caps); // 700 cmd
PUSH_DATA (push, bsp_addr + 1); // 704 strparm_bsp
PUSH_DATA (push, bsp_addr + 7); // 708 str addr
PUSH_DATA (push, comm_addr); // 70c comm
PUSH_DATA (push, comm_seq); // 710 seq
if (codec != PIPE_VIDEO_CODEC_MPEG4_AVC) {
u32 bitplane_addr;
bitplane_addr = dec->bitplane_bo->offset >> 8;
nvc0_decoder_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size);
BEGIN_NVC0(push, SUBC_BSP(0x400), 6);
PUSH_DATA (push, bsp_addr); // 400 picparm addr
PUSH_DATA (push, inter_addr); // 404 interparm addr
PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 408 interdata addr
PUSH_DATA (push, ring_size << 8); // 40c interdata_size
PUSH_DATA (push, bitplane_addr); // 410 BITPLANE_DATA
PUSH_DATA (push, 0x400); // 414 BITPLANE_DATA_SIZE
} else {
nvc0_decoder_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
BEGIN_NVC0(push, SUBC_BSP(0x400), 8);
PUSH_DATA (push, bsp_addr); // 400 picparm addr
PUSH_DATA (push, inter_addr); // 404 interparm addr
PUSH_DATA (push, slice_size << 8); // 408 interparm size?
PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 40c interdata addr
PUSH_DATA (push, ring_size << 8); // 410 interdata size
PUSH_DATA (push, inter_addr + slice_size); // 414 bucket?
PUSH_DATA (push, bucket_size << 8); // 418 bucket size? unshifted..
PUSH_DATA (push, 0); // 41c targets
// TODO: Double check 414 / 418 with nvidia trace
}
#if NVC0_DEBUG_FENCE
BEGIN_NVC0(push, SUBC_BSP(0x240), 3);
PUSH_DATAh(push, dec->fence_bo->offset);
PUSH_DATA (push, dec->fence_bo->offset);
PUSH_DATA (push, dec->fence_seq);
BEGIN_NVC0(push, SUBC_BSP(0x300), 1);
PUSH_DATA (push, 1);
PUSH_KICK (push);
{
unsigned spin = 0;
do {
usleep(100);
if ((spin++ & 0xff) == 0xff) {
debug_printf("%u: %u\n", dec->fence_seq, dec->fence_map[0]);
dump_comm_bsp(dec->comm);
}
} while (dec->fence_seq > dec->fence_map[0]);
}
dump_comm_bsp(dec->comm);
return dec->comm->status[comm_seq & 0xf];
#else
BEGIN_NVC0(push, SUBC_BSP(0x300), 1);
PUSH_DATA (push, 0);
PUSH_KICK (push);
return 2;
#endif
}

View File

@ -0,0 +1,145 @@
/*
* Copyright 2011-2013 Maarten Lankhorst
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "nvc0_video.h"
static void
nvc0_decoder_setup_ppp(struct nvc0_decoder *dec, struct nvc0_video_buffer *target, uint32_t low700) {
struct nouveau_pushbuf *push = dec->pushbuf[2];
uint32_t stride_in = mb(dec->base.width);
uint32_t stride_out = mb(target->resources[0]->width0);
uint32_t dec_h = mb(dec->base.height);
uint32_t dec_w = mb(dec->base.width);
uint64_t in_addr;
uint32_t y2, cbcr, cbcr2, i;
struct nouveau_pushbuf_refn bo_refs[] = {
{ NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
{ NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
{ dec->ref_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
#ifdef NVC0_DEBUG_FENCE
{ dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
#endif
};
unsigned num_refs = sizeof(bo_refs)/sizeof(*bo_refs);
for (i = 0; i < 2; ++i) {
struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i];
bo_refs[i].bo = mt->base.bo;
}
nouveau_pushbuf_refn(push, bo_refs, num_refs);
nvc0_decoder_ycbcr_offsets(dec, &y2, &cbcr, &cbcr2);
BEGIN_NVC0(push, SUBC_PPP(0x700), 10);
in_addr = nvc0_video_addr(dec, target) >> 8;
PUSH_DATA (push, (stride_out << 24) | (stride_out << 16) | low700); // 700
PUSH_DATA (push, (stride_in << 24) | (stride_in << 16) | (dec_h << 8) | dec_w); // 704
assert(dec_w == stride_in);
/* Input: */
PUSH_DATA (push, in_addr); // 708
PUSH_DATA (push, in_addr + y2); // 70c
PUSH_DATA (push, in_addr + cbcr); // 710
PUSH_DATA (push, in_addr + cbcr2); // 714
assert(target->resources[0]->width0 >= 16 * dec_w);
assert(target->resources[0]->height0 >= dec->base.height/2);
for (i = 0; i < 2; ++i) {
struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i];
PUSH_DATA (push, mt->base.address >> 8);
PUSH_DATA (push, (mt->base.address + mt->total_size/2/mt->base.base.array_size) >> 8);
mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
}
}
static uint32_t
nvc0_decoder_vc1_ppp(struct nvc0_decoder *dec, struct pipe_vc1_picture_desc *desc, struct nvc0_video_buffer *target) {
struct nouveau_pushbuf *push = dec->pushbuf[2];
nvc0_decoder_setup_ppp(dec, target, 0x1412);
assert(!desc->deblockEnable);
assert(!(dec->base.width & 0xf));
assert(!(dec->base.height & 0xf));
BEGIN_NVC0(push, SUBC_PPP(0x400), 1);
PUSH_DATA (push, desc->pquant << 11);
// 728 = wtf?
return 0x10;
}
void
nvc0_decoder_ppp(struct nvc0_decoder *dec, union pipe_desc desc, struct nvc0_video_buffer *target, unsigned comm_seq) {
enum pipe_video_codec codec = u_reduce_video_profile(dec->base.profile);
struct nouveau_pushbuf *push = dec->pushbuf[2];
unsigned ppp_caps = 0x10;
unsigned fence_extra = 0;
#if NVC0_DEBUG_FENCE
fence_extra = 4;
#endif
PUSH_SPACE(push, 11 + (codec == PIPE_VIDEO_CODEC_VC1 ? 2 : 0) + 3 + fence_extra + 2);
switch (codec) {
case PIPE_VIDEO_CODEC_MPEG12: {
unsigned mpeg2 = dec->base.profile != PIPE_VIDEO_PROFILE_MPEG1;
nvc0_decoder_setup_ppp(dec, target, 0x1410 | mpeg2);
break;
}
case PIPE_VIDEO_CODEC_MPEG4: nvc0_decoder_setup_ppp(dec, target, 0x1414); break;
case PIPE_VIDEO_CODEC_VC1: ppp_caps = nvc0_decoder_vc1_ppp(dec, desc.vc1, target); break;
case PIPE_VIDEO_CODEC_MPEG4_AVC: nvc0_decoder_setup_ppp(dec, target, 0x1413); break;
default: assert(0);
}
BEGIN_NVC0(push, SUBC_PPP(0x734), 2);
PUSH_DATA (push, comm_seq);
PUSH_DATA (push, ppp_caps);
#if NVC0_DEBUG_FENCE
BEGIN_NVC0(push, SUBC_PPP(0x240), 3);
PUSH_DATAh(push, (dec->fence_bo->offset + 0x20));
PUSH_DATA (push, (dec->fence_bo->offset + 0x20));
PUSH_DATA (push, dec->fence_seq);
BEGIN_NVC0(push, SUBC_PPP(0x300), 1);
PUSH_DATA (push, 1);
PUSH_KICK (push);
{
unsigned spin = 0;
do {
usleep(100);
if ((spin++ & 0xff) == 0xff)
debug_printf("ppp%u: %u\n", dec->fence_seq, dec->fence_map[8]);
} while (dec->fence_seq > dec->fence_map[8]);
}
#else
BEGIN_NVC0(push, SUBC_PPP(0x300), 1);
PUSH_DATA (push, 0);
PUSH_KICK (push);
#endif
}

View File

@ -0,0 +1,667 @@
/*
* Copyright 2011-2013 Maarten Lankhorst
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "nvc0_video.h"
#include <sys/mman.h>
struct mpeg12_picparm_vp {
uint16_t width; // 00 in mb units
uint16_t height; // 02 in mb units
uint32_t unk04; // 04 stride for Y?
uint32_t unk08; // 08 stride for CbCr?
uint32_t ofs[6]; // 1c..20 ofs
uint32_t bucket_size; // 24
uint32_t inter_ring_data_size; // 28
uint16_t unk2c; // 2c
uint16_t alternate_scan; // 2e
uint16_t unk30; // 30 not seen set yet
uint16_t picture_structure; // 32
uint16_t pad2[3];
uint16_t unk3a; // 3a set on I frame?
uint32_t f_code[4]; // 3c
uint32_t picture_coding_type; // 4c
uint32_t intra_dc_precision; // 50
uint32_t q_scale_type; // 54
uint32_t top_field_first; // 58
uint32_t full_pel_forward_vector; // 5c
uint32_t full_pel_backward_vector; // 60
uint8_t intra_quantizer_matrix[0x40]; // 64
uint8_t non_intra_quantizer_matrix[0x40]; // a4
};
struct mpeg4_picparm_vp {
uint32_t width; // 00 in normal units
uint32_t height; // 04 in normal units
uint32_t unk08; // stride 1
uint32_t unk0c; // stride 2
uint32_t ofs[6]; // 10..24 ofs
uint32_t bucket_size; // 28
uint32_t pad1; // 2c, pad
uint32_t pad2; // 30
uint32_t inter_ring_data_size; // 34
uint32_t trd[2]; // 38, 3c
uint32_t trb[2]; // 40, 44
uint32_t u48; // XXX codec selection? Should test with different values of VdpDecoderProfile
uint16_t f_code_fw; // 4c
uint16_t f_code_bw; // 4e
uint8_t interlaced; // 50
uint8_t quant_type; // bool, written to 528
uint8_t quarter_sample; // bool, written to 548
uint8_t short_video_header; // bool, negated written to 528 shifted by 1
uint8_t u54; // bool, written to 0x740
uint8_t vop_coding_type; // 55
uint8_t rounding_control; // 56
uint8_t alternate_vertical_scan_flag; // 57 bool
uint8_t top_field_first; // bool, written to vuc
uint8_t pad4[3]; // 59, 5a, 5b, contains garbage on blob
uint32_t pad5[0x10]; // 5c...9c non-inclusive, but WHY?
uint32_t intra[0x10]; // 9c
uint32_t non_intra[0x10]; // bc
// udc..uff pad?
};
// Full version, with data pumped from BSP
struct vc1_picparm_vp {
uint32_t bucket_size; // 00
uint32_t pad; // 04
uint32_t inter_ring_data_size; // 08
uint32_t unk0c; // stride 1
uint32_t unk10; // stride 2
uint32_t ofs[6]; // 14..28 ofs
uint16_t width; // 2c
uint16_t height; // 2e
uint8_t profile; // 30 0 = simple, 1 = main, 2 = advanced
uint8_t loopfilter; // 31 written into vuc
uint8_t fastuvmc; // 32, written into vuc
uint8_t dquant; // 33
uint8_t overlap; // 34
uint8_t quantizer; // 35
uint8_t u36; // 36, bool
uint8_t pad2; // 37, to align to 0x38
};
struct h264_picparm_vp { // 700..a00
uint16_t width, height;
uint32_t stride1, stride2; // 04 08
uint32_t ofs[6]; // 0c..24 in-image offset
uint32_t u24; // nfi ac8 ?
uint32_t bucket_size; // 28 bucket size
uint32_t inter_ring_data_size; // 2c
unsigned f0 : 1; // 0 0x01: into 640 shifted by 3, 540 shifted by 5, half size something?
unsigned f1 : 1; // 1 0x02: into vuc ofs 56
unsigned weighted_pred_flag : 1; // 2 0x04
unsigned f3 : 1; // 3 0x08: into vuc ofs 68
unsigned is_reference : 1; // 4
unsigned interlace : 1; // 5 field_pic_flag
unsigned bottom_field_flag : 1; // 6
unsigned f7 : 1; // 7 0x80: nfi yet
signed log2_max_frame_num_minus4 : 4; // 31 0..3
unsigned u31_45 : 2; // 31 4..5
unsigned pic_order_cnt_type : 2; // 31 6..7
signed pic_init_qp_minus26 : 6; // 32 0..5
signed chroma_qp_index_offset : 5; // 32 6..10
signed second_chroma_qp_index_offset : 5; // 32 11..15
unsigned weighted_bipred_idc : 2; // 34 0..1
unsigned fifo_dec_index : 7; // 34 2..8
unsigned tmp_idx : 5; // 34 9..13
unsigned frame_number : 16; // 34 14..29
unsigned u34_3030 : 1; // 34 30..30 pp.u34[30:30]
unsigned u34_3131 : 1; // 34 31..31 pad?
uint32_t field_order_cnt[2]; // 38, 3c
struct { // 40
// 0x00223102
// nfi (needs: top_is_reference, bottom_is_reference, is_long_term, maybe some other state that was saved..
unsigned fifo_idx : 7; // 00 0..6
unsigned tmp_idx : 5; // 00 7..11
unsigned unk12 : 1; // 00 12 not seen yet, but set, maybe top_is_reference
unsigned unk13 : 1; // 00 13 not seen yet, but set, maybe bottom_is_reference?
unsigned unk14 : 1; // 00 14 skipped?
unsigned notseenyet : 1; // 00 15 pad?
unsigned unk16 : 1; // 00 16
unsigned unk17 : 4; // 00 17..20
unsigned unk21 : 4; // 00 21..24
unsigned pad : 7; // 00 d25..31
uint32_t field_order_cnt[2]; // 04,08
uint32_t frame_idx; // 0c
} refs[0x10];
uint8_t m4x4[6][16]; // 140
uint8_t m8x8[2][64]; // 1a0
uint32_t u220; // 220 number of extra reorder_list to append?
uint8_t u224[0x20]; // 224..244 reorder_list append ?
uint8_t nfi244[0xb0]; // add some pad to make sure nulls are read
};
static void
nvc0_decoder_handle_references(struct nvc0_decoder *dec, struct nvc0_video_buffer *refs[16], unsigned seq, struct nvc0_video_buffer *target)
{
unsigned h264 = u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_CODEC_MPEG4_AVC;
unsigned i, idx, empty_spot = dec->base.max_references + 1;
for (i = 0; i < dec->base.max_references; ++i) {
if (!refs[i])
continue;
idx = refs[i]->valid_ref;
//debug_printf("ref[%i] %p in slot %i\n", i, refs[i], idx);
assert(target != refs[i] ||
(h264 && empty_spot &&
(!dec->refs[idx].decoded_bottom || !dec->refs[idx].decoded_top)));
if (target == refs[i])
empty_spot = 0;
assert(!h264 ||
dec->refs[idx].last_used == seq - 1);
if (dec->refs[idx].vidbuf != refs[i]) {
debug_printf("%p is not a real ref\n", refs[i]);
// FIXME: Maybe do m2mf copy here if a application really depends on it?
continue;
}
assert(dec->refs[idx].vidbuf == refs[i]);
dec->refs[idx].last_used = seq;
}
if (!empty_spot)
return;
/* Try to find a real empty spot first, there should be one..
*/
for (i = 0; i < dec->base.max_references + 1; ++i) {
if (dec->refs[i].last_used < seq) {
if (!dec->refs[i].vidbuf) {
empty_spot = i;
break;
}
if (empty_spot < dec->base.max_references+1 &&
dec->refs[empty_spot].last_used < dec->refs[i].last_used)
continue;
empty_spot = i;
}
}
assert(empty_spot < dec->base.max_references+1);
dec->refs[empty_spot].last_used = seq;
// debug_printf("Kicked %p to add %p to slot %i\n", dec->refs[empty_spot].vidbuf, target, i);
dec->refs[empty_spot].vidbuf = target;
dec->refs[empty_spot].decoded_bottom = dec->refs[empty_spot].decoded_top = 0;
target->valid_ref = empty_spot;
}
static void
nvc0_decoder_kick_ref(struct nvc0_decoder *dec, struct nvc0_video_buffer *target)
{
dec->refs[target->valid_ref].vidbuf = NULL;
dec->refs[target->valid_ref].last_used = 0;
// debug_printf("Unreffed %p\n", target);
}
static uint32_t
nvc0_decoder_fill_picparm_mpeg12_vp(struct nvc0_decoder *dec,
struct pipe_mpeg12_picture_desc *desc,
struct nvc0_video_buffer *refs[16],
unsigned *is_ref,
char *map)
{
struct mpeg12_picparm_vp pic_vp_stub = {}, *pic_vp = &pic_vp_stub;
uint32_t i, ret = 0x01010, ring; // !async_shutdown << 16 | watchdog << 12 | irq_record << 4 | unk;
assert(!(dec->base.width & 0xf));
*is_ref = desc->picture_coding_type <= 2;
if (dec->base.profile == PIPE_VIDEO_PROFILE_MPEG1)
pic_vp->picture_structure = 3;
else
pic_vp->picture_structure = desc->picture_structure;
assert(desc->picture_structure != 4);
if (desc->picture_structure == 4) // Untested, but should work
ret |= 0x100;
pic_vp->width = mb(dec->base.width);
pic_vp->height = mb(dec->base.height);
pic_vp->unk08 = pic_vp->unk04 = (dec->base.width+0xf)&~0xf; // Stride
nvc0_decoder_ycbcr_offsets(dec, &pic_vp->ofs[1], &pic_vp->ofs[3], &pic_vp->ofs[4]);
pic_vp->ofs[5] = pic_vp->ofs[3];
pic_vp->ofs[0] = pic_vp->ofs[2] = 0;
nvc0_decoder_inter_sizes(dec, 1, &ring, &pic_vp->bucket_size, &pic_vp->inter_ring_data_size);
pic_vp->alternate_scan = desc->alternate_scan;
pic_vp->pad2[0] = pic_vp->pad2[1] = pic_vp->pad2[2] = 0;
pic_vp->unk30 = desc->picture_structure < 3 && (desc->picture_structure == 2 - desc->top_field_first);
pic_vp->unk3a = (desc->picture_coding_type == 1);
for (i = 0; i < 4; ++i)
pic_vp->f_code[i] = desc->f_code[i/2][i%2] + 1; // FU
pic_vp->picture_coding_type = desc->picture_coding_type;
pic_vp->intra_dc_precision = desc->intra_dc_precision;
pic_vp->q_scale_type = desc->q_scale_type;
pic_vp->top_field_first = desc->top_field_first;
pic_vp->full_pel_forward_vector = desc->full_pel_forward_vector;
pic_vp->full_pel_backward_vector = desc->full_pel_backward_vector;
memcpy(pic_vp->intra_quantizer_matrix, desc->intra_matrix, 0x40);
memcpy(pic_vp->non_intra_quantizer_matrix, desc->non_intra_matrix, 0x40);
memcpy(map, pic_vp, sizeof(*pic_vp));
refs[0] = (struct nvc0_video_buffer *)desc->ref[0];
refs[!!refs[0]] = (struct nvc0_video_buffer *)desc->ref[1];
return ret | (dec->base.profile != PIPE_VIDEO_PROFILE_MPEG1);
}
static uint32_t
nvc0_decoder_fill_picparm_mpeg4_vp(struct nvc0_decoder *dec,
struct pipe_mpeg4_picture_desc *desc,
struct nvc0_video_buffer *refs[16],
unsigned *is_ref,
char *map)
{
struct mpeg4_picparm_vp pic_vp_stub = {}, *pic_vp = &pic_vp_stub;
uint32_t ring, ret = 0x01014; // !async_shutdown << 16 | watchdog << 12 | irq_record << 4 | unk;
assert(!(dec->base.width & 0xf));
*is_ref = desc->vop_coding_type <= 1;
pic_vp->width = dec->base.width;
pic_vp->height = mb(dec->base.height)<<4;
pic_vp->unk0c = pic_vp->unk08 = mb(dec->base.width)<<4; // Stride
nvc0_decoder_ycbcr_offsets(dec, &pic_vp->ofs[1], &pic_vp->ofs[3], &pic_vp->ofs[4]);
pic_vp->ofs[5] = pic_vp->ofs[3];
pic_vp->ofs[0] = pic_vp->ofs[2] = 0;
pic_vp->pad1 = pic_vp->pad2 = 0;
nvc0_decoder_inter_sizes(dec, 1, &ring, &pic_vp->bucket_size, &pic_vp->inter_ring_data_size);
pic_vp->trd[0] = desc->trd[0];
pic_vp->trd[1] = desc->trd[1];
pic_vp->trb[0] = desc->trb[0];
pic_vp->trb[1] = desc->trb[1];
pic_vp->u48 = 0; // Codec?
pic_vp->pad1 = pic_vp->pad2 = 0;
pic_vp->f_code_fw = desc->vop_fcode_forward;
pic_vp->f_code_bw = desc->vop_fcode_backward;
pic_vp->interlaced = desc->interlaced;
pic_vp->quant_type = desc->quant_type;
pic_vp->quarter_sample = desc->quarter_sample;
pic_vp->short_video_header = desc->short_video_header;
pic_vp->u54 = 0;
pic_vp->vop_coding_type = desc->vop_coding_type;
pic_vp->rounding_control = desc->rounding_control;
pic_vp->alternate_vertical_scan_flag = desc->alternate_vertical_scan_flag;
pic_vp->top_field_first = desc->top_field_first;
memcpy(pic_vp->intra, desc->intra_matrix, 0x40);
memcpy(pic_vp->non_intra, desc->non_intra_matrix, 0x40);
memcpy(map, pic_vp, sizeof(*pic_vp));
refs[0] = (struct nvc0_video_buffer *)desc->ref[0];
refs[!!refs[0]] = (struct nvc0_video_buffer *)desc->ref[1];
return ret;
}
static uint32_t
nvc0_decoder_fill_picparm_h264_vp(struct nvc0_decoder *dec,
const struct pipe_h264_picture_desc *d,
struct nvc0_video_buffer *refs[16],
unsigned *is_ref,
char *map)
{
struct h264_picparm_vp stub_h = {}, *h = &stub_h;
unsigned ring, i, j = 0;
assert(offsetof(struct h264_picparm_vp, u224) == 0x224);
*is_ref = d->is_reference;
assert(!d->frame_num || dec->last_frame_num + 1 == d->frame_num || dec->last_frame_num == d->frame_num);
dec->last_frame_num = d->frame_num;
h->width = mb(dec->base.width);
h->height = mb(dec->base.height);
h->stride1 = h->stride2 = mb(dec->base.width)*16;
nvc0_decoder_ycbcr_offsets(dec, &h->ofs[1], &h->ofs[3], &h->ofs[4]);
h->ofs[5] = h->ofs[3];
h->ofs[0] = h->ofs[2] = 0;
h->u24 = dec->tmp_stride >> 8;
assert(h->u24);
nvc0_decoder_inter_sizes(dec, 1, &ring, &h->bucket_size, &h->inter_ring_data_size);
h->u220 = 0;
h->f0 = d->mb_adaptive_frame_field_flag;
h->f1 = d->direct_8x8_inference_flag;
h->weighted_pred_flag = d->weighted_pred_flag;
h->f3 = d->constrained_intra_pred_flag;
h->is_reference = d->is_reference;
h->interlace = d->field_pic_flag;
h->bottom_field_flag = d->bottom_field_flag;
h->f7 = 0; // TODO: figure out when set..
h->log2_max_frame_num_minus4 = d->log2_max_frame_num_minus4;
h->u31_45 = 1;
h->pic_order_cnt_type = d->pic_order_cnt_type;
h->pic_init_qp_minus26 = d->pic_init_qp_minus26;
h->chroma_qp_index_offset = d->chroma_qp_index_offset;
h->second_chroma_qp_index_offset = d->second_chroma_qp_index_offset;
h->weighted_bipred_idc = d->weighted_bipred_idc;
h->tmp_idx = 0; // set in h264_vp_refs below
h->fifo_dec_index = 0; // always set to 0 to be fifo compatible with other codecs
h->frame_number = d->frame_num;
h->u34_3030 = h->u34_3131 = 0;
h->field_order_cnt[0] = d->field_order_cnt[0];
h->field_order_cnt[1] = d->field_order_cnt[1];
memset(h->refs, 0, sizeof(h->refs));
memcpy(h->m4x4, d->scaling_lists_4x4, sizeof(h->m4x4) + sizeof(h->m8x8));
h->u220 = 0;
for (i = 0; i < d->num_ref_frames; ++i) {
if (!d->ref[i])
break;
refs[j] = (struct nvc0_video_buffer *)d->ref[i];
h->refs[j].fifo_idx = j + 1;
h->refs[j].tmp_idx = refs[j]->valid_ref;
h->refs[j].field_order_cnt[0] = d->field_order_cnt_list[i][0];
h->refs[j].field_order_cnt[1] = d->field_order_cnt_list[i][1];
h->refs[j].frame_idx = d->frame_num_list[i];
if (!dec->refs[refs[j]->valid_ref].field_pic_flag) {
h->refs[j].unk12 = d->top_is_reference[i];
h->refs[j].unk13 = d->bottom_is_reference[i];
}
h->refs[j].unk14 = 0;
h->refs[j].notseenyet = 0;
h->refs[j].unk16 = dec->refs[refs[j]->valid_ref].field_pic_flag;
h->refs[j].unk17 = dec->refs[refs[j]->valid_ref].decoded_top &&
d->top_is_reference[i];
h->refs[j].unk21 = dec->refs[refs[j]->valid_ref].decoded_bottom &&
d->bottom_is_reference[i];
h->refs[j].pad = 0;
assert(!d->is_long_term[i]);
j++;
}
for (; i < 16; ++i)
assert(!d->ref[i]);
assert(d->num_ref_frames <= dec->base.max_references);
for (; i < d->num_ref_frames; ++i)
h->refs[j].unk16 = d->field_pic_flag;
*(struct h264_picparm_vp *)map = *h;
return 0x1113;
}
static void
nvc0_decoder_fill_picparm_h264_vp_refs(struct nvc0_decoder *dec,
struct pipe_h264_picture_desc *d,
struct nvc0_video_buffer *refs[16],
struct nvc0_video_buffer *target,
char *map)
{
struct h264_picparm_vp *h = (struct h264_picparm_vp *)map;
assert(dec->refs[target->valid_ref].vidbuf == target);
// debug_printf("Target: %p\n", target);
h->tmp_idx = target->valid_ref;
dec->refs[target->valid_ref].field_pic_flag = d->field_pic_flag;
if (!d->field_pic_flag || d->bottom_field_flag)
dec->refs[target->valid_ref].decoded_bottom = 1;
if (!d->field_pic_flag || !d->bottom_field_flag)
dec->refs[target->valid_ref].decoded_top = 1;
}
static uint32_t
nvc0_decoder_fill_picparm_vc1_vp(struct nvc0_decoder *dec,
struct pipe_vc1_picture_desc *d,
struct nvc0_video_buffer *refs[16],
unsigned *is_ref,
char *map)
{
struct vc1_picparm_vp *vc = (struct vc1_picparm_vp *)map;
unsigned ring;
assert(dec->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE);
*is_ref = d->picture_type <= 1;
nvc0_decoder_ycbcr_offsets(dec, &vc->ofs[1], &vc->ofs[3], &vc->ofs[4]);
vc->ofs[5] = vc->ofs[3];
vc->ofs[0] = vc->ofs[2] = 0;
vc->width = dec->base.width;
vc->height = mb(dec->base.height)<<4;
vc->unk0c = vc->unk10 = mb(dec->base.width)<<4; // Stride
vc->pad = vc->pad2 = 0;
nvc0_decoder_inter_sizes(dec, 1, &ring, &vc->bucket_size, &vc->inter_ring_data_size);
vc->profile = dec->base.profile - PIPE_VIDEO_PROFILE_VC1_SIMPLE;
vc->loopfilter = d->loopfilter;
vc->fastuvmc = d->fastuvmc;
vc->dquant = d->dquant;
vc->overlap = d->overlap;
vc->quantizer = d->quantizer;
vc->u36 = 0; // ? No idea what this one is..
refs[0] = (struct nvc0_video_buffer *)d->ref[0];
refs[!!refs[0]] = (struct nvc0_video_buffer *)d->ref[1];
return 0x12;
}
#if NVC0_DEBUG_FENCE
static void dump_comm_vp(struct nvc0_decoder *dec, struct comm *comm, u32 comm_seq,
struct nouveau_bo *inter_bo, unsigned slice_size)
{
unsigned i, idx = comm->pvp_cur_index & 0xf;
debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage);
#if 0
debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs);
debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index);
for (i = 0; i != comm->irq_index; ++i)
debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]);
for (i = 0; i != comm->parse_endpos_index; ++i)
debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]);
#endif
debug_printf("mb_y = %u\n", comm->mb_y[idx]);
if (comm->status_vp[idx] == 1)
return;
if ((comm->pvp_stage & 0xff) != 0xff) {
unsigned *map;
assert(nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client) >= 0);
map = inter_bo->map;
for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) {
debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]);
}
munmap(inter_bo->map, inter_bo->size);
inter_bo->map = NULL;
}
assert((comm->pvp_stage & 0xff) == 0xff);
}
#endif
void nvc0_decoder_vp_caps(struct nvc0_decoder *dec, union pipe_desc desc,
struct nvc0_video_buffer *target, unsigned comm_seq,
unsigned *caps, unsigned *is_ref,
struct nvc0_video_buffer *refs[16])
{
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NVC0_VIDEO_QDEPTH];
enum pipe_video_codec codec = u_reduce_video_profile(dec->base.profile);
char *vp = bsp_bo->map + VP_OFFSET;
switch (codec){
case PIPE_VIDEO_CODEC_MPEG12:
*caps = nvc0_decoder_fill_picparm_mpeg12_vp(dec, desc.mpeg12, refs, is_ref, vp);
nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target);
return;
case PIPE_VIDEO_CODEC_MPEG4:
*caps = nvc0_decoder_fill_picparm_mpeg4_vp(dec, desc.mpeg4, refs, is_ref, vp);
nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target);
return;
case PIPE_VIDEO_CODEC_VC1: {
*caps = nvc0_decoder_fill_picparm_vc1_vp(dec, desc.vc1, refs, is_ref, vp);
nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target);
return;
}
case PIPE_VIDEO_CODEC_MPEG4_AVC: {
*caps = nvc0_decoder_fill_picparm_h264_vp(dec, desc.h264, refs, is_ref, vp);
nvc0_decoder_handle_references(dec, refs, dec->fence_seq, target);
nvc0_decoder_fill_picparm_h264_vp_refs(dec, desc.h264, refs, target, vp);
return;
}
default: assert(0); return;
}
}
void
nvc0_decoder_vp(struct nvc0_decoder *dec, union pipe_desc desc,
struct nvc0_video_buffer *target, unsigned comm_seq,
unsigned caps, unsigned is_ref,
struct nvc0_video_buffer *refs[16])
{
struct nouveau_pushbuf *push = dec->pushbuf[1];
uint32_t bsp_addr, comm_addr, inter_addr, ucode_addr, pic_addr[17], last_addr, null_addr;
uint32_t slice_size, bucket_size, ring_size, i;
enum pipe_video_codec codec = u_reduce_video_profile(dec->base.profile);
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NVC0_VIDEO_QDEPTH];
struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
u32 fence_extra = 0, codec_extra = 0;
struct nouveau_pushbuf_refn bo_refs[] = {
{ inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
{ dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
{ bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
#ifdef NVC0_DEBUG_FENCE
{ dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
#endif
{ dec->fw_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
};
int num_refs = sizeof(bo_refs)/sizeof(*bo_refs) - !dec->fw_bo;
#if NVC0_DEBUG_FENCE
fence_extra = 4;
#endif
if (codec == PIPE_VIDEO_CODEC_MPEG4_AVC) {
nvc0_decoder_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
codec_extra += 2;
} else
nvc0_decoder_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size);
if (dec->base.max_references > 2)
codec_extra += 1 + (dec->base.max_references - 2);
pic_addr[16] = nvc0_video_addr(dec, target) >> 8;
last_addr = null_addr = nvc0_video_addr(dec, NULL) >> 8;
for (i = 0; i < dec->base.max_references; ++i) {
if (!refs[i])
pic_addr[i] = last_addr;
else if (dec->refs[refs[i]->valid_ref].vidbuf == refs[i])
last_addr = pic_addr[i] = nvc0_video_addr(dec, refs[i]) >> 8;
else
pic_addr[i] = null_addr;
}
if (!is_ref)
nvc0_decoder_kick_ref(dec, target);
PUSH_SPACE(push, 8 + 3 * (codec != PIPE_VIDEO_CODEC_MPEG12) +
6 + codec_extra + fence_extra + 2);
nouveau_pushbuf_refn(push, bo_refs, num_refs);
bsp_addr = bsp_bo->offset >> 8;
#if NVC0_DEBUG_FENCE
comm_addr = (dec->fence_bo->offset + COMM_OFFSET)>>8;
#else
comm_addr = bsp_addr + (COMM_OFFSET>>8);
#endif
inter_addr = inter_bo->offset >> 8;
if (dec->fw_bo)
ucode_addr = dec->fw_bo->offset >> 8;
else
ucode_addr = 0;
BEGIN_NVC0(push, SUBC_VP(0x700), 7);
PUSH_DATA (push, caps); // 700
PUSH_DATA (push, comm_seq); // 704
PUSH_DATA (push, 0); // 708 fuc targets, ignored for nvc0
PUSH_DATA (push, dec->fw_sizes); // 70c
PUSH_DATA (push, bsp_addr+(VP_OFFSET>>8)); // 710 picparm_addr
PUSH_DATA (push, inter_addr); // 714 inter_parm
PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 718 inter_data_ofs
if (bucket_size) {
uint64_t tmpimg_addr = dec->ref_bo->offset + dec->ref_stride * (dec->base.max_references+2);
BEGIN_NVC0(push, SUBC_VP(0x71c), 2);
PUSH_DATA (push, tmpimg_addr >> 8); // 71c
PUSH_DATA (push, inter_addr + slice_size); // 720 bucket_ofs
}
BEGIN_NVC0(push, SUBC_VP(0x724), 5);
PUSH_DATA (push, comm_addr); // 724
PUSH_DATA (push, ucode_addr); // 728
PUSH_DATA (push, pic_addr[16]); // 734
PUSH_DATA (push, pic_addr[0]); // 72c
PUSH_DATA (push, pic_addr[1]); // 730
if (dec->base.max_references > 2) {
int i;
BEGIN_NVC0(push, SUBC_VP(0x400), dec->base.max_references - 2);
for (i = 2; i < dec->base.max_references; ++i) {
assert(0x400 + (i - 2) * 4 < 0x438);
PUSH_DATA (push, pic_addr[i]);
}
}
if (codec == PIPE_VIDEO_CODEC_MPEG4_AVC) {
BEGIN_NVC0(push, SUBC_VP(0x438), 1);
PUSH_DATA (push, desc.h264->slice_count);
}
//debug_printf("Decoding %08lx with %08lx and %08lx\n", pic_addr[16], pic_addr[0], pic_addr[1]);
#if NVC0_DEBUG_FENCE
BEGIN_NVC0(push, SUBC_VP(0x240), 3);
PUSH_DATAh(push, (dec->fence_bo->offset + 0x10));
PUSH_DATA (push, (dec->fence_bo->offset + 0x10));
PUSH_DATA (push, dec->fence_seq);
BEGIN_NVC0(push, SUBC_VP(0x300), 1);
PUSH_DATA (push, 1);
PUSH_KICK(push);
{
unsigned spin = 0;
do {
usleep(100);
if ((spin++ & 0xff) == 0xff) {
debug_printf("vp%u: %u\n", dec->fence_seq, dec->fence_map[4]);
dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8);
}
} while (dec->fence_seq > dec->fence_map[4]);
}
dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8);
#else
BEGIN_NVC0(push, SUBC_VP(0x300), 1);
PUSH_DATA (push, 0);
PUSH_KICK (push);
#endif
}