mesa/st: support lowering multi-planar YUV

Support multi-planar YUV for external EGLImage's (currently just in the
dma-buf import path) by lowering to multiple texture fetch's for each
plane and CSC in shader.

There was some discussion of alternative approaches for tracking the
additional UV or U/V planes:

  https://lists.freedesktop.org/archives/mesa-dev/2016-September/127832.html

They all seemed worse than pipe_resource::next

Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
Rob Clark 2016-08-31 17:44:01 -04:00
parent e0ec1c3134
commit ecd6fce261
18 changed files with 359 additions and 27 deletions

View File

@ -136,8 +136,10 @@ pipe_resource_reference(struct pipe_resource **ptr, struct pipe_resource *tex)
struct pipe_resource *old_tex = *ptr;
if (pipe_reference_described(&(*ptr)->reference, &tex->reference,
(debug_reference_descriptor)debug_describe_resource))
(debug_reference_descriptor)debug_describe_resource)) {
pipe_resource_reference(&old_tex->next, NULL);
old_tex->screen->resource_destroy(old_tex->screen, old_tex);
}
*ptr = tex;
}

View File

@ -498,6 +498,12 @@ struct pipe_resource
unsigned bind; /**< bitmask of PIPE_BIND_x */
unsigned flags; /**< bitmask of PIPE_RESOURCE_FLAG_x */
/**
* For planar images, ie. YUV EGLImage external, etc, pointer to the
* next plane.
*/
struct pipe_resource *next;
};

View File

@ -200,6 +200,9 @@ struct st_egl_image
/* this is owned by the caller */
struct pipe_resource *texture;
/* format only differs from texture->format for multi-planar (YUV): */
enum pipe_format format;
unsigned level;
unsigned layer;
};

View File

@ -83,6 +83,21 @@ static int convert_fourcc(int format, int *dri_components_p)
format = __DRI_IMAGE_FORMAT_GR88;
dri_components = __DRI_IMAGE_COMPONENTS_RG;
break;
/*
* For multi-planar YUV formats, we return the format of the first
* plane only. Since there is only one caller which supports multi-
* planar YUV it gets to figure out the remaining planes on it's
* own.
*/
case __DRI_IMAGE_FOURCC_YUV420:
case __DRI_IMAGE_FOURCC_YVU420:
format = __DRI_IMAGE_FORMAT_R8;
dri_components = __DRI_IMAGE_COMPONENTS_Y_U_V;
break;
case __DRI_IMAGE_FOURCC_NV12:
format = __DRI_IMAGE_FORMAT_R8;
dri_components = __DRI_IMAGE_COMPONENTS_Y_UV;
break;
default:
return -1;
}
@ -90,6 +105,11 @@ static int convert_fourcc(int format, int *dri_components_p)
return format;
}
/* NOTE this probably isn't going to do the right thing for YUV images
* (but I think the same can be said for intel_query_image()). I think
* only needed for exporting dmabuf's, so I think I won't loose much
* sleep over it.
*/
static int convert_to_fourcc(int format)
{
switch(format) {
@ -762,14 +782,16 @@ dri2_lookup_egl_image(struct dri_screen *screen, void *handle)
static __DRIimage *
dri2_create_image_from_winsys(__DRIscreen *_screen,
int width, int height, int format,
struct winsys_handle *whandle,
int num_handles, struct winsys_handle *whandle,
void *loaderPrivate)
{
struct dri_screen *screen = dri_screen(_screen);
struct pipe_screen *pscreen = screen->base.screen;
__DRIimage *img;
struct pipe_resource templ;
unsigned tex_usage;
enum pipe_format pf;
int i;
tex_usage = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
@ -783,19 +805,47 @@ dri2_create_image_from_winsys(__DRIscreen *_screen,
memset(&templ, 0, sizeof(templ));
templ.bind = tex_usage;
templ.format = pf;
templ.target = screen->target;
templ.last_level = 0;
templ.width0 = width;
templ.height0 = height;
templ.depth0 = 1;
templ.array_size = 1;
img->texture = screen->base.screen->resource_from_handle(screen->base.screen,
&templ, whandle, PIPE_HANDLE_USAGE_READ_WRITE);
if (!img->texture) {
FREE(img);
return NULL;
for (i = num_handles - 1; i >= 0; i--) {
struct pipe_resource *tex;
/* TODO: something a lot less ugly */
switch (i) {
case 0:
templ.width0 = width;
templ.height0 = height;
templ.format = pf;
break;
case 1:
templ.width0 = width / 2;
templ.height0 = height / 2;
templ.format = (num_handles == 2) ?
PIPE_FORMAT_RG88_UNORM : /* NV12, etc */
PIPE_FORMAT_R8_UNORM; /* I420, etc */
break;
case 2:
templ.width0 = width / 2;
templ.height0 = height / 2;
templ.format = PIPE_FORMAT_R8_UNORM;
break;
default:
unreachable("too many planes!");
}
tex = pscreen->resource_from_handle(pscreen,
&templ, &whandle[i], PIPE_HANDLE_USAGE_READ_WRITE);
if (!tex) {
pipe_resource_reference(&img->texture, NULL);
FREE(img);
return NULL;
}
tex->next = img->texture;
img->texture = tex;
}
img->level = 0;
@ -826,7 +876,7 @@ dri2_create_image_from_name(__DRIscreen *_screen,
whandle.stride = pitch * util_format_get_blocksize(pf);
return dri2_create_image_from_winsys(_screen, width, height, format,
&whandle, loaderPrivate);
1, &whandle, loaderPrivate);
}
static __DRIimage *
@ -836,12 +886,26 @@ dri2_create_image_from_fd(__DRIscreen *_screen,
int *offsets, unsigned *error,
int *dri_components, void *loaderPrivate)
{
struct winsys_handle whandle;
struct winsys_handle whandles[3];
int format;
__DRIimage *img = NULL;
unsigned err = __DRI_IMAGE_ERROR_SUCCESS;
int expected_num_fds, i;
if (num_fds != 1) {
switch (fourcc) {
case __DRI_IMAGE_FOURCC_YUV420:
case __DRI_IMAGE_FOURCC_YVU420:
expected_num_fds = 3;
break;
case __DRI_IMAGE_FOURCC_NV12:
expected_num_fds = 2;
break;
default:
expected_num_fds = 1;
break;
}
if (num_fds != expected_num_fds) {
err = __DRI_IMAGE_ERROR_BAD_MATCH;
goto exit;
}
@ -852,19 +916,30 @@ dri2_create_image_from_fd(__DRIscreen *_screen,
goto exit;
}
if (fds[0] < 0) {
err = __DRI_IMAGE_ERROR_BAD_ALLOC;
goto exit;
memset(whandles, 0, sizeof(whandles));
for (i = 0; i < num_fds; i++) {
if (fds[i] < 0) {
err = __DRI_IMAGE_ERROR_BAD_ALLOC;
goto exit;
}
whandles[i].type = DRM_API_HANDLE_TYPE_FD;
whandles[i].handle = (unsigned)fds[i];
whandles[i].stride = (unsigned)strides[i];
whandles[i].offset = (unsigned)offsets[i];
}
memset(&whandle, 0, sizeof(whandle));
whandle.type = DRM_API_HANDLE_TYPE_FD;
whandle.handle = (unsigned)fds[0];
whandle.stride = (unsigned)strides[0];
whandle.offset = (unsigned)offsets[0];
if (fourcc == __DRI_IMAGE_FOURCC_YVU420) {
/* convert to YUV420 by swapping 2nd and 3rd planes: */
struct winsys_handle tmp = whandles[1];
whandles[1] = whandles[2];
whandles[2] = tmp;
fourcc = __DRI_IMAGE_FOURCC_YUV420;
}
img = dri2_create_image_from_winsys(_screen, width, height, format,
&whandle, loaderPrivate);
num_fds, whandles, loaderPrivate);
if(img == NULL)
err = __DRI_IMAGE_ERROR_BAD_ALLOC;
@ -1067,7 +1142,7 @@ dri2_from_names(__DRIscreen *screen, int width, int height, int format,
whandle.offset = offsets[0];
img = dri2_create_image_from_winsys(screen, width, height, format,
&whandle, loaderPrivate);
1, &whandle, loaderPrivate);
if (img == NULL)
return NULL;

View File

@ -334,6 +334,17 @@ dri_get_egl_image(struct st_manager *smapi,
stimg->texture = NULL;
pipe_resource_reference(&stimg->texture, img->texture);
switch (img->dri_components) {
case __DRI_IMAGE_COMPONENTS_Y_U_V:
stimg->format = PIPE_FORMAT_IYUV;
break;
case __DRI_IMAGE_COMPONENTS_Y_UV:
stimg->format = PIPE_FORMAT_NV12;
break;
default:
stimg->format = img->texture->format;
break;
}
stimg->level = img->level;
stimg->layer = img->layer;

View File

@ -45,6 +45,7 @@
#include "compiler/shader_enums.h"
#include "main/formats.h" /* MESA_FORMAT_COUNT */
#include "compiler/glsl/list.h"
#include "util/bitscan.h"
#ifdef __cplusplus
@ -1929,6 +1930,7 @@ struct gl_program
GLbitfield TexturesUsed[MAX_COMBINED_TEXTURE_IMAGE_UNITS]; /**< TEXTURE_x_BIT bitmask */
GLbitfield SamplersUsed; /**< Bitfield of which samplers are used */
GLbitfield ShadowSamplers; /**< Texture units used for shadow sampling. */
GLbitfield ExternalSamplersUsed; /**< Texture units used for samplerExternalOES */
GLboolean UsesGather; /**< Does this program use gather4 at all? */
@ -2460,6 +2462,20 @@ struct gl_linked_shader
struct gl_shader_info info;
};
static inline GLbitfield gl_external_samplers(struct gl_linked_shader *shader)
{
GLbitfield external_samplers = 0;
GLbitfield mask = shader->active_samplers;
while (mask) {
int idx = u_bit_scan(&mask);
if (shader->SamplerTargets[idx] == TEXTURE_EXTERNAL_INDEX)
external_samplers |= (1 << idx);
}
return external_samplers;
}
/**
* A GLSL shader object.
*/

View File

@ -2921,6 +2921,7 @@ get_mesa_program(struct gl_context *ctx,
prog->SamplersUsed = shader->active_samplers;
prog->ShadowSamplers = shader->shadow_samplers;
prog->ExternalSamplersUsed = gl_external_samplers(shader);
_mesa_update_shader_textures_used(shader_program, prog);
/* Set the gl_FragDepth layout. */

View File

@ -243,13 +243,13 @@ update_shader_samplers(struct st_context *st,
struct pipe_sampler_state *samplers,
unsigned *num_samplers)
{
GLbitfield samplers_used = prog->SamplersUsed;
GLbitfield free_slots = ~prog->SamplersUsed;
GLbitfield external_samplers_used = prog->ExternalSamplersUsed;
GLuint unit;
GLbitfield samplers_used;
const GLuint old_max = *num_samplers;
const struct pipe_sampler_state *states[PIPE_MAX_SAMPLERS];
samplers_used = prog->SamplersUsed;
if (*num_samplers == 0 && samplers_used == 0x0)
return;
@ -275,6 +275,41 @@ update_shader_samplers(struct st_context *st,
}
}
/* For any external samplers with multiplaner YUV, stuff the additional
* sampler states we need at the end.
*
* Just re-use the existing sampler-state from the primary slot.
*/
while (unlikely(external_samplers_used)) {
GLuint unit = u_bit_scan(&external_samplers_used);
GLuint extra = 0;
struct st_texture_object *stObj =
st_get_texture_object(st->ctx, prog, unit);
struct pipe_sampler_state *sampler = samplers + unit;
if (!stObj)
continue;
switch (st_get_view_format(stObj)) {
case PIPE_FORMAT_NV12:
/* we need one additional sampler: */
extra = u_bit_scan(&free_slots);
states[extra] = sampler;
break;
case PIPE_FORMAT_IYUV:
/* we need two additional samplers: */
extra = u_bit_scan(&free_slots);
states[extra] = sampler;
extra = u_bit_scan(&free_slots);
states[extra] = sampler;
break;
default:
break;
}
*num_samplers = MAX2(*num_samplers, extra + 1);
}
cso_set_samplers(st->cso_context, shader_stage, *num_samplers, states);
}

View File

@ -51,6 +51,7 @@
#include "st_context.h"
#include "st_atom.h"
#include "st_program.h"
#include "st_texture.h"
/** Compress the fog function enums into a 2-bit value */
@ -142,6 +143,8 @@ update_fp( struct st_context *st )
}
}
key.external = st_get_external_sampler_key(st, &stfp->Base.Base);
st->fp_variant = st_get_fp_variant(st, stfp, &key);
st_reference_fragprog(st, &st->fp, stfp);

View File

@ -408,6 +408,15 @@ update_single_texture(struct st_context *st,
}
}
switch (view_format) {
case PIPE_FORMAT_NV12:
case PIPE_FORMAT_IYUV:
view_format = PIPE_FORMAT_R8_UNORM;
break;
default:
break;
}
*sampler_view =
st_get_texture_sampler_view_from_stobj(st, stObj, view_format,
glsl_version);
@ -426,6 +435,8 @@ update_textures(struct st_context *st,
{
const GLuint old_max = *num_textures;
GLbitfield samplers_used = prog->SamplersUsed;
GLbitfield free_slots = ~prog->SamplersUsed;
GLbitfield external_samplers_used = prog->ExternalSamplersUsed;
GLuint unit;
struct gl_shader_program *shader =
st->ctx->_Shader->CurrentProgram[mesa_shader];
@ -460,6 +471,53 @@ update_textures(struct st_context *st,
pipe_sampler_view_reference(&(sampler_views[unit]), sampler_view);
}
/* For any external samplers with multiplaner YUV, stuff the additional
* sampler views we need at the end.
*
* Trying to cache the sampler view in the stObj looks painful, so just
* re-create the sampler view for the extra planes each time. Main use
* case is video playback (ie. fps games wouldn't be using this) so I
* guess no point to try to optimize this feature.
*/
while (unlikely(external_samplers_used)) {
GLuint unit = u_bit_scan(&external_samplers_used);
GLuint extra = 0;
struct st_texture_object *stObj =
st_get_texture_object(st->ctx, prog, unit);
struct pipe_sampler_view tmpl;
if (!stObj)
continue;
/* use original view as template: */
tmpl = *sampler_views[unit];
switch (st_get_view_format(stObj)) {
case PIPE_FORMAT_NV12:
/* we need one additional R8G8 view: */
tmpl.format = PIPE_FORMAT_RG88_UNORM;
tmpl.swizzle_g = PIPE_SWIZZLE_Y; /* tmpl from Y plane is R8 */
extra = u_bit_scan(&free_slots);
sampler_views[extra] =
st->pipe->create_sampler_view(st->pipe, stObj->pt->next, &tmpl);
break;
case PIPE_FORMAT_IYUV:
/* we need two additional R8 views: */
tmpl.format = PIPE_FORMAT_R8_UNORM;
extra = u_bit_scan(&free_slots);
sampler_views[extra] =
st->pipe->create_sampler_view(st->pipe, stObj->pt->next, &tmpl);
extra = u_bit_scan(&free_slots);
sampler_views[extra] =
st->pipe->create_sampler_view(st->pipe, stObj->pt->next->next, &tmpl);
break;
default:
break;
}
*num_textures = MAX2(*num_textures, extra + 1);
}
cso_set_sampler_views(st->cso_context,
shader_stage,
*num_textures,

View File

@ -119,6 +119,24 @@ st_bind_surface(struct gl_context *ctx, GLenum target,
texFormat = st_pipe_format_to_mesa_format(ps->format);
/* TODO RequiredTextureImageUnits should probably be reset back
* to 1 somewhere if different texture is bound??
*/
if (texFormat == MESA_FORMAT_NONE) {
switch (ps->format) {
case PIPE_FORMAT_NV12:
texFormat = MESA_FORMAT_R_UNORM8;
texObj->RequiredTextureImageUnits = 2;
break;
case PIPE_FORMAT_IYUV:
texFormat = MESA_FORMAT_R_UNORM8;
texObj->RequiredTextureImageUnits = 3;
break;
default:
unreachable("bad YUV format!");
}
}
_mesa_init_teximage_fields(ctx, texImage,
ps->width, ps->height, 1, 0, internalFormat,
texFormat);

View File

@ -255,11 +255,16 @@ void st_invalidate_state(struct gl_context * ctx, GLbitfield new_state)
st->active_states = st_get_active_states(ctx);
}
if (new_state & _NEW_TEXTURE)
if (new_state & _NEW_TEXTURE) {
st->dirty |= st->active_states &
(ST_NEW_SAMPLER_VIEWS |
ST_NEW_SAMPLERS |
ST_NEW_IMAGE_UNITS);
if (ctx->FragmentProgram._Current &&
ctx->FragmentProgram._Current->Base.ExternalSamplersUsed) {
st->dirty |= ST_NEW_FS_STATE;
}
}
if (new_state & _NEW_PROGRAM_CONSTANTS)
st->dirty |= st->active_states & ST_NEW_CONSTANTS;

View File

@ -423,6 +423,7 @@ st_nir_get_mesa_program(struct gl_context *ctx,
prog->SamplersUsed = shader->active_samplers;
prog->ShadowSamplers = shader->shadow_samplers;
prog->ExternalSamplersUsed = gl_external_samplers(shader);
_mesa_update_shader_textures_used(shader_program, prog);
_mesa_reference_program(ctx, &shader->Program, prog);

View File

@ -4347,6 +4347,10 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
}
}
}
if (inst->tex_target == TEXTURE_EXTERNAL_INDEX)
prog->ExternalSamplersUsed |= 1 << inst->sampler.index;
if (inst->buffer.file != PROGRAM_UNDEFINED && (
is_resource_instruction(inst->op) ||
inst->op == TGSI_OPCODE_STORE)) {

View File

@ -845,6 +845,7 @@ st_manager_get_egl_image_surface(struct st_context *st, void *eglimg)
return NULL;
u_surface_default_template(&surf_tmpl, stimg.texture);
surf_tmpl.format = stimg.format;
surf_tmpl.u.tex.level = stimg.level;
surf_tmpl.u.tex.first_layer = stimg.layer;
surf_tmpl.u.tex.last_layer = stimg.layer;

View File

@ -53,6 +53,7 @@
#include "st_cb_bitmap.h"
#include "st_cb_drawpixels.h"
#include "st_context.h"
#include "st_tgsi_lower_yuv.h"
#include "st_program.h"
#include "st_mesa_to_tgsi.h"
#include "st_atifs_to_tgsi.h"
@ -1024,8 +1025,23 @@ st_create_fp_variant(struct st_context *st,
NIR_PASS_V(tgsi.ir.nir, nir_lower_drawpixels, &options);
}
if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv)) {
nir_lower_tex_options options = {0};
options.lower_y_uv_external = key->external.lower_nv12;
options.lower_y_u_v_external = key->external.lower_iyuv;
NIR_PASS_V(tgsi.ir.nir, nir_lower_tex, &options);
}
st_finalize_nir(st, &stfp->Base.Base, tgsi.ir.nir);
if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv)) {
/* This pass needs to happen *after* nir_lower_sampler */
NIR_PASS_V(tgsi.ir.nir, st_nir_lower_tex_src_plane,
~stfp->Base.Base.SamplersUsed,
key->external.lower_nv12,
key->external.lower_iyuv);
}
variant->driver_shader = pipe->create_fs_state(pipe, &tgsi);
variant->key = *key;
@ -1122,6 +1138,25 @@ st_create_fp_variant(struct st_context *st,
fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
}
if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv)) {
const struct tgsi_token *tokens;
/* samplers inserted would conflict, but this should be unpossible: */
assert(!(key->bitmap || key->drawpixels));
tokens = st_tgsi_lower_yuv(tgsi.tokens,
~stfp->Base.Base.SamplersUsed,
key->external.lower_nv12,
key->external.lower_iyuv);
if (tokens) {
if (tgsi.tokens != stfp->tgsi.tokens)
tgsi_free_tokens(tgsi.tokens);
tgsi.tokens = tokens;
} else {
fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
}
}
if (ST_DEBUG & DEBUG_TGSI) {
tgsi_dump(tgsi.tokens, 0);
debug_printf("\n");

View File

@ -39,6 +39,7 @@
#include "program/program.h"
#include "pipe/p_state.h"
#include "st_context.h"
#include "st_texture.h"
#include "st_glsl_to_tgsi.h"
@ -48,6 +49,40 @@ extern "C" {
#define ST_DOUBLE_ATTRIB_PLACEHOLDER 0xffffffff
struct st_external_sampler_key
{
GLuint lower_nv12; /**< bitmask of 2 plane YUV samplers */
GLuint lower_iyuv; /**< bitmask of 3 plane YUV samplers */
};
static inline struct st_external_sampler_key
st_get_external_sampler_key(struct st_context *st, struct gl_program *prog)
{
unsigned mask = prog->ExternalSamplersUsed;
struct st_external_sampler_key key;
memset(&key, 0, sizeof(key));
while (unlikely(mask)) {
unsigned unit = u_bit_scan(&mask);
struct st_texture_object *stObj =
st_get_texture_object(st->ctx, prog, unit);
switch (st_get_view_format(stObj)) {
case PIPE_FORMAT_NV12:
key.lower_nv12 |= (1 << unit);
break;
case PIPE_FORMAT_IYUV:
key.lower_iyuv |= (1 << unit);
break;
default:
break;
}
}
return key;
}
/** Fragment program variant key */
struct st_fp_variant_key
{
@ -72,6 +107,8 @@ struct st_fp_variant_key
/** needed for ATI_fragment_shader */
char texture_targets[MAX_NUM_FRAGMENT_REGISTERS_ATI];
struct st_external_sampler_key external;
};

View File

@ -170,6 +170,27 @@ st_create_texture_sampler_view(struct pipe_context *pipe,
texture->format);
}
static inline struct st_texture_object *
st_get_texture_object(struct gl_context *ctx,
const struct gl_program *prog,
unsigned unit)
{
const GLuint texUnit = prog->SamplerUnits[unit];
struct gl_texture_object *texObj = ctx->Texture.Unit[texUnit]._Current;
if (!texObj)
return NULL;
return st_texture_object(texObj);
}
static inline enum pipe_format
st_get_view_format(struct st_texture_object *stObj)
{
if (!stObj)
return PIPE_FORMAT_NONE;
return stObj->surface_based ? stObj->surface_format : stObj->pt->format;
}
extern struct pipe_resource *