304 lines
9.6 KiB
C
304 lines
9.6 KiB
C
/**************************************************************************
|
|
*
|
|
* Copyright 2007-2021 VMware, Inc.
|
|
* All Rights Reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sub license, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the
|
|
* next paragraph) shall be included in all copies or substantial portions
|
|
* of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
|
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
|
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*
|
|
**************************************************************************/
|
|
|
|
/*
|
|
* Rasterization for binned rectangles within a tile
|
|
*/
|
|
|
|
#include <limits.h>
|
|
#include "util/u_math.h"
|
|
#include "lp_debug.h"
|
|
#include "lp_perf.h"
|
|
#include "lp_rast_priv.h"
|
|
|
|
|
|
/* Our 16-pixel stamps are layed out as:
|
|
*
|
|
* 0 1 2 3
|
|
* 4 5 6 7
|
|
* 8 9 10 11
|
|
* 12 13 14 15
|
|
*
|
|
* Define bitmasks for each row and column in this layout:
|
|
*/
|
|
#define COLUMN0 ((1<<0)|(1<<4)|(1<<8) |(1<<12))
|
|
#define COLUMN1 ((1<<1)|(1<<5)|(1<<9) |(1<<13))
|
|
#define COLUMN2 ((1<<2)|(1<<6)|(1<<10)|(1<<14))
|
|
#define COLUMN3 ((1<<3)|(1<<7)|(1<<11)|(1<<15))
|
|
|
|
#define ROW0 ((1<<0) |(1<<1) |(1<<2) |(1<<3))
|
|
#define ROW1 ((1<<4) |(1<<5) |(1<<6) |(1<<7))
|
|
#define ROW2 ((1<<8) |(1<<9) |(1<<10)|(1<<11))
|
|
#define ROW3 ((1<<12)|(1<<13)|(1<<14)|(1<<15))
|
|
|
|
#define STAMP_SIZE 4
|
|
|
|
static unsigned left_mask_tab[STAMP_SIZE] = {
|
|
COLUMN0 | COLUMN1 | COLUMN2 | COLUMN3,
|
|
COLUMN1 | COLUMN2 | COLUMN3,
|
|
COLUMN2 | COLUMN3,
|
|
COLUMN3,
|
|
};
|
|
|
|
static unsigned right_mask_tab[STAMP_SIZE] = {
|
|
COLUMN0,
|
|
COLUMN0 | COLUMN1,
|
|
COLUMN0 | COLUMN1 | COLUMN2,
|
|
COLUMN0 | COLUMN1 | COLUMN2 | COLUMN3,
|
|
};
|
|
|
|
static unsigned top_mask_tab[STAMP_SIZE] = {
|
|
ROW0 | ROW1 | ROW2 | ROW3,
|
|
ROW1 | ROW2 | ROW3,
|
|
ROW2 | ROW3,
|
|
ROW3,
|
|
};
|
|
|
|
static unsigned bottom_mask_tab[STAMP_SIZE] = {
|
|
ROW0,
|
|
ROW0 | ROW1,
|
|
ROW0 | ROW1 | ROW2,
|
|
ROW0 | ROW1 | ROW2 | ROW3,
|
|
};
|
|
|
|
|
|
/**
|
|
* Shade all pixels in a 4x4 block. The fragment code omits the
|
|
* triangle in/out tests.
|
|
* \param x, y location of 4x4 block in window coords
|
|
*/
|
|
static void
|
|
shade_quads_all( struct lp_rasterizer_task *task,
|
|
const struct lp_rast_shader_inputs *inputs,
|
|
unsigned x, unsigned y )
|
|
{
|
|
const struct lp_scene *scene = task->scene;
|
|
const struct lp_rast_state *state = task->state;
|
|
struct lp_fragment_shader_variant *variant = state->variant;
|
|
uint8_t *color = scene->cbufs[0].map;
|
|
unsigned stride = scene->cbufs[0].stride;
|
|
uint8_t *cbufs[1];
|
|
unsigned strides[1];
|
|
|
|
color += x * 4;
|
|
color += y * stride;
|
|
cbufs[0] = color;
|
|
strides[0] = stride;
|
|
|
|
assert(!variant->key.depth.enabled);
|
|
|
|
/* run shader on 4x4 block */
|
|
BEGIN_JIT_CALL(state, task);
|
|
variant->jit_function[RAST_WHOLE]( &state->jit_context,
|
|
x, y,
|
|
inputs->frontfacing,
|
|
(const float (*)[4])GET_A0(inputs),
|
|
(const float (*)[4])GET_DADX(inputs),
|
|
(const float (*)[4])GET_DADY(inputs),
|
|
cbufs,
|
|
NULL,
|
|
0xffff,
|
|
&task->thread_data,
|
|
strides, 0, 0, 0 );
|
|
END_JIT_CALL();
|
|
}
|
|
|
|
static void
|
|
shade_quads_mask(struct lp_rasterizer_task *task,
|
|
const struct lp_rast_shader_inputs *inputs,
|
|
unsigned x, unsigned y,
|
|
unsigned mask)
|
|
{
|
|
const struct lp_rast_state *state = task->state;
|
|
struct lp_fragment_shader_variant *variant = state->variant;
|
|
const struct lp_scene *scene = task->scene;
|
|
uint8_t *color = scene->cbufs[0].map;
|
|
unsigned stride = scene->cbufs[0].stride;
|
|
uint8_t *cbufs[1];
|
|
unsigned strides[1];
|
|
|
|
color += x * 4;
|
|
color += y * stride;
|
|
cbufs[0] = color;
|
|
strides[0] = stride;
|
|
|
|
assert(!variant->key.depth.enabled);
|
|
|
|
/* Propagate non-interpolated raster state */
|
|
task->thread_data.raster_state.viewport_index = inputs->viewport_index;
|
|
|
|
/* run shader on 4x4 block */
|
|
BEGIN_JIT_CALL(state, task);
|
|
variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
|
|
x, y,
|
|
inputs->frontfacing,
|
|
(const float (*)[4])GET_A0(inputs),
|
|
(const float (*)[4])GET_DADX(inputs),
|
|
(const float (*)[4])GET_DADY(inputs),
|
|
cbufs,
|
|
NULL,
|
|
mask,
|
|
&task->thread_data,
|
|
strides, 0, 0, 0);
|
|
END_JIT_CALL();
|
|
}
|
|
|
|
/* Shade a 4x4 stamp completely within the rectangle.
|
|
*/
|
|
static inline void
|
|
full(struct lp_rasterizer_task *task,
|
|
const struct lp_rast_shader_inputs *inputs,
|
|
unsigned ix, unsigned iy)
|
|
{
|
|
shade_quads_all(task,
|
|
inputs,
|
|
ix * STAMP_SIZE,
|
|
iy * STAMP_SIZE);
|
|
}
|
|
|
|
/* Shade a 4x4 stamp which may be partially outside the rectangle,
|
|
* according to the mask parameter.
|
|
*/
|
|
static inline void
|
|
partial(struct lp_rasterizer_task *task,
|
|
const struct lp_rast_shader_inputs *inputs,
|
|
unsigned ix, unsigned iy,
|
|
unsigned mask)
|
|
{
|
|
/* Unfortunately we can end up generating full blocks on this path,
|
|
* need to catch them.
|
|
*/
|
|
if (mask == 0xffff)
|
|
full(task, inputs, ix, iy);
|
|
else {
|
|
assert(mask);
|
|
shade_quads_mask(task,
|
|
inputs,
|
|
ix * STAMP_SIZE,
|
|
iy * STAMP_SIZE,
|
|
mask);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Run the full SoA shader.
|
|
*/
|
|
void
|
|
lp_rast_linear_rect_fallback(struct lp_rasterizer_task *task,
|
|
const struct lp_rast_shader_inputs *inputs,
|
|
const struct u_rect *box)
|
|
{
|
|
unsigned ix0, ix1, iy0, iy1;
|
|
unsigned left_mask;
|
|
unsigned right_mask;
|
|
unsigned top_mask;
|
|
unsigned bottom_mask;
|
|
unsigned i,j;
|
|
|
|
/* The interior of the rectangle (if there is one) will be
|
|
* rasterized as full 4x4 stamps.
|
|
*
|
|
* At each edge of the rectangle, however, there will be a fringe
|
|
* of partial blocks where the edge lands somewhere in the middle
|
|
* of a 4x4-pixel stamp.
|
|
*
|
|
* For each edge, precalculate a mask of the pixels inside that
|
|
* edge for the first 4x4-pixel stamp.
|
|
*
|
|
* Note that at the corners, and for narrow rectangles, an
|
|
* individual stamp may have two or more edges active. We'll deal
|
|
* with that below by combining these masks as appropriate.
|
|
*/
|
|
left_mask = left_mask_tab [box->x0 & (STAMP_SIZE - 1)];
|
|
right_mask = right_mask_tab [box->x1 & (STAMP_SIZE - 1)];
|
|
top_mask = top_mask_tab [box->y0 & (STAMP_SIZE - 1)];
|
|
bottom_mask = bottom_mask_tab [box->y1 & (STAMP_SIZE - 1)];
|
|
|
|
ix0 = box->x0 / STAMP_SIZE;
|
|
ix1 = box->x1 / STAMP_SIZE;
|
|
iy0 = box->y0 / STAMP_SIZE;
|
|
iy1 = box->y1 / STAMP_SIZE;
|
|
|
|
/* Various special cases.
|
|
*/
|
|
if (ix0 == ix1 && iy0 == iy1) {
|
|
/* Rectangle is contained within a single 4x4-pixel stamp:
|
|
*/
|
|
partial(task, inputs, ix0, iy0,
|
|
(left_mask & right_mask &
|
|
top_mask & bottom_mask));
|
|
}
|
|
else if (ix0 == ix1) {
|
|
/* Left and right edges fall on the same 4-pixel-wide column:
|
|
*/
|
|
unsigned mask = left_mask & right_mask;
|
|
partial(task, inputs, ix0, iy0, mask & top_mask);
|
|
for (i = iy0 + 1; i < iy1; i++)
|
|
partial(task, inputs, ix0, i, mask);
|
|
partial(task, inputs, ix0, iy1, mask & bottom_mask);
|
|
}
|
|
else if (iy0 == iy1) {
|
|
/* Top and bottom edges fall on the same 4-pixel-wide row:
|
|
*/
|
|
unsigned mask = top_mask & bottom_mask;
|
|
partial(task, inputs, ix0, iy0, mask & left_mask);
|
|
for (i = ix0 + 1; i < ix1; i++)
|
|
partial(task, inputs, i, iy0, mask);
|
|
partial(task, inputs, ix1, iy0, mask & right_mask);
|
|
}
|
|
else {
|
|
/* Each pair of edges falls in a separate 4-pixel-wide
|
|
* row/column.
|
|
*/
|
|
partial(task, inputs, ix0, iy0, left_mask & top_mask);
|
|
partial(task, inputs, ix0, iy1, left_mask & bottom_mask);
|
|
partial(task, inputs, ix1, iy0, right_mask & top_mask);
|
|
partial(task, inputs, ix1, iy1, right_mask & bottom_mask);
|
|
|
|
for (i = ix0 + 1; i < ix1; i++)
|
|
partial(task, inputs, i, iy0, top_mask);
|
|
|
|
for (i = ix0 + 1; i < ix1; i++)
|
|
partial(task, inputs, i, iy1, bottom_mask);
|
|
|
|
for (i = iy0 + 1; i < iy1; i++)
|
|
partial(task, inputs, ix0, i, left_mask);
|
|
|
|
for (i = iy0 + 1; i < iy1; i++)
|
|
partial(task, inputs, ix1, i, right_mask);
|
|
|
|
/* Full interior blocks
|
|
*/
|
|
for (j = iy0 + 1; j < iy1; j++) {
|
|
for (i = ix0 + 1; i < ix1; i++) {
|
|
full(task, inputs, i, j);
|
|
}
|
|
}
|
|
}
|
|
}
|