2009-10-07 22:36:43 +01:00
|
|
|
/**************************************************************************
|
|
|
|
*
|
s/Tungsten Graphics/VMware/
Tungsten Graphics Inc. was acquired by VMware Inc. in 2008. Leaving the
old copyright name is creating unnecessary confusion, hence this change.
This was the sed script I used:
$ cat tg2vmw.sed
# Run as:
#
# git reset --hard HEAD && find include scons src -type f -not -name 'sed*' -print0 | xargs -0 sed -i -f tg2vmw.sed
#
# Rename copyrights
s/Tungsten Gra\(ph\|hp\)ics,\? [iI]nc\.\?\(, Cedar Park\)\?\(, Austin\)\?\(, \(Texas\|TX\)\)\?\.\?/VMware, Inc./g
/Copyright/s/Tungsten Graphics\(,\? [iI]nc\.\)\?\(, Cedar Park\)\?\(, Austin\)\?\(, \(Texas\|TX\)\)\?\.\?/VMware, Inc./
s/TUNGSTEN GRAPHICS/VMWARE/g
# Rename emails
s/alanh@tungstengraphics.com/alanh@vmware.com/
s/jens@tungstengraphics.com/jowen@vmware.com/g
s/jrfonseca-at-tungstengraphics-dot-com/jfonseca-at-vmware-dot-com/
s/jrfonseca\?@tungstengraphics.com/jfonseca@vmware.com/g
s/keithw\?@tungstengraphics.com/keithw@vmware.com/g
s/michel@tungstengraphics.com/daenzer@vmware.com/g
s/thomas-at-tungstengraphics-dot-com/thellstom-at-vmware-dot-com/
s/zack@tungstengraphics.com/zackr@vmware.com/
# Remove dead links
s@Tungsten Graphics (http://www.tungstengraphics.com)@Tungsten Graphics@g
# C string src/gallium/state_trackers/vega/api_misc.c
s/"Tungsten Graphics, Inc"/"VMware, Inc"/
Reviewed-by: Brian Paul <brianp@vmware.com>
2014-01-17 16:27:50 +00:00
|
|
|
* Copyright 2007 VMware, Inc.
|
2009-10-07 22:36:43 +01:00
|
|
|
* All Rights Reserved.
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the
|
|
|
|
* "Software"), to deal in the Software without restriction, including
|
|
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
|
|
* distribute, sub license, and/or sell copies of the Software, and to
|
|
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
|
|
* the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the
|
|
|
|
* next paragraph) shall be included in all copies or substantial portions
|
|
|
|
* of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
s/Tungsten Graphics/VMware/
Tungsten Graphics Inc. was acquired by VMware Inc. in 2008. Leaving the
old copyright name is creating unnecessary confusion, hence this change.
This was the sed script I used:
$ cat tg2vmw.sed
# Run as:
#
# git reset --hard HEAD && find include scons src -type f -not -name 'sed*' -print0 | xargs -0 sed -i -f tg2vmw.sed
#
# Rename copyrights
s/Tungsten Gra\(ph\|hp\)ics,\? [iI]nc\.\?\(, Cedar Park\)\?\(, Austin\)\?\(, \(Texas\|TX\)\)\?\.\?/VMware, Inc./g
/Copyright/s/Tungsten Graphics\(,\? [iI]nc\.\)\?\(, Cedar Park\)\?\(, Austin\)\?\(, \(Texas\|TX\)\)\?\.\?/VMware, Inc./
s/TUNGSTEN GRAPHICS/VMWARE/g
# Rename emails
s/alanh@tungstengraphics.com/alanh@vmware.com/
s/jens@tungstengraphics.com/jowen@vmware.com/g
s/jrfonseca-at-tungstengraphics-dot-com/jfonseca-at-vmware-dot-com/
s/jrfonseca\?@tungstengraphics.com/jfonseca@vmware.com/g
s/keithw\?@tungstengraphics.com/keithw@vmware.com/g
s/michel@tungstengraphics.com/daenzer@vmware.com/g
s/thomas-at-tungstengraphics-dot-com/thellstom-at-vmware-dot-com/
s/zack@tungstengraphics.com/zackr@vmware.com/
# Remove dead links
s@Tungsten Graphics (http://www.tungstengraphics.com)@Tungsten Graphics@g
# C string src/gallium/state_trackers/vega/api_misc.c
s/"Tungsten Graphics, Inc"/"VMware, Inc"/
Reviewed-by: Brian Paul <brianp@vmware.com>
2014-01-17 16:27:50 +00:00
|
|
|
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
2009-10-07 22:36:43 +01:00
|
|
|
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
*
|
|
|
|
**************************************************************************/
|
|
|
|
|
|
|
|
/*
|
2009-10-08 12:15:12 +01:00
|
|
|
* Binning code for triangles
|
2009-10-07 22:36:43 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include "util/u_math.h"
|
|
|
|
#include "util/u_memory.h"
|
2010-08-24 20:04:08 +01:00
|
|
|
#include "util/u_rect.h"
|
2010-10-12 18:59:15 +01:00
|
|
|
#include "util/u_sse.h"
|
2010-01-21 21:59:01 +00:00
|
|
|
#include "lp_perf.h"
|
|
|
|
#include "lp_setup_context.h"
|
|
|
|
#include "lp_rast.h"
|
2010-05-26 15:11:17 +01:00
|
|
|
#include "lp_state_fs.h"
|
2010-09-05 13:17:43 +01:00
|
|
|
#include "lp_state_setup.h"
|
2013-06-19 22:38:39 +01:00
|
|
|
#include "lp_context.h"
|
2009-10-07 22:36:43 +01:00
|
|
|
|
2013-10-25 03:05:22 +01:00
|
|
|
#include <inttypes.h>
|
|
|
|
|
2009-10-07 22:36:43 +01:00
|
|
|
|
2010-10-12 18:59:15 +01:00
|
|
|
#if defined(PIPE_ARCH_SSE)
|
|
|
|
#include <emmintrin.h>
|
2018-11-10 00:23:08 +00:00
|
|
|
#elif defined(_ARCH_PWR8) && UTIL_ARCH_LITTLE_ENDIAN
|
2015-12-13 15:49:32 +00:00
|
|
|
#include <altivec.h>
|
|
|
|
#include "util/u_pwr8.h"
|
2010-10-12 18:59:15 +01:00
|
|
|
#endif
|
2013-10-25 03:05:22 +01:00
|
|
|
|
2017-04-11 13:30:42 +01:00
|
|
|
#if !defined(PIPE_ARCH_SSE)
|
|
|
|
|
2015-07-21 00:58:43 +01:00
|
|
|
static inline int
|
2010-06-17 21:19:09 +01:00
|
|
|
subpixel_snap(float a)
|
|
|
|
{
|
|
|
|
return util_iround(FIXED_ONE * a);
|
|
|
|
}
|
|
|
|
|
2017-04-11 13:30:42 +01:00
|
|
|
#endif
|
2010-06-17 21:19:09 +01:00
|
|
|
|
2012-05-14 16:00:06 +01:00
|
|
|
/* Position and area in fixed point coordinates */
|
|
|
|
struct fixed_position {
|
2013-10-25 03:05:22 +01:00
|
|
|
int32_t x[4];
|
|
|
|
int32_t y[4];
|
|
|
|
int32_t dx01;
|
|
|
|
int32_t dy01;
|
|
|
|
int32_t dx20;
|
|
|
|
int32_t dy20;
|
2012-05-14 16:00:06 +01:00
|
|
|
};
|
2009-10-07 22:36:43 +01:00
|
|
|
|
2010-01-22 02:04:53 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Alloc space for a new triangle plus the input.a0/dadx/dady arrays
|
|
|
|
* immediately after it.
|
|
|
|
* The memory is allocated from the per-scene pool, not per-tile.
|
|
|
|
* \param tri_size returns number of bytes allocated
|
2010-09-05 13:17:43 +01:00
|
|
|
* \param num_inputs number of fragment shader inputs
|
2010-01-22 02:04:53 +00:00
|
|
|
* \return pointer to triangle space
|
|
|
|
*/
|
2010-07-19 15:23:09 +01:00
|
|
|
struct lp_rast_triangle *
|
|
|
|
lp_setup_alloc_triangle(struct lp_scene *scene,
|
|
|
|
unsigned nr_inputs,
|
|
|
|
unsigned nr_planes,
|
|
|
|
unsigned *tri_size)
|
2010-01-22 02:04:53 +00:00
|
|
|
{
|
2022-05-27 15:05:19 +01:00
|
|
|
// add 1 for XYZW position
|
|
|
|
unsigned input_array_sz = (nr_inputs + 1) * sizeof(float[4]);
|
2010-10-15 12:23:22 +01:00
|
|
|
unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane);
|
2010-01-22 02:04:53 +00:00
|
|
|
|
2016-03-15 15:39:55 +00:00
|
|
|
STATIC_ASSERT(sizeof(struct lp_rast_plane) % 8 == 0);
|
|
|
|
|
2010-10-15 12:23:22 +01:00
|
|
|
*tri_size = (sizeof(struct lp_rast_triangle) +
|
2022-05-27 15:05:19 +01:00
|
|
|
3 * input_array_sz + // 3 = da + dadx + dady
|
2010-10-15 12:23:22 +01:00
|
|
|
plane_sz);
|
2010-01-22 02:04:53 +00:00
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
struct lp_rast_triangle *tri = lp_scene_alloc_aligned(scene, *tri_size, 16);
|
2015-12-04 11:08:22 +00:00
|
|
|
if (!tri)
|
2010-10-18 02:48:11 +01:00
|
|
|
return NULL;
|
2010-01-22 02:04:53 +00:00
|
|
|
|
2010-10-18 02:48:11 +01:00
|
|
|
tri->inputs.stride = input_array_sz;
|
2010-01-22 02:04:53 +00:00
|
|
|
|
2010-10-15 12:23:22 +01:00
|
|
|
{
|
2021-01-11 19:31:32 +00:00
|
|
|
ASSERTED char *a = (char *)tri;
|
|
|
|
ASSERTED char *b = (char *)&GET_PLANES(tri)[nr_planes];
|
|
|
|
|
2010-10-15 12:23:22 +01:00
|
|
|
assert(b - a == *tri_size);
|
2010-05-28 19:49:49 +01:00
|
|
|
}
|
2010-01-22 02:04:53 +00:00
|
|
|
|
|
|
|
return tri;
|
|
|
|
}
|
|
|
|
|
2010-08-24 19:58:54 +01:00
|
|
|
void
|
|
|
|
lp_setup_print_vertex(struct lp_setup_context *setup,
|
|
|
|
const char *name,
|
|
|
|
const float (*v)[4])
|
|
|
|
{
|
2010-09-05 13:17:43 +01:00
|
|
|
const struct lp_setup_variant_key *key = &setup->setup.variant->key;
|
2010-08-24 19:58:54 +01:00
|
|
|
|
|
|
|
debug_printf(" wpos (%s[0]) xyzw %f %f %f %f\n",
|
|
|
|
name,
|
|
|
|
v[0][0], v[0][1], v[0][2], v[0][3]);
|
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
for (int i = 0; i < key->num_inputs; i++) {
|
2010-09-05 13:17:43 +01:00
|
|
|
const float *in = v[key->inputs[i].src_index];
|
2010-08-24 19:58:54 +01:00
|
|
|
|
|
|
|
debug_printf(" in[%d] (%s[%d]) %s%s%s%s ",
|
2022-05-27 15:05:19 +01:00
|
|
|
i,
|
2010-09-05 13:17:43 +01:00
|
|
|
name, key->inputs[i].src_index,
|
|
|
|
(key->inputs[i].usage_mask & 0x1) ? "x" : " ",
|
|
|
|
(key->inputs[i].usage_mask & 0x2) ? "y" : " ",
|
|
|
|
(key->inputs[i].usage_mask & 0x4) ? "z" : " ",
|
|
|
|
(key->inputs[i].usage_mask & 0x8) ? "w" : " ");
|
2010-08-24 19:58:54 +01:00
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
for (int j = 0; j < 4; j++)
|
2010-09-05 13:17:43 +01:00
|
|
|
if (key->inputs[i].usage_mask & (1<<j))
|
2010-08-24 19:58:54 +01:00
|
|
|
debug_printf("%.5f ", in[j]);
|
|
|
|
|
|
|
|
debug_printf("\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-01-22 02:04:53 +00:00
|
|
|
|
2010-04-15 23:24:31 +01:00
|
|
|
/**
|
|
|
|
* Print triangle vertex attribs (for debug).
|
|
|
|
*/
|
2010-08-24 19:58:54 +01:00
|
|
|
void
|
|
|
|
lp_setup_print_triangle(struct lp_setup_context *setup,
|
|
|
|
const float (*v0)[4],
|
|
|
|
const float (*v1)[4],
|
|
|
|
const float (*v2)[4])
|
2010-04-15 23:24:31 +01:00
|
|
|
{
|
2010-08-24 19:58:54 +01:00
|
|
|
debug_printf("triangle\n");
|
|
|
|
|
|
|
|
{
|
|
|
|
const float ex = v0[0][0] - v2[0][0];
|
|
|
|
const float ey = v0[0][1] - v2[0][1];
|
|
|
|
const float fx = v1[0][0] - v2[0][0];
|
|
|
|
const float fy = v1[0][1] - v2[0][1];
|
|
|
|
|
|
|
|
/* det = cross(e,f).z */
|
|
|
|
const float det = ex * fy - ey * fx;
|
2022-05-27 15:05:19 +01:00
|
|
|
if (det < 0.0f)
|
2010-08-24 19:58:54 +01:00
|
|
|
debug_printf(" - ccw\n");
|
|
|
|
else if (det > 0.0f)
|
|
|
|
debug_printf(" - cw\n");
|
|
|
|
else
|
|
|
|
debug_printf(" - zero area\n");
|
2010-04-15 23:24:31 +01:00
|
|
|
}
|
2010-08-24 19:58:54 +01:00
|
|
|
|
|
|
|
lp_setup_print_vertex(setup, "v0", v0);
|
|
|
|
lp_setup_print_vertex(setup, "v1", v1);
|
|
|
|
lp_setup_print_vertex(setup, "v2", v2);
|
2010-04-15 23:24:31 +01:00
|
|
|
}
|
|
|
|
|
2010-01-22 02:04:53 +00:00
|
|
|
|
2010-09-24 11:18:38 +01:00
|
|
|
#define MAX_PLANES 8
|
2010-09-07 14:02:15 +01:00
|
|
|
static unsigned
|
2010-09-24 11:18:38 +01:00
|
|
|
lp_rast_tri_tab[MAX_PLANES+1] = {
|
2010-09-07 14:02:15 +01:00
|
|
|
0, /* should be impossible */
|
|
|
|
LP_RAST_OP_TRIANGLE_1,
|
|
|
|
LP_RAST_OP_TRIANGLE_2,
|
|
|
|
LP_RAST_OP_TRIANGLE_3,
|
|
|
|
LP_RAST_OP_TRIANGLE_4,
|
|
|
|
LP_RAST_OP_TRIANGLE_5,
|
|
|
|
LP_RAST_OP_TRIANGLE_6,
|
|
|
|
LP_RAST_OP_TRIANGLE_7,
|
|
|
|
LP_RAST_OP_TRIANGLE_8
|
2010-06-17 21:19:09 +01:00
|
|
|
};
|
|
|
|
|
2013-10-25 03:05:22 +01:00
|
|
|
static unsigned
|
|
|
|
lp_rast_32_tri_tab[MAX_PLANES+1] = {
|
|
|
|
0, /* should be impossible */
|
|
|
|
LP_RAST_OP_TRIANGLE_32_1,
|
|
|
|
LP_RAST_OP_TRIANGLE_32_2,
|
|
|
|
LP_RAST_OP_TRIANGLE_32_3,
|
|
|
|
LP_RAST_OP_TRIANGLE_32_4,
|
|
|
|
LP_RAST_OP_TRIANGLE_32_5,
|
|
|
|
LP_RAST_OP_TRIANGLE_32_6,
|
|
|
|
LP_RAST_OP_TRIANGLE_32_7,
|
|
|
|
LP_RAST_OP_TRIANGLE_32_8
|
|
|
|
};
|
|
|
|
|
llvmpipe: Add a linear rasterizer optimized for 2D rendering.
This change adds:
- an alternative rasterizer, which rasterizes bins in a left->right &
top->bottom linear fashion;
- triangle -> rectangle detection;
- 1:1 blit detection;
- a special TGSI -> LLVM IR code generation that uses 8-bit SSE integers
in AoS fashion (as opposed to 32bits floats.)
Altogether these changes yield a 2x to 3x performance improvement for 2D
workloads. It was designed to render Windows 7 Aero and other Windows
built-in 3D applications (like Windows Media Player, Internet Explorer
11, UWP applications) with minimum CPU utilization, but it should be
generally applicable to other 2D-on-3D applications, like desktop
compositors, HTML browsers, 3D based UI toolkits, etc.
This was mostly the brainchild of Keith Whitwell back in 2010. I wrote
TGSI -> AoS translation. And many others added bug-fixes and
enhancements over the years: Roland Scheidegger, Brian Paul, and James
Benton.
Known issues:
- piglit spec@!opengl 1.1@quad-invariance will warn that "left and right
half should match" due to rounding error difference
- These optimized paths to kick in is that depth-buffer must not be
used, so some applications which want to benefit from these improvements
might need to be modified to ensure they use painter's algorithm instead
of depth-buffers.
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Keith Whitwell <keithw@vmware.com>
v2: Incorporate Dave Airlie feedback: cleanup LP_DEBUG_xx; shrink 3+
empty lines.
v3: silence unused var warning, adapt to new upstream code (point setup)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11969>
2021-05-07 13:49:07 +01:00
|
|
|
|
2020-03-20 21:34:53 +00:00
|
|
|
static unsigned
|
|
|
|
lp_rast_ms_tri_tab[MAX_PLANES+1] = {
|
|
|
|
0, /* should be impossible */
|
|
|
|
LP_RAST_OP_MS_TRIANGLE_1,
|
|
|
|
LP_RAST_OP_MS_TRIANGLE_2,
|
|
|
|
LP_RAST_OP_MS_TRIANGLE_3,
|
|
|
|
LP_RAST_OP_MS_TRIANGLE_4,
|
|
|
|
LP_RAST_OP_MS_TRIANGLE_5,
|
|
|
|
LP_RAST_OP_MS_TRIANGLE_6,
|
|
|
|
LP_RAST_OP_MS_TRIANGLE_7,
|
|
|
|
LP_RAST_OP_MS_TRIANGLE_8
|
|
|
|
};
|
2010-08-27 17:51:21 +01:00
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
|
llvmpipe: Add a linear rasterizer optimized for 2D rendering.
This change adds:
- an alternative rasterizer, which rasterizes bins in a left->right &
top->bottom linear fashion;
- triangle -> rectangle detection;
- 1:1 blit detection;
- a special TGSI -> LLVM IR code generation that uses 8-bit SSE integers
in AoS fashion (as opposed to 32bits floats.)
Altogether these changes yield a 2x to 3x performance improvement for 2D
workloads. It was designed to render Windows 7 Aero and other Windows
built-in 3D applications (like Windows Media Player, Internet Explorer
11, UWP applications) with minimum CPU utilization, but it should be
generally applicable to other 2D-on-3D applications, like desktop
compositors, HTML browsers, 3D based UI toolkits, etc.
This was mostly the brainchild of Keith Whitwell back in 2010. I wrote
TGSI -> AoS translation. And many others added bug-fixes and
enhancements over the years: Roland Scheidegger, Brian Paul, and James
Benton.
Known issues:
- piglit spec@!opengl 1.1@quad-invariance will warn that "left and right
half should match" due to rounding error difference
- These optimized paths to kick in is that depth-buffer must not be
used, so some applications which want to benefit from these improvements
might need to be modified to ensure they use painter's algorithm instead
of depth-buffers.
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Keith Whitwell <keithw@vmware.com>
v2: Incorporate Dave Airlie feedback: cleanup LP_DEBUG_xx; shrink 3+
empty lines.
v3: silence unused var warning, adapt to new upstream code (point setup)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11969>
2021-05-07 13:49:07 +01:00
|
|
|
/*
|
|
|
|
* Detect big primitives drawn with an alpha == 1.0.
|
2010-08-27 17:51:21 +01:00
|
|
|
*
|
llvmpipe: Add a linear rasterizer optimized for 2D rendering.
This change adds:
- an alternative rasterizer, which rasterizes bins in a left->right &
top->bottom linear fashion;
- triangle -> rectangle detection;
- 1:1 blit detection;
- a special TGSI -> LLVM IR code generation that uses 8-bit SSE integers
in AoS fashion (as opposed to 32bits floats.)
Altogether these changes yield a 2x to 3x performance improvement for 2D
workloads. It was designed to render Windows 7 Aero and other Windows
built-in 3D applications (like Windows Media Player, Internet Explorer
11, UWP applications) with minimum CPU utilization, but it should be
generally applicable to other 2D-on-3D applications, like desktop
compositors, HTML browsers, 3D based UI toolkits, etc.
This was mostly the brainchild of Keith Whitwell back in 2010. I wrote
TGSI -> AoS translation. And many others added bug-fixes and
enhancements over the years: Roland Scheidegger, Brian Paul, and James
Benton.
Known issues:
- piglit spec@!opengl 1.1@quad-invariance will warn that "left and right
half should match" due to rounding error difference
- These optimized paths to kick in is that depth-buffer must not be
used, so some applications which want to benefit from these improvements
might need to be modified to ensure they use painter's algorithm instead
of depth-buffers.
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Keith Whitwell <keithw@vmware.com>
v2: Incorporate Dave Airlie feedback: cleanup LP_DEBUG_xx; shrink 3+
empty lines.
v3: silence unused var warning, adapt to new upstream code (point setup)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11969>
2021-05-07 13:49:07 +01:00
|
|
|
* This is used when simulating anti-aliasing primitives in shaders, e.g.,
|
|
|
|
* when drawing the windows client area in Aero's flip-3d effect.
|
2010-08-27 17:51:21 +01:00
|
|
|
*/
|
2010-08-27 17:49:40 +01:00
|
|
|
static boolean
|
2022-05-27 15:05:19 +01:00
|
|
|
check_opaque(const struct lp_setup_context *setup,
|
llvmpipe: Add a linear rasterizer optimized for 2D rendering.
This change adds:
- an alternative rasterizer, which rasterizes bins in a left->right &
top->bottom linear fashion;
- triangle -> rectangle detection;
- 1:1 blit detection;
- a special TGSI -> LLVM IR code generation that uses 8-bit SSE integers
in AoS fashion (as opposed to 32bits floats.)
Altogether these changes yield a 2x to 3x performance improvement for 2D
workloads. It was designed to render Windows 7 Aero and other Windows
built-in 3D applications (like Windows Media Player, Internet Explorer
11, UWP applications) with minimum CPU utilization, but it should be
generally applicable to other 2D-on-3D applications, like desktop
compositors, HTML browsers, 3D based UI toolkits, etc.
This was mostly the brainchild of Keith Whitwell back in 2010. I wrote
TGSI -> AoS translation. And many others added bug-fixes and
enhancements over the years: Roland Scheidegger, Brian Paul, and James
Benton.
Known issues:
- piglit spec@!opengl 1.1@quad-invariance will warn that "left and right
half should match" due to rounding error difference
- These optimized paths to kick in is that depth-buffer must not be
used, so some applications which want to benefit from these improvements
might need to be modified to ensure they use painter's algorithm instead
of depth-buffers.
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Keith Whitwell <keithw@vmware.com>
v2: Incorporate Dave Airlie feedback: cleanup LP_DEBUG_xx; shrink 3+
empty lines.
v3: silence unused var warning, adapt to new upstream code (point setup)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11969>
2021-05-07 13:49:07 +01:00
|
|
|
const float (*v1)[4],
|
|
|
|
const float (*v2)[4],
|
|
|
|
const float (*v3)[4])
|
2010-08-27 17:51:21 +01:00
|
|
|
{
|
llvmpipe: Add a linear rasterizer optimized for 2D rendering.
This change adds:
- an alternative rasterizer, which rasterizes bins in a left->right &
top->bottom linear fashion;
- triangle -> rectangle detection;
- 1:1 blit detection;
- a special TGSI -> LLVM IR code generation that uses 8-bit SSE integers
in AoS fashion (as opposed to 32bits floats.)
Altogether these changes yield a 2x to 3x performance improvement for 2D
workloads. It was designed to render Windows 7 Aero and other Windows
built-in 3D applications (like Windows Media Player, Internet Explorer
11, UWP applications) with minimum CPU utilization, but it should be
generally applicable to other 2D-on-3D applications, like desktop
compositors, HTML browsers, 3D based UI toolkits, etc.
This was mostly the brainchild of Keith Whitwell back in 2010. I wrote
TGSI -> AoS translation. And many others added bug-fixes and
enhancements over the years: Roland Scheidegger, Brian Paul, and James
Benton.
Known issues:
- piglit spec@!opengl 1.1@quad-invariance will warn that "left and right
half should match" due to rounding error difference
- These optimized paths to kick in is that depth-buffer must not be
used, so some applications which want to benefit from these improvements
might need to be modified to ensure they use painter's algorithm instead
of depth-buffers.
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Keith Whitwell <keithw@vmware.com>
v2: Incorporate Dave Airlie feedback: cleanup LP_DEBUG_xx; shrink 3+
empty lines.
v3: silence unused var warning, adapt to new upstream code (point setup)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11969>
2021-05-07 13:49:07 +01:00
|
|
|
const struct lp_fragment_shader_variant *variant =
|
|
|
|
setup->fs.current.variant;
|
2010-08-27 17:51:21 +01:00
|
|
|
|
llvmpipe: Add a linear rasterizer optimized for 2D rendering.
This change adds:
- an alternative rasterizer, which rasterizes bins in a left->right &
top->bottom linear fashion;
- triangle -> rectangle detection;
- 1:1 blit detection;
- a special TGSI -> LLVM IR code generation that uses 8-bit SSE integers
in AoS fashion (as opposed to 32bits floats.)
Altogether these changes yield a 2x to 3x performance improvement for 2D
workloads. It was designed to render Windows 7 Aero and other Windows
built-in 3D applications (like Windows Media Player, Internet Explorer
11, UWP applications) with minimum CPU utilization, but it should be
generally applicable to other 2D-on-3D applications, like desktop
compositors, HTML browsers, 3D based UI toolkits, etc.
This was mostly the brainchild of Keith Whitwell back in 2010. I wrote
TGSI -> AoS translation. And many others added bug-fixes and
enhancements over the years: Roland Scheidegger, Brian Paul, and James
Benton.
Known issues:
- piglit spec@!opengl 1.1@quad-invariance will warn that "left and right
half should match" due to rounding error difference
- These optimized paths to kick in is that depth-buffer must not be
used, so some applications which want to benefit from these improvements
might need to be modified to ensure they use painter's algorithm instead
of depth-buffers.
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Keith Whitwell <keithw@vmware.com>
v2: Incorporate Dave Airlie feedback: cleanup LP_DEBUG_xx; shrink 3+
empty lines.
v3: silence unused var warning, adapt to new upstream code (point setup)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11969>
2021-05-07 13:49:07 +01:00
|
|
|
if (variant->opaque)
|
|
|
|
return TRUE;
|
2022-05-27 15:05:19 +01:00
|
|
|
|
llvmpipe: Add a linear rasterizer optimized for 2D rendering.
This change adds:
- an alternative rasterizer, which rasterizes bins in a left->right &
top->bottom linear fashion;
- triangle -> rectangle detection;
- 1:1 blit detection;
- a special TGSI -> LLVM IR code generation that uses 8-bit SSE integers
in AoS fashion (as opposed to 32bits floats.)
Altogether these changes yield a 2x to 3x performance improvement for 2D
workloads. It was designed to render Windows 7 Aero and other Windows
built-in 3D applications (like Windows Media Player, Internet Explorer
11, UWP applications) with minimum CPU utilization, but it should be
generally applicable to other 2D-on-3D applications, like desktop
compositors, HTML browsers, 3D based UI toolkits, etc.
This was mostly the brainchild of Keith Whitwell back in 2010. I wrote
TGSI -> AoS translation. And many others added bug-fixes and
enhancements over the years: Roland Scheidegger, Brian Paul, and James
Benton.
Known issues:
- piglit spec@!opengl 1.1@quad-invariance will warn that "left and right
half should match" due to rounding error difference
- These optimized paths to kick in is that depth-buffer must not be
used, so some applications which want to benefit from these improvements
might need to be modified to ensure they use painter's algorithm instead
of depth-buffers.
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Keith Whitwell <keithw@vmware.com>
v2: Incorporate Dave Airlie feedback: cleanup LP_DEBUG_xx; shrink 3+
empty lines.
v3: silence unused var warning, adapt to new upstream code (point setup)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11969>
2021-05-07 13:49:07 +01:00
|
|
|
if (!variant->potentially_opaque)
|
|
|
|
return FALSE;
|
|
|
|
|
2022-02-03 02:58:45 +00:00
|
|
|
const struct lp_tgsi_channel_info *alpha_info = &variant->shader->info.cbuf[0][3];
|
llvmpipe: Add a linear rasterizer optimized for 2D rendering.
This change adds:
- an alternative rasterizer, which rasterizes bins in a left->right &
top->bottom linear fashion;
- triangle -> rectangle detection;
- 1:1 blit detection;
- a special TGSI -> LLVM IR code generation that uses 8-bit SSE integers
in AoS fashion (as opposed to 32bits floats.)
Altogether these changes yield a 2x to 3x performance improvement for 2D
workloads. It was designed to render Windows 7 Aero and other Windows
built-in 3D applications (like Windows Media Player, Internet Explorer
11, UWP applications) with minimum CPU utilization, but it should be
generally applicable to other 2D-on-3D applications, like desktop
compositors, HTML browsers, 3D based UI toolkits, etc.
This was mostly the brainchild of Keith Whitwell back in 2010. I wrote
TGSI -> AoS translation. And many others added bug-fixes and
enhancements over the years: Roland Scheidegger, Brian Paul, and James
Benton.
Known issues:
- piglit spec@!opengl 1.1@quad-invariance will warn that "left and right
half should match" due to rounding error difference
- These optimized paths to kick in is that depth-buffer must not be
used, so some applications which want to benefit from these improvements
might need to be modified to ensure they use painter's algorithm instead
of depth-buffers.
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Keith Whitwell <keithw@vmware.com>
v2: Incorporate Dave Airlie feedback: cleanup LP_DEBUG_xx; shrink 3+
empty lines.
v3: silence unused var warning, adapt to new upstream code (point setup)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11969>
2021-05-07 13:49:07 +01:00
|
|
|
if (alpha_info->file == TGSI_FILE_CONSTANT) {
|
|
|
|
const float *constants = setup->fs.current.jit_context.constants[0];
|
|
|
|
float alpha = constants[alpha_info->u.index*4 +
|
|
|
|
alpha_info->swizzle];
|
|
|
|
return alpha == 1.0f;
|
|
|
|
}
|
2010-08-27 17:51:21 +01:00
|
|
|
|
llvmpipe: Add a linear rasterizer optimized for 2D rendering.
This change adds:
- an alternative rasterizer, which rasterizes bins in a left->right &
top->bottom linear fashion;
- triangle -> rectangle detection;
- 1:1 blit detection;
- a special TGSI -> LLVM IR code generation that uses 8-bit SSE integers
in AoS fashion (as opposed to 32bits floats.)
Altogether these changes yield a 2x to 3x performance improvement for 2D
workloads. It was designed to render Windows 7 Aero and other Windows
built-in 3D applications (like Windows Media Player, Internet Explorer
11, UWP applications) with minimum CPU utilization, but it should be
generally applicable to other 2D-on-3D applications, like desktop
compositors, HTML browsers, 3D based UI toolkits, etc.
This was mostly the brainchild of Keith Whitwell back in 2010. I wrote
TGSI -> AoS translation. And many others added bug-fixes and
enhancements over the years: Roland Scheidegger, Brian Paul, and James
Benton.
Known issues:
- piglit spec@!opengl 1.1@quad-invariance will warn that "left and right
half should match" due to rounding error difference
- These optimized paths to kick in is that depth-buffer must not be
used, so some applications which want to benefit from these improvements
might need to be modified to ensure they use painter's algorithm instead
of depth-buffers.
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Keith Whitwell <keithw@vmware.com>
v2: Incorporate Dave Airlie feedback: cleanup LP_DEBUG_xx; shrink 3+
empty lines.
v3: silence unused var warning, adapt to new upstream code (point setup)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11969>
2021-05-07 13:49:07 +01:00
|
|
|
if (alpha_info->file == TGSI_FILE_INPUT) {
|
|
|
|
return (v1[1 + alpha_info->u.index][alpha_info->swizzle] == 1.0f &&
|
|
|
|
v2[1 + alpha_info->u.index][alpha_info->swizzle] == 1.0f &&
|
|
|
|
v3[1 + alpha_info->u.index][alpha_info->swizzle] == 1.0f);
|
2010-08-27 17:51:21 +01:00
|
|
|
}
|
llvmpipe: Add a linear rasterizer optimized for 2D rendering.
This change adds:
- an alternative rasterizer, which rasterizes bins in a left->right &
top->bottom linear fashion;
- triangle -> rectangle detection;
- 1:1 blit detection;
- a special TGSI -> LLVM IR code generation that uses 8-bit SSE integers
in AoS fashion (as opposed to 32bits floats.)
Altogether these changes yield a 2x to 3x performance improvement for 2D
workloads. It was designed to render Windows 7 Aero and other Windows
built-in 3D applications (like Windows Media Player, Internet Explorer
11, UWP applications) with minimum CPU utilization, but it should be
generally applicable to other 2D-on-3D applications, like desktop
compositors, HTML browsers, 3D based UI toolkits, etc.
This was mostly the brainchild of Keith Whitwell back in 2010. I wrote
TGSI -> AoS translation. And many others added bug-fixes and
enhancements over the years: Roland Scheidegger, Brian Paul, and James
Benton.
Known issues:
- piglit spec@!opengl 1.1@quad-invariance will warn that "left and right
half should match" due to rounding error difference
- These optimized paths to kick in is that depth-buffer must not be
used, so some applications which want to benefit from these improvements
might need to be modified to ensure they use painter's algorithm instead
of depth-buffers.
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Keith Whitwell <keithw@vmware.com>
v2: Incorporate Dave Airlie feedback: cleanup LP_DEBUG_xx; shrink 3+
empty lines.
v3: silence unused var warning, adapt to new upstream code (point setup)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11969>
2021-05-07 13:49:07 +01:00
|
|
|
|
|
|
|
return FALSE;
|
2010-08-27 17:51:21 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-11-30 21:02:01 +00:00
|
|
|
/**
|
|
|
|
* Do basic setup for triangle rasterization and determine which
|
2009-12-13 18:17:25 +00:00
|
|
|
* framebuffer tiles are touched. Put the triangle in the scene's
|
|
|
|
* bins for the tiles which we overlap.
|
2009-11-30 21:02:01 +00:00
|
|
|
*/
|
2010-08-27 17:49:40 +01:00
|
|
|
static boolean
|
2010-03-13 10:45:52 +00:00
|
|
|
do_triangle_ccw(struct lp_setup_context *setup,
|
2022-05-27 15:05:19 +01:00
|
|
|
struct fixed_position *position,
|
2012-05-14 16:00:06 +01:00
|
|
|
const float (*v0)[4],
|
|
|
|
const float (*v1)[4],
|
|
|
|
const float (*v2)[4],
|
2022-05-27 15:05:19 +01:00
|
|
|
boolean frontfacing)
|
2009-10-07 22:36:43 +01:00
|
|
|
{
|
2010-09-07 14:02:15 +01:00
|
|
|
struct lp_scene *scene = setup->scene;
|
|
|
|
|
2010-04-15 23:24:31 +01:00
|
|
|
if (0)
|
2010-08-22 10:57:12 +01:00
|
|
|
lp_setup_print_triangle(setup, v0, v1, v2);
|
2010-04-15 23:24:31 +01:00
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
const float (*pv)[4];
|
2015-12-03 00:18:14 +00:00
|
|
|
if (setup->flatshade_first) {
|
|
|
|
pv = v0;
|
2022-05-27 15:05:19 +01:00
|
|
|
} else {
|
2015-12-03 00:18:14 +00:00
|
|
|
pv = v2;
|
|
|
|
}
|
2022-05-27 15:05:19 +01:00
|
|
|
|
|
|
|
unsigned viewport_index = 0;
|
2015-12-03 00:18:14 +00:00
|
|
|
if (setup->viewport_index_slot > 0) {
|
|
|
|
unsigned *udata = (unsigned*)pv[setup->viewport_index_slot];
|
|
|
|
viewport_index = lp_clamp_viewport_idx(*udata);
|
2010-06-17 21:19:09 +01:00
|
|
|
}
|
2022-05-27 15:05:19 +01:00
|
|
|
|
|
|
|
unsigned layer = 0;
|
2013-06-07 20:03:40 +01:00
|
|
|
if (setup->layer_slot > 0) {
|
2015-12-03 00:18:14 +00:00
|
|
|
layer = *(unsigned*)pv[setup->layer_slot];
|
2013-06-07 20:03:40 +01:00
|
|
|
layer = MIN2(layer, scene->fb_max_layer);
|
|
|
|
}
|
2010-06-17 21:19:09 +01:00
|
|
|
|
2010-08-24 20:04:08 +01:00
|
|
|
/* Bounding rectangle (in pixels) */
|
2022-05-27 15:05:19 +01:00
|
|
|
struct u_rect bbox;
|
2010-08-24 20:04:08 +01:00
|
|
|
{
|
|
|
|
/* Yes this is necessary to accurately calculate bounding boxes
|
|
|
|
* with the two fill-conventions we support. GL (normally) ends
|
|
|
|
* up needing a bottom-left fill convention, which requires
|
|
|
|
* slightly different rounding.
|
|
|
|
*/
|
2014-01-07 17:52:21 +00:00
|
|
|
int adj = (setup->bottom_edge_rule != 0) ? 1 : 0;
|
2010-08-24 20:04:08 +01:00
|
|
|
|
2012-05-10 17:15:28 +01:00
|
|
|
/* Inclusive x0, exclusive x1 */
|
2012-05-14 16:00:06 +01:00
|
|
|
bbox.x0 = MIN3(position->x[0], position->x[1], position->x[2]) >> FIXED_ORDER;
|
|
|
|
bbox.x1 = (MAX3(position->x[0], position->x[1], position->x[2]) - 1) >> FIXED_ORDER;
|
2010-08-24 20:04:08 +01:00
|
|
|
|
2012-05-10 17:15:28 +01:00
|
|
|
/* Inclusive / exclusive depending upon adj (bottom-left or top-right) */
|
2012-05-14 16:00:06 +01:00
|
|
|
bbox.y0 = (MIN3(position->y[0], position->y[1], position->y[2]) + adj) >> FIXED_ORDER;
|
|
|
|
bbox.y1 = (MAX3(position->y[0], position->y[1], position->y[2]) - 1 + adj) >> FIXED_ORDER;
|
2010-08-24 20:04:08 +01:00
|
|
|
}
|
|
|
|
|
2013-11-26 18:50:27 +00:00
|
|
|
if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) {
|
llvmpipe: Add a linear rasterizer optimized for 2D rendering.
This change adds:
- an alternative rasterizer, which rasterizes bins in a left->right &
top->bottom linear fashion;
- triangle -> rectangle detection;
- 1:1 blit detection;
- a special TGSI -> LLVM IR code generation that uses 8-bit SSE integers
in AoS fashion (as opposed to 32bits floats.)
Altogether these changes yield a 2x to 3x performance improvement for 2D
workloads. It was designed to render Windows 7 Aero and other Windows
built-in 3D applications (like Windows Media Player, Internet Explorer
11, UWP applications) with minimum CPU utilization, but it should be
generally applicable to other 2D-on-3D applications, like desktop
compositors, HTML browsers, 3D based UI toolkits, etc.
This was mostly the brainchild of Keith Whitwell back in 2010. I wrote
TGSI -> AoS translation. And many others added bug-fixes and
enhancements over the years: Roland Scheidegger, Brian Paul, and James
Benton.
Known issues:
- piglit spec@!opengl 1.1@quad-invariance will warn that "left and right
half should match" due to rounding error difference
- These optimized paths to kick in is that depth-buffer must not be
used, so some applications which want to benefit from these improvements
might need to be modified to ensure they use painter's algorithm instead
of depth-buffers.
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Keith Whitwell <keithw@vmware.com>
v2: Incorporate Dave Airlie feedback: cleanup LP_DEBUG_xx; shrink 3+
empty lines.
v3: silence unused var warning, adapt to new upstream code (point setup)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11969>
2021-05-07 13:49:07 +01:00
|
|
|
if (0) debug_printf("no intersection\n");
|
2010-08-24 20:04:08 +01:00
|
|
|
LP_COUNT(nr_culled_tris);
|
2010-09-07 14:02:15 +01:00
|
|
|
return TRUE;
|
2010-08-24 20:04:08 +01:00
|
|
|
}
|
|
|
|
|
2022-02-02 07:39:38 +00:00
|
|
|
int max_szorig = ((bbox.x1 - (bbox.x0 & ~3)) |
|
|
|
|
(bbox.y1 - (bbox.y0 & ~3)));
|
|
|
|
boolean use_32bits = max_szorig <= MAX_FIXED_LENGTH32;
|
|
|
|
#if defined(_ARCH_PWR8) && UTIL_ARCH_LITTLE_ENDIAN
|
|
|
|
boolean pwr8_limit_check = (bbox.x1 - bbox.x0) <= MAX_FIXED_LENGTH32 &&
|
|
|
|
(bbox.y1 - bbox.y0) <= MAX_FIXED_LENGTH32;
|
|
|
|
#endif
|
2017-06-23 18:35:50 +01:00
|
|
|
|
2010-11-02 14:20:20 +00:00
|
|
|
/* Can safely discard negative regions, but need to keep hold of
|
|
|
|
* information about when the triangle extends past screen
|
|
|
|
* boundaries. See trimmed_box in lp_setup_bin_triangle().
|
|
|
|
*/
|
2022-02-02 07:39:38 +00:00
|
|
|
bbox.x0 = MAX2(bbox.x0, 0);
|
|
|
|
bbox.y0 = MAX2(bbox.y0, 0);
|
2010-06-17 21:19:09 +01:00
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
int nr_planes = 3;
|
|
|
|
|
2016-02-02 02:14:12 +00:00
|
|
|
/*
|
|
|
|
* Determine how many scissor planes we need, that is drop scissor
|
|
|
|
* edges if the bounding box of the tri is fully inside that edge.
|
|
|
|
*/
|
2022-05-27 15:05:19 +01:00
|
|
|
const struct u_rect *scissor = &setup->draw_regions[viewport_index];
|
|
|
|
boolean s_planes[4];
|
2022-02-02 07:39:38 +00:00
|
|
|
scissor_planes_needed(s_planes, &bbox, scissor);
|
llvmpipe: always use draw_regions intersection
This was still used in the linear branch, since it works all a little
differently there (in particular, when using guard band we have to
intersect the draw regions with the viewport, since draw won't clip
for us). However, we should always intersect with draw_regions
(regardless if that includes the intersection with vp or not), since
the viewport can be larger than the fb size, and we don't want to
draw outside the fb (usually harmless, but important for occlusion
queries and shader image/buffer writes).
This fixes various dEQP-GLES31.functional.fbo.no_attachments failures
(which uses oversized viewport with occlusion queries).
The other ci changes aren't really bugs (the humus/Portals image
looks the same, we cannot expect bit-identical results, and
for the piglit quad-invariance test, I think we merely passed it
by accident since our interpolation may give different results
depending on where on the screen a tri is regardless of linear
rasterizer).
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11969>
2021-07-19 16:32:17 +01:00
|
|
|
nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
|
2016-02-02 02:14:12 +00:00
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
unsigned tri_bytes;
|
|
|
|
const struct lp_setup_variant_key *key = &setup->setup.variant->key;
|
|
|
|
struct lp_rast_triangle *tri =
|
|
|
|
lp_setup_alloc_triangle(scene, key->num_inputs, nr_planes, &tri_bytes);
|
2010-05-28 19:49:49 +01:00
|
|
|
if (!tri)
|
2010-08-27 17:49:40 +01:00
|
|
|
return FALSE;
|
2009-10-09 15:52:18 +01:00
|
|
|
|
2017-06-23 03:57:57 +01:00
|
|
|
#ifdef DEBUG
|
2010-08-22 10:57:12 +01:00
|
|
|
tri->v[0][0] = v0[0][0];
|
|
|
|
tri->v[1][0] = v1[0][0];
|
|
|
|
tri->v[2][0] = v2[0][0];
|
|
|
|
tri->v[0][1] = v0[0][1];
|
|
|
|
tri->v[1][1] = v1[0][1];
|
|
|
|
tri->v[2][1] = v2[0][1];
|
2010-03-03 19:55:31 +00:00
|
|
|
#endif
|
|
|
|
|
2010-01-21 21:59:01 +00:00
|
|
|
LP_COUNT(nr_tris);
|
|
|
|
|
llvmpipe: Add a linear rasterizer optimized for 2D rendering.
This change adds:
- an alternative rasterizer, which rasterizes bins in a left->right &
top->bottom linear fashion;
- triangle -> rectangle detection;
- 1:1 blit detection;
- a special TGSI -> LLVM IR code generation that uses 8-bit SSE integers
in AoS fashion (as opposed to 32bits floats.)
Altogether these changes yield a 2x to 3x performance improvement for 2D
workloads. It was designed to render Windows 7 Aero and other Windows
built-in 3D applications (like Windows Media Player, Internet Explorer
11, UWP applications) with minimum CPU utilization, but it should be
generally applicable to other 2D-on-3D applications, like desktop
compositors, HTML browsers, 3D based UI toolkits, etc.
This was mostly the brainchild of Keith Whitwell back in 2010. I wrote
TGSI -> AoS translation. And many others added bug-fixes and
enhancements over the years: Roland Scheidegger, Brian Paul, and James
Benton.
Known issues:
- piglit spec@!opengl 1.1@quad-invariance will warn that "left and right
half should match" due to rounding error difference
- These optimized paths to kick in is that depth-buffer must not be
used, so some applications which want to benefit from these improvements
might need to be modified to ensure they use painter's algorithm instead
of depth-buffers.
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Keith Whitwell <keithw@vmware.com>
v2: Incorporate Dave Airlie feedback: cleanup LP_DEBUG_xx; shrink 3+
empty lines.
v3: silence unused var warning, adapt to new upstream code (point setup)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11969>
2021-05-07 13:49:07 +01:00
|
|
|
/*
|
|
|
|
* Rotate the tri such that v0 is closest to the fb origin.
|
|
|
|
* This can give more accurate a0 value (which is at fb origin)
|
|
|
|
* when calculating the interpolants.
|
|
|
|
* It can't work when there's flat shading for instance in one
|
|
|
|
* of the attributes, hence restrict this to just a single attribute
|
|
|
|
* which is what causes some test failures.
|
|
|
|
* (This does not address the problem that interpolation may be
|
|
|
|
* inaccurate if gradients are relatively steep in small tris far
|
|
|
|
* away from the origin. It does however fix the (silly) wgf11rasterizer
|
|
|
|
* Interpolator test.)
|
|
|
|
* XXX This causes problems with mipgen -EmuTexture for not yet really
|
|
|
|
* understood reasons (if the vertices would be submitted in a different
|
|
|
|
* order, we'd also generate the same "wrong" results here without
|
|
|
|
* rotation). In any case, that we generate different values if a prim
|
|
|
|
* has the vertices rotated but is otherwise the same (which is due to
|
|
|
|
* numerical issues) is not a nice property. An additional problem by
|
|
|
|
* swapping the vertices here (which is possibly worse) is that
|
|
|
|
* the same primitive coming in twice might generate different values
|
|
|
|
* (in particular for z) due to the swapping potentially not happening
|
|
|
|
* both times, if the attributes to be interpolated are different. For now,
|
|
|
|
* just restrict this to not get used with dx9 (by checking pixel offset),
|
|
|
|
* could also restrict it further to only trigger with wgf11Interpolator
|
|
|
|
* Rasterizer test (the only place which needs it, with always the same
|
|
|
|
* vertices even).
|
|
|
|
*/
|
|
|
|
if ((LP_DEBUG & DEBUG_ACCURATE_A0) &&
|
|
|
|
setup->pixel_offset == 0.5f &&
|
|
|
|
key->num_inputs == 1 &&
|
|
|
|
(key->inputs[0].interp == LP_INTERP_LINEAR ||
|
|
|
|
key->inputs[0].interp == LP_INTERP_PERSPECTIVE)) {
|
|
|
|
float dist0 = v0[0][0] * v0[0][0] + v0[0][1] * v0[0][1];
|
|
|
|
float dist1 = v1[0][0] * v1[0][0] + v1[0][1] * v1[0][1];
|
|
|
|
float dist2 = v2[0][0] * v2[0][0] + v2[0][1] * v2[0][1];
|
|
|
|
if (dist0 > dist1 && dist1 < dist2) {
|
|
|
|
const float (*vt)[4];
|
|
|
|
int x, y;
|
|
|
|
vt = v0;
|
|
|
|
v0 = v1;
|
|
|
|
v1 = v2;
|
|
|
|
v2 = vt;
|
|
|
|
x = position->x[0];
|
|
|
|
y = position->y[0];
|
|
|
|
position->x[0] = position->x[1];
|
|
|
|
position->y[0] = position->y[1];
|
|
|
|
position->x[1] = position->x[2];
|
|
|
|
position->y[1] = position->y[2];
|
|
|
|
position->x[2] = x;
|
|
|
|
position->y[2] = y;
|
|
|
|
|
|
|
|
position->dx20 = position->dx01;
|
|
|
|
position->dy20 = position->dy01;
|
|
|
|
position->dx01 = position->x[0] - position->x[1];
|
|
|
|
position->dy01 = position->y[0] - position->y[1];
|
2022-05-27 15:05:19 +01:00
|
|
|
} else if (dist0 > dist2) {
|
llvmpipe: Add a linear rasterizer optimized for 2D rendering.
This change adds:
- an alternative rasterizer, which rasterizes bins in a left->right &
top->bottom linear fashion;
- triangle -> rectangle detection;
- 1:1 blit detection;
- a special TGSI -> LLVM IR code generation that uses 8-bit SSE integers
in AoS fashion (as opposed to 32bits floats.)
Altogether these changes yield a 2x to 3x performance improvement for 2D
workloads. It was designed to render Windows 7 Aero and other Windows
built-in 3D applications (like Windows Media Player, Internet Explorer
11, UWP applications) with minimum CPU utilization, but it should be
generally applicable to other 2D-on-3D applications, like desktop
compositors, HTML browsers, 3D based UI toolkits, etc.
This was mostly the brainchild of Keith Whitwell back in 2010. I wrote
TGSI -> AoS translation. And many others added bug-fixes and
enhancements over the years: Roland Scheidegger, Brian Paul, and James
Benton.
Known issues:
- piglit spec@!opengl 1.1@quad-invariance will warn that "left and right
half should match" due to rounding error difference
- These optimized paths to kick in is that depth-buffer must not be
used, so some applications which want to benefit from these improvements
might need to be modified to ensure they use painter's algorithm instead
of depth-buffers.
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Keith Whitwell <keithw@vmware.com>
v2: Incorporate Dave Airlie feedback: cleanup LP_DEBUG_xx; shrink 3+
empty lines.
v3: silence unused var warning, adapt to new upstream code (point setup)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11969>
2021-05-07 13:49:07 +01:00
|
|
|
const float (*vt)[4];
|
|
|
|
int x, y;
|
|
|
|
vt = v0;
|
|
|
|
v0 = v2;
|
|
|
|
v2 = v1;
|
|
|
|
v1 = vt;
|
|
|
|
x = position->x[0];
|
|
|
|
y = position->y[0];
|
|
|
|
position->x[0] = position->x[2];
|
|
|
|
position->y[0] = position->y[2];
|
|
|
|
position->x[2] = position->x[1];
|
|
|
|
position->y[2] = position->y[1];
|
|
|
|
position->x[1] = x;
|
|
|
|
position->y[1] = y;
|
|
|
|
|
|
|
|
position->dx01 = position->dx20;
|
|
|
|
position->dy01 = position->dy20;
|
|
|
|
position->dx20 = position->x[2] - position->x[0];
|
|
|
|
position->dy20 = position->y[2] - position->y[0];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-10-07 22:36:43 +01:00
|
|
|
/* Setup parameter interpolants:
|
|
|
|
*/
|
2016-02-02 02:14:12 +00:00
|
|
|
setup->setup.variant->jit_function(v0, v1, v2,
|
|
|
|
frontfacing,
|
|
|
|
GET_A0(&tri->inputs),
|
|
|
|
GET_DADX(&tri->inputs),
|
llvmpipe: Add a linear rasterizer optimized for 2D rendering.
This change adds:
- an alternative rasterizer, which rasterizes bins in a left->right &
top->bottom linear fashion;
- triangle -> rectangle detection;
- 1:1 blit detection;
- a special TGSI -> LLVM IR code generation that uses 8-bit SSE integers
in AoS fashion (as opposed to 32bits floats.)
Altogether these changes yield a 2x to 3x performance improvement for 2D
workloads. It was designed to render Windows 7 Aero and other Windows
built-in 3D applications (like Windows Media Player, Internet Explorer
11, UWP applications) with minimum CPU utilization, but it should be
generally applicable to other 2D-on-3D applications, like desktop
compositors, HTML browsers, 3D based UI toolkits, etc.
This was mostly the brainchild of Keith Whitwell back in 2010. I wrote
TGSI -> AoS translation. And many others added bug-fixes and
enhancements over the years: Roland Scheidegger, Brian Paul, and James
Benton.
Known issues:
- piglit spec@!opengl 1.1@quad-invariance will warn that "left and right
half should match" due to rounding error difference
- These optimized paths to kick in is that depth-buffer must not be
used, so some applications which want to benefit from these improvements
might need to be modified to ensure they use painter's algorithm instead
of depth-buffers.
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Keith Whitwell <keithw@vmware.com>
v2: Incorporate Dave Airlie feedback: cleanup LP_DEBUG_xx; shrink 3+
empty lines.
v3: silence unused var warning, adapt to new upstream code (point setup)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11969>
2021-05-07 13:49:07 +01:00
|
|
|
GET_DADY(&tri->inputs),
|
|
|
|
&setup->setup.variant->key);
|
2009-10-07 22:36:43 +01:00
|
|
|
|
2010-10-15 00:12:19 +01:00
|
|
|
tri->inputs.frontfacing = frontfacing;
|
2010-08-27 17:49:40 +01:00
|
|
|
tri->inputs.disable = FALSE;
|
llvmpipe: Add a linear rasterizer optimized for 2D rendering.
This change adds:
- an alternative rasterizer, which rasterizes bins in a left->right &
top->bottom linear fashion;
- triangle -> rectangle detection;
- 1:1 blit detection;
- a special TGSI -> LLVM IR code generation that uses 8-bit SSE integers
in AoS fashion (as opposed to 32bits floats.)
Altogether these changes yield a 2x to 3x performance improvement for 2D
workloads. It was designed to render Windows 7 Aero and other Windows
built-in 3D applications (like Windows Media Player, Internet Explorer
11, UWP applications) with minimum CPU utilization, but it should be
generally applicable to other 2D-on-3D applications, like desktop
compositors, HTML browsers, 3D based UI toolkits, etc.
This was mostly the brainchild of Keith Whitwell back in 2010. I wrote
TGSI -> AoS translation. And many others added bug-fixes and
enhancements over the years: Roland Scheidegger, Brian Paul, and James
Benton.
Known issues:
- piglit spec@!opengl 1.1@quad-invariance will warn that "left and right
half should match" due to rounding error difference
- These optimized paths to kick in is that depth-buffer must not be
used, so some applications which want to benefit from these improvements
might need to be modified to ensure they use painter's algorithm instead
of depth-buffers.
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Keith Whitwell <keithw@vmware.com>
v2: Incorporate Dave Airlie feedback: cleanup LP_DEBUG_xx; shrink 3+
empty lines.
v3: silence unused var warning, adapt to new upstream code (point setup)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11969>
2021-05-07 13:49:07 +01:00
|
|
|
tri->inputs.is_blit = FALSE;
|
2013-06-07 20:03:40 +01:00
|
|
|
tri->inputs.layer = layer;
|
2013-11-26 18:50:27 +00:00
|
|
|
tri->inputs.viewport_index = viewport_index;
|
2021-03-04 03:59:28 +00:00
|
|
|
tri->inputs.view_index = setup->view_index;
|
2010-03-18 19:02:53 +00:00
|
|
|
|
2010-09-05 13:17:43 +01:00
|
|
|
if (0)
|
|
|
|
lp_dump_setup_coef(&setup->setup.variant->key,
|
2022-06-22 19:41:59 +01:00
|
|
|
GET_A0(&tri->inputs),
|
|
|
|
GET_DADX(&tri->inputs),
|
|
|
|
GET_DADY(&tri->inputs));
|
2010-10-18 03:03:42 +01:00
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
struct lp_rast_plane *plane = GET_PLANES(tri);
|
2010-10-12 18:59:15 +01:00
|
|
|
|
|
|
|
#if defined(PIPE_ARCH_SSE)
|
2016-01-02 03:59:09 +00:00
|
|
|
if (1) {
|
2010-10-12 18:59:15 +01:00
|
|
|
__m128i vertx, verty;
|
|
|
|
__m128i shufx, shufy;
|
2016-01-02 03:59:09 +00:00
|
|
|
__m128i dcdx, dcdy;
|
|
|
|
__m128i cdx02, cdx13, cdy02, cdy13, c02, c13;
|
|
|
|
__m128i c01, c23, unused;
|
2010-10-12 18:59:15 +01:00
|
|
|
__m128i dcdx_neg_mask;
|
|
|
|
__m128i dcdy_neg_mask;
|
|
|
|
__m128i dcdx_zero_mask;
|
2016-01-02 03:59:09 +00:00
|
|
|
__m128i top_left_flag, c_dec;
|
2010-10-12 18:59:15 +01:00
|
|
|
__m128i eo, p0, p1, p2;
|
|
|
|
__m128i zero = _mm_setzero_si128();
|
|
|
|
|
2015-12-31 02:20:38 +00:00
|
|
|
vertx = _mm_load_si128((__m128i *)position->x); /* vertex x coords */
|
|
|
|
verty = _mm_load_si128((__m128i *)position->y); /* vertex y coords */
|
2010-10-12 18:59:15 +01:00
|
|
|
|
|
|
|
shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1));
|
|
|
|
shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1));
|
|
|
|
|
|
|
|
dcdx = _mm_sub_epi32(verty, shufy);
|
|
|
|
dcdy = _mm_sub_epi32(vertx, shufx);
|
|
|
|
|
|
|
|
dcdx_neg_mask = _mm_srai_epi32(dcdx, 31);
|
|
|
|
dcdx_zero_mask = _mm_cmpeq_epi32(dcdx, zero);
|
|
|
|
dcdy_neg_mask = _mm_srai_epi32(dcdy, 31);
|
|
|
|
|
2013-04-23 19:40:05 +01:00
|
|
|
top_left_flag = _mm_set1_epi32((setup->bottom_edge_rule == 0) ? ~0 : 0);
|
2010-10-12 18:59:15 +01:00
|
|
|
|
2016-01-02 03:59:09 +00:00
|
|
|
c_dec = _mm_or_si128(dcdx_neg_mask,
|
|
|
|
_mm_and_si128(dcdx_zero_mask,
|
|
|
|
_mm_xor_si128(dcdy_neg_mask,
|
|
|
|
top_left_flag)));
|
2010-10-12 18:59:15 +01:00
|
|
|
|
2016-01-02 03:59:09 +00:00
|
|
|
/*
|
|
|
|
* 64 bit arithmetic.
|
|
|
|
* Note we need _signed_ mul (_mm_mul_epi32) which we emulate.
|
|
|
|
*/
|
|
|
|
cdx02 = mm_mullohi_epi32(dcdx, vertx, &cdx13);
|
|
|
|
cdy02 = mm_mullohi_epi32(dcdy, verty, &cdy13);
|
|
|
|
c02 = _mm_sub_epi64(cdx02, cdy02);
|
|
|
|
c13 = _mm_sub_epi64(cdx13, cdy13);
|
|
|
|
c02 = _mm_sub_epi64(c02, _mm_shuffle_epi32(c_dec,
|
|
|
|
_MM_SHUFFLE(2,2,0,0)));
|
|
|
|
c13 = _mm_sub_epi64(c13, _mm_shuffle_epi32(c_dec,
|
|
|
|
_MM_SHUFFLE(3,3,1,1)));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Useful for very small fbs/tris (or fewer subpixel bits) only:
|
|
|
|
* c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx),
|
|
|
|
* mm_mullo_epi32(dcdy, verty));
|
|
|
|
*
|
|
|
|
* c = _mm_sub_epi32(c, c_dec);
|
|
|
|
*/
|
2010-10-12 18:59:15 +01:00
|
|
|
|
|
|
|
/* Scale up to match c:
|
2010-06-17 21:19:09 +01:00
|
|
|
*/
|
2010-10-12 18:59:15 +01:00
|
|
|
dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER);
|
|
|
|
dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER);
|
|
|
|
|
2016-01-02 03:59:09 +00:00
|
|
|
/*
|
|
|
|
* Calculate trivial reject values:
|
|
|
|
* Note eo cannot overflow even if dcdx/dcdy would already have
|
|
|
|
* 31 bits (which they shouldn't have). This is because eo
|
|
|
|
* is never negative (albeit if we rely on that need to be careful...)
|
2010-10-12 18:59:15 +01:00
|
|
|
*/
|
|
|
|
eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy),
|
|
|
|
_mm_and_si128(dcdx_neg_mask, dcdx));
|
|
|
|
|
|
|
|
/* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
|
|
|
|
|
2016-01-02 03:59:09 +00:00
|
|
|
/*
|
|
|
|
* Pointless transpose which gets undone immediately in
|
|
|
|
* rasterization.
|
|
|
|
* It is actually difficult to do away with it - would essentially
|
|
|
|
* need GET_PLANES_DX, GET_PLANES_DY etc., but the calculations
|
|
|
|
* for this then would need to depend on the number of planes.
|
|
|
|
* The transpose is quite special here due to c being 64bit...
|
|
|
|
* The store has to be unaligned (unless we'd make the plane size
|
|
|
|
* a multiple of 128), and of course storing eo separately...
|
2010-10-12 18:59:15 +01:00
|
|
|
*/
|
2016-01-02 03:59:09 +00:00
|
|
|
c01 = _mm_unpacklo_epi64(c02, c13);
|
|
|
|
c23 = _mm_unpackhi_epi64(c02, c13);
|
|
|
|
transpose2_64_2_32(&c01, &c23, &dcdx, &dcdy,
|
|
|
|
&p0, &p1, &p2, &unused);
|
|
|
|
_mm_storeu_si128((__m128i *)&plane[0], p0);
|
|
|
|
plane[0].eo = (uint32_t)_mm_cvtsi128_si32(eo);
|
|
|
|
_mm_storeu_si128((__m128i *)&plane[1], p1);
|
|
|
|
eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(3,2,0,1));
|
|
|
|
plane[1].eo = (uint32_t)_mm_cvtsi128_si32(eo);
|
|
|
|
_mm_storeu_si128((__m128i *)&plane[2], p2);
|
|
|
|
eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(0,0,0,2));
|
|
|
|
plane[2].eo = (uint32_t)_mm_cvtsi128_si32(eo);
|
2013-10-25 03:05:22 +01:00
|
|
|
} else
|
2018-11-10 00:23:08 +00:00
|
|
|
#elif defined(_ARCH_PWR8) && UTIL_ARCH_LITTLE_ENDIAN
|
2015-12-13 15:49:32 +00:00
|
|
|
/*
|
|
|
|
* XXX this code is effectively disabled for all practical purposes,
|
|
|
|
* as the allowed fb size is tiny if FIXED_ORDER is 8.
|
|
|
|
*/
|
|
|
|
if (setup->fb.width <= MAX_FIXED_LENGTH32 &&
|
|
|
|
setup->fb.height <= MAX_FIXED_LENGTH32 &&
|
2022-02-02 07:39:38 +00:00
|
|
|
pwr8_limit_check) {
|
2015-12-13 15:49:32 +00:00
|
|
|
unsigned int bottom_edge;
|
|
|
|
__m128i vertx, verty;
|
|
|
|
__m128i shufx, shufy;
|
|
|
|
__m128i dcdx, dcdy, c;
|
|
|
|
__m128i unused;
|
|
|
|
__m128i dcdx_neg_mask;
|
|
|
|
__m128i dcdy_neg_mask;
|
|
|
|
__m128i dcdx_zero_mask;
|
|
|
|
__m128i top_left_flag;
|
|
|
|
__m128i c_inc_mask, c_inc;
|
|
|
|
__m128i eo, p0, p1, p2;
|
|
|
|
__m128i_union vshuf_mask;
|
|
|
|
__m128i zero = vec_splats((unsigned char) 0);
|
2022-06-08 10:00:54 +01:00
|
|
|
alignas(16) int32_t temp_vec[4];
|
2015-12-13 15:49:32 +00:00
|
|
|
|
2018-11-10 00:23:08 +00:00
|
|
|
#if UTIL_ARCH_LITTLE_ENDIAN
|
2015-12-13 15:49:32 +00:00
|
|
|
vshuf_mask.i[0] = 0x07060504;
|
|
|
|
vshuf_mask.i[1] = 0x0B0A0908;
|
|
|
|
vshuf_mask.i[2] = 0x03020100;
|
|
|
|
vshuf_mask.i[3] = 0x0F0E0D0C;
|
|
|
|
#else
|
|
|
|
vshuf_mask.i[0] = 0x00010203;
|
|
|
|
vshuf_mask.i[1] = 0x0C0D0E0F;
|
|
|
|
vshuf_mask.i[2] = 0x04050607;
|
|
|
|
vshuf_mask.i[3] = 0x08090A0B;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* vertex x coords */
|
|
|
|
vertx = vec_load_si128((const uint32_t *) position->x);
|
|
|
|
/* vertex y coords */
|
|
|
|
verty = vec_load_si128((const uint32_t *) position->y);
|
|
|
|
|
|
|
|
shufx = vec_perm (vertx, vertx, vshuf_mask.m128i);
|
|
|
|
shufy = vec_perm (verty, verty, vshuf_mask.m128i);
|
|
|
|
|
|
|
|
dcdx = vec_sub_epi32(verty, shufy);
|
|
|
|
dcdy = vec_sub_epi32(vertx, shufx);
|
|
|
|
|
|
|
|
dcdx_neg_mask = vec_srai_epi32(dcdx, 31);
|
|
|
|
dcdx_zero_mask = vec_cmpeq_epi32(dcdx, zero);
|
|
|
|
dcdy_neg_mask = vec_srai_epi32(dcdy, 31);
|
|
|
|
|
|
|
|
bottom_edge = (setup->bottom_edge_rule == 0) ? ~0 : 0;
|
|
|
|
top_left_flag = (__m128i) vec_splats(bottom_edge);
|
|
|
|
|
|
|
|
c_inc_mask = vec_or(dcdx_neg_mask,
|
|
|
|
vec_and(dcdx_zero_mask,
|
|
|
|
vec_xor(dcdy_neg_mask,
|
|
|
|
top_left_flag)));
|
|
|
|
|
|
|
|
c_inc = vec_srli_epi32(c_inc_mask, 31);
|
|
|
|
|
|
|
|
c = vec_sub_epi32(vec_mullo_epi32(dcdx, vertx),
|
|
|
|
vec_mullo_epi32(dcdy, verty));
|
|
|
|
|
|
|
|
c = vec_add_epi32(c, c_inc);
|
|
|
|
|
|
|
|
/* Scale up to match c:
|
|
|
|
*/
|
|
|
|
dcdx = vec_slli_epi32(dcdx, FIXED_ORDER);
|
|
|
|
dcdy = vec_slli_epi32(dcdy, FIXED_ORDER);
|
|
|
|
|
|
|
|
/* Calculate trivial reject values:
|
|
|
|
*/
|
2016-01-17 12:25:32 +00:00
|
|
|
eo = vec_sub_epi32(vec_andnot_si128(dcdy_neg_mask, dcdy),
|
2015-12-13 15:49:32 +00:00
|
|
|
vec_and(dcdx_neg_mask, dcdx));
|
|
|
|
|
|
|
|
/* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
|
|
|
|
|
|
|
|
/* Pointless transpose which gets undone immediately in
|
|
|
|
* rasterization:
|
|
|
|
*/
|
|
|
|
transpose4_epi32(&c, &dcdx, &dcdy, &eo,
|
|
|
|
&p0, &p1, &p2, &unused);
|
|
|
|
|
|
|
|
#define STORE_PLANE(plane, vec) do { \
|
|
|
|
vec_store_si128((uint32_t *)&temp_vec, vec); \
|
|
|
|
plane.c = (int64_t)temp_vec[0]; \
|
|
|
|
plane.dcdx = temp_vec[1]; \
|
|
|
|
plane.dcdy = temp_vec[2]; \
|
|
|
|
plane.eo = temp_vec[3]; \
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
STORE_PLANE(plane[0], p0);
|
|
|
|
STORE_PLANE(plane[1], p1);
|
|
|
|
STORE_PLANE(plane[2], p2);
|
|
|
|
#undef STORE_PLANE
|
|
|
|
} else
|
2013-10-25 03:05:22 +01:00
|
|
|
#endif
|
2010-10-12 18:59:15 +01:00
|
|
|
{
|
2012-05-14 16:00:06 +01:00
|
|
|
plane[0].dcdy = position->dx01;
|
|
|
|
plane[1].dcdy = position->x[1] - position->x[2];
|
|
|
|
plane[2].dcdy = position->dx20;
|
|
|
|
plane[0].dcdx = position->dy01;
|
|
|
|
plane[1].dcdx = position->y[1] - position->y[2];
|
|
|
|
plane[2].dcdx = position->dy20;
|
2022-05-27 15:05:19 +01:00
|
|
|
|
|
|
|
for (int i = 0; i < 3; i++) {
|
2016-01-02 03:59:09 +00:00
|
|
|
/* half-edge constants, will be iterated over the whole render
|
2010-10-12 18:59:15 +01:00
|
|
|
* target.
|
|
|
|
*/
|
2013-10-25 03:05:22 +01:00
|
|
|
plane[i].c = IMUL64(plane[i].dcdx, position->x[i]) -
|
2016-01-02 03:59:09 +00:00
|
|
|
IMUL64(plane[i].dcdy, position->y[i]);
|
2010-10-12 18:59:15 +01:00
|
|
|
|
2013-04-23 19:40:05 +01:00
|
|
|
/* correct for top-left vs. bottom-left fill convention.
|
2016-01-02 03:59:09 +00:00
|
|
|
*/
|
2010-10-12 18:59:15 +01:00
|
|
|
if (plane[i].dcdx < 0) {
|
|
|
|
/* both fill conventions want this - adjust for left edges */
|
2016-01-02 03:59:09 +00:00
|
|
|
plane[i].c++;
|
2010-06-17 21:19:09 +01:00
|
|
|
}
|
2010-10-12 18:59:15 +01:00
|
|
|
else if (plane[i].dcdx == 0) {
|
2022-05-27 15:05:19 +01:00
|
|
|
if (setup->bottom_edge_rule == 0) {
|
2010-10-12 18:59:15 +01:00
|
|
|
/* correct for top-left fill convention:
|
|
|
|
*/
|
2022-05-27 15:05:19 +01:00
|
|
|
if (plane[i].dcdy > 0)
|
|
|
|
plane[i].c++;
|
|
|
|
} else {
|
2010-10-12 18:59:15 +01:00
|
|
|
/* correct for bottom-left fill convention:
|
|
|
|
*/
|
2022-05-27 15:05:19 +01:00
|
|
|
if (plane[i].dcdy < 0)
|
|
|
|
plane[i].c++;
|
2010-10-12 18:59:15 +01:00
|
|
|
}
|
2010-06-17 21:19:09 +01:00
|
|
|
}
|
2009-10-07 22:36:43 +01:00
|
|
|
|
2013-10-25 03:05:22 +01:00
|
|
|
/* Scale up to match c:
|
|
|
|
*/
|
|
|
|
assert((plane[i].dcdx << FIXED_ORDER) >> FIXED_ORDER == plane[i].dcdx);
|
|
|
|
assert((plane[i].dcdy << FIXED_ORDER) >> FIXED_ORDER == plane[i].dcdy);
|
|
|
|
plane[i].dcdx <<= FIXED_ORDER;
|
|
|
|
plane[i].dcdy <<= FIXED_ORDER;
|
2009-10-07 22:36:43 +01:00
|
|
|
|
2010-10-12 18:59:15 +01:00
|
|
|
/* find trivial reject offsets for each edge for a single-pixel
|
|
|
|
* sized block. These will be scaled up at each recursive level to
|
|
|
|
* match the active blocksize. Scaling in this way works best if
|
|
|
|
* the blocks are square.
|
|
|
|
*/
|
|
|
|
plane[i].eo = 0;
|
|
|
|
if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx;
|
|
|
|
if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy;
|
|
|
|
}
|
|
|
|
}
|
2009-10-07 22:36:43 +01:00
|
|
|
|
2010-10-12 18:59:15 +01:00
|
|
|
if (0) {
|
2016-01-02 03:58:37 +00:00
|
|
|
debug_printf("p0: %"PRIx64"/%08x/%08x/%08x\n",
|
2010-10-12 18:59:15 +01:00
|
|
|
plane[0].c,
|
|
|
|
plane[0].dcdx,
|
|
|
|
plane[0].dcdy,
|
|
|
|
plane[0].eo);
|
2016-01-02 03:58:37 +00:00
|
|
|
|
|
|
|
debug_printf("p1: %"PRIx64"/%08x/%08x/%08x\n",
|
2010-10-12 18:59:15 +01:00
|
|
|
plane[1].c,
|
|
|
|
plane[1].dcdx,
|
|
|
|
plane[1].dcdy,
|
|
|
|
plane[1].eo);
|
2016-01-02 03:58:37 +00:00
|
|
|
|
|
|
|
debug_printf("p2: %"PRIx64"/%08x/%08x/%08x\n",
|
2010-10-12 18:59:15 +01:00
|
|
|
plane[2].c,
|
|
|
|
plane[2].dcdx,
|
|
|
|
plane[2].dcdy,
|
|
|
|
plane[2].eo);
|
2009-10-20 02:46:00 +01:00
|
|
|
}
|
|
|
|
|
2016-02-02 02:14:12 +00:00
|
|
|
if (nr_planes > 3) {
|
2021-08-30 05:44:47 +01:00
|
|
|
lp_setup_add_scissor_planes(scissor, &plane[3], s_planes, setup->multisample);
|
2010-06-17 21:19:09 +01:00
|
|
|
}
|
|
|
|
|
2022-02-03 02:46:35 +00:00
|
|
|
return lp_setup_bin_triangle(setup, tri, use_32bits,
|
|
|
|
check_opaque(setup, v0, v1, v2),
|
|
|
|
&bbox, nr_planes, viewport_index);
|
2010-08-26 20:09:22 +01:00
|
|
|
}
|
|
|
|
|
2010-09-07 23:10:11 +01:00
|
|
|
/*
|
2010-09-13 12:03:07 +01:00
|
|
|
* Round to nearest less or equal power of two of the input.
|
2010-09-07 23:10:11 +01:00
|
|
|
*
|
2010-09-13 12:03:07 +01:00
|
|
|
* Undefined if no bit set exists, so code should check against 0 first.
|
2010-09-07 23:10:11 +01:00
|
|
|
*/
|
2022-05-27 15:05:19 +01:00
|
|
|
static inline uint32_t
|
2010-09-13 12:03:07 +01:00
|
|
|
floor_pot(uint32_t n)
|
2010-09-07 23:10:11 +01:00
|
|
|
{
|
2016-01-02 03:58:37 +00:00
|
|
|
#if defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
|
2010-09-15 16:28:49 +01:00
|
|
|
if (n == 0)
|
|
|
|
return 0;
|
|
|
|
|
2010-09-13 18:57:35 +01:00
|
|
|
__asm__("bsr %1,%0"
|
|
|
|
: "=r" (n)
|
2018-07-25 18:27:48 +01:00
|
|
|
: "rm" (n)
|
|
|
|
: "cc");
|
2010-09-13 12:03:07 +01:00
|
|
|
return 1 << n;
|
2010-09-07 23:10:11 +01:00
|
|
|
#else
|
2010-09-13 12:03:07 +01:00
|
|
|
n |= (n >> 1);
|
|
|
|
n |= (n >> 2);
|
|
|
|
n |= (n >> 4);
|
|
|
|
n |= (n >> 8);
|
|
|
|
n |= (n >> 16);
|
|
|
|
return n - (n >> 1);
|
2010-09-07 23:10:11 +01:00
|
|
|
#endif
|
2010-09-13 12:03:07 +01:00
|
|
|
}
|
2010-09-07 23:10:11 +01:00
|
|
|
|
2010-08-26 20:09:22 +01:00
|
|
|
|
2010-08-27 17:49:40 +01:00
|
|
|
boolean
|
2017-06-23 18:35:50 +01:00
|
|
|
lp_setup_bin_triangle(struct lp_setup_context *setup,
|
|
|
|
struct lp_rast_triangle *tri,
|
2022-02-02 07:39:38 +00:00
|
|
|
boolean use_32bits,
|
2022-02-03 02:46:35 +00:00
|
|
|
boolean opaque,
|
2017-06-23 18:35:50 +01:00
|
|
|
const struct u_rect *bbox,
|
|
|
|
int nr_planes,
|
|
|
|
unsigned viewport_index)
|
2010-08-26 20:09:22 +01:00
|
|
|
{
|
|
|
|
struct lp_scene *scene = setup->scene;
|
2020-03-20 21:34:53 +00:00
|
|
|
unsigned cmd;
|
|
|
|
|
2010-09-07 23:10:11 +01:00
|
|
|
/* What is the largest power-of-two boundary this triangle crosses:
|
2009-12-04 00:27:10 +00:00
|
|
|
*/
|
2022-05-27 15:05:19 +01:00
|
|
|
const int dx = floor_pot((bbox->x0 ^ bbox->x1) |
|
|
|
|
(bbox->y0 ^ bbox->y1));
|
2009-12-04 00:27:10 +00:00
|
|
|
|
2010-09-07 23:10:11 +01:00
|
|
|
/* The largest dimension of the rasterized area of the triangle
|
2010-09-13 12:03:07 +01:00
|
|
|
* (aligned to a 4x4 grid), rounded down to the nearest power of two:
|
2009-11-30 21:02:01 +00:00
|
|
|
*/
|
2022-05-27 15:05:19 +01:00
|
|
|
const int max_sz = ((bbox->x1 - (bbox->x0 & ~3)) |
|
|
|
|
(bbox->y1 - (bbox->y0 & ~3)));
|
|
|
|
const int sz = floor_pot(max_sz);
|
2017-06-23 18:35:50 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* NOTE: It is important to use the original bounding box
|
|
|
|
* which might contain negative values here, because if the
|
|
|
|
* plane math may overflow or not with the 32bit rasterization
|
|
|
|
* functions depends on the original extent of the triangle.
|
|
|
|
*/
|
2010-09-07 23:10:11 +01:00
|
|
|
|
2010-11-02 14:20:20 +00:00
|
|
|
/* Now apply scissor, etc to the bounding box. Could do this
|
|
|
|
* earlier, but it confuses the logic for tri-16 and would force
|
|
|
|
* the rasterizer to also respect scissor, etc, just for the rare
|
|
|
|
* cases where a small triangle extends beyond the scissor.
|
|
|
|
*/
|
2022-05-27 15:05:19 +01:00
|
|
|
struct u_rect trimmed_box = *bbox;
|
2013-11-26 18:50:27 +00:00
|
|
|
u_rect_find_intersection(&setup->draw_regions[viewport_index],
|
2013-05-24 21:28:19 +01:00
|
|
|
&trimmed_box);
|
2010-11-02 14:20:20 +00:00
|
|
|
|
2009-11-30 21:02:01 +00:00
|
|
|
/* Determine which tile(s) intersect the triangle's bounding box
|
2009-10-09 16:05:26 +01:00
|
|
|
*/
|
2022-05-27 15:05:19 +01:00
|
|
|
if (dx < TILE_SIZE) {
|
|
|
|
const int ix0 = bbox->x0 / TILE_SIZE;
|
|
|
|
const int iy0 = bbox->y0 / TILE_SIZE;
|
2011-10-05 11:31:15 +01:00
|
|
|
unsigned px = bbox->x0 & 63 & ~3;
|
|
|
|
unsigned py = bbox->y0 & 63 & ~3;
|
2010-09-07 23:10:11 +01:00
|
|
|
|
|
|
|
assert(iy0 == bbox->y1 / TILE_SIZE &&
|
2022-05-27 15:05:19 +01:00
|
|
|
ix0 == bbox->x1 / TILE_SIZE);
|
2010-09-07 23:10:11 +01:00
|
|
|
|
2010-09-23 19:56:48 +01:00
|
|
|
if (nr_planes == 3) {
|
2022-05-27 15:05:19 +01:00
|
|
|
if (sz < 4) {
|
2010-09-23 19:56:48 +01:00
|
|
|
/* Triangle is contained in a single 4x4 stamp:
|
|
|
|
*/
|
2011-10-05 11:31:15 +01:00
|
|
|
assert(px + 4 <= TILE_SIZE);
|
|
|
|
assert(py + 4 <= TILE_SIZE);
|
2020-03-20 21:34:53 +00:00
|
|
|
if (setup->multisample)
|
|
|
|
cmd = LP_RAST_OP_MS_TRIANGLE_3_4;
|
|
|
|
else
|
|
|
|
cmd = use_32bits ? LP_RAST_OP_TRIANGLE_32_3_4 : LP_RAST_OP_TRIANGLE_3_4;
|
2022-05-27 15:05:19 +01:00
|
|
|
return lp_scene_bin_cmd_with_state(scene, ix0, iy0,
|
|
|
|
setup->fs.stored, cmd,
|
|
|
|
lp_rast_arg_triangle_contained(tri, px, py));
|
2010-09-23 19:56:48 +01:00
|
|
|
}
|
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
if (sz < 16) {
|
2010-09-23 19:56:48 +01:00
|
|
|
/* Triangle is contained in a single 16x16 block:
|
|
|
|
*/
|
2011-10-05 13:27:08 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The 16x16 block is only 4x4 aligned, and can exceed the tile
|
|
|
|
* dimensions if the triangle is 16 pixels in one dimension but 4
|
|
|
|
* in the other. So budge the 16x16 back inside the tile.
|
|
|
|
*/
|
|
|
|
px = MIN2(px, TILE_SIZE - 16);
|
|
|
|
py = MIN2(py, TILE_SIZE - 16);
|
|
|
|
|
2011-10-05 11:31:15 +01:00
|
|
|
assert(px + 16 <= TILE_SIZE);
|
|
|
|
assert(py + 16 <= TILE_SIZE);
|
2011-10-05 13:27:08 +01:00
|
|
|
|
2020-03-20 21:34:53 +00:00
|
|
|
if (setup->multisample)
|
|
|
|
cmd = LP_RAST_OP_MS_TRIANGLE_3_16;
|
|
|
|
else
|
|
|
|
cmd = use_32bits ? LP_RAST_OP_TRIANGLE_32_3_16 : LP_RAST_OP_TRIANGLE_3_16;
|
2022-05-27 15:05:19 +01:00
|
|
|
return lp_scene_bin_cmd_with_state(scene, ix0, iy0,
|
|
|
|
setup->fs.stored, cmd,
|
|
|
|
lp_rast_arg_triangle_contained(tri, px, py));
|
2010-09-23 19:56:48 +01:00
|
|
|
}
|
2022-05-27 15:05:19 +01:00
|
|
|
} else if (nr_planes == 4 && sz < 16) {
|
2011-10-05 13:27:08 +01:00
|
|
|
px = MIN2(px, TILE_SIZE - 16);
|
|
|
|
py = MIN2(py, TILE_SIZE - 16);
|
|
|
|
|
2011-10-05 11:31:15 +01:00
|
|
|
assert(px + 16 <= TILE_SIZE);
|
|
|
|
assert(py + 16 <= TILE_SIZE);
|
2011-10-05 13:27:08 +01:00
|
|
|
|
2020-03-20 21:34:53 +00:00
|
|
|
if (setup->multisample)
|
|
|
|
cmd = LP_RAST_OP_MS_TRIANGLE_4_16;
|
|
|
|
else
|
|
|
|
cmd = use_32bits ? LP_RAST_OP_TRIANGLE_32_4_16 : LP_RAST_OP_TRIANGLE_4_16;
|
2010-10-14 23:28:10 +01:00
|
|
|
return lp_scene_bin_cmd_with_state(scene, ix0, iy0,
|
2020-03-20 21:34:53 +00:00
|
|
|
setup->fs.stored, cmd,
|
2011-10-05 13:27:08 +01:00
|
|
|
lp_rast_arg_triangle_contained(tri, px, py));
|
2010-10-08 17:21:03 +01:00
|
|
|
}
|
2010-09-23 19:56:48 +01:00
|
|
|
|
2009-10-08 12:15:12 +01:00
|
|
|
/* Triangle is contained in a single tile:
|
2009-10-07 22:36:43 +01:00
|
|
|
*/
|
2020-03-20 21:34:53 +00:00
|
|
|
if (setup->multisample)
|
|
|
|
cmd = lp_rast_ms_tri_tab[nr_planes];
|
|
|
|
else
|
|
|
|
cmd = use_32bits ? lp_rast_32_tri_tab[nr_planes] : lp_rast_tri_tab[nr_planes];
|
2022-05-27 15:05:19 +01:00
|
|
|
return lp_scene_bin_cmd_with_state(scene, ix0, iy0, setup->fs.stored, cmd,
|
|
|
|
lp_rast_arg_triangle(tri, (1<<nr_planes)-1));
|
|
|
|
} else {
|
2010-10-15 12:23:22 +01:00
|
|
|
struct lp_rast_plane *plane = GET_PLANES(tri);
|
2013-10-25 03:05:22 +01:00
|
|
|
int64_t c[MAX_PLANES];
|
|
|
|
int64_t ei[MAX_PLANES];
|
2010-11-02 14:20:20 +00:00
|
|
|
|
2013-10-25 03:05:22 +01:00
|
|
|
int64_t eo[MAX_PLANES];
|
|
|
|
int64_t xstep[MAX_PLANES];
|
|
|
|
int64_t ystep[MAX_PLANES];
|
2009-10-07 22:36:43 +01:00
|
|
|
int x, y;
|
2010-09-07 23:10:11 +01:00
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
const int ix0 = trimmed_box.x0 / TILE_SIZE;
|
|
|
|
const int iy0 = trimmed_box.y0 / TILE_SIZE;
|
|
|
|
const int ix1 = trimmed_box.x1 / TILE_SIZE;
|
|
|
|
const int iy1 = trimmed_box.y1 / TILE_SIZE;
|
|
|
|
|
|
|
|
for (int i = 0; i < nr_planes; i++) {
|
|
|
|
c[i] = (plane[i].c +
|
2013-10-25 03:05:22 +01:00
|
|
|
IMUL64(plane[i].dcdy, iy0) * TILE_SIZE -
|
|
|
|
IMUL64(plane[i].dcdx, ix0) * TILE_SIZE);
|
2010-10-15 12:23:22 +01:00
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
ei[i] = (plane[i].dcdy -
|
|
|
|
plane[i].dcdx -
|
2016-01-02 03:58:37 +00:00
|
|
|
(int64_t)plane[i].eo) << TILE_ORDER;
|
2010-10-15 13:04:19 +01:00
|
|
|
|
2016-01-02 03:58:37 +00:00
|
|
|
eo[i] = (int64_t)plane[i].eo << TILE_ORDER;
|
2013-10-25 03:05:22 +01:00
|
|
|
xstep[i] = -(((int64_t)plane[i].dcdx) << TILE_ORDER);
|
|
|
|
ystep[i] = ((int64_t)plane[i].dcdy) << TILE_ORDER;
|
2010-06-17 21:19:09 +01:00
|
|
|
}
|
|
|
|
|
llvmpipe: Add a linear rasterizer optimized for 2D rendering.
This change adds:
- an alternative rasterizer, which rasterizes bins in a left->right &
top->bottom linear fashion;
- triangle -> rectangle detection;
- 1:1 blit detection;
- a special TGSI -> LLVM IR code generation that uses 8-bit SSE integers
in AoS fashion (as opposed to 32bits floats.)
Altogether these changes yield a 2x to 3x performance improvement for 2D
workloads. It was designed to render Windows 7 Aero and other Windows
built-in 3D applications (like Windows Media Player, Internet Explorer
11, UWP applications) with minimum CPU utilization, but it should be
generally applicable to other 2D-on-3D applications, like desktop
compositors, HTML browsers, 3D based UI toolkits, etc.
This was mostly the brainchild of Keith Whitwell back in 2010. I wrote
TGSI -> AoS translation. And many others added bug-fixes and
enhancements over the years: Roland Scheidegger, Brian Paul, and James
Benton.
Known issues:
- piglit spec@!opengl 1.1@quad-invariance will warn that "left and right
half should match" due to rounding error difference
- These optimized paths to kick in is that depth-buffer must not be
used, so some applications which want to benefit from these improvements
might need to be modified to ensure they use painter's algorithm instead
of depth-buffers.
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Keith Whitwell <keithw@vmware.com>
v2: Incorporate Dave Airlie feedback: cleanup LP_DEBUG_xx; shrink 3+
empty lines.
v3: silence unused var warning, adapt to new upstream code (point setup)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11969>
2021-05-07 13:49:07 +01:00
|
|
|
tri->inputs.is_blit = lp_setup_is_blit(setup, &tri->inputs);
|
2022-05-27 15:05:19 +01:00
|
|
|
|
2010-01-19 16:30:13 +00:00
|
|
|
/* Test tile-sized blocks against the triangle.
|
|
|
|
* Discard blocks fully outside the tri. If the block is fully
|
|
|
|
* contained inside the tri, bin an lp_rast_shade_tile command.
|
|
|
|
* Else, bin a lp_rast_triangle command.
|
2009-10-07 22:36:43 +01:00
|
|
|
*/
|
2022-05-27 15:05:19 +01:00
|
|
|
for (y = iy0; y <= iy1; y++) {
|
2013-10-25 03:05:22 +01:00
|
|
|
boolean in = FALSE; /* are we inside the triangle? */
|
|
|
|
int64_t cx[MAX_PLANES];
|
2010-06-17 21:19:09 +01:00
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
for (int i = 0; i < nr_planes; i++)
|
2010-06-17 21:19:09 +01:00
|
|
|
cx[i] = c[i];
|
2009-10-07 22:36:43 +01:00
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
for (x = ix0; x <= ix1; x++) {
|
2010-06-17 21:19:09 +01:00
|
|
|
int out = 0;
|
|
|
|
int partial = 0;
|
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
for (int i = 0; i < nr_planes; i++) {
|
2013-10-25 03:05:22 +01:00
|
|
|
int64_t planeout = cx[i] + eo[i];
|
|
|
|
int64_t planepartial = cx[i] + ei[i] - 1;
|
2014-11-25 23:03:02 +00:00
|
|
|
out |= (int) (planeout >> 63);
|
|
|
|
partial |= ((int) (planepartial >> 63)) & (1<<i);
|
2010-06-17 21:19:09 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (out) {
|
|
|
|
/* do nothing */
|
|
|
|
if (in)
|
|
|
|
break; /* exiting triangle, all done with this row */
|
2010-01-21 21:59:01 +00:00
|
|
|
LP_COUNT(nr_empty_64);
|
2022-05-27 15:05:19 +01:00
|
|
|
} else if (partial) {
|
2013-10-25 03:05:22 +01:00
|
|
|
/* Not trivially accepted by at least one plane -
|
2010-06-17 21:19:09 +01:00
|
|
|
* rasterize/shade partial tile
|
|
|
|
*/
|
|
|
|
int count = util_bitcount(partial);
|
|
|
|
in = TRUE;
|
2020-03-20 21:34:53 +00:00
|
|
|
|
|
|
|
if (setup->multisample)
|
|
|
|
cmd = lp_rast_ms_tri_tab[count];
|
|
|
|
else
|
|
|
|
cmd = use_32bits ? lp_rast_32_tri_tab[count] : lp_rast_tri_tab[count];
|
2022-05-27 15:05:19 +01:00
|
|
|
if (!lp_scene_bin_cmd_with_state(scene, x, y,
|
|
|
|
setup->fs.stored, cmd,
|
|
|
|
lp_rast_arg_triangle(tri, partial)))
|
2010-08-27 17:49:40 +01:00
|
|
|
goto fail;
|
2010-06-17 21:19:09 +01:00
|
|
|
|
|
|
|
LP_COUNT(nr_partially_covered_64);
|
2022-05-27 15:05:19 +01:00
|
|
|
} else {
|
2009-11-30 21:02:01 +00:00
|
|
|
/* triangle covers the whole tile- shade whole tile */
|
2010-01-21 21:59:01 +00:00
|
|
|
LP_COUNT(nr_fully_covered_64);
|
2010-06-17 21:19:09 +01:00
|
|
|
in = TRUE;
|
2022-02-03 02:46:35 +00:00
|
|
|
if (!lp_setup_whole_tile(setup, &tri->inputs, x, y, opaque))
|
2010-08-27 17:49:40 +01:00
|
|
|
goto fail;
|
2010-06-17 21:19:09 +01:00
|
|
|
}
|
2009-10-07 22:36:43 +01:00
|
|
|
|
2013-10-25 03:05:22 +01:00
|
|
|
/* Iterate cx values across the region: */
|
2022-05-27 15:05:19 +01:00
|
|
|
for (int i = 0; i < nr_planes; i++)
|
2010-06-17 21:19:09 +01:00
|
|
|
cx[i] += xstep[i];
|
2013-10-25 03:05:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Iterate c values down the region: */
|
2022-05-27 15:05:19 +01:00
|
|
|
for (int i = 0; i < nr_planes; i++)
|
2010-06-17 21:19:09 +01:00
|
|
|
c[i] += ystep[i];
|
2009-10-07 22:36:43 +01:00
|
|
|
}
|
|
|
|
}
|
2010-08-27 17:49:40 +01:00
|
|
|
|
|
|
|
return TRUE;
|
|
|
|
|
|
|
|
fail:
|
|
|
|
/* Need to disable any partially binned triangle. This is easier
|
|
|
|
* than trying to locate all the triangle, shade-tile, etc,
|
|
|
|
* commands which may have been binned.
|
|
|
|
*/
|
|
|
|
tri->inputs.disable = TRUE;
|
|
|
|
return FALSE;
|
2009-10-07 22:36:43 +01:00
|
|
|
}
|
|
|
|
|
2010-01-14 01:01:45 +00:00
|
|
|
|
2010-04-17 19:48:26 +01:00
|
|
|
/**
|
2010-10-08 17:06:05 +01:00
|
|
|
* Try to draw the triangle, restart the scene on failure.
|
2010-04-17 19:48:26 +01:00
|
|
|
*/
|
2022-05-27 15:05:19 +01:00
|
|
|
static inline void
|
|
|
|
retry_triangle_ccw(struct lp_setup_context *setup,
|
|
|
|
struct fixed_position *position,
|
|
|
|
const float (*v0)[4],
|
|
|
|
const float (*v1)[4],
|
|
|
|
const float (*v2)[4],
|
|
|
|
boolean front)
|
2009-10-07 22:36:43 +01:00
|
|
|
{
|
2022-05-27 15:05:19 +01:00
|
|
|
if (!do_triangle_ccw(setup, position, v0, v1, v2, front)) {
|
2010-10-08 17:01:16 +01:00
|
|
|
if (!lp_setup_flush_and_restart(setup))
|
|
|
|
return;
|
2010-08-27 17:49:40 +01:00
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
if (!do_triangle_ccw(setup, position, v0, v1, v2, front))
|
2010-10-08 17:01:16 +01:00
|
|
|
return;
|
2010-08-27 17:49:40 +01:00
|
|
|
}
|
2009-10-07 22:36:43 +01:00
|
|
|
}
|
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
|
2012-05-14 16:00:06 +01:00
|
|
|
/**
|
|
|
|
* Calculate fixed position data for a triangle
|
2016-01-02 03:59:16 +00:00
|
|
|
* It is unfortunate we need to do that here (as we need area
|
|
|
|
* calculated in fixed point), as there's quite some code duplication
|
|
|
|
* to what is done in the jit setup prog.
|
2012-05-14 16:00:06 +01:00
|
|
|
*/
|
2022-02-03 04:18:40 +00:00
|
|
|
static inline int8_t
|
2016-01-02 03:59:16 +00:00
|
|
|
calc_fixed_position(struct lp_setup_context *setup,
|
|
|
|
struct fixed_position* position,
|
|
|
|
const float (*v0)[4],
|
|
|
|
const float (*v1)[4],
|
|
|
|
const float (*v2)[4])
|
2010-10-08 17:06:05 +01:00
|
|
|
{
|
2020-03-20 21:36:35 +00:00
|
|
|
float pixel_offset = setup->multisample ? 0.0 : setup->pixel_offset;
|
2016-01-02 03:59:16 +00:00
|
|
|
/*
|
|
|
|
* The rounding may not be quite the same with PIPE_ARCH_SSE
|
|
|
|
* (util_iround right now only does nearest/even on x87,
|
|
|
|
* otherwise nearest/away-from-zero).
|
|
|
|
* Both should be acceptable, I think.
|
|
|
|
*/
|
|
|
|
#if defined(PIPE_ARCH_SSE)
|
2016-01-31 00:27:09 +00:00
|
|
|
__m128 v0r, v1r;
|
2016-01-02 03:59:16 +00:00
|
|
|
__m128 vxy0xy2, vxy1xy0;
|
|
|
|
__m128i vxy0xy2i, vxy1xy0i;
|
|
|
|
__m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120;
|
2020-03-20 21:36:35 +00:00
|
|
|
__m128 pix_offset = _mm_set1_ps(pixel_offset);
|
2016-01-02 03:59:16 +00:00
|
|
|
__m128 fixed_one = _mm_set1_ps((float)FIXED_ONE);
|
2016-01-31 00:27:09 +00:00
|
|
|
v0r = _mm_castpd_ps(_mm_load_sd((double *)v0[0]));
|
|
|
|
vxy0xy2 = _mm_loadh_pi(v0r, (__m64 *)v2[0]);
|
|
|
|
v1r = _mm_castpd_ps(_mm_load_sd((double *)v1[0]));
|
|
|
|
vxy1xy0 = _mm_movelh_ps(v1r, vxy0xy2);
|
2016-01-02 03:59:16 +00:00
|
|
|
vxy0xy2 = _mm_sub_ps(vxy0xy2, pix_offset);
|
|
|
|
vxy1xy0 = _mm_sub_ps(vxy1xy0, pix_offset);
|
|
|
|
vxy0xy2 = _mm_mul_ps(vxy0xy2, fixed_one);
|
|
|
|
vxy1xy0 = _mm_mul_ps(vxy1xy0, fixed_one);
|
|
|
|
vxy0xy2i = _mm_cvtps_epi32(vxy0xy2);
|
|
|
|
vxy1xy0i = _mm_cvtps_epi32(vxy1xy0);
|
|
|
|
dxdy0120 = _mm_sub_epi32(vxy0xy2i, vxy1xy0i);
|
|
|
|
_mm_store_si128((__m128i *)&position->dx01, dxdy0120);
|
|
|
|
/*
|
|
|
|
* For the mul, would need some more shuffles, plus emulation
|
|
|
|
* for the signed mul (without sse41), so don't bother.
|
|
|
|
*/
|
|
|
|
x0x2y0y2 = _mm_shuffle_epi32(vxy0xy2i, _MM_SHUFFLE(3,1,2,0));
|
|
|
|
x1x0y1y0 = _mm_shuffle_epi32(vxy1xy0i, _MM_SHUFFLE(3,1,2,0));
|
|
|
|
x0120 = _mm_unpacklo_epi32(x0x2y0y2, x1x0y1y0);
|
|
|
|
y0120 = _mm_unpackhi_epi32(x0x2y0y2, x1x0y1y0);
|
|
|
|
_mm_store_si128((__m128i *)&position->x[0], x0120);
|
|
|
|
_mm_store_si128((__m128i *)&position->y[0], y0120);
|
|
|
|
|
|
|
|
#else
|
2020-03-20 21:36:35 +00:00
|
|
|
position->x[0] = subpixel_snap(v0[0][0] - pixel_offset);
|
|
|
|
position->x[1] = subpixel_snap(v1[0][0] - pixel_offset);
|
|
|
|
position->x[2] = subpixel_snap(v2[0][0] - pixel_offset);
|
2016-01-02 03:59:16 +00:00
|
|
|
position->x[3] = 0; // should be unused
|
2012-05-14 16:00:06 +01:00
|
|
|
|
2020-03-20 21:36:35 +00:00
|
|
|
position->y[0] = subpixel_snap(v0[0][1] - pixel_offset);
|
|
|
|
position->y[1] = subpixel_snap(v1[0][1] - pixel_offset);
|
|
|
|
position->y[2] = subpixel_snap(v2[0][1] - pixel_offset);
|
2016-01-02 03:59:16 +00:00
|
|
|
position->y[3] = 0; // should be unused
|
2012-05-14 16:00:06 +01:00
|
|
|
|
|
|
|
position->dx01 = position->x[0] - position->x[1];
|
|
|
|
position->dy01 = position->y[0] - position->y[1];
|
|
|
|
|
|
|
|
position->dx20 = position->x[2] - position->x[0];
|
|
|
|
position->dy20 = position->y[2] - position->y[0];
|
2016-01-02 03:59:16 +00:00
|
|
|
#endif
|
2012-05-14 16:00:06 +01:00
|
|
|
|
2022-02-03 04:18:40 +00:00
|
|
|
uint64_t area = IMUL64(position->dx01, position->dy20) -
|
|
|
|
IMUL64(position->dx20, position->dy01);
|
|
|
|
return area == 0 ? 0 : (area & (1ULL << 63)) ? -1 : 1;
|
2012-05-14 16:00:06 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-05-17 08:32:31 +01:00
|
|
|
/**
|
|
|
|
* Rotate a triangle, flipping its clockwise direction,
|
|
|
|
* Swaps values for xy[0] and xy[1]
|
|
|
|
*/
|
2015-07-21 00:58:43 +01:00
|
|
|
static inline void
|
2022-05-27 15:05:19 +01:00
|
|
|
rotate_fixed_position_01(struct fixed_position* position)
|
2012-05-17 08:32:31 +01:00
|
|
|
{
|
2022-05-27 15:05:19 +01:00
|
|
|
int x = position->x[1];
|
|
|
|
int y = position->y[1];
|
2012-05-17 08:32:31 +01:00
|
|
|
|
|
|
|
position->x[1] = position->x[0];
|
|
|
|
position->y[1] = position->y[0];
|
|
|
|
position->x[0] = x;
|
|
|
|
position->y[0] = y;
|
|
|
|
|
|
|
|
position->dx01 = -position->dx01;
|
|
|
|
position->dy01 = -position->dy01;
|
|
|
|
position->dx20 = position->x[2] - position->x[0];
|
|
|
|
position->dy20 = position->y[2] - position->y[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-05-14 16:00:06 +01:00
|
|
|
/**
|
|
|
|
* Rotate a triangle, flipping its clockwise direction,
|
|
|
|
* Swaps values for xy[1] and xy[2]
|
|
|
|
*/
|
2015-07-21 00:58:43 +01:00
|
|
|
static inline void
|
2022-05-27 15:05:19 +01:00
|
|
|
rotate_fixed_position_12(struct fixed_position* position)
|
2012-05-14 16:00:06 +01:00
|
|
|
{
|
2022-05-27 15:05:19 +01:00
|
|
|
int x = position->x[2];
|
|
|
|
int y = position->y[2];
|
2012-05-14 16:00:06 +01:00
|
|
|
|
|
|
|
position->x[2] = position->x[1];
|
|
|
|
position->y[2] = position->y[1];
|
|
|
|
position->x[1] = x;
|
|
|
|
position->y[1] = y;
|
|
|
|
|
|
|
|
x = position->dx01;
|
|
|
|
y = position->dy01;
|
|
|
|
position->dx01 = -position->dx20;
|
|
|
|
position->dy01 = -position->dy20;
|
|
|
|
position->dx20 = -x;
|
|
|
|
position->dy20 = -y;
|
2010-10-08 17:06:05 +01:00
|
|
|
}
|
|
|
|
|
2010-01-14 01:01:45 +00:00
|
|
|
|
2010-04-17 19:48:26 +01:00
|
|
|
/**
|
2010-10-08 17:01:16 +01:00
|
|
|
* Draw triangle if it's CW, cull otherwise.
|
2010-04-17 19:48:26 +01:00
|
|
|
*/
|
2022-05-27 15:05:19 +01:00
|
|
|
static void
|
|
|
|
triangle_cw(struct lp_setup_context *setup,
|
|
|
|
const float (*v0)[4],
|
|
|
|
const float (*v1)[4],
|
|
|
|
const float (*v2)[4])
|
2009-10-07 22:36:43 +01:00
|
|
|
{
|
2022-06-08 10:00:54 +01:00
|
|
|
alignas(16) struct fixed_position position;
|
2018-05-22 01:12:38 +01:00
|
|
|
struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
|
|
|
|
|
|
|
|
if (lp_context->active_statistics_queries) {
|
|
|
|
lp_context->pipeline_statistics.c_primitives++;
|
|
|
|
}
|
2013-03-26 04:02:47 +00:00
|
|
|
|
2022-02-03 04:18:40 +00:00
|
|
|
int8_t area_sign = calc_fixed_position(setup, &position, v0, v1, v2);
|
2010-10-08 17:01:16 +01:00
|
|
|
|
2022-02-03 04:18:40 +00:00
|
|
|
if (area_sign < 0) {
|
2012-05-17 08:32:31 +01:00
|
|
|
if (setup->flatshade_first) {
|
|
|
|
rotate_fixed_position_12(&position);
|
|
|
|
retry_triangle_ccw(setup, &position, v0, v2, v1, !setup->ccw_is_frontface);
|
|
|
|
} else {
|
|
|
|
rotate_fixed_position_01(&position);
|
|
|
|
retry_triangle_ccw(setup, &position, v1, v0, v2, !setup->ccw_is_frontface);
|
|
|
|
}
|
2012-05-14 16:00:06 +01:00
|
|
|
}
|
2009-10-07 22:36:43 +01:00
|
|
|
}
|
|
|
|
|
2010-01-14 01:01:45 +00:00
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
static void
|
|
|
|
triangle_ccw(struct lp_setup_context *setup,
|
|
|
|
const float (*v0)[4],
|
|
|
|
const float (*v1)[4],
|
|
|
|
const float (*v2)[4])
|
2010-10-08 17:06:05 +01:00
|
|
|
{
|
2022-06-08 10:00:54 +01:00
|
|
|
alignas(16) struct fixed_position position;
|
2018-05-22 01:12:38 +01:00
|
|
|
struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
|
|
|
|
|
|
|
|
if (lp_context->active_statistics_queries) {
|
|
|
|
lp_context->pipeline_statistics.c_primitives++;
|
|
|
|
}
|
2013-03-26 04:02:47 +00:00
|
|
|
|
2022-02-03 04:18:40 +00:00
|
|
|
int8_t area_sign = calc_fixed_position(setup, &position, v0, v1, v2);
|
2010-10-08 17:06:05 +01:00
|
|
|
|
2022-02-03 04:18:40 +00:00
|
|
|
if (area_sign > 0)
|
2012-05-14 16:00:06 +01:00
|
|
|
retry_triangle_ccw(setup, &position, v0, v1, v2, setup->ccw_is_frontface);
|
2010-10-08 17:06:05 +01:00
|
|
|
}
|
2010-04-17 19:48:26 +01:00
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
|
2010-04-17 19:48:26 +01:00
|
|
|
/**
|
|
|
|
* Draw triangle whether it's CW or CCW.
|
|
|
|
*/
|
2022-05-27 15:05:19 +01:00
|
|
|
static void
|
|
|
|
triangle_both(struct lp_setup_context *setup,
|
|
|
|
const float (*v0)[4],
|
|
|
|
const float (*v1)[4],
|
|
|
|
const float (*v2)[4])
|
2009-10-07 22:36:43 +01:00
|
|
|
{
|
2022-06-08 10:00:54 +01:00
|
|
|
alignas(16) struct fixed_position position;
|
2013-09-19 18:37:03 +01:00
|
|
|
struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
|
2013-03-26 04:02:47 +00:00
|
|
|
|
2018-05-22 01:12:38 +01:00
|
|
|
if (lp_context->active_statistics_queries) {
|
2013-09-19 18:37:03 +01:00
|
|
|
lp_context->pipeline_statistics.c_primitives++;
|
|
|
|
}
|
|
|
|
|
2022-02-03 04:18:40 +00:00
|
|
|
int8_t area_sign = calc_fixed_position(setup, &position, v0, v1, v2);
|
2010-10-08 17:06:05 +01:00
|
|
|
|
2010-11-04 23:52:49 +00:00
|
|
|
if (0) {
|
|
|
|
assert(!util_is_inf_or_nan(v0[0][0]));
|
|
|
|
assert(!util_is_inf_or_nan(v0[0][1]));
|
|
|
|
assert(!util_is_inf_or_nan(v1[0][0]));
|
|
|
|
assert(!util_is_inf_or_nan(v1[0][1]));
|
|
|
|
assert(!util_is_inf_or_nan(v2[0][0]));
|
|
|
|
assert(!util_is_inf_or_nan(v2[0][1]));
|
|
|
|
}
|
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
if (area_sign > 0) {
|
|
|
|
retry_triangle_ccw(setup, &position, v0, v1, v2, setup->ccw_is_frontface);
|
|
|
|
} else if (area_sign < 0) {
|
2012-05-17 08:32:31 +01:00
|
|
|
if (setup->flatshade_first) {
|
2022-05-27 15:05:19 +01:00
|
|
|
rotate_fixed_position_12(&position);
|
|
|
|
retry_triangle_ccw(setup, &position, v0, v2, v1, !setup->ccw_is_frontface);
|
2012-05-17 08:32:31 +01:00
|
|
|
} else {
|
2022-05-27 15:05:19 +01:00
|
|
|
rotate_fixed_position_01(&position);
|
|
|
|
retry_triangle_ccw(setup, &position, v1, v0, v2, !setup->ccw_is_frontface);
|
2012-05-17 08:32:31 +01:00
|
|
|
}
|
2012-05-14 16:00:06 +01:00
|
|
|
}
|
2009-10-07 22:36:43 +01:00
|
|
|
}
|
|
|
|
|
2010-01-14 01:01:45 +00:00
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
static void
|
|
|
|
triangle_noop(struct lp_setup_context *setup,
|
|
|
|
const float (*v0)[4],
|
|
|
|
const float (*v1)[4],
|
|
|
|
const float (*v2)[4])
|
2009-10-07 22:36:43 +01:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-05-27 15:05:19 +01:00
|
|
|
void
|
2018-05-22 01:12:38 +01:00
|
|
|
lp_setup_choose_triangle(struct lp_setup_context *setup)
|
2009-10-09 10:44:07 +01:00
|
|
|
{
|
2018-05-22 01:12:38 +01:00
|
|
|
if (setup->rasterizer_discard) {
|
|
|
|
setup->triangle = triangle_noop;
|
|
|
|
return;
|
|
|
|
}
|
2009-10-09 11:29:01 +01:00
|
|
|
switch (setup->cullmode) {
|
2010-05-14 13:04:42 +01:00
|
|
|
case PIPE_FACE_NONE:
|
2009-10-08 12:15:12 +01:00
|
|
|
setup->triangle = triangle_both;
|
2009-10-07 22:36:43 +01:00
|
|
|
break;
|
2010-05-17 18:43:43 +01:00
|
|
|
case PIPE_FACE_BACK:
|
2010-05-14 13:04:42 +01:00
|
|
|
setup->triangle = setup->ccw_is_frontface ? triangle_ccw : triangle_cw;
|
2009-10-07 22:36:43 +01:00
|
|
|
break;
|
2010-05-17 18:43:43 +01:00
|
|
|
case PIPE_FACE_FRONT:
|
2010-05-14 13:04:42 +01:00
|
|
|
setup->triangle = setup->ccw_is_frontface ? triangle_cw : triangle_ccw;
|
2009-10-07 22:36:43 +01:00
|
|
|
break;
|
|
|
|
default:
|
2018-05-22 01:12:38 +01:00
|
|
|
setup->triangle = triangle_noop;
|
2009-10-07 22:36:43 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|