Merge branch 'master' into r300-compiler
There were additional non-textual conflicts. Conflicts: src/gallium/drivers/r300/r300_tgsi_to_rc.c src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c src/mesa/drivers/dri/r300/compiler/radeon_program.c src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
This commit is contained in:
commit
81c7561d9d
3
Makefile
3
Makefile
|
@ -182,7 +182,7 @@ ultrix-gcc:
|
|||
|
||||
# Rules for making release tarballs
|
||||
|
||||
VERSION=7.6-devel
|
||||
VERSION=7.7-devel
|
||||
DIRECTORY = Mesa-$(VERSION)
|
||||
LIB_NAME = MesaLib-$(VERSION)
|
||||
DEMO_NAME = MesaDemos-$(VERSION)
|
||||
|
@ -325,6 +325,7 @@ GALLIUM_FILES = \
|
|||
$(DIRECTORY)/src/gallium/*/*/SConscript \
|
||||
$(DIRECTORY)/src/gallium/*/*/*.[ch] \
|
||||
$(DIRECTORY)/src/gallium/*/*/*.py \
|
||||
$(DIRECTORY)/src/gallium/*/*/*.csv \
|
||||
$(DIRECTORY)/src/gallium/*/*/*/Makefile \
|
||||
$(DIRECTORY)/src/gallium/*/*/*/SConscript \
|
||||
$(DIRECTORY)/src/gallium/*/*/*/*.[ch] \
|
||||
|
|
|
@ -33,9 +33,9 @@ else:
|
|||
default_machine = _platform.machine()
|
||||
default_machine = _machine_map.get(default_machine, 'generic')
|
||||
|
||||
if default_platform in ('linux', 'freebsd', 'darwin'):
|
||||
if default_platform in ('linux', 'freebsd'):
|
||||
default_dri = 'yes'
|
||||
elif default_platform in ('winddk', 'windows', 'wince'):
|
||||
elif default_platform in ('winddk', 'windows', 'wince', 'darwin'):
|
||||
default_dri = 'no'
|
||||
else:
|
||||
default_dri = 'no'
|
||||
|
@ -59,7 +59,7 @@ def AddOptions(opts):
|
|||
opts.Add(EnumOption('machine', 'use machine-specific assembly code', default_machine,
|
||||
allowed_values=('generic', 'ppc', 'x86', 'x86_64')))
|
||||
opts.Add(EnumOption('platform', 'target platform', default_platform,
|
||||
allowed_values=('linux', 'cell', 'windows', 'winddk', 'wince')))
|
||||
allowed_values=('linux', 'cell', 'windows', 'winddk', 'wince', 'darwin')))
|
||||
opts.Add(EnumOption('toolchain', 'compiler toolchain', 'default',
|
||||
allowed_values=('default', 'crossmingw', 'winsdk', 'winddk')))
|
||||
opts.Add(BoolOption('llvm', 'use LLVM', 'no'))
|
||||
|
|
|
@ -9,7 +9,7 @@ CONFIG_NAME = default
|
|||
|
||||
# Version info
|
||||
MESA_MAJOR=7
|
||||
MESA_MINOR=6
|
||||
MESA_MINOR=7
|
||||
MESA_TINY=0
|
||||
MESA_VERSION = $(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY)
|
||||
|
||||
|
@ -23,6 +23,7 @@ HOST_CC = $(CC)
|
|||
CFLAGS = -O
|
||||
CXXFLAGS = -O
|
||||
LDFLAGS =
|
||||
HOST_CFLAGS = $(CFLAGS)
|
||||
GLU_CFLAGS =
|
||||
|
||||
# Compiler for building demos/tests/etc
|
||||
|
|
|
@ -1148,6 +1148,11 @@ yes)
|
|||
if test "$tracker" = egl && test "x$enable_egl" != xyes; then
|
||||
AC_MSG_ERROR([cannot build egl state tracker without EGL library])
|
||||
fi
|
||||
if test "$tracker" = xorg; then
|
||||
PKG_CHECK_MODULES(XEXT, [xextproto >= 7.0.99.1],
|
||||
HAVE_XEXTPROTO_71="yes"; DEFINES="$DEFINES -DHAVE_XEXTPROTO_71"
|
||||
HAVE_XEXTPROTO_71="no")
|
||||
fi
|
||||
done
|
||||
GALLIUM_STATE_TRACKERS_DIRS="$state_trackers"
|
||||
;;
|
||||
|
@ -1202,7 +1207,7 @@ if test "x$enable_gallium_radeon" = xyes; then
|
|||
fi
|
||||
|
||||
dnl
|
||||
dnl Gallium Radeon configuration
|
||||
dnl Gallium Nouveau configuration
|
||||
dnl
|
||||
AC_ARG_ENABLE([gallium-nouveau],
|
||||
[AS_HELP_STRING([--enable-gallium-nouveau],
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
<HTML>
|
||||
|
||||
<TITLE>Mesa Release Notes</TITLE>
|
||||
|
||||
<head><link rel="stylesheet" type="text/css" href="mesa.css"></head>
|
||||
|
||||
<BODY>
|
||||
|
||||
<body bgcolor="#eeeeee">
|
||||
|
||||
<H1>Mesa 7.5.2 Release Notes, (date tbd)</H1>
|
||||
|
||||
<p>
|
||||
Mesa 7.5.2 is a bug-fix release fixing issues found since the 7.5.1 release.
|
||||
</p>
|
||||
<p>
|
||||
The main new feature of Mesa 7.5.x is the
|
||||
<a href="http://wiki.freedesktop.org/wiki/Software/gallium"
|
||||
target="_parent">Gallium3D</a> infrastructure.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 7.5.2 implements the OpenGL 2.1 API, but the version reported by
|
||||
glGetString(GL_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 2.1.
|
||||
</p>
|
||||
<p>
|
||||
See the <a href="install.html">Compiling/Installing page</a> for prerequisites
|
||||
for DRI hardware acceleration.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>MD5 checksums</h2>
|
||||
<pre>
|
||||
tbd
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<ul>
|
||||
<li>Detect B43 chipset in Intel driver
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
<ul>
|
||||
<li>Assorted bug fixes for i965/i945 drivers
|
||||
<li>Fixed Gallium glDrawPixels(GL_STENCIL_INDEX) failure.
|
||||
<li>Fixed GLSL linker/preprocessor version directive issue seen in Wine
|
||||
(such as bug 23946)
|
||||
<li>glUseProgram() is now compiled into display lists (bug 23746).
|
||||
<li>glUniform functions are now compiled into display lists
|
||||
</ul>
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -50,6 +50,8 @@ This was written by Zack Rusin at Tungsten Graphics.
|
|||
<li>Rewritten radeon/r200/r300 driver using a buffer manager
|
||||
<li>radeon/r200/r300 GL_EXT_framebuffer_object support when used with
|
||||
kernel memory manager
|
||||
<li>radeon/r200/r300 support for GL_ARB_occlusion_query</li>
|
||||
<li>r300 driver supports OpenGL 1.5</li>
|
||||
<li>r300 driver support for GL_EXT_vertex_array_bgra, GL_EXT_texture_sRGB
|
||||
<li>i915/945 driver support for GL_ARB_point_sprite, GL_EXT_stencil_two_side
|
||||
and GL_ATI_separate_stencil extensions
|
||||
|
@ -57,6 +59,10 @@ This was written by Zack Rusin at Tungsten Graphics.
|
|||
GL_ARB_fragment_program.</li>
|
||||
<li>Added configure --with-max-width=W, --with-max-height=H options to specify
|
||||
max framebuffer, viewport size.
|
||||
<li>Initial version of Gallium llvmpipe driver. This is a new driver based
|
||||
on LLVM which makes exensive use of run-time code generation. This is
|
||||
an "alpha" stage driver. See the src/gallium/drivers/llvmpipe/README
|
||||
file for more information.
|
||||
</ul>
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
<HTML>
|
||||
|
||||
<TITLE>Mesa Release Notes</TITLE>
|
||||
|
||||
<head><link rel="stylesheet" type="text/css" href="mesa.css"></head>
|
||||
|
||||
<BODY>
|
||||
|
||||
<body bgcolor="#eeeeee">
|
||||
|
||||
<H1>Mesa 7.7 Release Notes / date TBD</H1>
|
||||
|
||||
<p>
|
||||
Mesa 7.7 is a new development release.
|
||||
People who are concerned with stability and reliability should stick
|
||||
with a previous release or wait for Mesa 7.7.1.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 7.7 implements the OpenGL 2.1 API, but the version reported by
|
||||
glGetString(GL_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 2.1.
|
||||
</p>
|
||||
<p>
|
||||
See the <a href="install.html">Compiling/Installing page</a> for prerequisites
|
||||
for DRI hardware acceleration.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>MD5 checksums</h2>
|
||||
<pre>
|
||||
tbd
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<ul>
|
||||
<li>GL_ARB_draw_elements_base_vertex (supported in Intel i965 and software drivers)</li>
|
||||
<li>GL_ARB_depth_clamp (supported in Intel i965 DRI and software drivers)</li>
|
||||
<li>GL_NV_depth_clamp (supported in Intel i965 DRI and software drivers)</li>
|
||||
<li>GL_ARB_provoking_vertex (same as GL_EXT_provoking_vertex)</li>
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
<ul>
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
<ul>
|
||||
</ul>
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -13,7 +13,9 @@ The release notes summarize what's new or changed in each Mesa release.
|
|||
</p>
|
||||
|
||||
<UL>
|
||||
<LI><A HREF="relnotes-7.7.html">7.7 release notes</A>
|
||||
<LI><A HREF="relnotes-7.6.html">7.6 release notes</A>
|
||||
<LI><A HREF="relnotes-7.5.2.html">7.5.2 release notes</A>
|
||||
<LI><A HREF="relnotes-7.5.1.html">7.5.1 release notes</A>
|
||||
<LI><A HREF="relnotes-7.5.html">7.5 release notes</A>
|
||||
<LI><A HREF="relnotes-7.4.4.html">7.4.4 release notes</A>
|
||||
|
|
|
@ -69,7 +69,8 @@ typedef HWND NativeWindowType;
|
|||
typedef HBITMAP NativePixmapType;
|
||||
/** END Added for Windows **/
|
||||
|
||||
#elif defined(__gnu_linux__) || defined(__FreeBSD__) || defined(__sun)
|
||||
#elif defined(__gnu_linux__) || defined(__FreeBSD__) || defined(__sun) || defined(__APPLE__)
|
||||
|
||||
|
||||
/** BEGIN Added for X (Mesa) **/
|
||||
#ifndef EGLAPI
|
||||
|
|
|
@ -1740,6 +1740,9 @@ GLAPI void GLAPIENTRY glSeparableFilter2D( GLenum target,
|
|||
GLAPI void GLAPIENTRY glGetSeparableFilter( GLenum target, GLenum format,
|
||||
GLenum type, GLvoid *row, GLvoid *column, GLvoid *span );
|
||||
|
||||
typedef void (APIENTRYP PFNGLBLENDCOLORPROC) (GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha);
|
||||
typedef void (APIENTRYP PFNGLBLENDEQUATIONPROC) (GLenum mode);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
|
@ -1945,6 +1948,18 @@ GLAPI void GLAPIENTRY glMultTransposeMatrixf( const GLfloat m[16] );
|
|||
GLAPI void GLAPIENTRY glSampleCoverage( GLclampf value, GLboolean invert );
|
||||
|
||||
|
||||
typedef void (APIENTRYP PFNGLACTIVETEXTUREPROC) (GLenum texture);
|
||||
typedef void (APIENTRYP PFNGLSAMPLECOVERAGEPROC) (GLclampf value, GLboolean invert);
|
||||
typedef void (APIENTRYP PFNGLCOMPRESSEDTEXIMAGE3DPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const GLvoid *data);
|
||||
typedef void (APIENTRYP PFNGLCOMPRESSEDTEXIMAGE2DPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const GLvoid *data);
|
||||
typedef void (APIENTRYP PFNGLCOMPRESSEDTEXIMAGE1DPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border, GLsizei imageSize, const GLvoid *data);
|
||||
typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const GLvoid *data);
|
||||
typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const GLvoid *data);
|
||||
typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC) (GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const GLvoid *data);
|
||||
typedef void (APIENTRYP PFNGLGETCOMPRESSEDTEXIMAGEPROC) (GLenum target, GLint level, GLvoid *img);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* GL_ARB_multitexture (ARB extension 1 and OpenGL 1.2.1)
|
||||
*/
|
||||
|
|
|
@ -10,4 +10,5 @@ SConscript([
|
|||
'vpglsl/SConscript',
|
||||
'fp/SConscript',
|
||||
'wgl/SConscript',
|
||||
'perf/SConscript',
|
||||
])
|
||||
|
|
|
@ -26,6 +26,7 @@ static int Scissor = 0;
|
|||
static float Xzoom, Yzoom;
|
||||
static GLboolean DrawFront = GL_FALSE;
|
||||
static GLboolean Dither = GL_TRUE;
|
||||
static GLboolean Invert = GL_FALSE;
|
||||
|
||||
|
||||
static void Reset( void )
|
||||
|
@ -59,6 +60,15 @@ static void Display( void )
|
|||
if (Scissor)
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
|
||||
if (Invert) {
|
||||
glPixelTransferf(GL_RED_SCALE, -1.0);
|
||||
glPixelTransferf(GL_GREEN_SCALE, -1.0);
|
||||
glPixelTransferf(GL_BLUE_SCALE, -1.0);
|
||||
glPixelTransferf(GL_RED_BIAS, 1.0);
|
||||
glPixelTransferf(GL_GREEN_BIAS, 1.0);
|
||||
glPixelTransferf(GL_BLUE_BIAS, 1.0);
|
||||
}
|
||||
|
||||
/* draw copy */
|
||||
glPixelZoom(Xzoom, Yzoom);
|
||||
glWindowPos2iARB(Xpos, Ypos);
|
||||
|
@ -67,6 +77,15 @@ static void Display( void )
|
|||
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
|
||||
if (Invert) {
|
||||
glPixelTransferf(GL_RED_SCALE, 1.0);
|
||||
glPixelTransferf(GL_GREEN_SCALE, 1.0);
|
||||
glPixelTransferf(GL_BLUE_SCALE, 1.0);
|
||||
glPixelTransferf(GL_RED_BIAS, 0.0);
|
||||
glPixelTransferf(GL_GREEN_BIAS, 0.0);
|
||||
glPixelTransferf(GL_BLUE_BIAS, 0.0);
|
||||
}
|
||||
|
||||
if (DrawFront)
|
||||
glFinish();
|
||||
else
|
||||
|
@ -105,6 +124,9 @@ static void Key( unsigned char key, int x, int y )
|
|||
else
|
||||
glDisable(GL_DITHER);
|
||||
break;
|
||||
case 'i':
|
||||
Invert = !Invert;
|
||||
break;
|
||||
case 's':
|
||||
Scissor = !Scissor;
|
||||
break;
|
||||
|
|
|
@ -58,6 +58,7 @@ static GLint ClampIndex = 0;
|
|||
static GLboolean supportFBO = GL_FALSE;
|
||||
static GLboolean supportSeamless = GL_FALSE;
|
||||
static GLboolean seamless = GL_FALSE;
|
||||
static GLuint TexObj = 0;
|
||||
|
||||
|
||||
static struct {
|
||||
|
@ -543,6 +544,10 @@ static void init( GLboolean useImageFiles )
|
|||
|
||||
printf("GL_RENDERER: %s\n", (char *) glGetString(GL_RENDERER));
|
||||
|
||||
|
||||
glGenTextures(1, &TexObj);
|
||||
glBindTexture(GL_TEXTURE_CUBE_MAP_ARB, TexObj);
|
||||
|
||||
if (useImageFiles) {
|
||||
load_envmaps();
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@ static GLboolean Anim = GL_TRUE;
|
|||
static GLint Bias = 0, BiasStepSign = +1; /* ints avoid fp precision problem */
|
||||
static GLint BiasMin = -400, BiasMax = 400;
|
||||
static int win = 0;
|
||||
static GLuint TexObj = 0;
|
||||
|
||||
|
||||
static void
|
||||
|
@ -214,6 +215,9 @@ static void Init( void )
|
|||
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
||||
|
||||
glGenTextures(1, &TexObj);
|
||||
glBindTexture(GL_TEXTURE_2D, TexObj);
|
||||
|
||||
if (glutExtensionSupported("GL_SGIS_generate_mipmap")) {
|
||||
/* test auto mipmap generation */
|
||||
GLint width, height, i;
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
!!ARBfp1.0
|
||||
TEMP R0;
|
||||
MOV R0, fragment.color;
|
||||
ADD_SAT R0, R0, R0;
|
||||
MUL result.color, {0.5}.x, R0;
|
||||
END
|
|
@ -0,0 +1,6 @@
|
|||
!!ARBfp1.0
|
||||
TEMP R0;
|
||||
MOV R0, fragment.color;
|
||||
MOV R0, R0.zyxw;
|
||||
MOV result.color, R0;
|
||||
END
|
|
@ -0,0 +1,6 @@
|
|||
!!ARBfp1.0
|
||||
TEMP R0;
|
||||
MOV R0, fragment.color;
|
||||
MUL R0, R0.zyxw, fragment.color;
|
||||
MOV result.color, R0;
|
||||
END
|
|
@ -10,16 +10,15 @@ LIB_DEP = \
|
|||
$(TOP)/$(LIB_DIR)/$(GLU_LIB_NAME) \
|
||||
$(TOP)/$(LIB_DIR)/$(GLUT_LIB_NAME)
|
||||
|
||||
LIBS = -L$(TOP)/$(LIB_DIR) -l$(GLUT_LIB) -l$(GLEW_LIB) -l$(GLU_LIB) -l$(GL_LIB) $(APP_LIB_DEPS)
|
||||
|
||||
INCLUDE_DIRS = -I$(TOP)/progs/util
|
||||
LIBS = -L$(TOP)/$(LIB_DIR) -l$(GLUT_LIB) -l$(GLEW_LIB) -l$(GLU_LIB) \
|
||||
-l$(GL_LIB) $(APP_LIB_DEPS)
|
||||
|
||||
# using : to avoid APP_CC pointing to CC loop
|
||||
CC:=$(APP_CC)
|
||||
CC := $(APP_CC)
|
||||
CFLAGS += -I$(INCDIR)
|
||||
LDLIBS=$(LIBS)
|
||||
LDLIBS = $(LIBS)
|
||||
|
||||
DEMO_SOURCES = \
|
||||
PROG_SOURCES = \
|
||||
array.c \
|
||||
bitmap.c \
|
||||
brick.c \
|
||||
|
@ -59,8 +58,8 @@ UTIL_SOURCES = \
|
|||
readtex.c
|
||||
|
||||
UTIL_OBJS = $(UTIL_SOURCES:.c=.o)
|
||||
PROG_OBJS = $(DEMO_SOURCES:.c=.o)
|
||||
PROGS = $(DEMO_SOURCES:%.c=%)
|
||||
PROG_OBJS = $(PROG_SOURCES:.c=.o)
|
||||
PROGS = $(PROG_SOURCES:%.c=%)
|
||||
|
||||
##### TARGETS #####
|
||||
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
# progs/demos/Makefile
|
||||
|
||||
TOP = ../..
|
||||
include $(TOP)/configs/current
|
||||
|
||||
INCDIR = $(TOP)/include
|
||||
|
||||
LIBS = -L$(TOP)/$(LIB_DIR) -l$(GLUT_LIB) -l$(GLEW_LIB) \
|
||||
-l$(GLU_LIB) -l$(GL_LIB) $(APP_LIB_DEPS)
|
||||
|
||||
# using : to avoid APP_CC pointing to CC loop
|
||||
CC := $(APP_CC)
|
||||
CFLAGS += -I$(INCDIR)
|
||||
LDLIBS = $(LIBS)
|
||||
|
||||
PROG_SOURCES = \
|
||||
drawoverhead.c \
|
||||
teximage.c \
|
||||
vbo.c \
|
||||
vertexrate.c \
|
||||
|
||||
PROG_OBJS = $(PROG_SOURCES:.c=.o)
|
||||
|
||||
PROGS = $(PROG_SOURCES:%.c=%)
|
||||
|
||||
|
||||
UTIL_SOURCES = \
|
||||
common.c \
|
||||
glmain.c
|
||||
|
||||
UTIL_HEADERS = \
|
||||
common.h \
|
||||
glmain.h
|
||||
|
||||
UTIL_OBJS = $(UTIL_SOURCES:.c=.o)
|
||||
|
||||
|
||||
|
||||
default: $(PROGS)
|
||||
|
||||
$(PROG_OBJS): $(UTIL_HEADERS)
|
||||
|
||||
$(PROGS): $(UTIL_OBJS)
|
||||
|
||||
|
||||
|
||||
clean:
|
||||
-rm -f $(PROGS)
|
||||
-rm -f *.o *~
|
|
@ -0,0 +1,26 @@
|
|||
Import('env')
|
||||
|
||||
if not env['GLUT']:
|
||||
Return()
|
||||
|
||||
env = env.Clone()
|
||||
|
||||
env.Prepend(LIBS = ['$GLUT_LIB'])
|
||||
|
||||
progs = [
|
||||
'drawoverhead',
|
||||
'teximage',
|
||||
'vbo',
|
||||
'vertexrate',
|
||||
]
|
||||
|
||||
for prog in progs:
|
||||
env.Program(
|
||||
target = prog,
|
||||
source = [
|
||||
prog + '.c',
|
||||
'common.c',
|
||||
'glmain.c',
|
||||
]
|
||||
)
|
||||
|
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* Copyright (C) 2009 VMware, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Common perf code. This should be re-usable with other APIs.
|
||||
*/
|
||||
|
||||
#include "common.h"
|
||||
#include "glmain.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
|
||||
/* Need to add a fflush windows console with mingw, otherwise nothing
|
||||
* shows up until program exit. May want to add logging here.
|
||||
*/
|
||||
void
|
||||
perf_printf(const char *format, ...)
|
||||
{
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
|
||||
fflush(stdout);
|
||||
vfprintf(stdout, format, ap);
|
||||
fflush(stdout);
|
||||
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Run function 'f' for enough iterations to reach a steady state.
|
||||
* Return the rate (iterations/second).
|
||||
*/
|
||||
double
|
||||
PerfMeasureRate(PerfRateFunc f)
|
||||
{
|
||||
const double minDuration = 1.0;
|
||||
double rate = 0.0, prevRate = 0.0;
|
||||
unsigned subiters;
|
||||
|
||||
/* Compute initial number of iterations to try.
|
||||
* If the test function is pretty slow this helps to avoid
|
||||
* extraordarily long run times.
|
||||
*/
|
||||
subiters = 2;
|
||||
{
|
||||
const double t0 = PerfGetTime();
|
||||
double t1;
|
||||
do {
|
||||
f(subiters); /* call the rendering function */
|
||||
t1 = PerfGetTime();
|
||||
subiters *= 2;
|
||||
} while (t1 - t0 < 0.1 * minDuration);
|
||||
}
|
||||
/*perf_printf("initial subIters = %u\n", subiters);*/
|
||||
|
||||
while (1) {
|
||||
const double t0 = PerfGetTime();
|
||||
unsigned iters = 0;
|
||||
double t1;
|
||||
|
||||
do {
|
||||
f(subiters); /* call the rendering function */
|
||||
t1 = PerfGetTime();
|
||||
iters += subiters;
|
||||
} while (t1 - t0 < minDuration);
|
||||
|
||||
rate = iters / (t1 - t0);
|
||||
|
||||
if (0)
|
||||
perf_printf("prevRate %f rate %f ratio %f iters %u\n",
|
||||
prevRate, rate, rate/prevRate, iters);
|
||||
|
||||
/* Try and speed the search up by skipping a few steps:
|
||||
*/
|
||||
if (rate > prevRate * 1.6)
|
||||
subiters *= 8;
|
||||
else if (rate > prevRate * 1.2)
|
||||
subiters *= 4;
|
||||
else if (rate > prevRate * 1.05)
|
||||
subiters *= 2;
|
||||
else
|
||||
break;
|
||||
|
||||
prevRate = rate;
|
||||
}
|
||||
|
||||
if (0)
|
||||
perf_printf("%s returning iters %u rate %f\n", __FUNCTION__, subiters, rate);
|
||||
return rate;
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Copyright (C) 2009 VMware, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef COMMON_H
|
||||
#define COMMON_H
|
||||
|
||||
|
||||
typedef void (*PerfRateFunc)(unsigned count);
|
||||
|
||||
|
||||
extern double
|
||||
PerfMeasureRate(PerfRateFunc f);
|
||||
|
||||
|
||||
extern void
|
||||
perf_printf(const char *format, ...);
|
||||
|
||||
|
||||
#endif /* COMMON_H */
|
||||
|
|
@ -0,0 +1,134 @@
|
|||
/*
|
||||
* Copyright (C) 2009 VMware, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Measure drawing overhead
|
||||
*
|
||||
* This is the first in a series of simple performance benchmarks.
|
||||
* The code in this file should be as simple as possible to make it
|
||||
* easily portable to other APIs.
|
||||
*
|
||||
* All the window-system stuff should be contained in glmain.c (or TBDmain.c).
|
||||
* All the re-usable, generic code should be in common.c (XXX not done yet).
|
||||
*
|
||||
* Brian Paul
|
||||
* 15 Sep 2009
|
||||
*/
|
||||
|
||||
#include "glmain.h"
|
||||
#include "common.h"
|
||||
|
||||
|
||||
int WinWidth = 100, WinHeight = 100;
|
||||
|
||||
static GLuint VBO;
|
||||
|
||||
struct vertex
|
||||
{
|
||||
GLfloat x, y;
|
||||
};
|
||||
|
||||
static const struct vertex vertices[4] = {
|
||||
{ -1.0, -1.0 },
|
||||
{ 1.0, -1.0 },
|
||||
{ 1.0, 1.0 },
|
||||
{ -1.0, 1.0 }
|
||||
};
|
||||
|
||||
|
||||
/** Called from test harness/main */
|
||||
void
|
||||
PerfInit(void)
|
||||
{
|
||||
/* setup VBO w/ vertex data */
|
||||
glGenBuffersARB(1, &VBO);
|
||||
glBindBufferARB(GL_ARRAY_BUFFER_ARB, VBO);
|
||||
glBufferDataARB(GL_ARRAY_BUFFER_ARB,
|
||||
sizeof(vertices), vertices, GL_STATIC_DRAW_ARB);
|
||||
glVertexPointer(2, GL_FLOAT, sizeof(struct vertex), (void *) 0);
|
||||
glEnableClientState(GL_VERTEX_ARRAY);
|
||||
|
||||
/* misc GL state */
|
||||
glAlphaFunc(GL_ALWAYS, 0.0);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
DrawNoStateChange(unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < count; i++) {
|
||||
glDrawArrays(GL_POINTS, 0, 4);
|
||||
}
|
||||
glFinish();
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
DrawNopStateChange(unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < count; i++) {
|
||||
glDisable(GL_ALPHA_TEST);
|
||||
glDrawArrays(GL_POINTS, 0, 4);
|
||||
}
|
||||
glFinish();
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
DrawStateChange(unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < count; i++) {
|
||||
if (i & 1)
|
||||
glEnable(GL_TEXTURE_GEN_S);
|
||||
else
|
||||
glDisable(GL_TEXTURE_GEN_S);
|
||||
glDrawArrays(GL_POINTS, 0, 4);
|
||||
}
|
||||
glFinish();
|
||||
}
|
||||
|
||||
|
||||
/** Called from test harness/main */
|
||||
void
|
||||
PerfDraw(void)
|
||||
{
|
||||
double rate0, rate1, rate2, overhead;
|
||||
|
||||
rate0 = PerfMeasureRate(DrawNoStateChange);
|
||||
perf_printf(" Draw only: %.1f draws/second\n", rate0);
|
||||
|
||||
|
||||
rate1 = PerfMeasureRate(DrawNopStateChange);
|
||||
overhead = 1000.0 * (1.0 / rate1 - 1.0 / rate0);
|
||||
perf_printf(" Draw w/ nop state change: %.1f draws/sec (overhead: %f ms/draw)\n",
|
||||
rate1, overhead);
|
||||
|
||||
rate2 = PerfMeasureRate(DrawStateChange);
|
||||
overhead = 1000.0 * (1.0 / rate2 - 1.0 / rate0);
|
||||
perf_printf(" Draw w/ state change: %.1f draws/sec (overhead: %f ms/draw)\n",
|
||||
rate2, overhead);
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
|
@ -0,0 +1,156 @@
|
|||
/*
|
||||
* Copyright (C) 2009 VMware, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* OpenGL/GLUT common code for perf programs.
|
||||
* Brian Paul
|
||||
* 15 Sep 2009
|
||||
*/
|
||||
|
||||
|
||||
#include "glmain.h"
|
||||
#include <GL/glut.h>
|
||||
|
||||
|
||||
static int Win;
|
||||
static GLfloat Xrot = 0, Yrot = 0, Zrot = 0;
|
||||
static GLboolean Anim = GL_FALSE;
|
||||
|
||||
|
||||
/** Return time in seconds */
|
||||
double
|
||||
PerfGetTime(void)
|
||||
{
|
||||
return glutGet(GLUT_ELAPSED_TIME) * 0.001;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
PerfSwapBuffers(void)
|
||||
{
|
||||
glutSwapBuffers();
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
Idle(void)
|
||||
{
|
||||
Xrot += 3.0;
|
||||
Yrot += 4.0;
|
||||
Zrot += 2.0;
|
||||
glutPostRedisplay();
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
Draw(void)
|
||||
{
|
||||
PerfDraw();
|
||||
glutSwapBuffers();
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
Reshape(int width, int height)
|
||||
{
|
||||
WinWidth = width;
|
||||
WinHeight = height;
|
||||
glViewport(0, 0, width, height);
|
||||
glMatrixMode(GL_PROJECTION);
|
||||
glLoadIdentity();
|
||||
glFrustum(-1.0, 1.0, -1.0, 1.0, 5.0, 25.0);
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
glLoadIdentity();
|
||||
glTranslatef(0.0, 0.0, -15.0);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
Key(unsigned char key, int x, int y)
|
||||
{
|
||||
const GLfloat step = 3.0;
|
||||
(void) x;
|
||||
(void) y;
|
||||
switch (key) {
|
||||
case 'a':
|
||||
Anim = !Anim;
|
||||
if (Anim)
|
||||
glutIdleFunc(Idle);
|
||||
else
|
||||
glutIdleFunc(NULL);
|
||||
break;
|
||||
case 'z':
|
||||
Zrot -= step;
|
||||
break;
|
||||
case 'Z':
|
||||
Zrot += step;
|
||||
break;
|
||||
case 27:
|
||||
glutDestroyWindow(Win);
|
||||
exit(0);
|
||||
break;
|
||||
}
|
||||
glutPostRedisplay();
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
SpecialKey(int key, int x, int y)
|
||||
{
|
||||
const GLfloat step = 3.0;
|
||||
(void) x;
|
||||
(void) y;
|
||||
switch (key) {
|
||||
case GLUT_KEY_UP:
|
||||
Xrot -= step;
|
||||
break;
|
||||
case GLUT_KEY_DOWN:
|
||||
Xrot += step;
|
||||
break;
|
||||
case GLUT_KEY_LEFT:
|
||||
Yrot -= step;
|
||||
break;
|
||||
case GLUT_KEY_RIGHT:
|
||||
Yrot += step;
|
||||
break;
|
||||
}
|
||||
glutPostRedisplay();
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
glutInit(&argc, argv);
|
||||
glutInitWindowSize(WinWidth, WinHeight);
|
||||
glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH);
|
||||
Win = glutCreateWindow(argv[0]);
|
||||
glewInit();
|
||||
glutReshapeFunc(Reshape);
|
||||
glutKeyboardFunc(Key);
|
||||
glutSpecialFunc(SpecialKey);
|
||||
glutDisplayFunc(Draw);
|
||||
if (Anim)
|
||||
glutIdleFunc(Idle);
|
||||
PerfInit();
|
||||
glutMainLoop();
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Copyright (C) 2009 VMware, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef GLMAIN_H
|
||||
#define GLMAIN_H
|
||||
|
||||
|
||||
#define GL_GLEXT_PROTOTYPES
|
||||
#include <GL/glew.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
|
||||
/** Test programs can use these vars/functions */
|
||||
|
||||
extern int WinWidth, WinHeight;
|
||||
|
||||
extern double
|
||||
PerfGetTime(void);
|
||||
|
||||
extern void
|
||||
PerfSwapBuffers(void);
|
||||
|
||||
|
||||
/** Test programs must implement these functions **/
|
||||
|
||||
extern void
|
||||
PerfInit(void);
|
||||
|
||||
extern void
|
||||
PerfDraw(void);
|
||||
|
||||
|
||||
#endif /* GLMAIN_H */
|
|
@ -0,0 +1,213 @@
|
|||
/*
|
||||
* Copyright (C) 2009 VMware, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Measure glTexSubImage2D rate
|
||||
*
|
||||
* Brian Paul
|
||||
* 16 Sep 2009
|
||||
*/
|
||||
|
||||
#include "glmain.h"
|
||||
#include "common.h"
|
||||
|
||||
|
||||
int WinWidth = 100, WinHeight = 100;
|
||||
|
||||
static GLuint VBO;
|
||||
static GLuint TexObj = 0;
|
||||
static GLubyte *TexImage = NULL;
|
||||
static GLsizei TexSize;
|
||||
static GLenum TexSrcFormat, TexSrcType;
|
||||
|
||||
static const GLboolean DrawPoint = GL_TRUE;
|
||||
static const GLboolean TexSubImage4 = GL_TRUE;
|
||||
|
||||
struct vertex
|
||||
{
|
||||
GLfloat x, y, s, t;
|
||||
};
|
||||
|
||||
static const struct vertex vertices[1] = {
|
||||
{ 0.0, 0.0, 0.5, 0.5 },
|
||||
};
|
||||
|
||||
#if 0
|
||||
#define VOFFSET(F) ((void *) offsetof(struct vertex, F))
|
||||
#else
|
||||
#define VOFFSET(F) ((void *) &((struct vertex *)NULL)->F)
|
||||
#endif
|
||||
|
||||
/** Called from test harness/main */
|
||||
void
|
||||
PerfInit(void)
|
||||
{
|
||||
/* setup VBO w/ vertex data */
|
||||
glGenBuffersARB(1, &VBO);
|
||||
glBindBufferARB(GL_ARRAY_BUFFER_ARB, VBO);
|
||||
glBufferDataARB(GL_ARRAY_BUFFER_ARB,
|
||||
sizeof(vertices), vertices, GL_STATIC_DRAW_ARB);
|
||||
glVertexPointer(2, GL_FLOAT, sizeof(struct vertex), VOFFSET(x));
|
||||
glTexCoordPointer(2, GL_FLOAT, sizeof(struct vertex), VOFFSET(s));
|
||||
glEnableClientState(GL_VERTEX_ARRAY);
|
||||
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
|
||||
|
||||
/* texture */
|
||||
glGenTextures(1, &TexObj);
|
||||
glBindTexture(GL_TEXTURE_2D, TexObj);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glEnable(GL_TEXTURE_2D);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
UploadTexImage2D(unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < count; i++) {
|
||||
/* XXX is this equivalent to a glTexSubImage call since we're
|
||||
* always specifying the same image size? That case isn't optimized
|
||||
* in Mesa but may be optimized in other drivers. Note sure how
|
||||
* much difference that might make.
|
||||
*/
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA,
|
||||
TexSize, TexSize, 0,
|
||||
TexSrcFormat, TexSrcType, TexImage);
|
||||
if (DrawPoint)
|
||||
glDrawArrays(GL_POINTS, 0, 1);
|
||||
}
|
||||
glFinish();
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
UploadTexSubImage2D(unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < count; i++) {
|
||||
if (TexSubImage4) {
|
||||
GLsizei halfSize = (TexSize == 1) ? 1 : TexSize / 2;
|
||||
GLsizei halfPos = TexSize - halfSize;
|
||||
/* do glTexSubImage2D in four pieces */
|
||||
/* lower-left */
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, TexSize);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0,
|
||||
0, 0, halfSize, halfSize,
|
||||
TexSrcFormat, TexSrcType, TexImage);
|
||||
/* lower-right */
|
||||
glPixelStorei(GL_UNPACK_SKIP_PIXELS, halfPos);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0,
|
||||
halfPos, 0, halfSize, halfSize,
|
||||
TexSrcFormat, TexSrcType, TexImage);
|
||||
/* upper-left */
|
||||
glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0);
|
||||
glPixelStorei(GL_UNPACK_SKIP_ROWS, halfPos);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0,
|
||||
0, halfPos, halfSize, halfSize,
|
||||
TexSrcFormat, TexSrcType, TexImage);
|
||||
/* upper-right */
|
||||
glPixelStorei(GL_UNPACK_SKIP_PIXELS, halfPos);
|
||||
glPixelStorei(GL_UNPACK_SKIP_ROWS, halfPos);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0,
|
||||
halfPos, halfPos, halfSize, halfSize,
|
||||
TexSrcFormat, TexSrcType, TexImage);
|
||||
/* reset the unpacking state */
|
||||
glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0);
|
||||
glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0);
|
||||
glPixelStorei(GL_UNPACK_SKIP_ROWS, 0);
|
||||
}
|
||||
else {
|
||||
/* replace whole texture image at once */
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0,
|
||||
0, 0, TexSize, TexSize,
|
||||
TexSrcFormat, TexSrcType, TexImage);
|
||||
}
|
||||
if (DrawPoint)
|
||||
glDrawArrays(GL_POINTS, 0, 1);
|
||||
}
|
||||
glFinish();
|
||||
}
|
||||
|
||||
|
||||
/* XXX any other formats to measure? */
|
||||
static const struct {
|
||||
GLenum format, type;
|
||||
const char *name;
|
||||
} SrcFormats[] = {
|
||||
{ GL_RGBA, GL_UNSIGNED_BYTE, "GL_RGBA/GLubyte" },
|
||||
{ GL_BGRA, GL_UNSIGNED_BYTE, "GL_BGRA/GLubyte" },
|
||||
{ 0, 0, NULL }
|
||||
};
|
||||
|
||||
|
||||
|
||||
/** Called from test harness/main */
|
||||
void
|
||||
PerfDraw(void)
|
||||
{
|
||||
GLint maxSize;
|
||||
double rate;
|
||||
GLint fmt, subImage;
|
||||
|
||||
glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxSize);
|
||||
|
||||
/* loop over source data formats */
|
||||
for (fmt = 0; SrcFormats[fmt].format; fmt++) {
|
||||
TexSrcFormat = SrcFormats[fmt].format;
|
||||
TexSrcType = SrcFormats[fmt].type;
|
||||
|
||||
/* loop over glTexImage, glTexSubImage */
|
||||
for (subImage = 0; subImage < 2; subImage++) {
|
||||
|
||||
/* loop over texture sizes */
|
||||
for (TexSize = 16; TexSize <= maxSize; TexSize *= 2) {
|
||||
GLint bytesPerImage;
|
||||
double mbPerSec;
|
||||
|
||||
bytesPerImage = TexSize * TexSize * 4;
|
||||
TexImage = malloc(bytesPerImage);
|
||||
|
||||
if (subImage) {
|
||||
/* create initial, empty texture */
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA,
|
||||
TexSize, TexSize, 0,
|
||||
TexSrcFormat, TexSrcType, NULL);
|
||||
rate = PerfMeasureRate(UploadTexSubImage2D);
|
||||
}
|
||||
else {
|
||||
rate = PerfMeasureRate(UploadTexImage2D);
|
||||
}
|
||||
|
||||
mbPerSec = rate * bytesPerImage / (1024.0 * 1024.0);
|
||||
|
||||
perf_printf(" glTex%sImage2D(%s %d x %d): "
|
||||
"%.1f images/sec, %.1f MB/sec\n",
|
||||
(subImage ? "Sub" : ""),
|
||||
SrcFormats[fmt].name, TexSize, TexSize, rate, mbPerSec);
|
||||
|
||||
free(TexImage);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
exit(0);
|
||||
}
|
|
@ -0,0 +1,138 @@
|
|||
/*
|
||||
* Copyright (C) 2009 VMware, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Measure VBO upload speed.
|
||||
* That is, measure glBufferDataARB() and glBufferSubDataARB().
|
||||
*
|
||||
* Brian Paul
|
||||
* 16 Sep 2009
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "glmain.h"
|
||||
#include "common.h"
|
||||
|
||||
|
||||
int WinWidth = 100, WinHeight = 100;
|
||||
|
||||
static GLuint VBO;
|
||||
|
||||
static GLsizei VBOSize = 0;
|
||||
static GLubyte *VBOData = NULL;
|
||||
|
||||
static const GLboolean DrawPoint = GL_TRUE;
|
||||
static const GLboolean BufferSubDataInHalves = GL_TRUE;
|
||||
|
||||
static const GLfloat Vertex0[2] = { 0.0, 0.0 };
|
||||
|
||||
|
||||
/** Called from test harness/main */
|
||||
void
|
||||
PerfInit(void)
|
||||
{
|
||||
/* setup VBO */
|
||||
glGenBuffersARB(1, &VBO);
|
||||
glBindBufferARB(GL_ARRAY_BUFFER_ARB, VBO);
|
||||
glVertexPointer(2, GL_FLOAT, sizeof(Vertex0), (void *) 0);
|
||||
glEnableClientState(GL_VERTEX_ARRAY);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
UploadVBO(unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < count; i++) {
|
||||
glBufferDataARB(GL_ARRAY_BUFFER, VBOSize, VBOData, GL_STREAM_DRAW_ARB);
|
||||
|
||||
if (DrawPoint)
|
||||
glDrawArrays(GL_POINTS, 0, 1);
|
||||
}
|
||||
glFinish();
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
UploadSubVBO(unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < count; i++) {
|
||||
if (BufferSubDataInHalves) {
|
||||
GLsizei half = VBOSize / 2;
|
||||
glBufferSubDataARB(GL_ARRAY_BUFFER, 0, half, VBOData);
|
||||
glBufferSubDataARB(GL_ARRAY_BUFFER, half, half, VBOData + half);
|
||||
}
|
||||
else {
|
||||
glBufferSubDataARB(GL_ARRAY_BUFFER, 0, VBOSize, VBOData);
|
||||
}
|
||||
|
||||
if (DrawPoint)
|
||||
glDrawArrays(GL_POINTS, 0, 1);
|
||||
}
|
||||
glFinish();
|
||||
}
|
||||
|
||||
|
||||
static const GLsizei Sizes[] = {
|
||||
64,
|
||||
1024,
|
||||
16*1024,
|
||||
256*1024,
|
||||
1024*1024,
|
||||
16*1024*1024,
|
||||
0 /* end of list */
|
||||
};
|
||||
|
||||
|
||||
/** Called from test harness/main */
|
||||
void
|
||||
PerfDraw(void)
|
||||
{
|
||||
double rate, mbPerSec;
|
||||
int sub, sz;
|
||||
|
||||
/* loop over whole/sub buffer upload */
|
||||
for (sub = 0; sub < 2; sub++) {
|
||||
|
||||
/* loop over VBO sizes */
|
||||
for (sz = 0; Sizes[sz]; sz++) {
|
||||
VBOSize = Sizes[sz];
|
||||
|
||||
VBOData = malloc(VBOSize);
|
||||
memcpy(VBOData, Vertex0, sizeof(Vertex0));
|
||||
|
||||
if (sub)
|
||||
rate = PerfMeasureRate(UploadSubVBO);
|
||||
else
|
||||
rate = PerfMeasureRate(UploadVBO);
|
||||
|
||||
mbPerSec = rate * VBOSize / (1024.0 * 1024.0);
|
||||
|
||||
perf_printf(" glBuffer%sDataARB(size = %d): %.1f MB/sec\n",
|
||||
(sub ? "Sub" : ""), VBOSize, mbPerSec);
|
||||
|
||||
free(VBOData);
|
||||
}
|
||||
}
|
||||
|
||||
exit(0);
|
||||
}
|
|
@ -0,0 +1,271 @@
|
|||
/*
|
||||
* Copyright (C) 2009 VMware, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Measure simple vertex processing rate via:
|
||||
* - immediate mode
|
||||
* - vertex arrays
|
||||
* - VBO vertex arrays
|
||||
* - glDrawElements
|
||||
* - VBO glDrawElements
|
||||
* - glDrawRangeElements
|
||||
* - VBO glDrawRangeElements
|
||||
*
|
||||
* Brian Paul
|
||||
* 16 Sep 2009
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "glmain.h"
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#define MAX_VERTS (100 * 100)
|
||||
|
||||
/** glVertex2/3/4 size */
|
||||
#define VERT_SIZE 4
|
||||
|
||||
int WinWidth = 500, WinHeight = 500;
|
||||
|
||||
static GLuint VertexBO, ElementBO;
|
||||
|
||||
static unsigned NumVerts = MAX_VERTS;
|
||||
static unsigned VertBytes = VERT_SIZE * sizeof(float);
|
||||
static float *VertexData = NULL;
|
||||
|
||||
static unsigned NumElements = MAX_VERTS;
|
||||
static GLuint *Elements = NULL;
|
||||
|
||||
|
||||
/**
|
||||
* Load VertexData buffer with a 2-D grid of points in the range [-1,1]^2.
|
||||
*/
|
||||
static void
|
||||
InitializeVertexData(void)
|
||||
{
|
||||
unsigned i;
|
||||
float x = -1.0, y = -1.0;
|
||||
float dx = 2.0 / 100;
|
||||
float dy = 2.0 / 100;
|
||||
|
||||
VertexData = (float *) malloc(NumVerts * VertBytes);
|
||||
|
||||
for (i = 0; i < NumVerts; i++) {
|
||||
VertexData[i * VERT_SIZE + 0] = x;
|
||||
VertexData[i * VERT_SIZE + 1] = y;
|
||||
VertexData[i * VERT_SIZE + 2] = 0.0;
|
||||
VertexData[i * VERT_SIZE + 3] = 1.0;
|
||||
x += dx;
|
||||
if (x > 1.0) {
|
||||
x = -1.0;
|
||||
y += dy;
|
||||
}
|
||||
}
|
||||
|
||||
Elements = (GLuint *) malloc(NumVerts * sizeof(GLuint));
|
||||
|
||||
for (i = 0; i < NumVerts; i++) {
|
||||
Elements[i] = NumVerts - i - 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Called from test harness/main */
|
||||
void
|
||||
PerfInit(void)
|
||||
{
|
||||
InitializeVertexData();
|
||||
|
||||
/* setup VertexBO */
|
||||
glGenBuffersARB(1, &VertexBO);
|
||||
glBindBufferARB(GL_ARRAY_BUFFER_ARB, VertexBO);
|
||||
glBufferDataARB(GL_ARRAY_BUFFER_ARB,
|
||||
NumVerts * VertBytes, VertexData, GL_STATIC_DRAW_ARB);
|
||||
glEnableClientState(GL_VERTEX_ARRAY);
|
||||
|
||||
/* setup ElementBO */
|
||||
glGenBuffersARB(1, &ElementBO);
|
||||
glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, ElementBO);
|
||||
glBufferDataARB(GL_ELEMENT_ARRAY_BUFFER_ARB,
|
||||
NumElements * sizeof(GLuint), Elements, GL_STATIC_DRAW_ARB);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
DrawImmediate(unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER, 0);
|
||||
glBindBufferARB(GL_ARRAY_BUFFER, 0);
|
||||
for (i = 0; i < count; i++) {
|
||||
unsigned j;
|
||||
glBegin(GL_POINTS);
|
||||
for (j = 0; j < NumVerts; j++) {
|
||||
#if VERT_SIZE == 4
|
||||
glVertex4fv(VertexData + j * 4);
|
||||
#elif VERT_SIZE == 3
|
||||
glVertex3fv(VertexData + j * 3);
|
||||
#elif VERT_SIZE == 2
|
||||
glVertex2fv(VertexData + j * 2);
|
||||
#else
|
||||
abort();
|
||||
#endif
|
||||
}
|
||||
glEnd();
|
||||
}
|
||||
glFinish();
|
||||
PerfSwapBuffers();
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
DrawArraysMem(unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER, 0);
|
||||
glBindBufferARB(GL_ARRAY_BUFFER, 0);
|
||||
glVertexPointer(VERT_SIZE, GL_FLOAT, VertBytes, VertexData);
|
||||
for (i = 0; i < count; i++) {
|
||||
glDrawArrays(GL_POINTS, 0, NumVerts);
|
||||
}
|
||||
glFinish();
|
||||
PerfSwapBuffers();
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
DrawArraysVBO(unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER, 0);
|
||||
glBindBufferARB(GL_ARRAY_BUFFER, VertexBO);
|
||||
glVertexPointer(VERT_SIZE, GL_FLOAT, VertBytes, (void *) 0);
|
||||
for (i = 0; i < count; i++) {
|
||||
glDrawArrays(GL_POINTS, 0, NumVerts);
|
||||
}
|
||||
glFinish();
|
||||
PerfSwapBuffers();
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
DrawElementsMem(unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER, 0);
|
||||
glBindBufferARB(GL_ARRAY_BUFFER, 0);
|
||||
glVertexPointer(VERT_SIZE, GL_FLOAT, VertBytes, VertexData);
|
||||
for (i = 0; i < count; i++) {
|
||||
glDrawElements(GL_POINTS, NumVerts, GL_UNSIGNED_INT, Elements);
|
||||
}
|
||||
glFinish();
|
||||
PerfSwapBuffers();
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
DrawElementsBO(unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER, ElementBO);
|
||||
glBindBufferARB(GL_ARRAY_BUFFER, VertexBO);
|
||||
glVertexPointer(VERT_SIZE, GL_FLOAT, VertBytes, (void *) 0);
|
||||
for (i = 0; i < count; i++) {
|
||||
glDrawElements(GL_POINTS, NumVerts, GL_UNSIGNED_INT, (void *) 0);
|
||||
}
|
||||
glFinish();
|
||||
PerfSwapBuffers();
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
DrawRangeElementsMem(unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER, 0);
|
||||
glBindBufferARB(GL_ARRAY_BUFFER, 0);
|
||||
glVertexPointer(VERT_SIZE, GL_FLOAT, VertBytes, VertexData);
|
||||
for (i = 0; i < count; i++) {
|
||||
glDrawRangeElements(GL_POINTS, 0, NumVerts - 1,
|
||||
NumVerts, GL_UNSIGNED_INT, Elements);
|
||||
}
|
||||
glFinish();
|
||||
PerfSwapBuffers();
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
DrawRangeElementsBO(unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER, ElementBO);
|
||||
glBindBufferARB(GL_ARRAY_BUFFER, VertexBO);
|
||||
glVertexPointer(VERT_SIZE, GL_FLOAT, VertBytes, (void *) 0);
|
||||
for (i = 0; i < count; i++) {
|
||||
glDrawRangeElements(GL_POINTS, 0, NumVerts - 1,
|
||||
NumVerts, GL_UNSIGNED_INT, (void *) 0);
|
||||
}
|
||||
glFinish();
|
||||
PerfSwapBuffers();
|
||||
}
|
||||
|
||||
|
||||
/** Called from test harness/main */
|
||||
void
|
||||
PerfDraw(void)
|
||||
{
|
||||
double rate;
|
||||
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
|
||||
|
||||
perf_printf("Vertex rate (%d x Vertex%df)\n", NumVerts, VERT_SIZE);
|
||||
|
||||
rate = PerfMeasureRate(DrawImmediate);
|
||||
rate *= NumVerts;
|
||||
perf_printf(" Immediate mode: %.1f verts/sec\n", rate);
|
||||
|
||||
rate = PerfMeasureRate(DrawArraysMem);
|
||||
rate *= NumVerts;
|
||||
perf_printf(" glDrawArrays: %.1f verts/sec\n", rate);
|
||||
|
||||
rate = PerfMeasureRate(DrawArraysVBO);
|
||||
rate *= NumVerts;
|
||||
perf_printf(" VBO glDrawArrays: %.1f verts/sec\n", rate);
|
||||
|
||||
rate = PerfMeasureRate(DrawElementsMem);
|
||||
rate *= NumVerts;
|
||||
perf_printf(" glDrawElements: %.1f verts/sec\n", rate);
|
||||
|
||||
rate = PerfMeasureRate(DrawElementsBO);
|
||||
rate *= NumVerts;
|
||||
perf_printf(" VBO glDrawElements: %.1f verts/sec\n", rate);
|
||||
|
||||
rate = PerfMeasureRate(DrawRangeElementsMem);
|
||||
rate *= NumVerts;
|
||||
perf_printf(" glDrawRangeElements: %.1f verts/sec\n", rate);
|
||||
|
||||
rate = PerfMeasureRate(DrawRangeElementsBO);
|
||||
rate *= NumVerts;
|
||||
perf_printf(" VBO glDrawRangeElements: %.1f verts/sec\n", rate);
|
||||
|
||||
exit(0);
|
||||
}
|
|
@ -12,14 +12,17 @@
|
|||
#include <GL/glut.h>
|
||||
|
||||
static GLint WinWidth = 500, WinHeight = 500;
|
||||
static GLboolean Invert = GL_FALSE;
|
||||
static GLboolean TestPacking = GL_FALSE;
|
||||
static GLboolean TestList = GL_FALSE;
|
||||
|
||||
|
||||
static void Display(void)
|
||||
{
|
||||
GLfloat depth[100 * 100];
|
||||
GLfloat depth2[400 * 400];
|
||||
GLfloat min, max;
|
||||
int i;
|
||||
GLfloat depth[100 * 100 * 2];
|
||||
GLfloat depth2[400 * 400]; /* *2 to test pixelstore stuff */
|
||||
GLuint list;
|
||||
GLenum depthType = GL_FLOAT;
|
||||
|
||||
glClearColor(0.5, 0.5, 0.5, 1.0);
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
|
||||
|
@ -35,22 +38,61 @@ static void Display(void)
|
|||
glLoadIdentity();
|
||||
glutSolidSphere(1.0, 20, 10);
|
||||
|
||||
/* read the depth image */
|
||||
glReadPixels(0, 0, 100, 100, GL_DEPTH_COMPONENT, GL_FLOAT, depth);
|
||||
min = max = depth[0];
|
||||
for (i = 1; i < 100 * 100; i++) {
|
||||
if (depth[i] < min)
|
||||
min = depth[i];
|
||||
if (depth[i] > max)
|
||||
max = depth[i];
|
||||
if (TestPacking) {
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, 120);
|
||||
glPixelStorei(GL_PACK_SKIP_PIXELS, 5);
|
||||
}
|
||||
|
||||
/* read the depth image */
|
||||
glReadPixels(0, 0, 100, 100, GL_DEPTH_COMPONENT, depthType, depth);
|
||||
if (depthType == GL_FLOAT) {
|
||||
GLfloat min, max;
|
||||
int i;
|
||||
min = max = depth[0];
|
||||
for (i = 1; i < 100 * 100; i++) {
|
||||
if (depth[i] < min)
|
||||
min = depth[i];
|
||||
if (depth[i] > max)
|
||||
max = depth[i];
|
||||
}
|
||||
printf("Depth value range: [%f, %f]\n", min, max);
|
||||
}
|
||||
|
||||
if (TestPacking) {
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
||||
glPixelStorei(GL_PACK_SKIP_PIXELS, 0);
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, 120);
|
||||
glPixelStorei(GL_UNPACK_SKIP_PIXELS, 5);
|
||||
}
|
||||
printf("Depth value range: [%f, %f]\n", min, max);
|
||||
|
||||
/* draw depth image with scaling (into z buffer) */
|
||||
glPixelZoom(4.0, 4.0);
|
||||
glColor4f(1, 0, 0, 0);
|
||||
glWindowPos2i(100, 0);
|
||||
glDrawPixels(100, 100, GL_DEPTH_COMPONENT, GL_FLOAT, depth);
|
||||
if (Invert) {
|
||||
glPixelTransferf(GL_DEPTH_SCALE, -1.0);
|
||||
glPixelTransferf(GL_DEPTH_BIAS, 1.0);
|
||||
}
|
||||
if (TestList) {
|
||||
list = glGenLists(1);
|
||||
glNewList(list, GL_COMPILE);
|
||||
glDrawPixels(100, 100, GL_DEPTH_COMPONENT, depthType, depth);
|
||||
glEndList();
|
||||
glCallList(list);
|
||||
glDeleteLists(list, 1);
|
||||
}
|
||||
else {
|
||||
glDrawPixels(100, 100, GL_DEPTH_COMPONENT, depthType, depth);
|
||||
}
|
||||
if (Invert) {
|
||||
glPixelTransferf(GL_DEPTH_SCALE, 1.0);
|
||||
glPixelTransferf(GL_DEPTH_BIAS, 0.0);
|
||||
}
|
||||
|
||||
if (TestPacking) {
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
|
||||
glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0);
|
||||
}
|
||||
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
|
||||
|
@ -77,6 +119,17 @@ static void Key(unsigned char key, int x, int y)
|
|||
(void) x;
|
||||
(void) y;
|
||||
switch (key) {
|
||||
case 'i':
|
||||
Invert = !Invert;
|
||||
break;
|
||||
case 'p':
|
||||
TestPacking = !TestPacking;
|
||||
printf("Test pixel pack/unpack: %d\n", TestPacking);
|
||||
break;
|
||||
case 'l':
|
||||
TestList = !TestList;
|
||||
printf("Test dlist: %d\n", TestList);
|
||||
break;
|
||||
case 27:
|
||||
exit(0);
|
||||
break;
|
||||
|
|
|
@ -119,6 +119,12 @@ static void Init( void )
|
|||
glBindProgramARB(GL_VERTEX_PROGRAM_ARB, prognum);
|
||||
glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
|
||||
sz, (const GLubyte *) buf);
|
||||
if (glGetError()) {
|
||||
printf("Program failed to compile:\n%s\n", buf);
|
||||
printf("Error: %s\n",
|
||||
(char *) glGetString(GL_PROGRAM_ERROR_STRING_ARB));
|
||||
exit(1);
|
||||
}
|
||||
assert(glIsProgramARB(prognum));
|
||||
}
|
||||
|
||||
|
|
|
@ -52,11 +52,20 @@ def generate(env):
|
|||
target_cpu = 'x64'
|
||||
else:
|
||||
raise SCons.Errors.InternalError, "Unsupported target machine"
|
||||
include_dir = 'Include'
|
||||
|
||||
include_dir = os.path.join(dxsdk_root, 'Include')
|
||||
lib_dir = os.path.join(dxsdk_root, 'Lib', target_cpu)
|
||||
|
||||
env.Append(CPPDEFINES = [('HAVE_DXSDK', '1')])
|
||||
env.Prepend(CPPPATH = [os.path.join(dxsdk_root, 'Include')])
|
||||
env.Prepend(LIBPATH = [os.path.join(dxsdk_root, 'Lib', target_cpu)])
|
||||
|
||||
gcc = 'gcc' in os.path.basename(env['CC']).split('-')
|
||||
if gcc:
|
||||
# Make GCC more forgiving towards Microsoft's headers
|
||||
env.Prepend(CPPFLAGS = ['-isystem', include_dir])
|
||||
else:
|
||||
env.Prepend(CPPPATH = [include_dir])
|
||||
|
||||
env.Prepend(LIBPATH = [lib_dir])
|
||||
|
||||
def exists(env):
|
||||
return get_dxsdk_root(env) is not None
|
||||
|
|
|
@ -334,12 +334,17 @@ def generate(env):
|
|||
else:
|
||||
ccflags += ['-O3', '-g3']
|
||||
if env['profile']:
|
||||
ccflags += ['-pg']
|
||||
# See http://code.google.com/p/jrfonseca/wiki/Gprof2Dot#Which_options_should_I_pass_to_gcc_when_compiling_for_profiling?
|
||||
ccflags += [
|
||||
'-fno-omit-frame-pointer',
|
||||
'-fno-optimize-sibling-calls',
|
||||
]
|
||||
if env['machine'] == 'x86':
|
||||
ccflags += [
|
||||
'-m32',
|
||||
#'-march=pentium4',
|
||||
'-mmmx', '-msse', '-msse2', # enable SIMD intrinsics
|
||||
'-mstackrealign', # ensure stack is aligned -- do not enabled -msse without it!
|
||||
#'-mfpmath=sse',
|
||||
]
|
||||
if env['machine'] == 'x86_64':
|
||||
|
|
|
@ -51,20 +51,26 @@ def generate(env):
|
|||
|
||||
llvm_bin_dir = os.path.join(llvm_dir, llvm_subdir, 'bin')
|
||||
if not os.path.isdir(llvm_bin_dir):
|
||||
raise SCons.Errors.InternalError, "LLVM build directory not found"
|
||||
llvm_bin_dir = os.path.join(llvm_dir, 'bin')
|
||||
if not os.path.isdir(llvm_bin_dir):
|
||||
raise SCons.Errors.InternalError, "LLVM binary directory not found"
|
||||
|
||||
env.PrependENVPath('PATH', llvm_bin_dir)
|
||||
|
||||
if env.Detect('llvm-config'):
|
||||
try:
|
||||
env['LLVM_VERSION'] = env.backtick('llvm-config --version')
|
||||
except AttributeError:
|
||||
env['LLVM_VERSION'] = 'X.X'
|
||||
version = env.backtick('llvm-config --version').rstrip()
|
||||
|
||||
env.ParseConfig('llvm-config --cppflags')
|
||||
env.ParseConfig('llvm-config --libs jit interpreter nativecodegen bitwriter')
|
||||
env.ParseConfig('llvm-config --ldflags')
|
||||
env['LINK'] = env['CXX']
|
||||
try:
|
||||
env.ParseConfig('llvm-config --cppflags')
|
||||
env.ParseConfig('llvm-config --libs jit interpreter nativecodegen bitwriter')
|
||||
env.ParseConfig('llvm-config --ldflags')
|
||||
except OSError:
|
||||
print 'llvm-config version %s failed' % version
|
||||
else:
|
||||
if env['platform'] == 'windows':
|
||||
env.Append(LIBS = ['imagehlp', 'psapi'])
|
||||
env['LINK'] = env['CXX']
|
||||
env['LLVM_VERSION'] = version
|
||||
|
||||
def exists(env):
|
||||
return True
|
||||
|
|
|
@ -85,8 +85,6 @@ def get_winddk_paths(env, version, root):
|
|||
else:
|
||||
# TODO: take in consideration the host cpu
|
||||
bin_dir = os.path.join(root, 'bin', 'win64', 'x86', cpu_bin(target_cpu))
|
||||
|
||||
env.PrependENVPath('PATH', [bin_dir])
|
||||
|
||||
crt_inc_dir = os.path.join(root, 'inc', 'crt')
|
||||
if version_major >= 6000:
|
||||
|
@ -98,17 +96,33 @@ def get_winddk_paths(env, version, root):
|
|||
sdk_inc_dir = os.path.join(root, 'inc', target_os)
|
||||
wdm_inc_dir = os.path.join(root, 'inc', 'ddk', 'wdm', target_os)
|
||||
|
||||
env.PrependENVPath('INCLUDE', [
|
||||
wdm_inc_dir,
|
||||
ddk_inc_dir,
|
||||
crt_inc_dir,
|
||||
sdk_inc_dir,
|
||||
])
|
||||
if env['toolchain'] == 'winddk':
|
||||
env.PrependENVPath('PATH', [bin_dir])
|
||||
env.PrependENVPath('INCLUDE', [
|
||||
wdm_inc_dir,
|
||||
ddk_inc_dir,
|
||||
crt_inc_dir,
|
||||
sdk_inc_dir,
|
||||
])
|
||||
env.PrependENVPath('LIB', [
|
||||
os.path.join(root, 'lib', 'crt', target_cpu),
|
||||
os.path.join(root, 'lib', target_os, target_cpu),
|
||||
])
|
||||
elif env['toolchain'] == 'crossmingw':
|
||||
env.Prepend(CPPFLAGS = [
|
||||
'-isystem', ddk_inc_dir,
|
||||
'-isystem', sdk_inc_dir,
|
||||
])
|
||||
else:
|
||||
env.Prepend(CPPPATH = [
|
||||
wdm_inc_dir,
|
||||
ddk_inc_dir,
|
||||
sdk_inc_dir,
|
||||
])
|
||||
env.Prepend(LIBPATH = [
|
||||
os.path.join(root, 'lib', target_os, target_cpu),
|
||||
])
|
||||
|
||||
env.PrependENVPath('LIB', [
|
||||
os.path.join(root, 'lib', 'crt', target_cpu),
|
||||
os.path.join(root, 'lib', target_os, target_cpu),
|
||||
])
|
||||
|
||||
def generate(env):
|
||||
if not env.has_key('ENV'):
|
||||
|
@ -120,9 +134,10 @@ def generate(env):
|
|||
get_winddk_paths(env, version, root)
|
||||
break
|
||||
|
||||
msvc_sa.generate(env)
|
||||
mslib_sa.generate(env)
|
||||
mslink_sa.generate(env)
|
||||
if env['toolchain'] == 'winddk':
|
||||
msvc_sa.generate(env)
|
||||
mslib_sa.generate(env)
|
||||
mslink_sa.generate(env)
|
||||
|
||||
def exists(env):
|
||||
for version in versions:
|
||||
|
|
|
@ -2756,19 +2756,32 @@ exec_instruction(
|
|||
if (mach->ExecMask) {
|
||||
/* do the call */
|
||||
|
||||
/* push the Cond, Loop, Cont stacks */
|
||||
/* First, record the depths of the execution stacks.
|
||||
* This is important for deeply nested/looped return statements.
|
||||
* We have to unwind the stacks by the correct amount. For a
|
||||
* real code generator, we could determine the number of entries
|
||||
* to pop off each stack with simple static analysis and avoid
|
||||
* implementing this data structure at run time.
|
||||
*/
|
||||
mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
|
||||
mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
|
||||
mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
|
||||
/* note that PC was already incremented above */
|
||||
mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
|
||||
|
||||
mach->CallStackTop++;
|
||||
|
||||
/* Second, push the Cond, Loop, Cont, Func stacks */
|
||||
assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
|
||||
mach->CondStack[mach->CondStackTop++] = mach->CondMask;
|
||||
assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
|
||||
mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
|
||||
assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
|
||||
mach->ContStack[mach->ContStackTop++] = mach->ContMask;
|
||||
|
||||
assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
|
||||
mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
|
||||
|
||||
/* note that PC was already incremented above */
|
||||
mach->CallStack[mach->CallStackTop++] = *pc;
|
||||
/* Finally, jump to the subroutine */
|
||||
*pc = inst->InstructionExtLabel.Label;
|
||||
}
|
||||
break;
|
||||
|
@ -2785,18 +2798,24 @@ exec_instruction(
|
|||
*pc = -1;
|
||||
return;
|
||||
}
|
||||
*pc = mach->CallStack[--mach->CallStackTop];
|
||||
|
||||
/* pop the Cond, Loop, Cont stacks */
|
||||
assert(mach->CondStackTop > 0);
|
||||
mach->CondMask = mach->CondStack[--mach->CondStackTop];
|
||||
assert(mach->LoopStackTop > 0);
|
||||
mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
|
||||
assert(mach->ContStackTop > 0);
|
||||
mach->ContMask = mach->ContStack[--mach->ContStackTop];
|
||||
assert(mach->CallStackTop > 0);
|
||||
mach->CallStackTop--;
|
||||
|
||||
mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
|
||||
mach->CondMask = mach->CondStack[mach->CondStackTop];
|
||||
|
||||
mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
|
||||
mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
|
||||
|
||||
mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
|
||||
mach->ContMask = mach->ContStack[mach->ContStackTop];
|
||||
|
||||
assert(mach->FuncStackTop > 0);
|
||||
mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
|
||||
|
||||
*pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
|
||||
|
||||
UPDATE_EXEC_MASK(mach);
|
||||
}
|
||||
break;
|
||||
|
@ -3245,7 +3264,6 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
|
|||
mach->FuncMask = 0xf;
|
||||
mach->ExecMask = 0xf;
|
||||
|
||||
mach->CondStackTop = 0; /* temporarily subvert this assertion */
|
||||
assert(mach->CondStackTop == 0);
|
||||
assert(mach->LoopStackTop == 0);
|
||||
assert(mach->ContStackTop == 0);
|
||||
|
|
|
@ -186,6 +186,17 @@ struct tgsi_exec_labels
|
|||
*/
|
||||
#define TGSI_EXEC_MAX_CONST_BUFFER 4096
|
||||
|
||||
|
||||
/** function call/activation record */
|
||||
struct tgsi_call_record
|
||||
{
|
||||
uint CondStackTop;
|
||||
uint LoopStackTop;
|
||||
uint ContStackTop;
|
||||
uint ReturnAddr;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Run-time virtual machine state for executing TGSI shader.
|
||||
*/
|
||||
|
@ -249,7 +260,7 @@ struct tgsi_exec_machine
|
|||
int FuncStackTop;
|
||||
|
||||
/** Function call stack for saving/restoring the program counter */
|
||||
uint CallStack[TGSI_EXEC_MAX_CALL_NESTING];
|
||||
struct tgsi_call_record CallStack[TGSI_EXEC_MAX_CALL_NESTING];
|
||||
int CallStackTop;
|
||||
|
||||
struct tgsi_full_instruction *Instructions;
|
||||
|
|
|
@ -358,7 +358,7 @@ epilog(
|
|||
|
||||
boolean
|
||||
tgsi_sanity_check(
|
||||
struct tgsi_token *tokens )
|
||||
const struct tgsi_token *tokens )
|
||||
{
|
||||
struct sanity_check_ctx ctx;
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ extern "C" {
|
|||
*/
|
||||
boolean
|
||||
tgsi_sanity_check(
|
||||
struct tgsi_token *tokens );
|
||||
const struct tgsi_token *tokens );
|
||||
|
||||
#if defined __cplusplus
|
||||
}
|
||||
|
|
|
@ -132,6 +132,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
|
|||
if (file == TGSI_FILE_INPUT) {
|
||||
info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName;
|
||||
info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex;
|
||||
info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate;
|
||||
info->num_inputs++;
|
||||
}
|
||||
else if (file == TGSI_FILE_OUTPUT) {
|
||||
|
|
|
@ -45,6 +45,7 @@ struct tgsi_shader_info
|
|||
ubyte num_outputs;
|
||||
ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */
|
||||
ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
|
||||
ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS];
|
||||
ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */
|
||||
ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
|
||||
|
||||
|
|
|
@ -39,8 +39,9 @@
|
|||
#include "tgsi/tgsi_info.h"
|
||||
#include "tgsi/tgsi_parse.h"
|
||||
#include "tgsi/tgsi_util.h"
|
||||
#include "tgsi_exec.h"
|
||||
#include "tgsi_sse2.h"
|
||||
#include "tgsi/tgsi_dump.h"
|
||||
#include "tgsi/tgsi_exec.h"
|
||||
#include "tgsi/tgsi_sse2.h"
|
||||
|
||||
#include "rtasm/rtasm_x86sse.h"
|
||||
|
||||
|
@ -1360,6 +1361,32 @@ emit_store(
|
|||
const struct tgsi_full_instruction *inst,
|
||||
unsigned chan_index )
|
||||
{
|
||||
switch( inst->Instruction.Saturate ) {
|
||||
case TGSI_SAT_NONE:
|
||||
break;
|
||||
|
||||
case TGSI_SAT_ZERO_ONE:
|
||||
sse_maxps(
|
||||
func,
|
||||
make_xmm( xmm ),
|
||||
get_temp(
|
||||
TGSI_EXEC_TEMP_00000000_I,
|
||||
TGSI_EXEC_TEMP_00000000_C ) );
|
||||
|
||||
sse_minps(
|
||||
func,
|
||||
make_xmm( xmm ),
|
||||
get_temp(
|
||||
TGSI_EXEC_TEMP_ONE_I,
|
||||
TGSI_EXEC_TEMP_ONE_C ) );
|
||||
break;
|
||||
|
||||
case TGSI_SAT_MINUS_PLUS_ONE:
|
||||
assert( 0 );
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
switch( reg->DstRegister.File ) {
|
||||
case TGSI_FILE_OUTPUT:
|
||||
emit_output(
|
||||
|
@ -1388,19 +1415,6 @@ emit_store(
|
|||
default:
|
||||
assert( 0 );
|
||||
}
|
||||
|
||||
switch( inst->Instruction.Saturate ) {
|
||||
case TGSI_SAT_NONE:
|
||||
break;
|
||||
|
||||
case TGSI_SAT_ZERO_ONE:
|
||||
/* assert( 0 ); */
|
||||
break;
|
||||
|
||||
case TGSI_SAT_MINUS_PLUS_ONE:
|
||||
assert( 0 );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#define STORE( FUNC, INST, XMM, INDEX, CHAN )\
|
||||
|
@ -1747,14 +1761,6 @@ emit_instruction(
|
|||
if (indirect_temp_reference(inst))
|
||||
return FALSE;
|
||||
|
||||
/* we don't handle saturation/clamping yet */
|
||||
if (inst->Instruction.Saturate != TGSI_SAT_NONE)
|
||||
return FALSE;
|
||||
|
||||
/* need to use extra temps to fix SOA dependencies : */
|
||||
if (tgsi_check_soa_dependencies(inst))
|
||||
return FALSE;
|
||||
|
||||
switch (inst->Instruction.Opcode) {
|
||||
case TGSI_OPCODE_ARL:
|
||||
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
|
||||
|
@ -1768,8 +1774,10 @@ emit_instruction(
|
|||
case TGSI_OPCODE_MOV:
|
||||
case TGSI_OPCODE_SWZ:
|
||||
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
|
||||
FETCH( func, *inst, 0, 0, chan_index );
|
||||
STORE( func, *inst, 0, 0, chan_index );
|
||||
FETCH( func, *inst, 4 + chan_index, 0, chan_index );
|
||||
}
|
||||
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
|
||||
STORE( func, *inst, 4 + chan_index, 0, chan_index );
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -2929,6 +2937,22 @@ tgsi_emit_sse2(
|
|||
parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
|
||||
"vertex shader" : "fragment shader");
|
||||
}
|
||||
|
||||
if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) {
|
||||
uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
|
||||
|
||||
/* XXX: we only handle src/dst aliasing in a few opcodes
|
||||
* currently. Need to use an additional temporay to hold
|
||||
* the result in the cases where the code is too opaque to
|
||||
* fix.
|
||||
*/
|
||||
if (opcode != TGSI_OPCODE_MOV &&
|
||||
opcode != TGSI_OPCODE_SWZ) {
|
||||
debug_printf("Warning: src/dst aliasing in instruction"
|
||||
" is not handled:\n");
|
||||
tgsi_dump_instruction(&parse.FullToken.FullInstruction, 1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case TGSI_TOKEN_TYPE_IMMEDIATE:
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "tgsi/tgsi_ureg.h"
|
||||
#include "tgsi/tgsi_info.h"
|
||||
#include "tgsi/tgsi_dump.h"
|
||||
#include "tgsi/tgsi_sanity.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
|
@ -70,6 +71,7 @@ struct ureg_tokens {
|
|||
|
||||
#define UREG_MAX_INPUT PIPE_MAX_ATTRIBS
|
||||
#define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS
|
||||
#define UREG_MAX_CONSTANT_RANGE 32
|
||||
#define UREG_MAX_IMMEDIATE 32
|
||||
#define UREG_MAX_TEMP 256
|
||||
#define UREG_MAX_ADDR 2
|
||||
|
@ -86,8 +88,10 @@ struct ureg_program
|
|||
unsigned semantic_name;
|
||||
unsigned semantic_index;
|
||||
unsigned interp;
|
||||
} input[UREG_MAX_INPUT];
|
||||
unsigned nr_inputs;
|
||||
} fs_input[UREG_MAX_INPUT];
|
||||
unsigned nr_fs_inputs;
|
||||
|
||||
unsigned vs_inputs[UREG_MAX_INPUT/32];
|
||||
|
||||
struct {
|
||||
unsigned semantic_name;
|
||||
|
@ -107,9 +111,13 @@ struct ureg_program
|
|||
unsigned temps_active[UREG_MAX_TEMP / 32];
|
||||
unsigned nr_temps;
|
||||
|
||||
unsigned nr_addrs;
|
||||
struct {
|
||||
unsigned first;
|
||||
unsigned last;
|
||||
} constant_range[UREG_MAX_CONSTANT_RANGE];
|
||||
unsigned nr_constant_ranges;
|
||||
|
||||
unsigned nr_constants;
|
||||
unsigned nr_addrs;
|
||||
unsigned nr_instructions;
|
||||
|
||||
struct ureg_tokens domain[2];
|
||||
|
@ -119,6 +127,9 @@ static union tgsi_any_token error_tokens[32];
|
|||
|
||||
static void tokens_error( struct ureg_tokens *tokens )
|
||||
{
|
||||
if (tokens->tokens && tokens->tokens != error_tokens)
|
||||
FREE(tokens->tokens);
|
||||
|
||||
tokens->tokens = error_tokens;
|
||||
tokens->size = Elements(error_tokens);
|
||||
tokens->count = 0;
|
||||
|
@ -228,25 +239,25 @@ ureg_src_register( unsigned file,
|
|||
|
||||
|
||||
|
||||
static struct ureg_src
|
||||
ureg_DECL_input( struct ureg_program *ureg,
|
||||
unsigned name,
|
||||
unsigned index,
|
||||
unsigned interp_mode )
|
||||
struct ureg_src
|
||||
ureg_DECL_fs_input( struct ureg_program *ureg,
|
||||
unsigned name,
|
||||
unsigned index,
|
||||
unsigned interp_mode )
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < ureg->nr_inputs; i++) {
|
||||
if (ureg->input[i].semantic_name == name &&
|
||||
ureg->input[i].semantic_index == index)
|
||||
for (i = 0; i < ureg->nr_fs_inputs; i++) {
|
||||
if (ureg->fs_input[i].semantic_name == name &&
|
||||
ureg->fs_input[i].semantic_index == index)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ureg->nr_inputs < UREG_MAX_INPUT) {
|
||||
ureg->input[i].semantic_name = name;
|
||||
ureg->input[i].semantic_index = index;
|
||||
ureg->input[i].interp = interp_mode;
|
||||
ureg->nr_inputs++;
|
||||
if (ureg->nr_fs_inputs < UREG_MAX_INPUT) {
|
||||
ureg->fs_input[i].semantic_name = name;
|
||||
ureg->fs_input[i].semantic_index = index;
|
||||
ureg->fs_input[i].interp = interp_mode;
|
||||
ureg->nr_fs_inputs++;
|
||||
}
|
||||
else {
|
||||
set_bad( ureg );
|
||||
|
@ -257,25 +268,14 @@ out:
|
|||
}
|
||||
|
||||
|
||||
|
||||
struct ureg_src
|
||||
ureg_DECL_fs_input( struct ureg_program *ureg,
|
||||
unsigned name,
|
||||
unsigned index,
|
||||
unsigned interp )
|
||||
{
|
||||
assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT);
|
||||
return ureg_DECL_input( ureg, name, index, interp );
|
||||
}
|
||||
|
||||
|
||||
struct ureg_src
|
||||
ureg_DECL_vs_input( struct ureg_program *ureg,
|
||||
unsigned name,
|
||||
unsigned index )
|
||||
{
|
||||
assert(ureg->processor == TGSI_PROCESSOR_VERTEX);
|
||||
return ureg_DECL_input( ureg, name, index, TGSI_INTERPOLATE_CONSTANT );
|
||||
|
||||
ureg->vs_inputs[index/32] |= 1 << (index % 32);
|
||||
return ureg_src_register( TGSI_FILE_INPUT, index );
|
||||
}
|
||||
|
||||
|
||||
|
@ -313,9 +313,57 @@ out:
|
|||
* value or manage any constant_buffer contents -- that's the
|
||||
* resposibility of the calling code.
|
||||
*/
|
||||
struct ureg_src ureg_DECL_constant(struct ureg_program *ureg )
|
||||
struct ureg_src ureg_DECL_constant(struct ureg_program *ureg,
|
||||
unsigned index )
|
||||
{
|
||||
return ureg_src_register( TGSI_FILE_CONSTANT, ureg->nr_constants++ );
|
||||
unsigned minconst = index, maxconst = index;
|
||||
unsigned i;
|
||||
|
||||
/* Inside existing range?
|
||||
*/
|
||||
for (i = 0; i < ureg->nr_constant_ranges; i++) {
|
||||
if (ureg->constant_range[i].first <= index &&
|
||||
ureg->constant_range[i].last >= index)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Extend existing range?
|
||||
*/
|
||||
for (i = 0; i < ureg->nr_constant_ranges; i++) {
|
||||
if (ureg->constant_range[i].last == index - 1) {
|
||||
ureg->constant_range[i].last = index;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ureg->constant_range[i].first == index + 1) {
|
||||
ureg->constant_range[i].first = index;
|
||||
goto out;
|
||||
}
|
||||
|
||||
minconst = MIN2(minconst, ureg->constant_range[i].first);
|
||||
maxconst = MAX2(maxconst, ureg->constant_range[i].last);
|
||||
}
|
||||
|
||||
/* Create new range?
|
||||
*/
|
||||
if (ureg->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) {
|
||||
i = ureg->nr_constant_ranges++;
|
||||
ureg->constant_range[i].first = index;
|
||||
ureg->constant_range[i].last = index;
|
||||
}
|
||||
|
||||
/* Collapse all ranges down to one:
|
||||
*/
|
||||
i = 0;
|
||||
ureg->constant_range[0].first = minconst;
|
||||
ureg->constant_range[0].last = maxconst;
|
||||
ureg->nr_constant_ranges = 1;
|
||||
|
||||
out:
|
||||
assert(i < ureg->nr_constant_ranges);
|
||||
assert(ureg->constant_range[i].first <= index);
|
||||
assert(ureg->constant_range[i].last >= index);
|
||||
return ureg_src_register( TGSI_FILE_CONSTANT, index );
|
||||
}
|
||||
|
||||
|
||||
|
@ -566,6 +614,19 @@ ureg_emit_dst( struct ureg_program *ureg,
|
|||
}
|
||||
|
||||
|
||||
static void validate( unsigned opcode,
|
||||
unsigned nr_dst,
|
||||
unsigned nr_src )
|
||||
{
|
||||
#ifdef DEBUG
|
||||
const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode );
|
||||
assert(info);
|
||||
if(info) {
|
||||
assert(nr_dst == info->num_dst);
|
||||
assert(nr_src == info->num_src);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
unsigned
|
||||
ureg_emit_insn(struct ureg_program *ureg,
|
||||
|
@ -576,6 +637,8 @@ ureg_emit_insn(struct ureg_program *ureg,
|
|||
{
|
||||
union tgsi_any_token *out;
|
||||
|
||||
validate( opcode, num_dst, num_src );
|
||||
|
||||
out = get_tokens( ureg, DOMAIN_INSN, 1 );
|
||||
out[0].value = 0;
|
||||
out[0].insn.Type = TGSI_TOKEN_TYPE_INSTRUCTION;
|
||||
|
@ -678,17 +741,6 @@ ureg_insn(struct ureg_program *ureg,
|
|||
unsigned insn, i;
|
||||
boolean saturate;
|
||||
|
||||
#ifdef DEBUG
|
||||
{
|
||||
const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode );
|
||||
assert(info);
|
||||
if(info) {
|
||||
assert(nr_dst == info->num_dst);
|
||||
assert(nr_src == info->num_src);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
saturate = nr_dst ? dst[0].Saturate : FALSE;
|
||||
|
||||
insn = ureg_emit_insn( ureg, opcode, saturate, nr_dst, nr_src );
|
||||
|
@ -702,6 +754,53 @@ ureg_insn(struct ureg_program *ureg,
|
|||
ureg_fixup_insn_size( ureg, insn );
|
||||
}
|
||||
|
||||
void
|
||||
ureg_tex_insn(struct ureg_program *ureg,
|
||||
unsigned opcode,
|
||||
const struct ureg_dst *dst,
|
||||
unsigned nr_dst,
|
||||
unsigned target,
|
||||
const struct ureg_src *src,
|
||||
unsigned nr_src )
|
||||
{
|
||||
unsigned insn, i;
|
||||
boolean saturate;
|
||||
|
||||
saturate = nr_dst ? dst[0].Saturate : FALSE;
|
||||
|
||||
insn = ureg_emit_insn( ureg, opcode, saturate, nr_dst, nr_src );
|
||||
|
||||
ureg_emit_texture( ureg, insn, target ); \
|
||||
|
||||
for (i = 0; i < nr_dst; i++)
|
||||
ureg_emit_dst( ureg, dst[i] );
|
||||
|
||||
for (i = 0; i < nr_src; i++)
|
||||
ureg_emit_src( ureg, src[i] );
|
||||
|
||||
ureg_fixup_insn_size( ureg, insn );
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ureg_label_insn(struct ureg_program *ureg,
|
||||
unsigned opcode,
|
||||
const struct ureg_src *src,
|
||||
unsigned nr_src,
|
||||
unsigned *label_token )
|
||||
{
|
||||
unsigned insn, i;
|
||||
|
||||
insn = ureg_emit_insn( ureg, opcode, FALSE, 0, nr_src );
|
||||
|
||||
ureg_emit_label( ureg, insn, label_token ); \
|
||||
|
||||
for (i = 0; i < nr_src; i++)
|
||||
ureg_emit_src( ureg, src[i] );
|
||||
|
||||
ureg_fixup_insn_size( ureg, insn );
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void emit_decl( struct ureg_program *ureg,
|
||||
|
@ -777,13 +876,22 @@ static void emit_decls( struct ureg_program *ureg )
|
|||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < ureg->nr_inputs; i++) {
|
||||
emit_decl( ureg,
|
||||
TGSI_FILE_INPUT,
|
||||
i,
|
||||
ureg->input[i].semantic_name,
|
||||
ureg->input[i].semantic_index,
|
||||
ureg->input[i].interp );
|
||||
if (ureg->processor == TGSI_PROCESSOR_VERTEX) {
|
||||
for (i = 0; i < UREG_MAX_INPUT; i++) {
|
||||
if (ureg->vs_inputs[i/32] & (1 << (i%32))) {
|
||||
emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 );
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (i = 0; i < ureg->nr_fs_inputs; i++) {
|
||||
emit_decl( ureg,
|
||||
TGSI_FILE_INPUT,
|
||||
i,
|
||||
ureg->fs_input[i].semantic_name,
|
||||
ureg->fs_input[i].semantic_index,
|
||||
ureg->fs_input[i].interp );
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < ureg->nr_outputs; i++) {
|
||||
|
@ -801,10 +909,13 @@ static void emit_decls( struct ureg_program *ureg )
|
|||
ureg->sampler[i].Index, 1 );
|
||||
}
|
||||
|
||||
if (ureg->nr_constants) {
|
||||
emit_decl_range( ureg,
|
||||
TGSI_FILE_CONSTANT,
|
||||
0, ureg->nr_constants );
|
||||
if (ureg->nr_constant_ranges) {
|
||||
for (i = 0; i < ureg->nr_constant_ranges; i++)
|
||||
emit_decl_range( ureg,
|
||||
TGSI_FILE_CONSTANT,
|
||||
ureg->constant_range[i].first,
|
||||
(ureg->constant_range[i].last + 1 -
|
||||
ureg->constant_range[i].first) );
|
||||
}
|
||||
|
||||
if (ureg->nr_temps) {
|
||||
|
@ -890,6 +1001,15 @@ const struct tgsi_token *ureg_finalize( struct ureg_program *ureg )
|
|||
ureg->domain[DOMAIN_DECL].count);
|
||||
tgsi_dump( tokens, 0 );
|
||||
}
|
||||
|
||||
#if DEBUG
|
||||
if (tokens && !tgsi_sanity_check(tokens)) {
|
||||
debug_printf("tgsi_ureg.c, sanity check failed on generated tokens:\n");
|
||||
tgsi_dump(tokens, 0);
|
||||
assert(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
@ -911,6 +1031,25 @@ void *ureg_create_shader( struct ureg_program *ureg,
|
|||
}
|
||||
|
||||
|
||||
const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg,
|
||||
unsigned *nr_tokens )
|
||||
{
|
||||
const struct tgsi_token *tokens;
|
||||
|
||||
ureg_finalize(ureg);
|
||||
|
||||
tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
|
||||
|
||||
if (nr_tokens)
|
||||
*nr_tokens = ureg->domain[DOMAIN_DECL].size;
|
||||
|
||||
ureg->domain[DOMAIN_DECL].tokens = 0;
|
||||
ureg->domain[DOMAIN_DECL].size = 0;
|
||||
ureg->domain[DOMAIN_DECL].order = 0;
|
||||
ureg->domain[DOMAIN_DECL].count = 0;
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
|
||||
struct ureg_program *ureg_create( unsigned processor )
|
||||
|
|
|
@ -82,10 +82,21 @@ ureg_create( unsigned processor );
|
|||
const struct tgsi_token *
|
||||
ureg_finalize( struct ureg_program * );
|
||||
|
||||
/* Create and return a shader:
|
||||
*/
|
||||
void *
|
||||
ureg_create_shader( struct ureg_program *,
|
||||
struct pipe_context *pipe );
|
||||
|
||||
|
||||
/* Alternately, return the built token stream and hand ownership of
|
||||
* that memory to the caller:
|
||||
*/
|
||||
const struct tgsi_token *
|
||||
ureg_get_tokens( struct ureg_program *ureg,
|
||||
unsigned *nr_tokens );
|
||||
|
||||
|
||||
void
|
||||
ureg_destroy( struct ureg_program * );
|
||||
|
||||
|
@ -116,8 +127,7 @@ ureg_DECL_fs_input( struct ureg_program *,
|
|||
|
||||
struct ureg_src
|
||||
ureg_DECL_vs_input( struct ureg_program *,
|
||||
unsigned semantic_name,
|
||||
unsigned semantic_index );
|
||||
unsigned index );
|
||||
|
||||
struct ureg_dst
|
||||
ureg_DECL_output( struct ureg_program *,
|
||||
|
@ -130,7 +140,8 @@ ureg_DECL_immediate( struct ureg_program *,
|
|||
unsigned nr );
|
||||
|
||||
struct ureg_src
|
||||
ureg_DECL_constant( struct ureg_program * );
|
||||
ureg_DECL_constant( struct ureg_program *,
|
||||
unsigned index );
|
||||
|
||||
struct ureg_dst
|
||||
ureg_DECL_temporary( struct ureg_program * );
|
||||
|
@ -233,6 +244,24 @@ ureg_insn(struct ureg_program *ureg,
|
|||
unsigned nr_src );
|
||||
|
||||
|
||||
void
|
||||
ureg_tex_insn(struct ureg_program *ureg,
|
||||
unsigned opcode,
|
||||
const struct ureg_dst *dst,
|
||||
unsigned nr_dst,
|
||||
unsigned target,
|
||||
const struct ureg_src *src,
|
||||
unsigned nr_src );
|
||||
|
||||
|
||||
void
|
||||
ureg_label_insn(struct ureg_program *ureg,
|
||||
unsigned opcode,
|
||||
const struct ureg_src *src,
|
||||
unsigned nr_src,
|
||||
unsigned *label);
|
||||
|
||||
|
||||
/***********************************************************************
|
||||
* Internal instruction helpers, don't call these directly:
|
||||
*/
|
||||
|
|
|
@ -88,6 +88,7 @@ _debug_printf(const char *format, ...)
|
|||
* - avoid outputing large strings (512 bytes is the current maximum length
|
||||
* that is guaranteed to be printed in all platforms)
|
||||
*/
|
||||
#if !defined(PIPE_OS_HAIKU)
|
||||
static INLINE void
|
||||
debug_printf(const char *format, ...)
|
||||
{
|
||||
|
@ -101,6 +102,7 @@ debug_printf(const char *format, ...)
|
|||
#endif
|
||||
}
|
||||
|
||||
#endif /* !PIPE_OS_HAIKU */
|
||||
|
||||
/*
|
||||
* ... isn't portable so we need to pass arguments in parentheses.
|
||||
|
|
|
@ -0,0 +1,94 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright © 2009 Jakob Bornecrantz
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#ifndef U_FIFO_H
|
||||
#define U_FIFO_H
|
||||
|
||||
#include "util/u_memory.h"
|
||||
|
||||
struct util_fifo
|
||||
{
|
||||
size_t head;
|
||||
size_t tail;
|
||||
size_t num;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
static INLINE struct util_fifo *
|
||||
u_fifo_create(size_t size)
|
||||
{
|
||||
struct util_fifo *fifo;
|
||||
fifo = MALLOC(sizeof(*fifo) + size * sizeof(void*));
|
||||
|
||||
fifo->head = 0;
|
||||
fifo->tail = 0;
|
||||
fifo->num = 0;
|
||||
fifo->size = size;
|
||||
|
||||
return fifo;
|
||||
}
|
||||
|
||||
static INLINE boolean
|
||||
u_fifo_add(struct util_fifo *fifo, void *ptr)
|
||||
{
|
||||
void **array = (void**)&fifo[1];
|
||||
if (fifo->num >= fifo->size)
|
||||
return FALSE;
|
||||
|
||||
if (++fifo->head >= fifo->size)
|
||||
fifo->head = 0;
|
||||
|
||||
array[fifo->head] = ptr;
|
||||
|
||||
++fifo->num;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static INLINE boolean
|
||||
u_fifo_pop(struct util_fifo *fifo, void **ptr)
|
||||
{
|
||||
void **array = (void**)&fifo[1];
|
||||
|
||||
if (!fifo->num)
|
||||
return FALSE;
|
||||
|
||||
if (++fifo->tail >= fifo->size)
|
||||
fifo->tail = 0;
|
||||
|
||||
*ptr = array[fifo->tail];
|
||||
|
||||
++fifo->num;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
u_fifo_destroy(struct util_fifo *fifo)
|
||||
{
|
||||
FREE(fifo);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -2,7 +2,7 @@ PIPE_FORMAT_A8R8G8B8_UNORM , arith , 1, 1, un8 , un8 , un8 , un8 , zyxw,
|
|||
PIPE_FORMAT_X8R8G8B8_UNORM , arith , 1, 1, un8 , un8 , un8 , un8 , zyx1, rgb
|
||||
PIPE_FORMAT_B8G8R8A8_UNORM , arith , 1, 1, un8 , un8 , un8 , un8 , yzwx, rgb
|
||||
PIPE_FORMAT_B8G8R8X8_UNORM , arith , 1, 1, un8 , un8 , un8 , un8 , yzw1, rgb
|
||||
PIPE_FORMAT_A1R5G5B5_UNORM , arith , 1, 1, un1 , un5 , un5 , un5 , zyxw, rgb
|
||||
PIPE_FORMAT_A1R5G5B5_UNORM , arith , 1, 1, un5 , un5 , un5 , un1 , zyxw, rgb
|
||||
PIPE_FORMAT_A4R4G4B4_UNORM , arith , 1, 1, un4 , un4 , un4 , un4 , zyxw, rgb
|
||||
PIPE_FORMAT_R5G6B5_UNORM , arith , 1, 1, un5 , un6 , un5 , , zyx1, rgb
|
||||
PIPE_FORMAT_A2B10G10R10_UNORM , arith , 1, 1, un10, un10, un10, un2 , xyzw, rgb
|
||||
|
@ -14,10 +14,10 @@ PIPE_FORMAT_L16_UNORM , arith , 1, 1, un16, , , , xxx1,
|
|||
PIPE_FORMAT_Z16_UNORM , array , 1, 1, un16, , , , x___, zs
|
||||
PIPE_FORMAT_Z32_UNORM , array , 1, 1, un32, , , , x___, zs
|
||||
PIPE_FORMAT_Z32_FLOAT , array , 1, 1, f32 , , , , x___, zs
|
||||
PIPE_FORMAT_S8Z24_UNORM , arith , 1, 1, un8 , un24, , , yx__, zs
|
||||
PIPE_FORMAT_Z24S8_UNORM , arith , 1, 1, un24, un8 , , , xy__, zs
|
||||
PIPE_FORMAT_X8Z24_UNORM , arith , 1, 1, un8 , un24, , , y___, zs
|
||||
PIPE_FORMAT_Z24X8_UNORM , arith , 1, 1, un24, un8 , , , x___, zs
|
||||
PIPE_FORMAT_S8Z24_UNORM , arith , 1, 1, un24, un8 , , , xy__, zs
|
||||
PIPE_FORMAT_Z24S8_UNORM , arith , 1, 1, un8 , un24, , , yx__, zs
|
||||
PIPE_FORMAT_X8Z24_UNORM , arith , 1, 1, un24, un8 , , , x___, zs
|
||||
PIPE_FORMAT_Z24X8_UNORM , arith , 1, 1, un8 , un24, , , y___, zs
|
||||
PIPE_FORMAT_S8_UNORM , array , 1, 1, un8 , , , , _x__, zs
|
||||
PIPE_FORMAT_R64_FLOAT , array , 1, 1, f64 , , , , x001, rgb
|
||||
PIPE_FORMAT_R64G64_FLOAT , array , 1, 1, f64 , f64 , , , xy01, rgb
|
||||
|
|
|
|
@ -340,11 +340,23 @@ util_is_inf_or_nan(float x)
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Test whether x is a power of two.
|
||||
*/
|
||||
static INLINE boolean
|
||||
util_is_pot(unsigned x)
|
||||
{
|
||||
return (x & (x - 1)) == 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Find first bit set in word. Least significant bit is 1.
|
||||
* Return 0 if no bits set.
|
||||
*/
|
||||
#if defined(_MSC_VER) && _MSC_VER >= 1300
|
||||
#if defined(_MSC_VER) && _MSC_VER >= 1300 && (_M_IX86 || _M_AMD64 || _M_IA64)
|
||||
unsigned char _BitScanForward(unsigned long* Index, unsigned long Mask);
|
||||
#pragma intrinsic(_BitScanForward)
|
||||
static INLINE
|
||||
unsigned long ffs( unsigned long u )
|
||||
{
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
|
||||
# include <winsock2.h>
|
||||
# include <windows.h>
|
||||
#elif defined(PIPE_OS_LINUX)
|
||||
#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU)
|
||||
# include <sys/socket.h>
|
||||
# include <netinet/in.h>
|
||||
# include <unistd.h>
|
||||
|
@ -54,7 +54,7 @@ u_socket_close(int s)
|
|||
if (s < 0)
|
||||
return;
|
||||
|
||||
#if defined(PIPE_OS_LINUX)
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU)
|
||||
shutdown(s, SHUT_RDWR);
|
||||
close(s);
|
||||
#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
|
||||
|
@ -169,7 +169,7 @@ u_socket_listen_on_port(uint16_t portnum)
|
|||
void
|
||||
u_socket_block(int s, boolean block)
|
||||
{
|
||||
#if defined(PIPE_OS_LINUX)
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU)
|
||||
int old = fcntl(s, F_GETFL, 0);
|
||||
if (old == -1)
|
||||
return;
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
|
||||
# define PIPE_HAVE_SOCKETS
|
||||
#elif defined(PIPE_OS_LINUX)
|
||||
#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU)
|
||||
# define PIPE_HAVE_SOCKETS
|
||||
#endif
|
||||
|
||||
|
|
|
@ -52,8 +52,7 @@ pass_user_buffer_create(struct pipe_screen *screen,
|
|||
unsigned bytes)
|
||||
{
|
||||
struct pipe_buffer *buffer =
|
||||
screen->winsys->user_buffer_create(screen->winsys,
|
||||
ptr, bytes);
|
||||
screen->winsys->user_buffer_create(screen->winsys, ptr, bytes);
|
||||
|
||||
buffer->screen = screen;
|
||||
|
||||
|
@ -69,9 +68,8 @@ pass_surface_buffer_create(struct pipe_screen *screen,
|
|||
unsigned *stride)
|
||||
{
|
||||
struct pipe_buffer *buffer =
|
||||
screen->winsys->surface_buffer_create(screen->winsys,
|
||||
width, height,
|
||||
format, usage, tex_usage, stride);
|
||||
screen->winsys->surface_buffer_create(screen->winsys, width, height,
|
||||
format, usage, tex_usage, stride);
|
||||
|
||||
buffer->screen = screen;
|
||||
|
||||
|
@ -83,8 +81,7 @@ pass_buffer_map(struct pipe_screen *screen,
|
|||
struct pipe_buffer *buf,
|
||||
unsigned usage)
|
||||
{
|
||||
return screen->winsys->buffer_map(screen->winsys,
|
||||
buf, usage);
|
||||
return screen->winsys->buffer_map(screen->winsys, buf, usage);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -106,8 +103,7 @@ pass_flush_frontbuffer(struct pipe_screen *screen,
|
|||
struct pipe_surface *surf,
|
||||
void *context_private)
|
||||
{
|
||||
screen->winsys->flush_frontbuffer(screen->winsys,
|
||||
surf, context_private);
|
||||
screen->winsys->flush_frontbuffer(screen->winsys, surf, context_private);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -115,8 +111,7 @@ pass_fence_reference(struct pipe_screen *screen,
|
|||
struct pipe_fence_handle **ptr,
|
||||
struct pipe_fence_handle *fence)
|
||||
{
|
||||
screen->winsys->fence_reference(screen->winsys,
|
||||
ptr, fence);
|
||||
screen->winsys->fence_reference(screen->winsys, ptr, fence);
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -124,8 +119,7 @@ pass_fence_signalled(struct pipe_screen *screen,
|
|||
struct pipe_fence_handle *fence,
|
||||
unsigned flag)
|
||||
{
|
||||
return screen->winsys->fence_signalled(screen->winsys,
|
||||
fence, flag);
|
||||
return screen->winsys->fence_signalled(screen->winsys, fence, flag);
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -133,11 +127,11 @@ pass_fence_finish(struct pipe_screen *screen,
|
|||
struct pipe_fence_handle *fence,
|
||||
unsigned flag)
|
||||
{
|
||||
return screen->winsys->fence_finish(screen->winsys,
|
||||
fence, flag);
|
||||
return screen->winsys->fence_finish(screen->winsys, fence, flag);
|
||||
}
|
||||
|
||||
void u_simple_screen_init(struct pipe_screen *screen)
|
||||
void
|
||||
u_simple_screen_init(struct pipe_screen *screen)
|
||||
{
|
||||
screen->buffer_create = pass_buffer_create;
|
||||
screen->user_buffer_create = pass_user_buffer_create;
|
||||
|
@ -152,7 +146,8 @@ void u_simple_screen_init(struct pipe_screen *screen)
|
|||
screen->fence_finish = pass_fence_finish;
|
||||
}
|
||||
|
||||
const char* u_simple_screen_winsys_name(struct pipe_screen *screen)
|
||||
const char *
|
||||
u_simple_screen_winsys_name(struct pipe_screen *screen)
|
||||
{
|
||||
return screen->winsys->get_name(screen->winsys);
|
||||
}
|
||||
|
|
|
@ -34,14 +34,8 @@
|
|||
|
||||
|
||||
#include "pipe/p_context.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_screen.h"
|
||||
#include "pipe/p_shader_tokens.h"
|
||||
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_simple_shaders.h"
|
||||
|
||||
#include "tgsi/tgsi_ureg.h"
|
||||
|
||||
|
||||
|
@ -67,9 +61,7 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe,
|
|||
struct ureg_src src;
|
||||
struct ureg_dst dst;
|
||||
|
||||
src = ureg_DECL_vs_input( ureg,
|
||||
semantic_names[i],
|
||||
semantic_indexes[i]);
|
||||
src = ureg_DECL_vs_input( ureg, i );
|
||||
|
||||
dst = ureg_DECL_output( ureg,
|
||||
semantic_names[i],
|
||||
|
@ -131,8 +123,6 @@ util_make_fragment_tex_shader(struct pipe_context *pipe )
|
|||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Make simple fragment color pass-through shader.
|
||||
*/
|
||||
|
|
|
@ -32,7 +32,7 @@
|
|||
|
||||
#include "pipe/p_config.h"
|
||||
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_OS_SOLARIS)
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU)
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
|
|
|
@ -170,7 +170,7 @@ x8r8g8b8_get_tile_rgba(const unsigned *src,
|
|||
pRow[0] = ubyte_to_float((pixel >> 16) & 0xff);
|
||||
pRow[1] = ubyte_to_float((pixel >> 8) & 0xff);
|
||||
pRow[2] = ubyte_to_float((pixel >> 0) & 0xff);
|
||||
pRow[3] = ubyte_to_float(0xff);
|
||||
pRow[3] = 1.0F;
|
||||
}
|
||||
p += dst_stride;
|
||||
}
|
||||
|
@ -394,6 +394,52 @@ r5g6b5_put_tile_rgba(ushort *dst,
|
|||
|
||||
|
||||
|
||||
/*** PIPE_FORMAT_R8G8B8_UNORM ***/
|
||||
|
||||
static void
|
||||
r8g8b8_get_tile_rgba(const ubyte *src,
|
||||
unsigned w, unsigned h,
|
||||
float *p,
|
||||
unsigned dst_stride)
|
||||
{
|
||||
unsigned i, j;
|
||||
|
||||
for (i = 0; i < h; i++) {
|
||||
float *pRow = p;
|
||||
for (j = 0; j < w; j++, pRow += 4) {
|
||||
pRow[0] = ubyte_to_float(src[0]);
|
||||
pRow[1] = ubyte_to_float(src[1]);
|
||||
pRow[2] = ubyte_to_float(src[2]);
|
||||
pRow[3] = 1.0f;
|
||||
src += 3;
|
||||
}
|
||||
p += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
r8g8b8_put_tile_rgba(ubyte *dst,
|
||||
unsigned w, unsigned h,
|
||||
const float *p,
|
||||
unsigned src_stride)
|
||||
{
|
||||
unsigned i, j;
|
||||
|
||||
for (i = 0; i < h; i++) {
|
||||
const float *pRow = p;
|
||||
for (j = 0; j < w; j++, pRow += 4) {
|
||||
dst[0] = float_to_ubyte(pRow[0]);
|
||||
dst[1] = float_to_ubyte(pRow[1]);
|
||||
dst[2] = float_to_ubyte(pRow[2]);
|
||||
dst += 3;
|
||||
}
|
||||
p += src_stride;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*** PIPE_FORMAT_Z16_UNORM ***/
|
||||
|
||||
/**
|
||||
|
@ -1106,6 +1152,9 @@ pipe_tile_raw_to_rgba(enum pipe_format format,
|
|||
case PIPE_FORMAT_R5G6B5_UNORM:
|
||||
r5g6b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride);
|
||||
break;
|
||||
case PIPE_FORMAT_R8G8B8_UNORM:
|
||||
r8g8b8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride);
|
||||
break;
|
||||
case PIPE_FORMAT_L8_UNORM:
|
||||
l8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride);
|
||||
break;
|
||||
|
@ -1222,6 +1271,9 @@ pipe_put_tile_rgba(struct pipe_transfer *pt,
|
|||
case PIPE_FORMAT_R5G6B5_UNORM:
|
||||
r5g6b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride);
|
||||
break;
|
||||
case PIPE_FORMAT_R8G8B8_UNORM:
|
||||
r8g8b8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);
|
||||
break;
|
||||
case PIPE_FORMAT_R8G8B8A8_UNORM:
|
||||
assert(0);
|
||||
break;
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
|
||||
#include "pipe/p_config.h"
|
||||
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU)
|
||||
#include <sys/time.h>
|
||||
#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
|
||||
#include <windows.h>
|
||||
|
@ -77,7 +77,7 @@ util_time_get_frequency(void)
|
|||
void
|
||||
util_time_get(struct util_time *t)
|
||||
{
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU)
|
||||
gettimeofday(&t->tv, NULL);
|
||||
#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
|
||||
LONGLONG temp;
|
||||
|
@ -102,7 +102,7 @@ util_time_add(const struct util_time *t1,
|
|||
int64_t usecs,
|
||||
struct util_time *t2)
|
||||
{
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU)
|
||||
t2->tv.tv_sec = t1->tv.tv_sec + usecs / 1000000;
|
||||
t2->tv.tv_usec = t1->tv.tv_usec + usecs % 1000000;
|
||||
#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
|
||||
|
@ -124,7 +124,7 @@ int64_t
|
|||
util_time_diff(const struct util_time *t1,
|
||||
const struct util_time *t2)
|
||||
{
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU)
|
||||
return (t2->tv.tv_usec - t1->tv.tv_usec) +
|
||||
(t2->tv.tv_sec - t1->tv.tv_sec)*1000000;
|
||||
#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
|
||||
|
@ -144,7 +144,7 @@ util_time_micros( void )
|
|||
|
||||
util_time_get(&t1);
|
||||
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU)
|
||||
return t1.tv.tv_usec + t1.tv.tv_sec*1000000LL;
|
||||
#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
|
||||
util_time_get_frequency();
|
||||
|
@ -166,7 +166,7 @@ static INLINE int
|
|||
util_time_compare(const struct util_time *t1,
|
||||
const struct util_time *t2)
|
||||
{
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU)
|
||||
if (t1->tv.tv_sec < t2->tv.tv_sec)
|
||||
return -1;
|
||||
else if(t1->tv.tv_sec > t2->tv.tv_sec)
|
||||
|
|
|
@ -43,6 +43,11 @@
|
|||
#include <unistd.h> /* usleep */
|
||||
#endif
|
||||
|
||||
#if defined(PIPE_OS_HAIKU)
|
||||
#include <sys/time.h> /* timeval */
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "pipe/p_compiler.h"
|
||||
|
||||
|
||||
|
@ -58,7 +63,7 @@ extern "C" {
|
|||
*/
|
||||
struct util_time
|
||||
{
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU)
|
||||
struct timeval tv;
|
||||
#else
|
||||
int64_t counter;
|
||||
|
@ -89,7 +94,7 @@ util_time_timeout(const struct util_time *start,
|
|||
const struct util_time *end,
|
||||
const struct util_time *curr);
|
||||
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
|
||||
#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_HAIKU)
|
||||
#define util_time_sleep usleep
|
||||
#else
|
||||
void
|
||||
|
|
|
@ -64,8 +64,6 @@ cell_get_param(struct pipe_screen *screen, int param)
|
|||
return 1;
|
||||
case PIPE_CAP_GLSL:
|
||||
return 1;
|
||||
case PIPE_CAP_S3TC:
|
||||
return 0;
|
||||
case PIPE_CAP_ANISOTROPIC_FILTER:
|
||||
return 0;
|
||||
case PIPE_CAP_POINT_SPRITE:
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
#include "pipe/p_inlines.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_fifo.h"
|
||||
|
||||
#include "i915_context.h"
|
||||
#include "i915_reg.h"
|
||||
|
@ -76,8 +77,13 @@ struct i915_vbuf_render {
|
|||
size_t vbo_size;
|
||||
size_t vbo_offset;
|
||||
void *vbo_ptr;
|
||||
size_t vbo_alloc_size;
|
||||
size_t vbo_max_used;
|
||||
|
||||
/* stuff for the pool */
|
||||
struct util_fifo *pool_fifo;
|
||||
unsigned pool_used;
|
||||
unsigned pool_buffer_size;
|
||||
boolean pool_not_used;
|
||||
};
|
||||
|
||||
|
||||
|
@ -105,6 +111,55 @@ i915_vbuf_render_get_vertex_info(struct vbuf_render *render)
|
|||
return &i915->current.vertex_info;
|
||||
}
|
||||
|
||||
static boolean
|
||||
i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size)
|
||||
{
|
||||
struct i915_context *i915 = i915_render->i915;
|
||||
|
||||
if (i915_render->vbo_size < size + i915_render->vbo_offset)
|
||||
return FALSE;
|
||||
|
||||
if (i915->vbo_flushed)
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static void
|
||||
i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size)
|
||||
{
|
||||
struct i915_context *i915 = i915_render->i915;
|
||||
struct intel_winsys *iws = i915->iws;
|
||||
|
||||
if (i915_render->vbo) {
|
||||
if (i915_render->pool_not_used)
|
||||
iws->buffer_destroy(iws, i915_render->vbo);
|
||||
else
|
||||
u_fifo_add(i915_render->pool_fifo, i915_render->vbo);
|
||||
i915_render->vbo = NULL;
|
||||
}
|
||||
|
||||
i915->vbo_flushed = 0;
|
||||
|
||||
i915_render->vbo_size = MAX2(size, i915_render->pool_buffer_size);
|
||||
i915_render->vbo_offset = 0;
|
||||
|
||||
if (i915_render->vbo_size != i915_render->pool_buffer_size) {
|
||||
i915_render->pool_not_used = TRUE;
|
||||
i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64,
|
||||
INTEL_NEW_VERTEX);
|
||||
} else {
|
||||
i915_render->pool_not_used = FALSE;
|
||||
|
||||
if (i915_render->pool_used >= 2) {
|
||||
FLUSH_BATCH(NULL);
|
||||
i915->vbo_flushed = 0;
|
||||
i915_render->pool_used = 0;
|
||||
}
|
||||
u_fifo_pop(i915_render->pool_fifo, (void**)&i915_render->vbo);
|
||||
}
|
||||
}
|
||||
|
||||
static boolean
|
||||
i915_vbuf_render_allocate_vertices(struct vbuf_render *render,
|
||||
ushort vertex_size,
|
||||
|
@ -112,27 +167,17 @@ i915_vbuf_render_allocate_vertices(struct vbuf_render *render,
|
|||
{
|
||||
struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
|
||||
struct i915_context *i915 = i915_render->i915;
|
||||
struct intel_winsys *iws = i915->iws;
|
||||
size_t size = (size_t)vertex_size * (size_t)nr_vertices;
|
||||
|
||||
/* FIXME: handle failure */
|
||||
assert(!i915->vbo);
|
||||
|
||||
if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) {
|
||||
} else {
|
||||
i915->vbo_flushed = 0;
|
||||
if (i915_render->vbo) {
|
||||
iws->buffer_destroy(iws, i915_render->vbo);
|
||||
i915_render->vbo = NULL;
|
||||
}
|
||||
}
|
||||
if (!i915_vbuf_render_reserve(i915_render, size)) {
|
||||
|
||||
if (!i915_render->vbo) {
|
||||
i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size);
|
||||
i915_render->vbo_offset = 0;
|
||||
i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64,
|
||||
INTEL_NEW_VERTEX);
|
||||
if (i915->vbo_flushed)
|
||||
i915_render->pool_used = 0;
|
||||
|
||||
i915_vbuf_render_new_buf(i915_render, size);
|
||||
}
|
||||
|
||||
i915_render->vertex_size = vertex_size;
|
||||
|
@ -504,6 +549,7 @@ i915_vbuf_render_create(struct i915_context *i915)
|
|||
{
|
||||
struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render);
|
||||
struct intel_winsys *iws = i915->iws;
|
||||
int i;
|
||||
|
||||
i915_render->i915 = i915;
|
||||
|
||||
|
@ -524,14 +570,24 @@ i915_vbuf_render_create(struct i915_context *i915)
|
|||
i915_render->base.release_vertices = i915_vbuf_render_release_vertices;
|
||||
i915_render->base.destroy = i915_vbuf_render_destroy;
|
||||
|
||||
i915_render->vbo_alloc_size = 128 * 4096;
|
||||
i915_render->vbo_size = i915_render->vbo_alloc_size;
|
||||
|
||||
i915_render->vbo = NULL;
|
||||
i915_render->vbo_size = 0;
|
||||
i915_render->vbo_offset = 0;
|
||||
i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64,
|
||||
INTEL_NEW_VERTEX);
|
||||
|
||||
i915_render->pool_used = FALSE;
|
||||
i915_render->pool_buffer_size = 128 * 4096;
|
||||
i915_render->pool_fifo = u_fifo_create(6);
|
||||
for (i = 0; i < 6; i++)
|
||||
u_fifo_add(i915_render->pool_fifo,
|
||||
iws->buffer_create(iws, i915_render->pool_buffer_size, 64,
|
||||
INTEL_NEW_VERTEX));
|
||||
|
||||
#if 0
|
||||
/* TODO JB: is this realy needed? */
|
||||
i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE);
|
||||
iws->buffer_unmap(iws, i915_render->vbo);
|
||||
#endif
|
||||
|
||||
return &i915_render->base;
|
||||
}
|
||||
|
|
|
@ -101,8 +101,6 @@ i915_get_param(struct pipe_screen *screen, int param)
|
|||
return 1;
|
||||
case PIPE_CAP_GLSL:
|
||||
return 0;
|
||||
case PIPE_CAP_S3TC:
|
||||
return 0;
|
||||
case PIPE_CAP_ANISOTROPIC_FILTER:
|
||||
return 0;
|
||||
case PIPE_CAP_POINT_SPRITE:
|
||||
|
|
|
@ -150,6 +150,17 @@ struct intel_winsys {
|
|||
void (*buffer_unmap)(struct intel_winsys *iws,
|
||||
struct intel_buffer *buffer);
|
||||
|
||||
/**
|
||||
* Write to a buffer.
|
||||
*
|
||||
* Arguments follows pwrite(2)
|
||||
*/
|
||||
int (*buffer_write)(struct intel_winsys *iws,
|
||||
struct intel_buffer *dst,
|
||||
const void *src,
|
||||
size_t size,
|
||||
size_t offset);
|
||||
|
||||
void (*buffer_destroy)(struct intel_winsys *iws,
|
||||
struct intel_buffer *buffer);
|
||||
/*@}*/
|
||||
|
|
|
@ -85,8 +85,6 @@ brw_get_param(struct pipe_screen *screen, int param)
|
|||
return 1;
|
||||
case PIPE_CAP_GLSL:
|
||||
return 0;
|
||||
case PIPE_CAP_S3TC:
|
||||
return 0;
|
||||
case PIPE_CAP_ANISOTROPIC_FILTER:
|
||||
return 0;
|
||||
case PIPE_CAP_POINT_SPRITE:
|
||||
|
|
|
@ -3,6 +3,8 @@ include $(TOP)/configs/current
|
|||
|
||||
LIBNAME = llvmpipe
|
||||
|
||||
CFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS
|
||||
|
||||
C_SOURCES = \
|
||||
lp_bld_alpha.c \
|
||||
lp_bld_arit.c \
|
||||
|
@ -15,9 +17,11 @@ C_SOURCES = \
|
|||
lp_bld_depth.c \
|
||||
lp_bld_flow.c \
|
||||
lp_bld_format_aos.c \
|
||||
lp_bld_format_soa.c \
|
||||
lp_bld_interp.c \
|
||||
lp_bld_intr.c \
|
||||
lp_bld_logic.c \
|
||||
lp_bld_sample_soa.c \
|
||||
lp_bld_swizzle.c \
|
||||
lp_bld_struct.c \
|
||||
lp_bld_tgsi_soa.c \
|
||||
|
@ -44,7 +48,8 @@ C_SOURCES = \
|
|||
lp_state_vs.c \
|
||||
lp_surface.c \
|
||||
lp_tex_cache.c \
|
||||
lp_tex_sample.c \
|
||||
lp_tex_sample_c.c \
|
||||
lp_tex_sample_llvm.c \
|
||||
lp_texture.c \
|
||||
lp_tile_cache.c \
|
||||
lp_tile_soa.c
|
||||
|
|
|
@ -8,13 +8,16 @@ Done so far is:
|
|||
|
||||
- the whole fragment pipeline is code generated in a single function
|
||||
|
||||
- input interpolation
|
||||
|
||||
- depth testing
|
||||
|
||||
- texture sampling (not all state/formats are supported)
|
||||
|
||||
- fragment shader TGSI translation
|
||||
- same level of support as the TGSI SSE2 exec machine, with the exception
|
||||
we don't fallback to TGSI interpretation when an unsupported opcode is
|
||||
found, but just ignore it
|
||||
- texture sampling via an intrinsic call
|
||||
- done in SoA layout
|
||||
- input interpolation also code generated
|
||||
|
||||
|
@ -28,16 +31,17 @@ Done so far is:
|
|||
any width and length
|
||||
- not all operations are implemented for these types yet though
|
||||
|
||||
Most mesa/progs/demos/* work. Speed is on par with Keith's softpipe-opt branch,
|
||||
which includes hand written fast implementations for common cases.
|
||||
Most mesa/progs/demos/* work.
|
||||
|
||||
To do (probably by this order):
|
||||
|
||||
- code generate stipple and stencil testing
|
||||
|
||||
- code generate texture sampling
|
||||
- translate the remaining bits of texture sampling state
|
||||
|
||||
- translate TGSI control flow instructions, and all other remaining opcodes
|
||||
|
||||
- integrate with the draw module for VS code generation
|
||||
|
||||
- code generate the triangle setup and rasterization
|
||||
|
||||
|
@ -93,7 +97,7 @@ Alternatively, you can build it with GNU make, if you prefer, by invoking it as
|
|||
|
||||
make linux-llvm
|
||||
|
||||
but the rest of these instructions assume scons is used.
|
||||
but the rest of these instructions assume that scons is used.
|
||||
|
||||
|
||||
Using
|
||||
|
@ -108,6 +112,9 @@ or
|
|||
|
||||
export LD_LIBRARY_PATH=$PWD/build/linux-x86-debug/lib:$LD_LIBRARY_PATH
|
||||
|
||||
For performance evaluation pass debug=no to scons, and use the corresponding
|
||||
lib directory without the "-debug" suffix.
|
||||
|
||||
|
||||
Unit testing
|
||||
============
|
||||
|
@ -119,7 +126,7 @@ build/linux-???-debug/gallium/drivers/llvmpipe:
|
|||
- lp_test_conv: SIMD vector conversion
|
||||
- lp_test_format: pixel unpacking/packing
|
||||
|
||||
Some of this tests can output results and benchmarks to a tab-seperated-file
|
||||
Some of this tests can output results and benchmarks to a tab-separated-file
|
||||
for posterior analysis, e.g.:
|
||||
|
||||
build/linux-x86_64-debug/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv
|
||||
|
@ -133,10 +140,10 @@ Development Notes
|
|||
at the top of the lp_bld_*.c functions.
|
||||
|
||||
- All lp_bld_*.[ch] are isolated from the rest of the driver, and could/may be
|
||||
put in a standalone Gallium state -> LLVM IR translation module.
|
||||
put in a stand-alone Gallium state -> LLVM IR translation module.
|
||||
|
||||
- We use LLVM-C bindings for now. They are not documented, but follow the C++
|
||||
interfaces very closely, and appear to be complete enough for code
|
||||
generation. See
|
||||
http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
|
||||
for a standalone example.
|
||||
for a stand-alone example.
|
||||
|
|
|
@ -3,7 +3,7 @@ Import('*')
|
|||
env = env.Clone()
|
||||
|
||||
env.Tool('llvm')
|
||||
if 'LLVM_VERSION' not in env:
|
||||
if not env.has_key('LLVM_VERSION'):
|
||||
print 'warning: LLVM not found: not building llvmpipe'
|
||||
Return()
|
||||
|
||||
|
@ -23,8 +23,10 @@ llvmpipe = env.ConvenienceLibrary(
|
|||
'lp_bld_depth.c',
|
||||
'lp_bld_flow.c',
|
||||
'lp_bld_format_aos.c',
|
||||
'lp_bld_format_soa.c',
|
||||
'lp_bld_interp.c',
|
||||
'lp_bld_intr.c',
|
||||
'lp_bld_sample_soa.c',
|
||||
'lp_bld_struct.c',
|
||||
'lp_bld_logic.c',
|
||||
'lp_bld_swizzle.c',
|
||||
|
@ -52,7 +54,8 @@ llvmpipe = env.ConvenienceLibrary(
|
|||
'lp_state_vs.c',
|
||||
'lp_surface.c',
|
||||
'lp_tex_cache.c',
|
||||
'lp_tex_sample.c',
|
||||
'lp_tex_sample_c.c',
|
||||
'lp_tex_sample_llvm.c',
|
||||
'lp_texture.c',
|
||||
'lp_tile_cache.c',
|
||||
'lp_tile_soa.c',
|
||||
|
|
|
@ -45,7 +45,7 @@
|
|||
void
|
||||
lp_build_alpha_test(LLVMBuilderRef builder,
|
||||
const struct pipe_alpha_state *state,
|
||||
union lp_type type,
|
||||
struct lp_type type,
|
||||
struct lp_build_mask_context *mask,
|
||||
LLVMValueRef alpha,
|
||||
LLVMValueRef ref)
|
||||
|
|
|
@ -38,14 +38,14 @@
|
|||
#include <llvm-c/Core.h>
|
||||
|
||||
struct pipe_alpha_state;
|
||||
union lp_type;
|
||||
struct lp_type;
|
||||
struct lp_build_mask_context;
|
||||
|
||||
|
||||
void
|
||||
lp_build_alpha_test(LLVMBuilderRef builder,
|
||||
const struct pipe_alpha_state *state,
|
||||
union lp_type type,
|
||||
struct lp_type type,
|
||||
struct lp_build_mask_context *mask,
|
||||
LLVMValueRef alpha,
|
||||
LLVMValueRef ref);
|
||||
|
|
|
@ -65,7 +65,7 @@ lp_build_min_simple(struct lp_build_context *bld,
|
|||
LLVMValueRef a,
|
||||
LLVMValueRef b)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
const char *intrinsic = NULL;
|
||||
LLVMValueRef cond;
|
||||
|
||||
|
@ -113,7 +113,7 @@ lp_build_max_simple(struct lp_build_context *bld,
|
|||
LLVMValueRef a,
|
||||
LLVMValueRef b)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
const char *intrinsic = NULL;
|
||||
LLVMValueRef cond;
|
||||
|
||||
|
@ -159,7 +159,7 @@ LLVMValueRef
|
|||
lp_build_comp(struct lp_build_context *bld,
|
||||
LLVMValueRef a)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
|
||||
if(a == bld->one)
|
||||
return bld->zero;
|
||||
|
@ -188,7 +188,7 @@ lp_build_add(struct lp_build_context *bld,
|
|||
LLVMValueRef a,
|
||||
LLVMValueRef b)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
LLVMValueRef res;
|
||||
|
||||
if(a == bld->zero)
|
||||
|
@ -241,7 +241,7 @@ lp_build_sub(struct lp_build_context *bld,
|
|||
LLVMValueRef a,
|
||||
LLVMValueRef b)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
LLVMValueRef res;
|
||||
|
||||
if(b == bld->zero)
|
||||
|
@ -405,7 +405,7 @@ lp_build_mul(struct lp_build_context *bld,
|
|||
LLVMValueRef a,
|
||||
LLVMValueRef b)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
|
||||
if(a == bld->zero)
|
||||
return bld->zero;
|
||||
|
@ -477,7 +477,7 @@ lp_build_div(struct lp_build_context *bld,
|
|||
LLVMValueRef a,
|
||||
LLVMValueRef b)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
|
||||
if(a == bld->zero)
|
||||
return bld->zero;
|
||||
|
@ -502,6 +502,31 @@ lp_build_div(struct lp_build_context *bld,
|
|||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_lerp(struct lp_build_context *bld,
|
||||
LLVMValueRef x,
|
||||
LLVMValueRef v0,
|
||||
LLVMValueRef v1)
|
||||
{
|
||||
return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0)));
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_lerp_2d(struct lp_build_context *bld,
|
||||
LLVMValueRef x,
|
||||
LLVMValueRef y,
|
||||
LLVMValueRef v00,
|
||||
LLVMValueRef v01,
|
||||
LLVMValueRef v10,
|
||||
LLVMValueRef v11)
|
||||
{
|
||||
LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01);
|
||||
LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11);
|
||||
return lp_build_lerp(bld, y, v0, v1);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Generate min(a, b)
|
||||
* Do checks for special cases.
|
||||
|
@ -565,21 +590,32 @@ LLVMValueRef
|
|||
lp_build_abs(struct lp_build_context *bld,
|
||||
LLVMValueRef a)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(type);
|
||||
|
||||
if(!type.sign)
|
||||
return a;
|
||||
|
||||
/* XXX: is this really necessary? */
|
||||
if(type.floating) {
|
||||
/* Mask out the sign bit */
|
||||
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
|
||||
LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1);
|
||||
a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
|
||||
a = LLVMBuildAnd(bld->builder, a, mask, "");
|
||||
a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
|
||||
return a;
|
||||
}
|
||||
|
||||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
|
||||
if(!type.floating && type.width*type.length == 128) {
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(type);
|
||||
if(type.width == 8)
|
||||
if(type.width*type.length == 128) {
|
||||
switch(type.width) {
|
||||
case 8:
|
||||
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a);
|
||||
if(type.width == 16)
|
||||
case 16:
|
||||
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a);
|
||||
if(type.width == 32)
|
||||
case 32:
|
||||
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -587,11 +623,189 @@ lp_build_abs(struct lp_build_context *bld,
|
|||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_sgn(struct lp_build_context *bld,
|
||||
LLVMValueRef a)
|
||||
{
|
||||
const struct lp_type type = bld->type;
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(type);
|
||||
LLVMValueRef cond;
|
||||
LLVMValueRef res;
|
||||
|
||||
/* Handle non-zero case */
|
||||
if(!type.sign) {
|
||||
/* if not zero then sign must be positive */
|
||||
res = bld->one;
|
||||
}
|
||||
else if(type.floating) {
|
||||
/* Take the sign bit and add it to 1 constant */
|
||||
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
|
||||
LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
|
||||
LLVMValueRef sign;
|
||||
LLVMValueRef one;
|
||||
sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
|
||||
sign = LLVMBuildAnd(bld->builder, sign, mask, "");
|
||||
one = LLVMConstBitCast(bld->one, int_vec_type);
|
||||
res = LLVMBuildOr(bld->builder, sign, one, "");
|
||||
res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
|
||||
}
|
||||
else
|
||||
{
|
||||
LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0);
|
||||
cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero);
|
||||
res = lp_build_select(bld, cond, bld->one, minus_one);
|
||||
}
|
||||
|
||||
/* Handle zero */
|
||||
cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero);
|
||||
res = lp_build_select(bld, cond, bld->zero, bld->one);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
enum lp_build_round_sse41_mode
|
||||
{
|
||||
LP_BUILD_ROUND_SSE41_NEAREST = 0,
|
||||
LP_BUILD_ROUND_SSE41_FLOOR = 1,
|
||||
LP_BUILD_ROUND_SSE41_CEIL = 2,
|
||||
LP_BUILD_ROUND_SSE41_TRUNCATE = 3
|
||||
};
|
||||
|
||||
|
||||
static INLINE LLVMValueRef
|
||||
lp_build_round_sse41(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
enum lp_build_round_sse41_mode mode)
|
||||
{
|
||||
const struct lp_type type = bld->type;
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(type);
|
||||
const char *intrinsic;
|
||||
|
||||
assert(type.floating);
|
||||
assert(type.width*type.length == 128);
|
||||
|
||||
switch(type.width) {
|
||||
case 32:
|
||||
intrinsic = "llvm.x86.sse41.round.ps";
|
||||
break;
|
||||
case 64:
|
||||
intrinsic = "llvm.x86.sse41.round.pd";
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
return bld->undef;
|
||||
}
|
||||
|
||||
return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a,
|
||||
LLVMConstInt(LLVMInt32Type(), mode, 0));
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_round(struct lp_build_context *bld,
|
||||
LLVMValueRef a)
|
||||
{
|
||||
const struct lp_type type = bld->type;
|
||||
|
||||
assert(type.floating);
|
||||
|
||||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
|
||||
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
|
||||
#endif
|
||||
|
||||
/* FIXME */
|
||||
assert(0);
|
||||
return bld->undef;
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_floor(struct lp_build_context *bld,
|
||||
LLVMValueRef a)
|
||||
{
|
||||
const struct lp_type type = bld->type;
|
||||
|
||||
assert(type.floating);
|
||||
|
||||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
|
||||
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
|
||||
#endif
|
||||
|
||||
/* FIXME */
|
||||
assert(0);
|
||||
return bld->undef;
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_ceil(struct lp_build_context *bld,
|
||||
LLVMValueRef a)
|
||||
{
|
||||
const struct lp_type type = bld->type;
|
||||
|
||||
assert(type.floating);
|
||||
|
||||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
|
||||
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
|
||||
#endif
|
||||
|
||||
/* FIXME */
|
||||
assert(0);
|
||||
return bld->undef;
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_trunc(struct lp_build_context *bld,
|
||||
LLVMValueRef a)
|
||||
{
|
||||
const struct lp_type type = bld->type;
|
||||
|
||||
assert(type.floating);
|
||||
|
||||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
|
||||
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE);
|
||||
#endif
|
||||
|
||||
/* FIXME */
|
||||
assert(0);
|
||||
return bld->undef;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert to integer, through whichever rounding method that's fastest,
|
||||
* typically truncating to zero.
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_int(struct lp_build_context *bld,
|
||||
LLVMValueRef a)
|
||||
{
|
||||
const struct lp_type type = bld->type;
|
||||
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
|
||||
|
||||
assert(type.floating);
|
||||
|
||||
return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_ifloor(struct lp_build_context *bld,
|
||||
LLVMValueRef a)
|
||||
{
|
||||
a = lp_build_floor(bld, a);
|
||||
a = lp_build_int(bld, a);
|
||||
return a;
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_sqrt(struct lp_build_context *bld,
|
||||
LLVMValueRef a)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(type);
|
||||
char intrinsic[32];
|
||||
|
||||
|
@ -609,7 +823,7 @@ LLVMValueRef
|
|||
lp_build_rcp(struct lp_build_context *bld,
|
||||
LLVMValueRef a)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
|
||||
if(a == bld->zero)
|
||||
return bld->undef;
|
||||
|
@ -640,7 +854,7 @@ LLVMValueRef
|
|||
lp_build_rsqrt(struct lp_build_context *bld,
|
||||
LLVMValueRef a)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
|
||||
assert(type.floating);
|
||||
|
||||
|
@ -661,7 +875,7 @@ LLVMValueRef
|
|||
lp_build_cos(struct lp_build_context *bld,
|
||||
LLVMValueRef a)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(type);
|
||||
char intrinsic[32];
|
||||
|
||||
|
@ -681,7 +895,7 @@ LLVMValueRef
|
|||
lp_build_sin(struct lp_build_context *bld,
|
||||
LLVMValueRef a)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(type);
|
||||
char intrinsic[32];
|
||||
|
||||
|
@ -752,7 +966,7 @@ lp_build_polynomial(struct lp_build_context *bld,
|
|||
const double *coeffs,
|
||||
unsigned num_coeffs)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
LLVMValueRef res = NULL;
|
||||
unsigned i;
|
||||
|
||||
|
@ -800,7 +1014,7 @@ lp_build_exp2_approx(struct lp_build_context *bld,
|
|||
LLVMValueRef *p_frac_part,
|
||||
LLVMValueRef *p_exp2)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(type);
|
||||
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
|
||||
LLVMValueRef ipart = NULL;
|
||||
|
@ -893,7 +1107,7 @@ lp_build_log2_approx(struct lp_build_context *bld,
|
|||
LLVMValueRef *p_floor_log2,
|
||||
LLVMValueRef *p_log2)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(type);
|
||||
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@
|
|||
#include <llvm-c/Core.h>
|
||||
|
||||
|
||||
union lp_type type;
|
||||
struct lp_type type;
|
||||
struct lp_build_context;
|
||||
|
||||
|
||||
|
@ -71,6 +71,26 @@ lp_build_div(struct lp_build_context *bld,
|
|||
LLVMValueRef a,
|
||||
LLVMValueRef b);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_lerp(struct lp_build_context *bld,
|
||||
LLVMValueRef x,
|
||||
LLVMValueRef v0,
|
||||
LLVMValueRef v1);
|
||||
|
||||
/**
|
||||
* Bilinear interpolation.
|
||||
*
|
||||
* Values indices are in v_{yx}.
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_lerp_2d(struct lp_build_context *bld,
|
||||
LLVMValueRef x,
|
||||
LLVMValueRef y,
|
||||
LLVMValueRef v00,
|
||||
LLVMValueRef v01,
|
||||
LLVMValueRef v10,
|
||||
LLVMValueRef v11);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_min(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
|
@ -85,6 +105,34 @@ LLVMValueRef
|
|||
lp_build_abs(struct lp_build_context *bld,
|
||||
LLVMValueRef a);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_sgn(struct lp_build_context *bld,
|
||||
LLVMValueRef a);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_round(struct lp_build_context *bld,
|
||||
LLVMValueRef a);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_floor(struct lp_build_context *bld,
|
||||
LLVMValueRef a);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_ceil(struct lp_build_context *bld,
|
||||
LLVMValueRef a);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_trunc(struct lp_build_context *bld,
|
||||
LLVMValueRef a);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_int(struct lp_build_context *bld,
|
||||
LLVMValueRef a);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_ifloor(struct lp_build_context *bld,
|
||||
LLVMValueRef a);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_sqrt(struct lp_build_context *bld,
|
||||
LLVMValueRef a);
|
||||
|
|
|
@ -46,7 +46,7 @@
|
|||
|
||||
|
||||
struct pipe_blend_state;
|
||||
union lp_type;
|
||||
struct lp_type;
|
||||
struct lp_build_context;
|
||||
|
||||
|
||||
|
@ -74,7 +74,7 @@ lp_build_blend_func(struct lp_build_context *bld,
|
|||
LLVMValueRef
|
||||
lp_build_blend_aos(LLVMBuilderRef builder,
|
||||
const struct pipe_blend_state *blend,
|
||||
union lp_type type,
|
||||
struct lp_type type,
|
||||
LLVMValueRef src,
|
||||
LLVMValueRef dst,
|
||||
LLVMValueRef const_,
|
||||
|
@ -84,7 +84,7 @@ lp_build_blend_aos(LLVMBuilderRef builder,
|
|||
void
|
||||
lp_build_blend_soa(LLVMBuilderRef builder,
|
||||
const struct pipe_blend_state *blend,
|
||||
union lp_type type,
|
||||
struct lp_type type,
|
||||
LLVMValueRef src[4],
|
||||
LLVMValueRef dst[4],
|
||||
LLVMValueRef const_[4],
|
||||
|
|
|
@ -303,7 +303,7 @@ lp_build_blend_func(struct lp_build_context *bld,
|
|||
LLVMValueRef
|
||||
lp_build_blend_aos(LLVMBuilderRef builder,
|
||||
const struct pipe_blend_state *blend,
|
||||
union lp_type type,
|
||||
struct lp_type type,
|
||||
LLVMValueRef src,
|
||||
LLVMValueRef dst,
|
||||
LLVMValueRef const_,
|
||||
|
|
|
@ -199,7 +199,7 @@ lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld,
|
|||
void
|
||||
lp_build_blend_soa(LLVMBuilderRef builder,
|
||||
const struct pipe_blend_state *blend,
|
||||
union lp_type type,
|
||||
struct lp_type type,
|
||||
LLVMValueRef src[4],
|
||||
LLVMValueRef dst[4],
|
||||
LLVMValueRef con[4],
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
|
||||
|
||||
unsigned
|
||||
lp_mantissa(union lp_type type)
|
||||
lp_mantissa(struct lp_type type)
|
||||
{
|
||||
assert(type.floating);
|
||||
|
||||
|
@ -72,7 +72,7 @@ lp_mantissa(union lp_type type)
|
|||
* Same as lp_const_scale(), but in terms of shifts.
|
||||
*/
|
||||
unsigned
|
||||
lp_const_shift(union lp_type type)
|
||||
lp_const_shift(struct lp_type type)
|
||||
{
|
||||
if(type.floating)
|
||||
return 0;
|
||||
|
@ -86,7 +86,7 @@ lp_const_shift(union lp_type type)
|
|||
|
||||
|
||||
unsigned
|
||||
lp_const_offset(union lp_type type)
|
||||
lp_const_offset(struct lp_type type)
|
||||
{
|
||||
if(type.floating || type.fixed)
|
||||
return 0;
|
||||
|
@ -104,7 +104,7 @@ lp_const_offset(union lp_type type)
|
|||
* else for the fixed points types and normalized integers.
|
||||
*/
|
||||
double
|
||||
lp_const_scale(union lp_type type)
|
||||
lp_const_scale(struct lp_type type)
|
||||
{
|
||||
unsigned long long llscale;
|
||||
double dscale;
|
||||
|
@ -122,7 +122,7 @@ lp_const_scale(union lp_type type)
|
|||
* Minimum value representable by the type.
|
||||
*/
|
||||
double
|
||||
lp_const_min(union lp_type type)
|
||||
lp_const_min(struct lp_type type)
|
||||
{
|
||||
unsigned bits;
|
||||
|
||||
|
@ -158,7 +158,7 @@ lp_const_min(union lp_type type)
|
|||
* Maximum value representable by the type.
|
||||
*/
|
||||
double
|
||||
lp_const_max(union lp_type type)
|
||||
lp_const_max(struct lp_type type)
|
||||
{
|
||||
unsigned bits;
|
||||
|
||||
|
@ -190,7 +190,7 @@ lp_const_max(union lp_type type)
|
|||
|
||||
|
||||
double
|
||||
lp_const_eps(union lp_type type)
|
||||
lp_const_eps(struct lp_type type)
|
||||
{
|
||||
if (type.floating) {
|
||||
switch(type.width) {
|
||||
|
@ -211,7 +211,7 @@ lp_const_eps(union lp_type type)
|
|||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_undef(union lp_type type)
|
||||
lp_build_undef(struct lp_type type)
|
||||
{
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(type);
|
||||
return LLVMGetUndef(vec_type);
|
||||
|
@ -219,7 +219,7 @@ lp_build_undef(union lp_type type)
|
|||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_zero(union lp_type type)
|
||||
lp_build_zero(struct lp_type type)
|
||||
{
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(type);
|
||||
return LLVMConstNull(vec_type);
|
||||
|
@ -227,7 +227,7 @@ lp_build_zero(union lp_type type)
|
|||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_one(union lp_type type)
|
||||
lp_build_one(struct lp_type type)
|
||||
{
|
||||
LLVMTypeRef elem_type;
|
||||
LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
|
||||
|
@ -269,7 +269,7 @@ lp_build_one(union lp_type type)
|
|||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_const_scalar(union lp_type type,
|
||||
lp_build_const_scalar(struct lp_type type,
|
||||
double val)
|
||||
{
|
||||
LLVMTypeRef elem_type = lp_build_elem_type(type);
|
||||
|
@ -295,7 +295,7 @@ lp_build_const_scalar(union lp_type type,
|
|||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_int_const_scalar(union lp_type type,
|
||||
lp_build_int_const_scalar(struct lp_type type,
|
||||
long long val)
|
||||
{
|
||||
LLVMTypeRef elem_type = lp_build_int_elem_type(type);
|
||||
|
@ -312,7 +312,7 @@ lp_build_int_const_scalar(union lp_type type,
|
|||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_const_aos(union lp_type type,
|
||||
lp_build_const_aos(struct lp_type type,
|
||||
double r, double g, double b, double a,
|
||||
const unsigned char *swizzle)
|
||||
{
|
||||
|
@ -352,8 +352,8 @@ lp_build_const_aos(union lp_type type,
|
|||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_const_mask_aos(union lp_type type,
|
||||
boolean cond[4])
|
||||
lp_build_const_mask_aos(struct lp_type type,
|
||||
const boolean cond[4])
|
||||
{
|
||||
LLVMTypeRef elem_type = LLVMIntType(type.width);
|
||||
LLVMValueRef masks[LP_MAX_VECTOR_LENGTH];
|
||||
|
|
|
@ -42,67 +42,67 @@
|
|||
#include <pipe/p_compiler.h>
|
||||
|
||||
|
||||
union lp_type type;
|
||||
struct lp_type type;
|
||||
|
||||
|
||||
unsigned
|
||||
lp_mantissa(union lp_type type);
|
||||
lp_mantissa(struct lp_type type);
|
||||
|
||||
|
||||
unsigned
|
||||
lp_const_shift(union lp_type type);
|
||||
lp_const_shift(struct lp_type type);
|
||||
|
||||
|
||||
unsigned
|
||||
lp_const_offset(union lp_type type);
|
||||
lp_const_offset(struct lp_type type);
|
||||
|
||||
|
||||
double
|
||||
lp_const_scale(union lp_type type);
|
||||
lp_const_scale(struct lp_type type);
|
||||
|
||||
double
|
||||
lp_const_min(union lp_type type);
|
||||
lp_const_min(struct lp_type type);
|
||||
|
||||
|
||||
double
|
||||
lp_const_max(union lp_type type);
|
||||
lp_const_max(struct lp_type type);
|
||||
|
||||
|
||||
double
|
||||
lp_const_eps(union lp_type type);
|
||||
lp_const_eps(struct lp_type type);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_undef(union lp_type type);
|
||||
lp_build_undef(struct lp_type type);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_zero(union lp_type type);
|
||||
lp_build_zero(struct lp_type type);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_one(union lp_type type);
|
||||
lp_build_one(struct lp_type type);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_const_scalar(union lp_type type,
|
||||
lp_build_const_scalar(struct lp_type type,
|
||||
double val);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_int_const_scalar(union lp_type type,
|
||||
lp_build_int_const_scalar(struct lp_type type,
|
||||
long long val);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_const_aos(union lp_type type,
|
||||
lp_build_const_aos(struct lp_type type,
|
||||
double r, double g, double b, double a,
|
||||
const unsigned char *swizzle);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_const_mask_aos(union lp_type type,
|
||||
boolean cond[4]);
|
||||
lp_build_const_mask_aos(struct lp_type type,
|
||||
const boolean cond[4]);
|
||||
|
||||
|
||||
#endif /* !LP_BLD_CONST_H */
|
||||
|
|
|
@ -86,7 +86,7 @@
|
|||
*/
|
||||
LLVMValueRef
|
||||
lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
|
||||
union lp_type src_type,
|
||||
struct lp_type src_type,
|
||||
unsigned dst_width,
|
||||
LLVMValueRef src)
|
||||
{
|
||||
|
@ -122,7 +122,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
|
|||
int shift = dst_width - n;
|
||||
res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), "");
|
||||
|
||||
/* Fill in the empty lower bits for added precision? */
|
||||
/* TODO: Fill in the empty lower bits for additional precision? */
|
||||
#if 0
|
||||
{
|
||||
LLVMValueRef msb;
|
||||
|
@ -152,7 +152,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
|
|||
LLVMValueRef
|
||||
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
|
||||
unsigned src_width,
|
||||
union lp_type dst_type,
|
||||
struct lp_type dst_type,
|
||||
LLVMValueRef src)
|
||||
{
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(dst_type);
|
||||
|
@ -244,12 +244,12 @@ lp_build_const_pack_shuffle(unsigned n)
|
|||
* Expand the bit width.
|
||||
*
|
||||
* This will only change the number of bits the values are represented, not the
|
||||
* values themselved.
|
||||
* values themselves.
|
||||
*/
|
||||
static void
|
||||
lp_build_expand(LLVMBuilderRef builder,
|
||||
union lp_type src_type,
|
||||
union lp_type dst_type,
|
||||
struct lp_type src_type,
|
||||
struct lp_type dst_type,
|
||||
LLVMValueRef src,
|
||||
LLVMValueRef *dst, unsigned num_dsts)
|
||||
{
|
||||
|
@ -266,7 +266,7 @@ lp_build_expand(LLVMBuilderRef builder,
|
|||
dst[0] = src;
|
||||
|
||||
while(src_type.width < dst_type.width) {
|
||||
union lp_type new_type = src_type;
|
||||
struct lp_type new_type = src_type;
|
||||
LLVMTypeRef new_vec_type;
|
||||
|
||||
new_type.width *= 2;
|
||||
|
@ -314,8 +314,8 @@ lp_build_expand(LLVMBuilderRef builder,
|
|||
*/
|
||||
static LLVMValueRef
|
||||
lp_build_pack2(LLVMBuilderRef builder,
|
||||
union lp_type src_type,
|
||||
union lp_type dst_type,
|
||||
struct lp_type src_type,
|
||||
struct lp_type dst_type,
|
||||
boolean clamped,
|
||||
LLVMValueRef lo,
|
||||
LLVMValueRef hi)
|
||||
|
@ -391,11 +391,11 @@ lp_build_pack2(LLVMBuilderRef builder,
|
|||
* TODO: Handle saturation consistently.
|
||||
*/
|
||||
static LLVMValueRef
|
||||
lp_build_trunc(LLVMBuilderRef builder,
|
||||
union lp_type src_type,
|
||||
union lp_type dst_type,
|
||||
boolean clamped,
|
||||
const LLVMValueRef *src, unsigned num_srcs)
|
||||
lp_build_pack(LLVMBuilderRef builder,
|
||||
struct lp_type src_type,
|
||||
struct lp_type dst_type,
|
||||
boolean clamped,
|
||||
const LLVMValueRef *src, unsigned num_srcs)
|
||||
{
|
||||
LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
|
||||
unsigned i;
|
||||
|
@ -410,7 +410,7 @@ lp_build_trunc(LLVMBuilderRef builder,
|
|||
tmp[i] = src[i];
|
||||
|
||||
while(src_type.width > dst_type.width) {
|
||||
union lp_type new_type = src_type;
|
||||
struct lp_type new_type = src_type;
|
||||
|
||||
new_type.width /= 2;
|
||||
new_type.length *= 2;
|
||||
|
@ -442,12 +442,12 @@ lp_build_trunc(LLVMBuilderRef builder,
|
|||
*/
|
||||
void
|
||||
lp_build_conv(LLVMBuilderRef builder,
|
||||
union lp_type src_type,
|
||||
union lp_type dst_type,
|
||||
struct lp_type src_type,
|
||||
struct lp_type dst_type,
|
||||
const LLVMValueRef *src, unsigned num_srcs,
|
||||
LLVMValueRef *dst, unsigned num_dsts)
|
||||
{
|
||||
union lp_type tmp_type;
|
||||
struct lp_type tmp_type;
|
||||
LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
|
||||
unsigned num_tmps;
|
||||
unsigned i;
|
||||
|
@ -470,7 +470,7 @@ lp_build_conv(LLVMBuilderRef builder,
|
|||
* Clamp if necessary
|
||||
*/
|
||||
|
||||
if(src_type.value != dst_type.value) {
|
||||
if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) {
|
||||
struct lp_build_context bld;
|
||||
double src_min = lp_const_min(src_type);
|
||||
double dst_min = lp_const_min(dst_type);
|
||||
|
@ -565,7 +565,7 @@ lp_build_conv(LLVMBuilderRef builder,
|
|||
|
||||
if(tmp_type.width > dst_type.width) {
|
||||
assert(num_dsts == 1);
|
||||
tmp[0] = lp_build_trunc(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
|
||||
tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
|
||||
tmp_type.width = dst_type.width;
|
||||
tmp_type.length = dst_type.length;
|
||||
num_tmps = 1;
|
||||
|
@ -656,8 +656,8 @@ lp_build_conv(LLVMBuilderRef builder,
|
|||
*/
|
||||
void
|
||||
lp_build_conv_mask(LLVMBuilderRef builder,
|
||||
union lp_type src_type,
|
||||
union lp_type dst_type,
|
||||
struct lp_type src_type,
|
||||
struct lp_type dst_type,
|
||||
const LLVMValueRef *src, unsigned num_srcs,
|
||||
LLVMValueRef *dst, unsigned num_dsts)
|
||||
{
|
||||
|
@ -689,7 +689,7 @@ lp_build_conv_mask(LLVMBuilderRef builder,
|
|||
|
||||
if(src_type.width > dst_type.width) {
|
||||
assert(num_dsts == 1);
|
||||
dst[0] = lp_build_trunc(builder, src_type, dst_type, TRUE, src, num_srcs);
|
||||
dst[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs);
|
||||
}
|
||||
else if(src_type.width < dst_type.width) {
|
||||
assert(num_srcs == 1);
|
||||
|
|
|
@ -40,33 +40,33 @@
|
|||
#include <llvm-c/Core.h>
|
||||
|
||||
|
||||
union lp_type type;
|
||||
struct lp_type type;
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
|
||||
union lp_type src_type,
|
||||
struct lp_type src_type,
|
||||
unsigned dst_width,
|
||||
LLVMValueRef src);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
|
||||
unsigned src_width,
|
||||
union lp_type dst_type,
|
||||
struct lp_type dst_type,
|
||||
LLVMValueRef src);
|
||||
|
||||
|
||||
void
|
||||
lp_build_conv(LLVMBuilderRef builder,
|
||||
union lp_type src_type,
|
||||
union lp_type dst_type,
|
||||
struct lp_type src_type,
|
||||
struct lp_type dst_type,
|
||||
const LLVMValueRef *srcs, unsigned num_srcs,
|
||||
LLVMValueRef *dsts, unsigned num_dsts);
|
||||
|
||||
void
|
||||
lp_build_conv_mask(LLVMBuilderRef builder,
|
||||
union lp_type src_type,
|
||||
union lp_type dst_type,
|
||||
struct lp_type src_type,
|
||||
struct lp_type dst_type,
|
||||
const LLVMValueRef *src, unsigned num_srcs,
|
||||
LLVMValueRef *dst, unsigned num_dsts);
|
||||
|
||||
|
|
|
@ -30,10 +30,27 @@
|
|||
#include <udis86.h>
|
||||
#endif
|
||||
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "lp_bld_debug.h"
|
||||
|
||||
|
||||
/**
|
||||
* Check alignment.
|
||||
*
|
||||
* It is important that this check is not implemented as a macro or inlined
|
||||
* function, as the compiler assumptions in respect to alignment of global
|
||||
* and stack variables would often make the check a no op, defeating the
|
||||
* whole purpose of the exercise.
|
||||
*/
|
||||
boolean
|
||||
lp_check_alignment(const void *ptr, unsigned alignment)
|
||||
{
|
||||
assert(util_is_pot(alignment));
|
||||
return ((uintptr_t)ptr & (alignment - 1)) == 0;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
lp_disassemble(const void* func)
|
||||
{
|
||||
|
|
|
@ -53,6 +53,10 @@ lp_build_name(LLVMValueRef val, const char *format, ...)
|
|||
}
|
||||
|
||||
|
||||
boolean
|
||||
lp_check_alignment(const void *ptr, unsigned alignment);
|
||||
|
||||
|
||||
void
|
||||
lp_disassemble(const void* func);
|
||||
|
||||
|
|
|
@ -71,11 +71,11 @@
|
|||
/**
|
||||
* Return a type appropriate for depth/stencil testing.
|
||||
*/
|
||||
union lp_type
|
||||
struct lp_type
|
||||
lp_depth_type(const struct util_format_description *format_desc,
|
||||
unsigned length)
|
||||
{
|
||||
union lp_type type;
|
||||
struct lp_type type;
|
||||
unsigned swizzle;
|
||||
|
||||
assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
|
||||
|
@ -85,7 +85,7 @@ lp_depth_type(const struct util_format_description *format_desc,
|
|||
swizzle = format_desc->swizzle[0];
|
||||
assert(swizzle < 4);
|
||||
|
||||
type.value = 0;
|
||||
memset(&type, 0, sizeof type);
|
||||
type.width = format_desc->block.bits;
|
||||
|
||||
if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
|
||||
|
@ -114,7 +114,7 @@ lp_depth_type(const struct util_format_description *format_desc,
|
|||
void
|
||||
lp_build_depth_test(LLVMBuilderRef builder,
|
||||
const struct pipe_depth_state *state,
|
||||
union lp_type type,
|
||||
struct lp_type type,
|
||||
const struct util_format_description *format_desc,
|
||||
struct lp_build_mask_context *mask,
|
||||
LLVMValueRef src,
|
||||
|
@ -179,12 +179,13 @@ lp_build_depth_test(LLVMBuilderRef builder,
|
|||
padding_right = 0;
|
||||
for(chan = 0; chan < z_swizzle; ++chan)
|
||||
padding_right += format_desc->channel[chan].size;
|
||||
padding_left = format_desc->block.bits - format_desc->channel[z_swizzle].size;
|
||||
padding_left = format_desc->block.bits -
|
||||
(padding_right + format_desc->channel[z_swizzle].size);
|
||||
|
||||
if(padding_left || padding_right) {
|
||||
const long long mask_left = ((long long)1 << (format_desc->block.bits - padding_left)) - 1;
|
||||
const long long mask_right = ((long long)1 << (padding_right)) - 1;
|
||||
z_bitmask = lp_build_int_const_scalar(type, mask_left & mask_right);
|
||||
const unsigned long long mask_left = ((unsigned long long)1 << (format_desc->block.bits - padding_left)) - 1;
|
||||
const unsigned long long mask_right = ((unsigned long long)1 << (padding_right)) - 1;
|
||||
z_bitmask = lp_build_int_const_scalar(type, mask_left ^ mask_right);
|
||||
}
|
||||
|
||||
if(padding_left)
|
||||
|
@ -210,5 +211,6 @@ lp_build_depth_test(LLVMBuilderRef builder,
|
|||
LLVMBuildStore(builder, dst, dst_ptr);
|
||||
}
|
||||
|
||||
/* FIXME */
|
||||
assert(!state->occlusion_count);
|
||||
}
|
||||
|
|
|
@ -41,11 +41,11 @@
|
|||
|
||||
struct pipe_depth_state;
|
||||
struct util_format_description;
|
||||
union lp_type;
|
||||
struct lp_type;
|
||||
struct lp_build_mask_context;
|
||||
|
||||
|
||||
union lp_type
|
||||
struct lp_type
|
||||
lp_depth_type(const struct util_format_description *format_desc,
|
||||
unsigned length);
|
||||
|
||||
|
@ -53,7 +53,7 @@ lp_depth_type(const struct util_format_description *format_desc,
|
|||
void
|
||||
lp_build_depth_test(LLVMBuilderRef builder,
|
||||
const struct pipe_depth_state *state,
|
||||
union lp_type type,
|
||||
struct lp_type type,
|
||||
const struct util_format_description *format_desc,
|
||||
struct lp_build_mask_context *mask,
|
||||
LLVMValueRef src,
|
||||
|
|
|
@ -32,59 +32,261 @@
|
|||
*/
|
||||
|
||||
#include "util/u_debug.h"
|
||||
#include "util/u_memory.h"
|
||||
|
||||
#include "lp_bld_type.h"
|
||||
#include "lp_bld_flow.h"
|
||||
|
||||
|
||||
void
|
||||
lp_build_mask_begin(struct lp_build_mask_context *mask,
|
||||
LLVMBuilderRef builder,
|
||||
union lp_type type,
|
||||
LLVMValueRef value)
|
||||
{
|
||||
memset(mask, 0, sizeof *mask);
|
||||
#define LP_BUILD_FLOW_MAX_VARIABLES 32
|
||||
#define LP_BUILD_FLOW_MAX_DEPTH 32
|
||||
|
||||
mask->builder = builder;
|
||||
mask->reg_type = LLVMIntType(type.width * type.length);
|
||||
mask->value = value;
|
||||
|
||||
/**
|
||||
* Enumeration of all possible flow constructs.
|
||||
*/
|
||||
enum lp_build_flow_construct_kind {
|
||||
lP_BUILD_FLOW_SCOPE,
|
||||
LP_BUILD_FLOW_SKIP,
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Variable declaration scope.
|
||||
*/
|
||||
struct lp_build_flow_scope
|
||||
{
|
||||
/** Number of variables declared in this scope */
|
||||
unsigned num_variables;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Early exit. Useful to skip to the end of a function or block when
|
||||
* the execution mask becomes zero or when there is an error condition.
|
||||
*/
|
||||
struct lp_build_flow_skip
|
||||
{
|
||||
/** Block to skip to */
|
||||
LLVMBasicBlockRef block;
|
||||
|
||||
/** Number of variables declared at the beginning */
|
||||
unsigned num_variables;
|
||||
|
||||
LLVMValueRef *phi;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Union of all possible flow constructs' data
|
||||
*/
|
||||
union lp_build_flow_construct_data
|
||||
{
|
||||
struct lp_build_flow_scope scope;
|
||||
struct lp_build_flow_skip skip;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Element of the flow construct stack.
|
||||
*/
|
||||
struct lp_build_flow_construct
|
||||
{
|
||||
enum lp_build_flow_construct_kind kind;
|
||||
union lp_build_flow_construct_data data;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* All necessary data to generate LLVM control flow constructs.
|
||||
*
|
||||
* Besides keeping track of the control flow construct themselves we also
|
||||
* need to keep track of variables in order to generate SSA Phi values.
|
||||
*/
|
||||
struct lp_build_flow_context
|
||||
{
|
||||
LLVMBuilderRef builder;
|
||||
|
||||
/**
|
||||
* Control flow stack.
|
||||
*/
|
||||
struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH];
|
||||
unsigned num_constructs;
|
||||
|
||||
/**
|
||||
* Variable stack
|
||||
*/
|
||||
LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES];
|
||||
unsigned num_variables;
|
||||
};
|
||||
|
||||
|
||||
struct lp_build_flow_context *
|
||||
lp_build_flow_create(LLVMBuilderRef builder)
|
||||
{
|
||||
struct lp_build_flow_context *flow;
|
||||
|
||||
flow = CALLOC_STRUCT(lp_build_flow_context);
|
||||
if(!flow)
|
||||
return NULL;
|
||||
|
||||
flow->builder = builder;
|
||||
|
||||
return flow;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
lp_build_mask_update(struct lp_build_mask_context *mask,
|
||||
LLVMValueRef value)
|
||||
lp_build_flow_destroy(struct lp_build_flow_context *flow)
|
||||
{
|
||||
assert(flow->num_constructs == 0);
|
||||
assert(flow->num_variables == 0);
|
||||
FREE(flow);
|
||||
}
|
||||
|
||||
LLVMValueRef cond;
|
||||
|
||||
static union lp_build_flow_construct_data *
|
||||
lp_build_flow_push(struct lp_build_flow_context *flow,
|
||||
enum lp_build_flow_construct_kind kind)
|
||||
{
|
||||
assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH);
|
||||
if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH)
|
||||
return NULL;
|
||||
|
||||
flow->constructs[flow->num_constructs].kind = kind;
|
||||
return &flow->constructs[flow->num_constructs++].data;
|
||||
}
|
||||
|
||||
|
||||
static union lp_build_flow_construct_data *
|
||||
lp_build_flow_peek(struct lp_build_flow_context *flow,
|
||||
enum lp_build_flow_construct_kind kind)
|
||||
{
|
||||
assert(flow->num_constructs);
|
||||
if(!flow->num_constructs)
|
||||
return NULL;
|
||||
|
||||
assert(flow->constructs[flow->num_constructs - 1].kind == kind);
|
||||
if(flow->constructs[flow->num_constructs - 1].kind != kind)
|
||||
return NULL;
|
||||
|
||||
return &flow->constructs[flow->num_constructs - 1].data;
|
||||
}
|
||||
|
||||
|
||||
static union lp_build_flow_construct_data *
|
||||
lp_build_flow_pop(struct lp_build_flow_context *flow,
|
||||
enum lp_build_flow_construct_kind kind)
|
||||
{
|
||||
assert(flow->num_constructs);
|
||||
if(!flow->num_constructs)
|
||||
return NULL;
|
||||
|
||||
assert(flow->constructs[flow->num_constructs - 1].kind == kind);
|
||||
if(flow->constructs[flow->num_constructs - 1].kind != kind)
|
||||
return NULL;
|
||||
|
||||
return &flow->constructs[--flow->num_constructs].data;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Begin a variable scope.
|
||||
*
|
||||
*
|
||||
*/
|
||||
void
|
||||
lp_build_flow_scope_begin(struct lp_build_flow_context *flow)
|
||||
{
|
||||
struct lp_build_flow_scope *scope;
|
||||
|
||||
scope = &lp_build_flow_push(flow, lP_BUILD_FLOW_SCOPE)->scope;
|
||||
if(!scope)
|
||||
return;
|
||||
|
||||
scope->num_variables = 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Declare a variable.
|
||||
*
|
||||
* A variable is a named entity which can have different LLVMValueRef's at
|
||||
* different points of the program. This is relevant for control flow because
|
||||
* when there are mutiple branches to a same location we need to replace
|
||||
* the variable's value with a Phi function as explained in
|
||||
* http://en.wikipedia.org/wiki/Static_single_assignment_form .
|
||||
*
|
||||
* We keep track of variables by keeping around a pointer to where their
|
||||
* current.
|
||||
*
|
||||
* There are a few cautions to observe:
|
||||
*
|
||||
* - Variable's value must not be NULL. If there is no initial value then
|
||||
* LLVMGetUndef() should be used.
|
||||
*
|
||||
* - Variable's value must be kept up-to-date. If the variable is going to be
|
||||
* modified by a function then a pointer should be passed so that its value
|
||||
* is accurate. Failure to do this will cause some of the variables'
|
||||
* transient values to be lost, leading to wrong results.
|
||||
*
|
||||
* - A program should be written from top to bottom, by always appending
|
||||
* instructions to the bottom with a single LLVMBuilderRef. Inserting and/or
|
||||
* modifying existing statements will most likely lead to wrong results.
|
||||
*
|
||||
*/
|
||||
void
|
||||
lp_build_flow_scope_declare(struct lp_build_flow_context *flow,
|
||||
LLVMValueRef *variable)
|
||||
{
|
||||
struct lp_build_flow_scope *scope;
|
||||
|
||||
scope = &lp_build_flow_peek(flow, lP_BUILD_FLOW_SCOPE)->scope;
|
||||
if(!scope)
|
||||
return;
|
||||
|
||||
assert(*variable);
|
||||
if(!*variable)
|
||||
return;
|
||||
|
||||
assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES);
|
||||
if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES)
|
||||
return;
|
||||
|
||||
flow->variables[flow->num_variables++] = variable;
|
||||
++scope->num_variables;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
lp_build_flow_scope_end(struct lp_build_flow_context *flow)
|
||||
{
|
||||
struct lp_build_flow_scope *scope;
|
||||
|
||||
scope = &lp_build_flow_pop(flow, lP_BUILD_FLOW_SCOPE)->scope;
|
||||
if(!scope)
|
||||
return;
|
||||
|
||||
assert(flow->num_variables >= scope->num_variables);
|
||||
if(flow->num_variables < scope->num_variables) {
|
||||
flow->num_variables = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
flow->num_variables -= scope->num_variables;
|
||||
}
|
||||
|
||||
|
||||
static LLVMBasicBlockRef
|
||||
lp_build_flow_insert_block(struct lp_build_flow_context *flow)
|
||||
{
|
||||
LLVMBasicBlockRef current_block;
|
||||
LLVMBasicBlockRef next_block;
|
||||
LLVMBasicBlockRef new_block;
|
||||
|
||||
if(mask->value)
|
||||
mask->value = LLVMBuildAnd(mask->builder, mask->value, value, "");
|
||||
else
|
||||
mask->value = value;
|
||||
|
||||
/* FIXME: disabled until we have proper control flow helpers */
|
||||
#if 0
|
||||
cond = LLVMBuildICmp(mask->builder,
|
||||
LLVMIntEQ,
|
||||
LLVMBuildBitCast(mask->builder, mask->value, mask->reg_type, ""),
|
||||
LLVMConstNull(mask->reg_type),
|
||||
"");
|
||||
|
||||
current_block = LLVMGetInsertBlock(mask->builder);
|
||||
|
||||
if(!mask->skip_block) {
|
||||
LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
|
||||
mask->skip_block = LLVMAppendBasicBlock(function, "skip");
|
||||
|
||||
mask->phi = LLVMBuildPhi(mask->builder, LLVMTypeOf(mask->value), "");
|
||||
}
|
||||
current_block = LLVMGetInsertBlock(flow->builder);
|
||||
|
||||
next_block = LLVMGetNextBasicBlock(current_block);
|
||||
assert(next_block);
|
||||
if(next_block) {
|
||||
new_block = LLVMInsertBasicBlock(next_block, "");
|
||||
}
|
||||
|
@ -93,30 +295,148 @@ lp_build_mask_update(struct lp_build_mask_context *mask,
|
|||
new_block = LLVMAppendBasicBlock(function, "");
|
||||
}
|
||||
|
||||
LLVMAddIncoming(mask->phi, &mask->value, ¤t_block, 1);
|
||||
LLVMBuildCondBr(mask->builder, cond, mask->skip_block, new_block);
|
||||
return new_block;
|
||||
}
|
||||
|
||||
LLVMPositionBuilderAtEnd(mask->builder, new_block);
|
||||
#endif
|
||||
void
|
||||
lp_build_flow_skip_begin(struct lp_build_flow_context *flow)
|
||||
{
|
||||
struct lp_build_flow_skip *skip;
|
||||
LLVMBuilderRef builder;
|
||||
unsigned i;
|
||||
|
||||
skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip;
|
||||
if(!skip)
|
||||
return;
|
||||
|
||||
skip->block = lp_build_flow_insert_block(flow);
|
||||
skip->num_variables = flow->num_variables;
|
||||
if(!skip->num_variables) {
|
||||
skip->phi = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi);
|
||||
if(!skip->phi) {
|
||||
skip->num_variables = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
builder = LLVMCreateBuilder();
|
||||
LLVMPositionBuilderAtEnd(builder, skip->block);
|
||||
|
||||
for(i = 0; i < skip->num_variables; ++i)
|
||||
skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), "");
|
||||
|
||||
LLVMDisposeBuilder(builder);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow,
|
||||
LLVMValueRef cond)
|
||||
{
|
||||
struct lp_build_flow_skip *skip;
|
||||
LLVMBasicBlockRef current_block;
|
||||
LLVMBasicBlockRef new_block;
|
||||
unsigned i;
|
||||
|
||||
skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip;
|
||||
if(!skip)
|
||||
return;
|
||||
|
||||
current_block = LLVMGetInsertBlock(flow->builder);
|
||||
|
||||
new_block = lp_build_flow_insert_block(flow);
|
||||
|
||||
for(i = 0; i < skip->num_variables; ++i) {
|
||||
assert(*flow->variables[i]);
|
||||
LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1);
|
||||
}
|
||||
|
||||
LLVMBuildCondBr(flow->builder, cond, skip->block, new_block);
|
||||
|
||||
LLVMPositionBuilderAtEnd(flow->builder, new_block);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
lp_build_flow_skip_end(struct lp_build_flow_context *flow)
|
||||
{
|
||||
struct lp_build_flow_skip *skip;
|
||||
LLVMBasicBlockRef current_block;
|
||||
unsigned i;
|
||||
|
||||
skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip;
|
||||
if(!skip)
|
||||
return;
|
||||
|
||||
current_block = LLVMGetInsertBlock(flow->builder);
|
||||
|
||||
for(i = 0; i < skip->num_variables; ++i) {
|
||||
assert(*flow->variables[i]);
|
||||
LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1);
|
||||
*flow->variables[i] = skip->phi[i];
|
||||
}
|
||||
|
||||
LLVMBuildBr(flow->builder, skip->block);
|
||||
LLVMPositionBuilderAtEnd(flow->builder, skip->block);
|
||||
|
||||
FREE(skip->phi);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
lp_build_mask_check(struct lp_build_mask_context *mask)
|
||||
{
|
||||
LLVMBuilderRef builder = mask->flow->builder;
|
||||
LLVMValueRef cond;
|
||||
|
||||
cond = LLVMBuildICmp(builder,
|
||||
LLVMIntEQ,
|
||||
LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""),
|
||||
LLVMConstNull(mask->reg_type),
|
||||
"");
|
||||
|
||||
lp_build_flow_skip_cond_break(mask->flow, cond);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
lp_build_mask_begin(struct lp_build_mask_context *mask,
|
||||
struct lp_build_flow_context *flow,
|
||||
struct lp_type type,
|
||||
LLVMValueRef value)
|
||||
{
|
||||
memset(mask, 0, sizeof *mask);
|
||||
|
||||
mask->flow = flow;
|
||||
mask->reg_type = LLVMIntType(type.width * type.length);
|
||||
mask->value = value;
|
||||
|
||||
lp_build_flow_scope_begin(flow);
|
||||
lp_build_flow_scope_declare(flow, &mask->value);
|
||||
lp_build_flow_skip_begin(flow);
|
||||
|
||||
lp_build_mask_check(mask);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
lp_build_mask_update(struct lp_build_mask_context *mask,
|
||||
LLVMValueRef value)
|
||||
{
|
||||
mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, "");
|
||||
|
||||
lp_build_mask_check(mask);
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_mask_end(struct lp_build_mask_context *mask)
|
||||
{
|
||||
if(mask->skip_block) {
|
||||
LLVMBasicBlockRef current_block = LLVMGetInsertBlock(mask->builder);
|
||||
|
||||
LLVMAddIncoming(mask->phi, &mask->value, ¤t_block, 1);
|
||||
LLVMBuildBr(mask->builder, mask->skip_block);
|
||||
|
||||
LLVMPositionBuilderAtEnd(mask->builder, mask->skip_block);
|
||||
|
||||
mask->value = mask->phi;
|
||||
mask->phi = NULL;
|
||||
mask->skip_block = NULL;
|
||||
}
|
||||
|
||||
lp_build_flow_skip_end(mask->flow);
|
||||
lp_build_flow_scope_end(mask->flow);
|
||||
return mask->value;
|
||||
}
|
||||
|
||||
|
|
|
@ -38,27 +38,53 @@
|
|||
#include <llvm-c/Core.h>
|
||||
|
||||
|
||||
union lp_type;
|
||||
struct lp_type;
|
||||
|
||||
|
||||
struct lp_build_flow_context;
|
||||
|
||||
|
||||
struct lp_build_flow_context *
|
||||
lp_build_flow_create(LLVMBuilderRef builder);
|
||||
|
||||
void
|
||||
lp_build_flow_destroy(struct lp_build_flow_context *flow);
|
||||
|
||||
void
|
||||
lp_build_flow_scope_begin(struct lp_build_flow_context *flow);
|
||||
|
||||
void
|
||||
lp_build_flow_scope_declare(struct lp_build_flow_context *flow,
|
||||
LLVMValueRef *variable);
|
||||
|
||||
void
|
||||
lp_build_flow_scope_end(struct lp_build_flow_context *flow);
|
||||
|
||||
void
|
||||
lp_build_flow_skip_begin(struct lp_build_flow_context *flow);
|
||||
|
||||
void
|
||||
lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow,
|
||||
LLVMValueRef cond);
|
||||
|
||||
void
|
||||
lp_build_flow_skip_end(struct lp_build_flow_context *flow);
|
||||
|
||||
|
||||
struct lp_build_mask_context
|
||||
{
|
||||
LLVMBuilderRef builder;
|
||||
struct lp_build_flow_context *flow;
|
||||
|
||||
LLVMTypeRef reg_type;
|
||||
|
||||
LLVMValueRef value;
|
||||
|
||||
LLVMValueRef phi;
|
||||
|
||||
LLVMBasicBlockRef skip_block;
|
||||
};
|
||||
|
||||
|
||||
void
|
||||
lp_build_mask_begin(struct lp_build_mask_context *mask,
|
||||
LLVMBuilderRef builder,
|
||||
union lp_type type,
|
||||
struct lp_build_flow_context *flow,
|
||||
struct lp_type type,
|
||||
LLVMValueRef value);
|
||||
|
||||
/**
|
||||
|
|
|
@ -31,21 +31,15 @@
|
|||
|
||||
/**
|
||||
* @file
|
||||
* LLVM IR building helpers interfaces.
|
||||
*
|
||||
* We use LLVM-C bindings for now. They are not documented, but follow the C++
|
||||
* interfaces very closely, and appear to be complete enough for code
|
||||
* genration. See
|
||||
* http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
|
||||
* for a standalone example.
|
||||
* Pixel format helpers.
|
||||
*/
|
||||
|
||||
#include <llvm-c/Core.h>
|
||||
|
||||
|
||||
#include "pipe/p_format.h"
|
||||
|
||||
|
||||
union lp_type;
|
||||
struct util_format_description;
|
||||
struct lp_type;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -56,9 +50,9 @@ union lp_type;
|
|||
* @return RGBA in a 4 floats vector.
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_unpack_rgba(LLVMBuilderRef builder,
|
||||
enum pipe_format format,
|
||||
LLVMValueRef packed);
|
||||
lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
|
||||
enum pipe_format format,
|
||||
LLVMValueRef packed);
|
||||
|
||||
|
||||
/**
|
||||
|
@ -67,9 +61,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder,
|
|||
* @param rgba 4 float vector with the unpacked components.
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_pack_rgba(LLVMBuilderRef builder,
|
||||
enum pipe_format format,
|
||||
LLVMValueRef rgba);
|
||||
lp_build_pack_rgba_aos(LLVMBuilderRef builder,
|
||||
enum pipe_format format,
|
||||
LLVMValueRef rgba);
|
||||
|
||||
|
||||
/**
|
||||
|
@ -81,9 +75,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder,
|
|||
* @return RGBA in a 4 floats vector.
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_load_rgba(LLVMBuilderRef builder,
|
||||
enum pipe_format format,
|
||||
LLVMValueRef ptr);
|
||||
lp_build_load_rgba_aos(LLVMBuilderRef builder,
|
||||
enum pipe_format format,
|
||||
LLVMValueRef ptr);
|
||||
|
||||
|
||||
/**
|
||||
|
@ -92,10 +86,34 @@ lp_build_load_rgba(LLVMBuilderRef builder,
|
|||
* @param rgba 4 float vector with the unpacked components.
|
||||
*/
|
||||
void
|
||||
lp_build_store_rgba(LLVMBuilderRef builder,
|
||||
enum pipe_format format,
|
||||
LLVMValueRef ptr,
|
||||
LLVMValueRef rgba);
|
||||
lp_build_store_rgba_aos(LLVMBuilderRef builder,
|
||||
enum pipe_format format,
|
||||
LLVMValueRef ptr,
|
||||
LLVMValueRef rgba);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_gather(LLVMBuilderRef builder,
|
||||
unsigned length,
|
||||
unsigned src_width,
|
||||
unsigned dst_width,
|
||||
LLVMValueRef base_ptr,
|
||||
LLVMValueRef offsets);
|
||||
|
||||
|
||||
void
|
||||
lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
|
||||
const struct util_format_description *format_desc,
|
||||
struct lp_type type,
|
||||
LLVMValueRef packed,
|
||||
LLVMValueRef *rgba);
|
||||
|
||||
|
||||
void
|
||||
lp_build_load_rgba_soa(LLVMBuilderRef builder,
|
||||
const struct util_format_description *format_desc,
|
||||
struct lp_type type,
|
||||
LLVMValueRef base_ptr,
|
||||
LLVMValueRef offsets,
|
||||
LLVMValueRef *rgba);
|
||||
|
||||
#endif /* !LP_BLD_H */
|
||||
|
|
|
@ -32,9 +32,9 @@
|
|||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_unpack_rgba(LLVMBuilderRef builder,
|
||||
enum pipe_format format,
|
||||
LLVMValueRef packed)
|
||||
lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
|
||||
enum pipe_format format,
|
||||
LLVMValueRef packed)
|
||||
{
|
||||
const struct util_format_description *desc;
|
||||
LLVMTypeRef type;
|
||||
|
@ -152,9 +152,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder,
|
|||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_pack_rgba(LLVMBuilderRef builder,
|
||||
enum pipe_format format,
|
||||
LLVMValueRef rgba)
|
||||
lp_build_pack_rgba_aos(LLVMBuilderRef builder,
|
||||
enum pipe_format format,
|
||||
LLVMValueRef rgba)
|
||||
{
|
||||
const struct util_format_description *desc;
|
||||
LLVMTypeRef type;
|
||||
|
@ -250,9 +250,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder,
|
|||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_load_rgba(LLVMBuilderRef builder,
|
||||
enum pipe_format format,
|
||||
LLVMValueRef ptr)
|
||||
lp_build_load_rgba_aos(LLVMBuilderRef builder,
|
||||
enum pipe_format format,
|
||||
LLVMValueRef ptr)
|
||||
{
|
||||
const struct util_format_description *desc;
|
||||
LLVMTypeRef type;
|
||||
|
@ -272,15 +272,15 @@ lp_build_load_rgba(LLVMBuilderRef builder,
|
|||
|
||||
packed = LLVMBuildLoad(builder, ptr, "");
|
||||
|
||||
return lp_build_unpack_rgba(builder, format, packed);
|
||||
return lp_build_unpack_rgba_aos(builder, format, packed);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
lp_build_store_rgba(LLVMBuilderRef builder,
|
||||
enum pipe_format format,
|
||||
LLVMValueRef ptr,
|
||||
LLVMValueRef rgba)
|
||||
lp_build_store_rgba_aos(LLVMBuilderRef builder,
|
||||
enum pipe_format format,
|
||||
LLVMValueRef ptr,
|
||||
LLVMValueRef rgba)
|
||||
{
|
||||
const struct util_format_description *desc;
|
||||
LLVMTypeRef type;
|
||||
|
@ -294,7 +294,7 @@ lp_build_store_rgba(LLVMBuilderRef builder,
|
|||
|
||||
type = LLVMIntType(desc->block.bits);
|
||||
|
||||
packed = lp_build_pack_rgba(builder, format, rgba);
|
||||
packed = lp_build_pack_rgba_aos(builder, format, rgba);
|
||||
|
||||
ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
|
||||
|
||||
|
|
|
@ -0,0 +1,208 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2009 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
#include "util/u_format.h"
|
||||
|
||||
#include "lp_bld_type.h"
|
||||
#include "lp_bld_const.h"
|
||||
#include "lp_bld_conv.h"
|
||||
#include "lp_bld_format.h"
|
||||
|
||||
|
||||
/**
|
||||
* Gather elements from scatter positions in memory into a single vector.
|
||||
*
|
||||
* @param src_width src element width
|
||||
* @param dst_width result element width (source will be expanded to fit)
|
||||
* @param length length of the offsets,
|
||||
* @param base_ptr base pointer, should be a i8 pointer type.
|
||||
* @param offsets vector with offsets
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_gather(LLVMBuilderRef builder,
|
||||
unsigned length,
|
||||
unsigned src_width,
|
||||
unsigned dst_width,
|
||||
LLVMValueRef base_ptr,
|
||||
LLVMValueRef offsets)
|
||||
{
|
||||
LLVMTypeRef src_type = LLVMIntType(src_width);
|
||||
LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
|
||||
LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
|
||||
LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
|
||||
LLVMValueRef res;
|
||||
unsigned i;
|
||||
|
||||
res = LLVMGetUndef(dst_vec_type);
|
||||
for(i = 0; i < length; ++i) {
|
||||
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
|
||||
LLVMValueRef elem_offset;
|
||||
LLVMValueRef elem_ptr;
|
||||
LLVMValueRef elem;
|
||||
|
||||
elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
|
||||
elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
|
||||
elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
|
||||
elem = LLVMBuildLoad(builder, elem_ptr, "");
|
||||
|
||||
assert(src_width <= dst_width);
|
||||
if(src_width > dst_width)
|
||||
elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
|
||||
if(src_width < dst_width)
|
||||
elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");
|
||||
|
||||
res = LLVMBuildInsertElement(builder, res, elem, index, "");
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
static LLVMValueRef
|
||||
lp_build_format_swizzle(struct lp_type type,
|
||||
const LLVMValueRef *inputs,
|
||||
enum util_format_swizzle swizzle)
|
||||
{
|
||||
switch (swizzle) {
|
||||
case UTIL_FORMAT_SWIZZLE_X:
|
||||
case UTIL_FORMAT_SWIZZLE_Y:
|
||||
case UTIL_FORMAT_SWIZZLE_Z:
|
||||
case UTIL_FORMAT_SWIZZLE_W:
|
||||
return inputs[swizzle];
|
||||
case UTIL_FORMAT_SWIZZLE_0:
|
||||
return lp_build_zero(type);
|
||||
case UTIL_FORMAT_SWIZZLE_1:
|
||||
return lp_build_one(type);
|
||||
case UTIL_FORMAT_SWIZZLE_NONE:
|
||||
return lp_build_undef(type);
|
||||
default:
|
||||
assert(0);
|
||||
return lp_build_undef(type);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
|
||||
const struct util_format_description *format_desc,
|
||||
struct lp_type type,
|
||||
LLVMValueRef packed,
|
||||
LLVMValueRef *rgba)
|
||||
{
|
||||
LLVMValueRef inputs[4];
|
||||
unsigned start;
|
||||
unsigned chan;
|
||||
|
||||
/* FIXME: Support more formats */
|
||||
assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
|
||||
assert(format_desc->block.width == 1);
|
||||
assert(format_desc->block.height == 1);
|
||||
assert(format_desc->block.bits <= 32);
|
||||
|
||||
/* Decode the input vector components */
|
||||
start = 0;
|
||||
for (chan = 0; chan < 4; ++chan) {
|
||||
unsigned width = format_desc->channel[chan].size;
|
||||
unsigned stop = start + width;
|
||||
LLVMValueRef input;
|
||||
|
||||
input = packed;
|
||||
|
||||
switch(format_desc->channel[chan].type) {
|
||||
case UTIL_FORMAT_TYPE_VOID:
|
||||
input = NULL;
|
||||
break;
|
||||
|
||||
case UTIL_FORMAT_TYPE_UNSIGNED:
|
||||
if(type.floating) {
|
||||
if(start)
|
||||
input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(type, start), "");
|
||||
if(stop < format_desc->block.bits) {
|
||||
unsigned mask = ((unsigned long long)1 << width) - 1;
|
||||
input = LLVMBuildAnd(builder, input, lp_build_int_const_scalar(type, mask), "");
|
||||
}
|
||||
|
||||
if(format_desc->channel[chan].normalized)
|
||||
input = lp_build_unsigned_norm_to_float(builder, width, type, input);
|
||||
else
|
||||
input = LLVMBuildFPToSI(builder, input, lp_build_vec_type(type), "");
|
||||
}
|
||||
else {
|
||||
/* FIXME */
|
||||
assert(0);
|
||||
input = lp_build_undef(type);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
/* fall through */
|
||||
input = lp_build_undef(type);
|
||||
break;
|
||||
}
|
||||
|
||||
inputs[chan] = input;
|
||||
|
||||
start = stop;
|
||||
}
|
||||
|
||||
if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
|
||||
enum util_format_swizzle swizzle = format_desc->swizzle[0];
|
||||
LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle);
|
||||
rgba[2] = rgba[1] = rgba[0] = depth;
|
||||
rgba[3] = lp_build_one(type);
|
||||
}
|
||||
else {
|
||||
for (chan = 0; chan < 4; ++chan) {
|
||||
enum util_format_swizzle swizzle = format_desc->swizzle[chan];
|
||||
rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
lp_build_load_rgba_soa(LLVMBuilderRef builder,
|
||||
const struct util_format_description *format_desc,
|
||||
struct lp_type type,
|
||||
LLVMValueRef base_ptr,
|
||||
LLVMValueRef offsets,
|
||||
LLVMValueRef *rgba)
|
||||
{
|
||||
LLVMValueRef packed;
|
||||
|
||||
assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
|
||||
assert(format_desc->block.width == 1);
|
||||
assert(format_desc->block.height == 1);
|
||||
assert(format_desc->block.bits <= 32);
|
||||
|
||||
packed = lp_build_gather(builder,
|
||||
type.length, format_desc->block.bits, type.width,
|
||||
base_ptr, offsets);
|
||||
|
||||
lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba);
|
||||
}
|
|
@ -292,7 +292,7 @@ void
|
|||
lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
|
||||
const struct tgsi_token *tokens,
|
||||
LLVMBuilderRef builder,
|
||||
union lp_type type,
|
||||
struct lp_type type,
|
||||
LLVMValueRef a0_ptr,
|
||||
LLVMValueRef dadx_ptr,
|
||||
LLVMValueRef dady_ptr,
|
||||
|
|
|
@ -83,7 +83,7 @@ void
|
|||
lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
|
||||
const struct tgsi_token *tokens,
|
||||
LLVMBuilderRef builder,
|
||||
union lp_type type,
|
||||
struct lp_type type,
|
||||
LLVMValueRef a0_ptr,
|
||||
LLVMValueRef dadx_ptr,
|
||||
LLVMValueRef dady_ptr,
|
||||
|
|
|
@ -45,7 +45,7 @@ lp_build_cmp(struct lp_build_context *bld,
|
|||
LLVMValueRef a,
|
||||
LLVMValueRef b)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(type);
|
||||
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
|
||||
LLVMValueRef zeros = LLVMConstNull(int_vec_type);
|
||||
|
@ -301,7 +301,7 @@ lp_build_select(struct lp_build_context *bld,
|
|||
LLVMValueRef a,
|
||||
LLVMValueRef b)
|
||||
{
|
||||
union lp_type type = bld->type;
|
||||
struct lp_type type = bld->type;
|
||||
LLVMValueRef res;
|
||||
|
||||
if(a == b)
|
||||
|
@ -313,8 +313,6 @@ lp_build_select(struct lp_build_context *bld,
|
|||
b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
|
||||
}
|
||||
|
||||
/* TODO: On SSE4 we could do this with a single instruction -- PBLENDVB */
|
||||
|
||||
a = LLVMBuildAnd(bld->builder, a, mask, "");
|
||||
|
||||
/* This often gets translated to PANDN, but sometimes the NOT is
|
||||
|
@ -339,9 +337,9 @@ LLVMValueRef
|
|||
lp_build_select_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
boolean cond[4])
|
||||
const boolean cond[4])
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
const unsigned n = type.length;
|
||||
unsigned i, j;
|
||||
|
||||
|
@ -376,9 +374,9 @@ lp_build_select_aos(struct lp_build_context *bld,
|
|||
|
||||
return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
|
||||
}
|
||||
else {
|
||||
#if 0
|
||||
else if(0) {
|
||||
/* FIXME: Unfortunately select of vectors do not work */
|
||||
/* XXX: Unfortunately select of vectors do not work */
|
||||
/* Use a select */
|
||||
LLVMTypeRef elem_type = LLVMInt1Type();
|
||||
LLVMValueRef cond[LP_MAX_VECTOR_LENGTH];
|
||||
|
@ -388,10 +386,9 @@ lp_build_select_aos(struct lp_build_context *bld,
|
|||
cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0);
|
||||
|
||||
return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, "");
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
#else
|
||||
LLVMValueRef mask = lp_build_const_mask_aos(type, cond);
|
||||
return lp_build_select(bld, mask, a, b);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
#include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */
|
||||
|
||||
|
||||
union lp_type type;
|
||||
struct lp_type type;
|
||||
struct lp_build_context;
|
||||
|
||||
|
||||
|
@ -66,7 +66,7 @@ LLVMValueRef
|
|||
lp_build_select_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
boolean cond[4]);
|
||||
const boolean cond[4]);
|
||||
|
||||
|
||||
#endif /* !LP_BLD_LOGIC_H */
|
||||
|
|
|
@ -0,0 +1,135 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2009 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Texture sampling.
|
||||
*
|
||||
* @author Jose Fonseca <jfonseca@vmware.com>
|
||||
*/
|
||||
|
||||
#ifndef LP_BLD_SAMPLE_H
|
||||
#define LP_BLD_SAMPLE_H
|
||||
|
||||
|
||||
#include <llvm-c/Core.h>
|
||||
|
||||
struct pipe_texture;
|
||||
struct pipe_sampler_state;
|
||||
struct lp_type;
|
||||
|
||||
|
||||
/**
|
||||
* Sampler static state.
|
||||
*
|
||||
* These are the bits of state from pipe_texture and pipe_sampler_state that
|
||||
* are embedded in the generated code.
|
||||
*/
|
||||
struct lp_sampler_static_state
|
||||
{
|
||||
/* pipe_texture's state */
|
||||
enum pipe_format format;
|
||||
unsigned target:2;
|
||||
unsigned pot_width:1;
|
||||
unsigned pot_height:1;
|
||||
unsigned pot_depth:1;
|
||||
|
||||
/* pipe_sampler_state's state */
|
||||
unsigned wrap_s:3;
|
||||
unsigned wrap_t:3;
|
||||
unsigned wrap_r:3;
|
||||
unsigned min_img_filter:2;
|
||||
unsigned min_mip_filter:2;
|
||||
unsigned mag_img_filter:2;
|
||||
unsigned compare_mode:1;
|
||||
unsigned compare_func:3;
|
||||
unsigned normalized_coords:1;
|
||||
unsigned prefilter:4;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Sampler dynamic state.
|
||||
*
|
||||
* These are the bits of state from pipe_texture and pipe_sampler_state that
|
||||
* are computed in runtime.
|
||||
*
|
||||
* There are obtained through callbacks, as we don't want to tie the texture
|
||||
* sampling code generation logic to any particular texture layout or pipe
|
||||
* driver.
|
||||
*/
|
||||
struct lp_sampler_dynamic_state
|
||||
{
|
||||
|
||||
/** Obtain the base texture width. */
|
||||
LLVMValueRef
|
||||
(*width)( struct lp_sampler_dynamic_state *state,
|
||||
LLVMBuilderRef builder,
|
||||
unsigned unit);
|
||||
|
||||
/** Obtain the base texture height. */
|
||||
LLVMValueRef
|
||||
(*height)( struct lp_sampler_dynamic_state *state,
|
||||
LLVMBuilderRef builder,
|
||||
unsigned unit);
|
||||
|
||||
LLVMValueRef
|
||||
(*stride)( struct lp_sampler_dynamic_state *state,
|
||||
LLVMBuilderRef builder,
|
||||
unsigned unit);
|
||||
|
||||
LLVMValueRef
|
||||
(*data_ptr)( struct lp_sampler_dynamic_state *state,
|
||||
LLVMBuilderRef builder,
|
||||
unsigned unit);
|
||||
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Derive the sampler static state.
|
||||
*/
|
||||
void
|
||||
lp_sampler_static_state(struct lp_sampler_static_state *state,
|
||||
const struct pipe_texture *texture,
|
||||
const struct pipe_sampler_state *sampler);
|
||||
|
||||
|
||||
void
|
||||
lp_build_sample_soa(LLVMBuilderRef builder,
|
||||
const struct lp_sampler_static_state *static_state,
|
||||
struct lp_sampler_dynamic_state *dynamic_state,
|
||||
struct lp_type fp_type,
|
||||
unsigned unit,
|
||||
unsigned num_coords,
|
||||
const LLVMValueRef *coords,
|
||||
LLVMValueRef lodbias,
|
||||
LLVMValueRef *texel);
|
||||
|
||||
|
||||
|
||||
#endif /* LP_BLD_SAMPLE_H */
|
|
@ -0,0 +1,416 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2009 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Texture sampling.
|
||||
*
|
||||
* @author Jose Fonseca <jfonseca@vmware.com>
|
||||
*/
|
||||
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_format.h"
|
||||
#include "lp_bld_debug.h"
|
||||
#include "lp_bld_type.h"
|
||||
#include "lp_bld_const.h"
|
||||
#include "lp_bld_arit.h"
|
||||
#include "lp_bld_logic.h"
|
||||
#include "lp_bld_swizzle.h"
|
||||
#include "lp_bld_format.h"
|
||||
#include "lp_bld_sample.h"
|
||||
|
||||
|
||||
void
|
||||
lp_sampler_static_state(struct lp_sampler_static_state *state,
|
||||
const struct pipe_texture *texture,
|
||||
const struct pipe_sampler_state *sampler)
|
||||
{
|
||||
memset(state, 0, sizeof *state);
|
||||
|
||||
if(!texture)
|
||||
return;
|
||||
|
||||
if(!sampler)
|
||||
return;
|
||||
|
||||
state->format = texture->format;
|
||||
state->target = texture->target;
|
||||
state->pot_width = util_is_pot(texture->width[0]);
|
||||
state->pot_height = util_is_pot(texture->height[0]);
|
||||
state->pot_depth = util_is_pot(texture->depth[0]);
|
||||
|
||||
state->wrap_s = sampler->wrap_s;
|
||||
state->wrap_t = sampler->wrap_t;
|
||||
state->wrap_r = sampler->wrap_r;
|
||||
state->min_img_filter = sampler->min_img_filter;
|
||||
state->min_mip_filter = sampler->min_mip_filter;
|
||||
state->mag_img_filter = sampler->mag_img_filter;
|
||||
if(sampler->compare_mode) {
|
||||
state->compare_mode = sampler->compare_mode;
|
||||
state->compare_func = sampler->compare_func;
|
||||
}
|
||||
state->normalized_coords = sampler->normalized_coords;
|
||||
state->prefilter = sampler->prefilter;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Keep all information for sampling code generation in a single place.
|
||||
*/
|
||||
struct lp_build_sample_context
|
||||
{
|
||||
LLVMBuilderRef builder;
|
||||
|
||||
const struct lp_sampler_static_state *static_state;
|
||||
|
||||
struct lp_sampler_dynamic_state *dynamic_state;
|
||||
|
||||
const struct util_format_description *format_desc;
|
||||
|
||||
/** Incoming coordinates type and build context */
|
||||
struct lp_type coord_type;
|
||||
struct lp_build_context coord_bld;
|
||||
|
||||
/** Integer coordinates */
|
||||
struct lp_type int_coord_type;
|
||||
struct lp_build_context int_coord_bld;
|
||||
|
||||
/** Output texels type and build context */
|
||||
struct lp_type texel_type;
|
||||
struct lp_build_context texel_bld;
|
||||
};
|
||||
|
||||
|
||||
static void
|
||||
lp_build_sample_texel(struct lp_build_sample_context *bld,
|
||||
LLVMValueRef x,
|
||||
LLVMValueRef y,
|
||||
LLVMValueRef y_stride,
|
||||
LLVMValueRef data_ptr,
|
||||
LLVMValueRef *texel)
|
||||
{
|
||||
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
|
||||
LLVMValueRef x_stride;
|
||||
LLVMValueRef offset;
|
||||
|
||||
x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8);
|
||||
|
||||
if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
|
||||
LLVMValueRef x_lo, x_hi;
|
||||
LLVMValueRef y_lo, y_hi;
|
||||
LLVMValueRef x_stride_lo, x_stride_hi;
|
||||
LLVMValueRef y_stride_lo, y_stride_hi;
|
||||
LLVMValueRef x_offset_lo, x_offset_hi;
|
||||
LLVMValueRef y_offset_lo, y_offset_hi;
|
||||
LLVMValueRef offset_lo, offset_hi;
|
||||
|
||||
x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, "");
|
||||
y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, "");
|
||||
|
||||
x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, "");
|
||||
y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, "");
|
||||
|
||||
x_stride_lo = x_stride;
|
||||
y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8);
|
||||
|
||||
x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8);
|
||||
y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, "");
|
||||
|
||||
x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo);
|
||||
y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo);
|
||||
offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo);
|
||||
|
||||
x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi);
|
||||
y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi);
|
||||
offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi);
|
||||
|
||||
offset = lp_build_add(int_coord_bld, offset_hi, offset_lo);
|
||||
}
|
||||
else {
|
||||
LLVMValueRef x_offset;
|
||||
LLVMValueRef y_offset;
|
||||
|
||||
x_offset = lp_build_mul(int_coord_bld, x, x_stride);
|
||||
y_offset = lp_build_mul(int_coord_bld, y, y_stride);
|
||||
|
||||
offset = lp_build_add(int_coord_bld, x_offset, y_offset);
|
||||
}
|
||||
|
||||
lp_build_load_rgba_soa(bld->builder,
|
||||
bld->format_desc,
|
||||
bld->texel_type,
|
||||
data_ptr,
|
||||
offset,
|
||||
texel);
|
||||
}
|
||||
|
||||
|
||||
static LLVMValueRef
|
||||
lp_build_sample_wrap(struct lp_build_sample_context *bld,
|
||||
LLVMValueRef coord,
|
||||
LLVMValueRef length,
|
||||
boolean is_pot,
|
||||
unsigned wrap_mode)
|
||||
{
|
||||
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
|
||||
LLVMValueRef length_minus_one;
|
||||
|
||||
length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
|
||||
|
||||
switch(wrap_mode) {
|
||||
case PIPE_TEX_WRAP_REPEAT:
|
||||
if(is_pot)
|
||||
coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
|
||||
else
|
||||
/* Signed remainder won't give the right results for negative
|
||||
* dividends but unsigned remainder does.*/
|
||||
coord = LLVMBuildURem(bld->builder, coord, length, "");
|
||||
break;
|
||||
|
||||
case PIPE_TEX_WRAP_CLAMP:
|
||||
coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
|
||||
coord = lp_build_min(int_coord_bld, coord, length_minus_one);
|
||||
break;
|
||||
|
||||
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
|
||||
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
|
||||
case PIPE_TEX_WRAP_MIRROR_REPEAT:
|
||||
case PIPE_TEX_WRAP_MIRROR_CLAMP:
|
||||
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
|
||||
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
|
||||
/* FIXME */
|
||||
_debug_printf("warning: failed to translate texture wrap mode %u\n", wrap_mode);
|
||||
coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
|
||||
coord = lp_build_min(int_coord_bld, coord, length_minus_one);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
return coord;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
|
||||
LLVMValueRef s,
|
||||
LLVMValueRef t,
|
||||
LLVMValueRef width,
|
||||
LLVMValueRef height,
|
||||
LLVMValueRef stride,
|
||||
LLVMValueRef data_ptr,
|
||||
LLVMValueRef *texel)
|
||||
{
|
||||
LLVMValueRef x;
|
||||
LLVMValueRef y;
|
||||
|
||||
x = lp_build_ifloor(&bld->coord_bld, s);
|
||||
y = lp_build_ifloor(&bld->coord_bld, t);
|
||||
|
||||
x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s);
|
||||
y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t);
|
||||
|
||||
lp_build_sample_texel(bld, x, y, stride, data_ptr, texel);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
|
||||
LLVMValueRef s,
|
||||
LLVMValueRef t,
|
||||
LLVMValueRef width,
|
||||
LLVMValueRef height,
|
||||
LLVMValueRef stride,
|
||||
LLVMValueRef data_ptr,
|
||||
LLVMValueRef *texel)
|
||||
{
|
||||
LLVMValueRef half;
|
||||
LLVMValueRef s_ipart;
|
||||
LLVMValueRef t_ipart;
|
||||
LLVMValueRef s_fpart;
|
||||
LLVMValueRef t_fpart;
|
||||
LLVMValueRef x0, x1;
|
||||
LLVMValueRef y0, y1;
|
||||
LLVMValueRef neighbors[2][2][4];
|
||||
unsigned chan;
|
||||
|
||||
half = lp_build_const_scalar(bld->coord_type, 0.5);
|
||||
s = lp_build_sub(&bld->coord_bld, s, half);
|
||||
t = lp_build_sub(&bld->coord_bld, t, half);
|
||||
|
||||
s_ipart = lp_build_floor(&bld->coord_bld, s);
|
||||
t_ipart = lp_build_floor(&bld->coord_bld, t);
|
||||
|
||||
s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart);
|
||||
t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart);
|
||||
|
||||
x0 = lp_build_int(&bld->coord_bld, s_ipart);
|
||||
y0 = lp_build_int(&bld->coord_bld, t_ipart);
|
||||
|
||||
x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s);
|
||||
y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t);
|
||||
|
||||
x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
|
||||
y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
|
||||
|
||||
x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s);
|
||||
y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
|
||||
|
||||
lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
|
||||
lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
|
||||
lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
|
||||
lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
|
||||
|
||||
/* TODO: Don't interpolate missing channels */
|
||||
for(chan = 0; chan < 4; ++chan) {
|
||||
texel[chan] = lp_build_lerp_2d(&bld->texel_bld,
|
||||
s_fpart, t_fpart,
|
||||
neighbors[0][0][chan],
|
||||
neighbors[0][1][chan],
|
||||
neighbors[1][0][chan],
|
||||
neighbors[1][1][chan]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
lp_build_sample_compare(struct lp_build_sample_context *bld,
|
||||
LLVMValueRef p,
|
||||
LLVMValueRef *texel)
|
||||
{
|
||||
struct lp_build_context *texel_bld = &bld->texel_bld;
|
||||
LLVMValueRef res;
|
||||
unsigned chan;
|
||||
|
||||
if(!bld->static_state->compare_mode)
|
||||
return;
|
||||
|
||||
/* TODO: Compare before swizzling, to avoid redundant computations */
|
||||
res = NULL;
|
||||
for(chan = 0; chan < 4; ++chan) {
|
||||
LLVMValueRef cmp;
|
||||
cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]);
|
||||
cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero);
|
||||
|
||||
if(res)
|
||||
res = lp_build_add(texel_bld, res, cmp);
|
||||
else
|
||||
res = cmp;
|
||||
}
|
||||
|
||||
assert(res);
|
||||
res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25));
|
||||
|
||||
/* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
|
||||
for(chan = 0; chan < 3; ++chan)
|
||||
texel[chan] = res;
|
||||
texel[3] = texel_bld->one;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
lp_build_sample_soa(LLVMBuilderRef builder,
|
||||
const struct lp_sampler_static_state *static_state,
|
||||
struct lp_sampler_dynamic_state *dynamic_state,
|
||||
struct lp_type type,
|
||||
unsigned unit,
|
||||
unsigned num_coords,
|
||||
const LLVMValueRef *coords,
|
||||
LLVMValueRef lodbias,
|
||||
LLVMValueRef *texel)
|
||||
{
|
||||
struct lp_build_sample_context bld;
|
||||
LLVMValueRef width;
|
||||
LLVMValueRef height;
|
||||
LLVMValueRef stride;
|
||||
LLVMValueRef data_ptr;
|
||||
LLVMValueRef s;
|
||||
LLVMValueRef t;
|
||||
LLVMValueRef p;
|
||||
|
||||
/* Setup our build context */
|
||||
memset(&bld, 0, sizeof bld);
|
||||
bld.builder = builder;
|
||||
bld.static_state = static_state;
|
||||
bld.dynamic_state = dynamic_state;
|
||||
bld.format_desc = util_format_description(static_state->format);
|
||||
bld.coord_type = type;
|
||||
bld.int_coord_type = lp_int_type(type);
|
||||
bld.texel_type = type;
|
||||
lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
|
||||
lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
|
||||
lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
|
||||
|
||||
/* Get the dynamic state */
|
||||
width = dynamic_state->width(dynamic_state, builder, unit);
|
||||
height = dynamic_state->height(dynamic_state, builder, unit);
|
||||
stride = dynamic_state->stride(dynamic_state, builder, unit);
|
||||
data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit);
|
||||
|
||||
s = coords[0];
|
||||
t = coords[1];
|
||||
p = coords[2];
|
||||
|
||||
width = lp_build_broadcast_scalar(&bld.int_coord_bld, width);
|
||||
height = lp_build_broadcast_scalar(&bld.int_coord_bld, height);
|
||||
stride = lp_build_broadcast_scalar(&bld.int_coord_bld, stride);
|
||||
|
||||
if(static_state->target == PIPE_TEXTURE_1D)
|
||||
t = bld.coord_bld.zero;
|
||||
|
||||
if(static_state->normalized_coords) {
|
||||
LLVMTypeRef coord_vec_type = lp_build_vec_type(bld.coord_type);
|
||||
LLVMValueRef fp_width = LLVMBuildSIToFP(builder, width, coord_vec_type, "");
|
||||
LLVMValueRef fp_height = LLVMBuildSIToFP(builder, height, coord_vec_type, "");
|
||||
s = lp_build_mul(&bld.coord_bld, s, fp_width);
|
||||
t = lp_build_mul(&bld.coord_bld, t, fp_height);
|
||||
}
|
||||
|
||||
switch (static_state->min_img_filter) {
|
||||
case PIPE_TEX_FILTER_NEAREST:
|
||||
lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel);
|
||||
break;
|
||||
case PIPE_TEX_FILTER_LINEAR:
|
||||
case PIPE_TEX_FILTER_ANISO:
|
||||
lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
/* FIXME: respect static_state->min_mip_filter */;
|
||||
/* FIXME: respect static_state->mag_img_filter */;
|
||||
/* FIXME: respect static_state->prefilter */;
|
||||
|
||||
lp_build_sample_compare(&bld, p, texel);
|
||||
}
|
|
@ -41,18 +41,31 @@
|
|||
#include "lp_bld_struct.h"
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_struct_get_ptr(LLVMBuilderRef builder,
|
||||
LLVMValueRef ptr,
|
||||
unsigned member,
|
||||
const char *name)
|
||||
{
|
||||
LLVMValueRef indices[2];
|
||||
LLVMValueRef member_ptr;
|
||||
indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
|
||||
indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0);
|
||||
member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
|
||||
lp_build_name(member_ptr, "%s.%s_ptr", LLVMGetValueName(ptr), name);
|
||||
return member_ptr;
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_struct_get(LLVMBuilderRef builder,
|
||||
LLVMValueRef ptr,
|
||||
unsigned member,
|
||||
const char *name)
|
||||
{
|
||||
LLVMValueRef indices[2];
|
||||
LLVMValueRef member_ptr;
|
||||
LLVMValueRef res;
|
||||
indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
|
||||
indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0);
|
||||
member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
|
||||
member_ptr = lp_build_struct_get_ptr(builder, ptr, member, name);
|
||||
res = LLVMBuildLoad(builder, member_ptr, "");
|
||||
lp_build_name(res, "%s.%s", LLVMGetValueName(ptr), name);
|
||||
return res;
|
||||
|
|
|
@ -53,6 +53,18 @@
|
|||
offsetof(_ctype, _cmember))
|
||||
|
||||
|
||||
/**
|
||||
* Get value pointer to a structure member.
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_struct_get_ptr(LLVMBuilderRef builder,
|
||||
LLVMValueRef ptr,
|
||||
unsigned member,
|
||||
const char *name);
|
||||
|
||||
/**
|
||||
* Get the value of a structure member.
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_struct_get(LLVMBuilderRef builder,
|
||||
LLVMValueRef ptr,
|
||||
|
|
|
@ -64,7 +64,7 @@ LLVMValueRef
|
|||
lp_build_broadcast_scalar(struct lp_build_context *bld,
|
||||
LLVMValueRef scalar)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
LLVMValueRef res;
|
||||
unsigned i;
|
||||
|
||||
|
@ -83,7 +83,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
|
|||
LLVMValueRef a,
|
||||
unsigned channel)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const struct lp_type type = bld->type;
|
||||
const unsigned n = type.length;
|
||||
unsigned i, j;
|
||||
|
||||
|
@ -115,7 +115,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
|
|||
* YY00 YY00 .... YY00
|
||||
* YYYY YYYY .... YYYY <= output
|
||||
*/
|
||||
union lp_type type4 = type;
|
||||
struct lp_type type4 = type;
|
||||
const char shifts[4][2] = {
|
||||
{ 1, 2},
|
||||
{-1, 2},
|
||||
|
@ -161,7 +161,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
|
|||
LLVMValueRef
|
||||
lp_build_swizzle1_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
unsigned char swizzle[4])
|
||||
const unsigned char swizzle[4])
|
||||
{
|
||||
const unsigned n = bld->type.length;
|
||||
unsigned i, j;
|
||||
|
@ -192,7 +192,7 @@ LLVMValueRef
|
|||
lp_build_swizzle2_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
unsigned char swizzle[4])
|
||||
const unsigned char swizzle[4])
|
||||
{
|
||||
const unsigned n = bld->type.length;
|
||||
unsigned i, j;
|
||||
|
@ -201,11 +201,12 @@ lp_build_swizzle2_aos(struct lp_build_context *bld,
|
|||
return lp_build_swizzle1_aos(bld, a, swizzle);
|
||||
|
||||
if(a == b) {
|
||||
swizzle[0] %= 4;
|
||||
swizzle[1] %= 4;
|
||||
swizzle[2] %= 4;
|
||||
swizzle[3] %= 4;
|
||||
return lp_build_swizzle1_aos(bld, a, swizzle);
|
||||
unsigned char swizzle1[4];
|
||||
swizzle1[0] = swizzle[0] % 4;
|
||||
swizzle1[1] = swizzle[1] % 4;
|
||||
swizzle1[2] = swizzle[2] % 4;
|
||||
swizzle1[3] = swizzle[3] % 4;
|
||||
return lp_build_swizzle1_aos(bld, a, swizzle1);
|
||||
}
|
||||
|
||||
if(swizzle[0] % 4 == 0 &&
|
||||
|
|
|
@ -40,7 +40,7 @@
|
|||
#include <llvm-c/Core.h>
|
||||
|
||||
|
||||
union lp_type type;
|
||||
struct lp_type type;
|
||||
struct lp_build_context;
|
||||
|
||||
|
||||
|
@ -73,7 +73,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
|
|||
LLVMValueRef
|
||||
lp_build_swizzle1_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
unsigned char swizzle[4]);
|
||||
const unsigned char swizzle[4]);
|
||||
|
||||
|
||||
/**
|
||||
|
@ -85,7 +85,7 @@ LLVMValueRef
|
|||
lp_build_swizzle2_aos(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
unsigned char swizzle[4]);
|
||||
const unsigned char swizzle[4]);
|
||||
|
||||
|
||||
#endif /* !LP_BLD_SWIZZLE_H */
|
||||
|
|
|
@ -39,31 +39,46 @@
|
|||
|
||||
|
||||
struct tgsi_token;
|
||||
union lp_type;
|
||||
struct lp_type;
|
||||
struct lp_build_context;
|
||||
struct lp_build_mask_context;
|
||||
|
||||
|
||||
typedef void
|
||||
(*lp_emit_fetch_texel_soa_callback)( LLVMBuilderRef builder,
|
||||
void *context,
|
||||
unsigned unit,
|
||||
unsigned num_coords,
|
||||
const LLVMValueRef *coords,
|
||||
LLVMValueRef lodbias,
|
||||
LLVMValueRef *texel);
|
||||
/**
|
||||
* Sampler code generation interface.
|
||||
*
|
||||
* Although texture sampling is a requirement for TGSI translation, it is
|
||||
* a very different problem with several different approaches to it. This
|
||||
* structure establishes an interface for texture sampling code generation, so
|
||||
* that we can easily use different texture sampling strategies.
|
||||
*/
|
||||
struct lp_build_sampler_soa
|
||||
{
|
||||
void
|
||||
(*destroy)( struct lp_build_sampler_soa *sampler );
|
||||
|
||||
void
|
||||
(*emit_fetch_texel)( struct lp_build_sampler_soa *sampler,
|
||||
LLVMBuilderRef builder,
|
||||
struct lp_type type,
|
||||
unsigned unit,
|
||||
unsigned num_coords,
|
||||
const LLVMValueRef *coords,
|
||||
LLVMValueRef lodbias,
|
||||
LLVMValueRef *texel);
|
||||
};
|
||||
|
||||
|
||||
void
|
||||
lp_build_tgsi_soa(LLVMBuilderRef builder,
|
||||
const struct tgsi_token *tokens,
|
||||
union lp_type type,
|
||||
struct lp_type type,
|
||||
struct lp_build_mask_context *mask,
|
||||
LLVMValueRef consts_ptr,
|
||||
const LLVMValueRef *pos,
|
||||
const LLVMValueRef (*inputs)[4],
|
||||
LLVMValueRef (*outputs)[4],
|
||||
lp_emit_fetch_texel_soa_callback emit_fetch_texel,
|
||||
void *emit_fetch_texel_context);
|
||||
struct lp_build_sampler_soa *sampler);
|
||||
|
||||
|
||||
#endif /* LP_BLD_TGSI_H */
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -33,7 +33,7 @@
|
|||
|
||||
|
||||
LLVMTypeRef
|
||||
lp_build_elem_type(union lp_type type)
|
||||
lp_build_elem_type(struct lp_type type)
|
||||
{
|
||||
if (type.floating) {
|
||||
switch(type.width) {
|
||||
|
@ -55,7 +55,7 @@ lp_build_elem_type(union lp_type type)
|
|||
|
||||
|
||||
LLVMTypeRef
|
||||
lp_build_vec_type(union lp_type type)
|
||||
lp_build_vec_type(struct lp_type type)
|
||||
{
|
||||
LLVMTypeRef elem_type = lp_build_elem_type(type);
|
||||
return LLVMVectorType(elem_type, type.length);
|
||||
|
@ -69,7 +69,7 @@ lp_build_vec_type(union lp_type type)
|
|||
* type and check for identity.
|
||||
*/
|
||||
boolean
|
||||
lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type)
|
||||
lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type)
|
||||
{
|
||||
LLVMTypeKind elem_kind;
|
||||
|
||||
|
@ -107,7 +107,7 @@ lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type)
|
|||
|
||||
|
||||
boolean
|
||||
lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type)
|
||||
lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type)
|
||||
{
|
||||
LLVMTypeRef elem_type;
|
||||
|
||||
|
@ -128,7 +128,7 @@ lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type)
|
|||
|
||||
|
||||
boolean
|
||||
lp_check_value(union lp_type type, LLVMValueRef val)
|
||||
lp_check_value(struct lp_type type, LLVMValueRef val)
|
||||
{
|
||||
LLVMTypeRef vec_type;
|
||||
|
||||
|
@ -143,24 +143,36 @@ lp_check_value(union lp_type type, LLVMValueRef val)
|
|||
|
||||
|
||||
LLVMTypeRef
|
||||
lp_build_int_elem_type(union lp_type type)
|
||||
lp_build_int_elem_type(struct lp_type type)
|
||||
{
|
||||
return LLVMIntType(type.width);
|
||||
}
|
||||
|
||||
|
||||
LLVMTypeRef
|
||||
lp_build_int_vec_type(union lp_type type)
|
||||
lp_build_int_vec_type(struct lp_type type)
|
||||
{
|
||||
LLVMTypeRef elem_type = lp_build_int_elem_type(type);
|
||||
return LLVMVectorType(elem_type, type.length);
|
||||
}
|
||||
|
||||
|
||||
struct lp_type
|
||||
lp_int_type(struct lp_type type)
|
||||
{
|
||||
struct lp_type int_type;
|
||||
|
||||
memset(&int_type, 0, sizeof int_type);
|
||||
int_type.width = type.width;
|
||||
int_type.length = type.length;
|
||||
return int_type;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
lp_build_context_init(struct lp_build_context *bld,
|
||||
LLVMBuilderRef builder,
|
||||
union lp_type type)
|
||||
struct lp_type type)
|
||||
{
|
||||
bld->builder = builder;
|
||||
bld->type = type;
|
||||
|
|
|
@ -56,58 +56,55 @@
|
|||
* on the types used for intermediate computations, such as signed vs unsigned,
|
||||
* normalized values, or fixed point.
|
||||
*/
|
||||
union lp_type {
|
||||
struct {
|
||||
/**
|
||||
* Floating-point. Cannot be used with fixed. Integer numbers are
|
||||
* represented by this zero.
|
||||
*/
|
||||
unsigned floating:1;
|
||||
struct lp_type {
|
||||
/**
|
||||
* Floating-point. Cannot be used with fixed. Integer numbers are
|
||||
* represented by this zero.
|
||||
*/
|
||||
unsigned floating:1;
|
||||
|
||||
/**
|
||||
* Fixed-point. Cannot be used with floating. Integer numbers are
|
||||
* represented by this zero.
|
||||
*/
|
||||
unsigned fixed:1;
|
||||
|
||||
/**
|
||||
* Whether it can represent negative values or not.
|
||||
*
|
||||
* If this is not set for floating point, it means that all values are
|
||||
* assumed to be positive.
|
||||
*/
|
||||
unsigned sign:1;
|
||||
/**
|
||||
* Fixed-point. Cannot be used with floating. Integer numbers are
|
||||
* represented by this zero.
|
||||
*/
|
||||
unsigned fixed:1;
|
||||
|
||||
/**
|
||||
* Whether values are normalized to fit [0, 1] interval, or [-1, 1]
|
||||
* interval for signed types.
|
||||
*
|
||||
* For integer types it means the representable integer range should be
|
||||
* interpreted as the interval above.
|
||||
*
|
||||
* For floating and fixed point formats it means the values should be
|
||||
* clamped to the interval above.
|
||||
*/
|
||||
unsigned norm:1;
|
||||
/**
|
||||
* Whether it can represent negative values or not.
|
||||
*
|
||||
* If this is not set for floating point, it means that all values are
|
||||
* assumed to be positive.
|
||||
*/
|
||||
unsigned sign:1;
|
||||
|
||||
/**
|
||||
* Element width.
|
||||
*
|
||||
* For fixed point values, the fixed point is assumed to be at half the
|
||||
* width.
|
||||
*/
|
||||
unsigned width:14;
|
||||
/**
|
||||
* Whether values are normalized to fit [0, 1] interval, or [-1, 1]
|
||||
* interval for signed types.
|
||||
*
|
||||
* For integer types it means the representable integer range should be
|
||||
* interpreted as the interval above.
|
||||
*
|
||||
* For floating and fixed point formats it means the values should be
|
||||
* clamped to the interval above.
|
||||
*/
|
||||
unsigned norm:1;
|
||||
|
||||
/**
|
||||
* Vector length.
|
||||
*
|
||||
* width*length should be a power of two greater or equal to eight.
|
||||
*
|
||||
* @sa LP_MAX_VECTOR_LENGTH
|
||||
*/
|
||||
unsigned length:14;
|
||||
};
|
||||
uint32_t value;
|
||||
/**
|
||||
* Element width.
|
||||
*
|
||||
* For fixed point values, the fixed point is assumed to be at half the
|
||||
* width.
|
||||
*/
|
||||
unsigned width:14;
|
||||
|
||||
/**
|
||||
* Vector length.
|
||||
*
|
||||
* width*length should be a power of two greater or equal to eight.
|
||||
*
|
||||
* @sa LP_MAX_VECTOR_LENGTH
|
||||
*/
|
||||
unsigned length:14;
|
||||
};
|
||||
|
||||
|
||||
|
@ -124,7 +121,7 @@ struct lp_build_context
|
|||
* This not only describes the input/output LLVM types, but also whether
|
||||
* to normalize/clamp the results.
|
||||
*/
|
||||
union lp_type type;
|
||||
struct lp_type type;
|
||||
|
||||
/** Same as lp_build_undef(type) */
|
||||
LLVMValueRef undef;
|
||||
|
@ -138,37 +135,41 @@ struct lp_build_context
|
|||
|
||||
|
||||
LLVMTypeRef
|
||||
lp_build_elem_type(union lp_type type);
|
||||
lp_build_elem_type(struct lp_type type);
|
||||
|
||||
|
||||
LLVMTypeRef
|
||||
lp_build_vec_type(union lp_type type);
|
||||
lp_build_vec_type(struct lp_type type);
|
||||
|
||||
|
||||
boolean
|
||||
lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type);
|
||||
lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type);
|
||||
|
||||
|
||||
boolean
|
||||
lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type);
|
||||
lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type);
|
||||
|
||||
|
||||
boolean
|
||||
lp_check_value(union lp_type type, LLVMValueRef val);
|
||||
lp_check_value(struct lp_type type, LLVMValueRef val);
|
||||
|
||||
|
||||
LLVMTypeRef
|
||||
lp_build_int_elem_type(union lp_type type);
|
||||
lp_build_int_elem_type(struct lp_type type);
|
||||
|
||||
|
||||
LLVMTypeRef
|
||||
lp_build_int_vec_type(union lp_type type);
|
||||
lp_build_int_vec_type(struct lp_type type);
|
||||
|
||||
|
||||
struct lp_type
|
||||
lp_int_type(struct lp_type type);
|
||||
|
||||
|
||||
void
|
||||
lp_build_context_init(struct lp_build_context *bld,
|
||||
LLVMBuilderRef builder,
|
||||
union lp_type type);
|
||||
struct lp_type type);
|
||||
|
||||
|
||||
#endif /* !LP_BLD_TYPE_H */
|
||||
|
|
|
@ -67,6 +67,7 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
|
|||
util_pack_color(rgba, ps->format, &cv);
|
||||
lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, cv);
|
||||
}
|
||||
llvmpipe->dirty_render_cache = TRUE;
|
||||
}
|
||||
|
||||
if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
|
||||
|
|
|
@ -141,8 +141,6 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe,
|
|||
return PIPE_REFERENCED_FOR_WRITE;
|
||||
}
|
||||
|
||||
/* FIXME: we also need to do the same for the texture cache */
|
||||
|
||||
return PIPE_UNREFERENCED;
|
||||
}
|
||||
|
||||
|
|
|
@ -44,15 +44,47 @@
|
|||
static void
|
||||
lp_jit_init_globals(struct llvmpipe_screen *screen)
|
||||
{
|
||||
/* struct lp_jit_context */
|
||||
LLVMTypeRef texture_type;
|
||||
|
||||
/* struct lp_jit_texture */
|
||||
{
|
||||
LLVMTypeRef elem_types[4];
|
||||
|
||||
elem_types[LP_JIT_TEXTURE_WIDTH] = LLVMInt32Type();
|
||||
elem_types[LP_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
|
||||
elem_types[LP_JIT_TEXTURE_STRIDE] = LLVMInt32Type();
|
||||
elem_types[LP_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0);
|
||||
|
||||
texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
|
||||
|
||||
LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width,
|
||||
screen->target, texture_type,
|
||||
LP_JIT_TEXTURE_WIDTH);
|
||||
LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height,
|
||||
screen->target, texture_type,
|
||||
LP_JIT_TEXTURE_HEIGHT);
|
||||
LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, stride,
|
||||
screen->target, texture_type,
|
||||
LP_JIT_TEXTURE_STRIDE);
|
||||
LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data,
|
||||
screen->target, texture_type,
|
||||
LP_JIT_TEXTURE_DATA);
|
||||
LP_CHECK_STRUCT_SIZE(struct lp_jit_texture,
|
||||
screen->target, texture_type);
|
||||
|
||||
LLVMAddTypeName(screen->module, "texture", texture_type);
|
||||
}
|
||||
|
||||
/* struct lp_jit_context */
|
||||
{
|
||||
LLVMTypeRef elem_types[5];
|
||||
LLVMTypeRef context_type;
|
||||
|
||||
elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */
|
||||
elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */
|
||||
elem_types[2] = LLVMFloatType(); /* alpha_ref_value */
|
||||
elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */
|
||||
elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
|
||||
|
||||
context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
|
||||
|
||||
|
@ -64,6 +96,9 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
|
|||
screen->target, context_type, 2);
|
||||
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color,
|
||||
screen->target, context_type, 3);
|
||||
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures,
|
||||
screen->target, context_type,
|
||||
LP_JIT_CONTEXT_TEXTURES_INDEX);
|
||||
LP_CHECK_STRUCT_SIZE(struct lp_jit_context,
|
||||
screen->target, context_type);
|
||||
|
||||
|
@ -117,7 +152,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
|
|||
screen->provider = LLVMCreateModuleProviderForExistingModule(screen->module);
|
||||
|
||||
if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) {
|
||||
fprintf(stderr, "%s\n", error);
|
||||
_debug_printf("%s\n", error);
|
||||
LLVMDisposeMessage(error);
|
||||
abort();
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue