Shared dri driver resources

2003-08-06 18:12:22 +00:00 · 2003-08-06 18:12:22 +00:00 · 93a458840c
parent b32a036059
commit 93a458840c
14 changed files with 3757 additions and 0 deletions
--- a/src/mesa/drivers/dri/common/depthtmp.h
+++ b/src/mesa/drivers/dri/common/depthtmp.h
@ -0,0 +1,176 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/common/depthtmp.h,v 1.5 2001/03/21 16:14:20 dawes Exp $ */
+
+#ifndef DBG
+#define DBG 0
+#endif
+
+
+#ifndef HAVE_HW_DEPTH_SPANS
+#define HAVE_HW_DEPTH_SPANS 0
+#endif
+#ifndef HAVE_HW_DEPTH_PIXELS
+#define HAVE_HW_DEPTH_PIXELS 0
+#endif
+
+#ifndef HW_READ_LOCK
+#define HW_READ_LOCK()		HW_LOCK()
+#endif
+#ifndef HW_READ_UNLOCK
+#define HW_READ_UNLOCK()	HW_UNLOCK()
+#endif
+
+static void TAG(WriteDepthSpan)( GLcontext *ctx,
+                             GLuint n, GLint x, GLint y,
+				 const GLdepth *depth,
+				 const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_DEPTH_VARS;
+
+	 y = Y_FLIP( y );
+
+#if HAVE_HW_DEPTH_SPANS
+	 (void) x1; (void) n1;
+
+	 if ( DBG ) fprintf( stderr, "WriteDepthSpan 0..%d (x1 %d)\n",
+			     (int)n, (int)x );
+
+	 WRITE_DEPTH_SPAN();
+#else
+	 HW_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN( x, y, n, x1, n1, i );
+
+	       if ( DBG ) fprintf( stderr, "WriteDepthSpan %d..%d (x1 %d)\n",
+				   (int)i, (int)n1, (int)x1 );
+
+	       if ( mask ) {
+		  for ( ; i < n1 ; i++, x1++ ) {
+		     if ( mask[i] ) WRITE_DEPTH( x1, y, depth[i] );
+		  }
+	       } else {
+		  for ( ; i < n1 ; i++, x1++ ) {
+		     WRITE_DEPTH( x1, y, depth[i] );
+		  }
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+#endif
+      }
+   HW_WRITE_UNLOCK();
+}
+
+static void TAG(WriteDepthPixels)( GLcontext *ctx,
+				   GLuint n,
+				   const GLint x[],
+				   const GLint y[],
+				   const GLdepth depth[],
+				   const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+	 GLint i;
+	 LOCAL_DEPTH_VARS;
+
+	 if ( DBG ) fprintf( stderr, "WriteDepthPixels\n" );
+
+#if HAVE_HW_DEPTH_PIXELS
+	 (void) i;
+
+	 WRITE_DEPTH_PIXELS();
+#else
+	 HW_CLIPLOOP()
+	    {
+	       for ( i = 0 ; i < n ; i++ ) {
+		  if ( mask[i] ) {
+		     const int fy = Y_FLIP( y[i] );
+		     if ( CLIPPIXEL( x[i], fy ) )
+			WRITE_DEPTH( x[i], fy, depth[i] );
+		  }
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+#endif
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+/* Read depth spans and pixels
+ */
+static void TAG(ReadDepthSpan)( GLcontext *ctx,
+				GLuint n, GLint x, GLint y,
+				GLdepth depth[] )
+{
+   HW_READ_LOCK()
+      {
+	 GLint x1, n1;
+	 LOCAL_DEPTH_VARS;
+
+	 y = Y_FLIP( y );
+
+	 if ( DBG ) fprintf( stderr, "ReadDepthSpan\n" );
+
+#if HAVE_HW_DEPTH_SPANS
+	 (void) x1; (void) n1;
+
+	 READ_DEPTH_SPAN();
+#else
+	 HW_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN( x, y, n, x1, n1, i );
+	       for ( ; i < n1 ; i++ )
+		  READ_DEPTH( depth[i], (x1+i), y );
+	    }
+	 HW_ENDCLIPLOOP();
+#endif
+      }
+   HW_READ_UNLOCK();
+}
+
+static void TAG(ReadDepthPixels)( GLcontext *ctx, GLuint n,
+				  const GLint x[], const GLint y[],
+				  GLdepth depth[] )
+{
+   HW_READ_LOCK()
+      {
+	 GLint i;
+	 LOCAL_DEPTH_VARS;
+
+	 if ( DBG ) fprintf( stderr, "ReadDepthPixels\n" );
+
+#if HAVE_HW_DEPTH_PIXELS
+	 (void) i;
+
+	 READ_DEPTH_PIXELS();
+#else
+	 HW_CLIPLOOP()
+	    {
+	       for ( i = 0 ; i < n ;i++ ) {
+		  int fy = Y_FLIP( y[i] );
+		  if ( CLIPPIXEL( x[i], fy ) )
+		     READ_DEPTH( depth[i], x[i], fy );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+#endif
+      }
+   HW_READ_UNLOCK();
+}
+
+
+#if HAVE_HW_DEPTH_SPANS
+#undef WRITE_DEPTH_SPAN
+#undef WRITE_DEPTH_PIXELS
+#undef READ_DEPTH_SPAN
+#undef READ_DEPTH_PIXELS
+#else
+#undef WRITE_DEPTH
+#undef READ_DEPTH
+#endif
+#undef TAG
--- a/src/mesa/drivers/dri/common/hwlog.c
+++ b/src/mesa/drivers/dri/common/hwlog.c
@ -0,0 +1,142 @@
+/*
+ * GLX Hardware Device Driver common code 
+ * 
+ * Based on the original MGA G200 driver (c) 1999 Wittawat Yamwong
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ *    Wittawat Yamwong <Wittawat.Yamwong@stud.uni-hannover.de>
+ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/common/hwlog.c,v 1.3 2001/08/18 02:51:03 dawes Exp $ */
+ 
+#include "hwlog.h"
+hwlog_t hwlog = { 0,0,0, "[???] "};
+
+
+/* Should be shared, but is this a good place for it?
+ */
+#include <sys/time.h>
+#include <stdarg.h>
+
+
+int usec( void ) 
+{
+   struct timeval tv;
+   struct timezone tz;
+   
+   gettimeofday( &tv, &tz );
+   
+   return (tv.tv_sec & 2047) * 1000000 + tv.tv_usec;
+}
+
+
+#ifdef HW_LOG_ENABLED
+int hwOpenLog(const char *filename, char *prefix)
+{
+  hwCloseLog();
+  hwSetLogLevel(0);
+  hwlog.prefix=prefix;
+  if (!filename)
+    return -1;
+  if ((hwlog.file = fopen(filename,"w")) == NULL)
+      return -1;
+  return 0;
+}
+
+void hwCloseLog()
+{
+  if (hwlog.file) {
+    fclose(hwlog.file);
+    hwlog.file = NULL;
+  }
+}
+
+int hwIsLogReady()
+{
+  return (hwlog.file != NULL);
+}
+
+void hwSetLogLevel(int level)
+{
+  hwlog.level = level;
+}
+
+int hwGetLogLevel()
+{
+  return hwlog.level;
+}
+
+void hwLog(int level, const char *format, ...)
+{
+  va_list ap;
+  va_start(ap,format);
+  hwLogv(level,format,ap);
+  va_end(ap);
+}
+
+void hwLogv(int l, const char *format, va_list ap)
+{
+  if (hwlog.file && (l <= hwlog.level)) {
+    vfprintf(hwlog.file,format,ap);
+    fflush(hwlog.file);
+  }
+}
+
+void hwMsg(int l, const char *format, ...)
+{
+  va_list ap;
+  va_start(ap, format);
+
+  if (l <= hwlog.level) {
+    if (hwIsLogReady()) {
+      int t = usec();
+
+      hwLog(l, "%6i:", t - hwlog.timeTemp);
+      hwlog.timeTemp = t;
+      hwLogv(l, format, ap);
+    } else {
+      fprintf(stderr, hwlog.prefix);
+      vfprintf(stderr, format, ap);
+    }
+  }
+
+  va_end(ap);
+}
+
+#else /* ifdef HW_LOG_ENABLED */
+
+int hwlogdummy()
+{
+  return 0;
+}
+
+#endif
+
+void hwError(const char *format, ...)
+{
+  va_list ap;
+  va_start(ap, format);
+
+  fprintf(stderr, hwlog.prefix);
+  vfprintf(stderr, format, ap);
+  hwLogv(0, format, ap);
+
+  va_end(ap);
+}
--- a/src/mesa/drivers/dri/common/hwlog.h
+++ b/src/mesa/drivers/dri/common/hwlog.h
@ -0,0 +1,101 @@
+/*
+ * GLX Hardware Device Driver common code
+ *
+ * Based on the original MGA G200 driver (c) 1999 Wittawat Yamwong
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ *    Wittawat Yamwong <Wittawat.Yamwong@stud.uni-hannover.de>
+ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/common/hwlog.h,v 1.5 2001/10/31 23:22:57 tsi Exp $ */
+ 
+/* Usage:
+ * - use mgaError for error messages. Always write to X error and log file.
+ * - use mgaMsg for debugging. Can be disabled by undefining MGA_LOG_ENABLED.
+ */
+ 
+#ifndef HWLOG_INC
+#define HWLOG_INC
+
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#define DBG_LEVEL_BASE          1
+#define DBG_LEVEL_VERBOSE       10
+#define DBG_LEVEL_ENTEREXIT     20
+
+typedef struct
+{
+  FILE *file;
+  int   level;
+  unsigned int timeTemp;
+  char *prefix;
+} hwlog_t;
+
+extern hwlog_t hwlog;
+
+
+#ifdef HW_LOG_ENABLED
+
+/* open and close log file. */
+int  hwOpenLog(const char *filename, char *prefix);
+void hwCloseLog(void);
+
+/* return 1 if log file is succesfully opened */
+int  hwIsLogReady(void);
+
+/* set current log level to 'level'. Messages with level less than or equal
+   the current log level will be written to the log file. */
+void hwSetLogLevel(int level);
+int  hwGetLogLevel(void);
+
+/* hwLog and hwLogv write a message to the log file.	*/
+/* do not call these directly, use hwMsg() instead	*/
+void hwLog(int level, const char *format, ...);
+void hwLogv(int level, const char *format, va_list ap);
+
+int usec( void );
+
+/* hwMsg writes a message to the log file or to the standard X error file. */
+void hwMsg(int level, const char *format, ...);
+
+
+#else
+
+
+static __inline__ int hwOpenLog(const char *f, char *prefix) { hwlog.prefix=prefix; return -1; }
+#define hwIsLogReady() (0)
+#define hwGetLogLevel() (-1)
+#define hwLogLevel(n) (0)
+#define hwLog()
+#define hwMsg()
+
+#define hwCloseLog()
+#define hwSetLogLevel(x)
+#define hwLogv(l,f,a)
+
+
+#endif
+
+void hwError(const char *format, ...);
+
+
+#endif
--- a/src/mesa/drivers/dri/common/mm.c
+++ b/src/mesa/drivers/dri/common/mm.c
@ -0,0 +1,197 @@
+/*
+ * GLX Hardware Device Driver common code
+ * Copyright (C) 1999 Wittawat Yamwong
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+/* $XFree86: xc/lib/GL/mesa/src/drv/common/mm.c,v 1.4 2002/10/30 12:51:27 alanh Exp $ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "mm.h"
+#include "hwlog.h"
+
+
+void mmDumpMemInfo( memHeap_t *heap )
+{
+   TMemBlock *p;
+
+   fprintf(stderr, "Memory heap %p:\n", heap);
+   if (heap == 0) {
+      fprintf(stderr, "  heap == 0\n");
+   } else {
+      p = (TMemBlock *)heap;
+      while (p) {
+	 fprintf(stderr, "  Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size,
+		 p->free ? '.':'U',
+		 p->reserved ? 'R':'.');
+	 p = p->next;
+      }
+   }
+   fprintf(stderr, "End of memory blocks\n");
+}
+
+memHeap_t *mmInit(int ofs,
+		  int size)
+{
+   PMemBlock blocks;
+  
+   if (size <= 0) {
+      return 0;
+   }
+   blocks = (TMemBlock *) calloc(1,sizeof(TMemBlock));
+   if (blocks) {
+      blocks->ofs = ofs;
+      blocks->size = size;
+      blocks->free = 1;
+      return (memHeap_t *)blocks;
+   } else
+      return 0;
+}
+
+
+static TMemBlock* SliceBlock(TMemBlock *p, 
+			     int startofs, int size, 
+			     int reserved, int alignment)
+{
+   TMemBlock *newblock;
+
+   /* break left */
+   if (startofs > p->ofs) {
+      newblock = (TMemBlock*) calloc(1,sizeof(TMemBlock));
+      if (!newblock)
+	 return NULL;
+      newblock->ofs = startofs;
+      newblock->size = p->size - (startofs - p->ofs);
+      newblock->free = 1;
+      newblock->next = p->next;
+      p->size -= newblock->size;
+      p->next = newblock;
+      p = newblock;
+   }
+
+   /* break right */
+   if (size < p->size) {
+      newblock = (TMemBlock*) calloc(1,sizeof(TMemBlock));
+      if (!newblock)
+	 return NULL;
+      newblock->ofs = startofs + size;
+      newblock->size = p->size - size;
+      newblock->free = 1;
+      newblock->next = p->next;
+      p->size = size;
+      p->next = newblock;
+   }
+
+   /* p = middle block */
+   p->align = alignment;
+   p->free = 0;
+   p->reserved = reserved;
+   return p;
+}
+
+PMemBlock mmAllocMem( memHeap_t *heap, int size, int align2, int startSearch)
+{
+   int mask,startofs,endofs;
+   TMemBlock *p;
+
+   if (!heap || align2 < 0 || size <= 0)
+      return NULL;
+   mask = (1 << align2)-1;
+   startofs = 0;
+   p = (TMemBlock *)heap;
+   while (p) {
+      if ((p)->free) {
+	 startofs = (p->ofs + mask) & ~mask;
+	 if ( startofs < startSearch ) {
+	    startofs = startSearch;
+	 }
+	 endofs = startofs+size;
+	 if (endofs <= (p->ofs+p->size))
+	    break;
+      }
+      p = p->next;
+   }
+   if (!p)
+      return NULL;
+   p = SliceBlock(p,startofs,size,0,mask+1);
+   p->heap = heap;
+   return p;
+}
+
+static __inline__ int Join2Blocks(TMemBlock *p)
+{
+   if (p->free && p->next && p->next->free) {
+      TMemBlock *q = p->next;
+      p->size += q->size;
+      p->next = q->next;
+      free(q);
+      return 1;
+   }
+   return 0;
+}
+
+int mmFreeMem(PMemBlock b)
+{
+   TMemBlock *p,*prev;
+
+   if (!b)
+      return 0;
+   if (!b->heap) {
+      fprintf(stderr, "no heap\n");
+      return -1;
+   }
+   p = b->heap;
+   prev = NULL;
+   while (p && p != b) {
+      prev = p;
+      p = p->next;
+   }
+   if (!p || p->free || p->reserved) {
+      if (!p)
+	 fprintf(stderr, "block not found in heap\n");
+      else if (p->free)
+	 fprintf(stderr, "block already free\n");
+      else
+	 fprintf(stderr, "block is reserved\n");
+      return -1;
+   }
+   p->free = 1;
+   Join2Blocks(p);
+   if (prev)
+      Join2Blocks(prev);
+   return 0;
+}
+
+
+void mmDestroy(memHeap_t *heap)
+{
+   TMemBlock *p,*q;
+
+   if (!heap)
+      return;
+   p = (TMemBlock *)heap;
+   while (p) {
+      q = p->next;
+      free(p);
+      p = q;
+   }
+}
--- a/src/mesa/drivers/dri/common/mm.h
+++ b/src/mesa/drivers/dri/common/mm.h
@ -0,0 +1,82 @@
+/*
+ * GLX Hardware Device Driver common code
+ * Copyright (C) 1999 Keith Whitwell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef MM_INC
+#define MM_INC
+
+struct mem_block_t {
+  struct mem_block_t *next;
+  struct mem_block_t *heap;
+  int ofs,size;
+  int align;
+  int free:1;
+  int reserved:1;
+};
+typedef struct mem_block_t TMemBlock;
+typedef struct mem_block_t *PMemBlock;
+
+/* a heap is just the first block in a chain */
+typedef struct mem_block_t memHeap_t;
+
+static __inline__ int mmBlockSize(PMemBlock b)
+{ return b->size; }
+
+static __inline__ int mmOffset(PMemBlock b)
+{ return b->ofs; }
+
+/* 
+ * input: total size in bytes
+ * return: a heap pointer if OK, NULL if error
+ */
+memHeap_t *mmInit( int ofs, int size );
+
+/*
+ * Allocate 'size' bytes with 2^align2 bytes alignment,
+ * restrict the search to free memory after 'startSearch'
+ * depth and back buffers should be in different 4mb banks
+ * to get better page hits if possible
+ * input:	size = size of block
+ *       	align2 = 2^align2 bytes alignment
+ *		startSearch = linear offset from start of heap to begin search
+ * return: pointer to the allocated block, 0 if error
+ */
+PMemBlock  mmAllocMem( memHeap_t *heap, int size, int align2, 
+		       int startSearch );
+
+/*
+ * Free block starts at offset
+ * input: pointer to a block
+ * return: 0 if OK, -1 if error
+ */
+int  mmFreeMem( PMemBlock b );
+
+/*
+ * destroy MM
+ */
+void mmDestroy( memHeap_t *mmInit );
+
+/* For debuging purpose. */
+void mmDumpMemInfo( memHeap_t *mmInit );
+
+#endif
--- a/src/mesa/drivers/dri/common/mmx.h
+++ b/src/mesa/drivers/dri/common/mmx.h
@ -0,0 +1,560 @@
+/*	mmx.h
+
+	MultiMedia eXtensions GCC interface library for IA32.
+
+	To use this library, simply include this header file
+	and compile with GCC.  You MUST have inlining enabled
+	in order for mmx_ok() to work; this can be done by
+	simply using -O on the GCC command line.
+
+	Compiling with -DMMX_TRACE will cause detailed trace
+	output to be sent to stderr for each mmx operation.
+	This adds lots of code, and obviously slows execution to
+	a crawl, but can be very useful for debugging.
+
+	THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
+	EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
+	LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+	AND FITNESS FOR ANY PARTICULAR PURPOSE.
+
+	1997-98 by H. Dietz and R. Fisher
+
+ History:
+	97-98*	R.Fisher	Early versions
+	980501	R.Fisher	Original Release
+	980611*	H.Dietz		Rewrite, correctly implementing inlines, and
+		R.Fisher	 including direct register accesses.
+	980616	R.Fisher	Release of 980611 as 980616.
+	980714	R.Fisher	Minor corrections to Makefile, etc.
+	980715	R.Fisher	mmx_ok() now prevents optimizer from using
+				 clobbered values.
+				mmx_ok() now checks if cpuid instruction is
+				 available before trying to use it.
+	980726*	R.Fisher	mm_support() searches for AMD 3DNow, Cyrix
+				 Extended MMX, and standard MMX.  It returns a
+				 value which is positive if any of these are
+				 supported, and can be masked with constants to
+				 see which.  mmx_ok() is now a call to this
+	980726*	R.Fisher	Added i2r support for shift functions
+	980919	R.Fisher	Fixed AMD extended feature recognition bug.
+	980921	R.Fisher	Added definition/check for _MMX_H.
+				Added "float s[2]" to mmx_t for use with
+				  3DNow and EMMX.  So same mmx_t can be used.
+	981013	R.Fisher	Fixed cpuid function 1 bug (looked at wrong reg)
+				Fixed psllq_i2r error in mmxtest.c
+
+	* Unreleased (internal or interim) versions
+
+ Notes:
+	It appears that the latest gas has the pand problem fixed, therefore
+	  I'll undefine BROKEN_PAND by default.
+	String compares may be quicker than the multiple test/jumps in vendor
+	  test sequence in mmx_ok(), but I'm not concerned with that right now.
+
+ Acknowledgments:
+	Jussi Laako for pointing out the errors ultimately found to be
+	  connected to the failure to notify the optimizer of clobbered values.
+	Roger Hardiman for reminding us that CPUID isn't everywhere, and that
+	  someone may actually try to use this on a machine without CPUID.
+	  Also for suggesting code for checking this.
+	Robert Dale for pointing out the AMD recognition bug.
+	Jimmy Mayfield and Carl Witty for pointing out the Intel recognition
+	  bug.
+	Carl Witty for pointing out the psllq_i2r test bug.
+*/
+
+#ifndef _MMX_H
+#define _MMX_H
+
+//#define MMX_TRACE
+
+/*	Warning:  at this writing, the version of GAS packaged
+	with most Linux distributions does not handle the
+	parallel AND operation mnemonic correctly.  If the
+	symbol BROKEN_PAND is defined, a slower alternative
+	coding will be used.  If execution of mmxtest results
+	in an illegal instruction fault, define this symbol.
+*/
+#undef	BROKEN_PAND
+
+
+/*	The type of an value that fits in an MMX register
+	(note that long long constant values MUST be suffixed
+	 by LL and unsigned long long values by ULL, lest
+	 they be truncated by the compiler)
+*/
+typedef	union {
+	long long		q;	/* Quadword (64-bit) value */
+	unsigned long long	uq;	/* Unsigned Quadword */
+	int			d[2];	/* 2 Doubleword (32-bit) values */
+	unsigned int		ud[2];	/* 2 Unsigned Doubleword */
+	short			w[4];	/* 4 Word (16-bit) values */
+	unsigned short		uw[4];	/* 4 Unsigned Word */
+	char			b[8];	/* 8 Byte (8-bit) values */
+	unsigned char		ub[8];	/* 8 Unsigned Byte */
+	float			s[2];	/* Single-precision (32-bit) value */
+} mmx_t;
+
+/*	Helper functions for the instruction macros that follow...
+	(note that memory-to-register, m2r, instructions are nearly
+	 as efficient as register-to-register, r2r, instructions;
+	 however, memory-to-memory instructions are really simulated
+	 as a convenience, and are only 1/3 as efficient)
+*/
+#ifdef	MMX_TRACE
+
+/*	Include the stuff for printing a trace to stderr...
+*/
+
+#include <stdio.h>
+
+#define	mmx_i2r(op, imm, reg) \
+	{ \
+		mmx_t mmx_trace; \
+		mmx_trace = (imm); \
+		fprintf(stderr, #op "_i2r(" #imm "=0x%016llx, ", mmx_trace.q); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ (#op " %0, %%" #reg \
+				      : /* nothing */ \
+				      : "X" (imm)); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#define	mmx_m2r(op, mem, reg) \
+	{ \
+		mmx_t mmx_trace; \
+		mmx_trace = (mem); \
+		fprintf(stderr, #op "_m2r(" #mem "=0x%016llx, ", mmx_trace.q); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ (#op " %0, %%" #reg \
+				      : /* nothing */ \
+				      : "X" (mem)); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#define	mmx_r2m(op, reg, mem) \
+	{ \
+		mmx_t mmx_trace; \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #op "_r2m(" #reg "=0x%016llx, ", mmx_trace.q); \
+		mmx_trace = (mem); \
+		fprintf(stderr, #mem "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ (#op " %%" #reg ", %0" \
+				      : "=X" (mem) \
+				      : /* nothing */ ); \
+		mmx_trace = (mem); \
+		fprintf(stderr, #mem "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#define	mmx_r2r(op, regs, regd) \
+	{ \
+		mmx_t mmx_trace; \
+		__asm__ __volatile__ ("movq %%" #regs ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #op "_r2r(" #regs "=0x%016llx, ", mmx_trace.q); \
+		__asm__ __volatile__ ("movq %%" #regd ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #regd "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ (#op " %" #regs ", %" #regd); \
+		__asm__ __volatile__ ("movq %%" #regd ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #regd "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#define	mmx_m2m(op, mems, memd) \
+	{ \
+		mmx_t mmx_trace; \
+		mmx_trace = (mems); \
+		fprintf(stderr, #op "_m2m(" #mems "=0x%016llx, ", mmx_trace.q); \
+		mmx_trace = (memd); \
+		fprintf(stderr, #memd "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
+				      #op " %1, %%mm0\n\t" \
+				      "movq %%mm0, %0" \
+				      : "=X" (memd) \
+				      : "X" (mems)); \
+		mmx_trace = (memd); \
+		fprintf(stderr, #memd "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#else
+
+/*	These macros are a lot simpler without the tracing...
+*/
+
+#define	mmx_i2r(op, imm, reg) \
+	__asm__ __volatile__ (#op " $" #imm ", %%" #reg \
+			      : /* nothing */ \
+			      : /* nothing */);
+
+#define	mmx_m2r(op, mem, reg) \
+	__asm__ __volatile__ (#op " %0, %%" #reg \
+			      : /* nothing */ \
+			      : "X" (mem))
+
+#define	mmx_r2m(op, reg, mem) \
+	__asm__ __volatile__ (#op " %%" #reg ", %0" \
+			      : "=X" (mem) \
+			      : /* nothing */ )
+
+#define	mmx_r2r(op, regs, regd) \
+	__asm__ __volatile__ (#op " %" #regs ", %" #regd)
+
+#define	mmx_m2m(op, mems, memd) \
+	__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
+			      #op " %1, %%mm0\n\t" \
+			      "movq %%mm0, %0" \
+			      : "=X" (memd) \
+			      : "X" (mems))
+
+#endif
+
+
+/*	1x64 MOVe Quadword
+	(this is both a load and a store...
+	 in fact, it is the only way to store)
+*/
+#define	movq_m2r(var, reg)	mmx_m2r(movq, var, reg)
+#define	movq_r2m(reg, var)	mmx_r2m(movq, reg, var)
+#define	movq_r2r(regs, regd)	mmx_r2r(movq, regs, regd)
+#define	movq(vars, vard) \
+	__asm__ __volatile__ ("movq %1, %%mm0\n\t" \
+			      "movq %%mm0, %0" \
+			      : "=X" (vard) \
+			      : "X" (vars))
+
+
+/*	1x32 MOVe Doubleword
+	(like movq, this is both load and store...
+	 but is most useful for moving things between
+	 mmx registers and ordinary registers)
+*/
+#define	movd_m2r(var, reg)	mmx_m2r(movd, var, reg)
+#define	movd_r2m(reg, var)	mmx_r2m(movd, reg, var)
+#define	movd_r2r(regs, regd)	mmx_r2r(movd, regs, regd)
+#define	movd(vars, vard) \
+	__asm__ __volatile__ ("movd %1, %%mm0\n\t" \
+			      "movd %%mm0, %0" \
+			      : "=X" (vard) \
+			      : "X" (vars))
+
+
+/*	2x32, 4x16, and 8x8 Parallel ADDs
+*/
+#define	paddd_m2r(var, reg)	mmx_m2r(paddd, var, reg)
+#define	paddd_r2r(regs, regd)	mmx_r2r(paddd, regs, regd)
+#define	paddd(vars, vard)	mmx_m2m(paddd, vars, vard)
+
+#define	paddw_m2r(var, reg)	mmx_m2r(paddw, var, reg)
+#define	paddw_r2r(regs, regd)	mmx_r2r(paddw, regs, regd)
+#define	paddw(vars, vard)	mmx_m2m(paddw, vars, vard)
+
+#define	paddb_m2r(var, reg)	mmx_m2r(paddb, var, reg)
+#define	paddb_r2r(regs, regd)	mmx_r2r(paddb, regs, regd)
+#define	paddb(vars, vard)	mmx_m2m(paddb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel ADDs using Saturation arithmetic
+*/
+#define	paddsw_m2r(var, reg)	mmx_m2r(paddsw, var, reg)
+#define	paddsw_r2r(regs, regd)	mmx_r2r(paddsw, regs, regd)
+#define	paddsw(vars, vard)	mmx_m2m(paddsw, vars, vard)
+
+#define	paddsb_m2r(var, reg)	mmx_m2r(paddsb, var, reg)
+#define	paddsb_r2r(regs, regd)	mmx_r2r(paddsb, regs, regd)
+#define	paddsb(vars, vard)	mmx_m2m(paddsb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic
+*/
+#define	paddusw_m2r(var, reg)	mmx_m2r(paddusw, var, reg)
+#define	paddusw_r2r(regs, regd)	mmx_r2r(paddusw, regs, regd)
+#define	paddusw(vars, vard)	mmx_m2m(paddusw, vars, vard)
+
+#define	paddusb_m2r(var, reg)	mmx_m2r(paddusb, var, reg)
+#define	paddusb_r2r(regs, regd)	mmx_r2r(paddusb, regs, regd)
+#define	paddusb(vars, vard)	mmx_m2m(paddusb, vars, vard)
+
+
+/*	2x32, 4x16, and 8x8 Parallel SUBs
+*/
+#define	psubd_m2r(var, reg)	mmx_m2r(psubd, var, reg)
+#define	psubd_r2r(regs, regd)	mmx_r2r(psubd, regs, regd)
+#define	psubd(vars, vard)	mmx_m2m(psubd, vars, vard)
+
+#define	psubw_m2r(var, reg)	mmx_m2r(psubw, var, reg)
+#define	psubw_r2r(regs, regd)	mmx_r2r(psubw, regs, regd)
+#define	psubw(vars, vard)	mmx_m2m(psubw, vars, vard)
+
+#define	psubb_m2r(var, reg)	mmx_m2r(psubb, var, reg)
+#define	psubb_r2r(regs, regd)	mmx_r2r(psubb, regs, regd)
+#define	psubb(vars, vard)	mmx_m2m(psubb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel SUBs using Saturation arithmetic
+*/
+#define	psubsw_m2r(var, reg)	mmx_m2r(psubsw, var, reg)
+#define	psubsw_r2r(regs, regd)	mmx_r2r(psubsw, regs, regd)
+#define	psubsw(vars, vard)	mmx_m2m(psubsw, vars, vard)
+
+#define	psubsb_m2r(var, reg)	mmx_m2r(psubsb, var, reg)
+#define	psubsb_r2r(regs, regd)	mmx_r2r(psubsb, regs, regd)
+#define	psubsb(vars, vard)	mmx_m2m(psubsb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic
+*/
+#define	psubusw_m2r(var, reg)	mmx_m2r(psubusw, var, reg)
+#define	psubusw_r2r(regs, regd)	mmx_r2r(psubusw, regs, regd)
+#define	psubusw(vars, vard)	mmx_m2m(psubusw, vars, vard)
+
+#define	psubusb_m2r(var, reg)	mmx_m2r(psubusb, var, reg)
+#define	psubusb_r2r(regs, regd)	mmx_r2r(psubusb, regs, regd)
+#define	psubusb(vars, vard)	mmx_m2m(psubusb, vars, vard)
+
+
+/*	4x16 Parallel MULs giving Low 4x16 portions of results
+*/
+#define	pmullw_m2r(var, reg)	mmx_m2r(pmullw, var, reg)
+#define	pmullw_r2r(regs, regd)	mmx_r2r(pmullw, regs, regd)
+#define	pmullw(vars, vard)	mmx_m2m(pmullw, vars, vard)
+
+
+/*	4x16 Parallel MULs giving High 4x16 portions of results
+*/
+#define	pmulhw_m2r(var, reg)	mmx_m2r(pmulhw, var, reg)
+#define	pmulhw_r2r(regs, regd)	mmx_r2r(pmulhw, regs, regd)
+#define	pmulhw(vars, vard)	mmx_m2m(pmulhw, vars, vard)
+
+
+/*	4x16->2x32 Parallel Mul-ADD
+	(muls like pmullw, then adds adjacent 16-bit fields
+	 in the multiply result to make the final 2x32 result)
+*/
+#define	pmaddwd_m2r(var, reg)	mmx_m2r(pmaddwd, var, reg)
+#define	pmaddwd_r2r(regs, regd)	mmx_r2r(pmaddwd, regs, regd)
+#define	pmaddwd(vars, vard)	mmx_m2m(pmaddwd, vars, vard)
+
+
+/*	1x64 bitwise AND
+*/
+#ifdef	BROKEN_PAND
+#define	pand_m2r(var, reg) \
+	{ \
+		mmx_m2r(pandn, (mmx_t) -1LL, reg); \
+		mmx_m2r(pandn, var, reg); \
+	}
+#define	pand_r2r(regs, regd) \
+	{ \
+		mmx_m2r(pandn, (mmx_t) -1LL, regd); \
+		mmx_r2r(pandn, regs, regd) \
+	}
+#define	pand(vars, vard) \
+	{ \
+		movq_m2r(vard, mm0); \
+		mmx_m2r(pandn, (mmx_t) -1LL, mm0); \
+		mmx_m2r(pandn, vars, mm0); \
+		movq_r2m(mm0, vard); \
+	}
+#else
+#define	pand_m2r(var, reg)	mmx_m2r(pand, var, reg)
+#define	pand_r2r(regs, regd)	mmx_r2r(pand, regs, regd)
+#define	pand(vars, vard)	mmx_m2m(pand, vars, vard)
+#endif
+
+
+/*	1x64 bitwise AND with Not the destination
+*/
+#define	pandn_m2r(var, reg)	mmx_m2r(pandn, var, reg)
+#define	pandn_r2r(regs, regd)	mmx_r2r(pandn, regs, regd)
+#define	pandn(vars, vard)	mmx_m2m(pandn, vars, vard)
+
+
+/*	1x64 bitwise OR
+*/
+#define	por_m2r(var, reg)	mmx_m2r(por, var, reg)
+#define	por_r2r(regs, regd)	mmx_r2r(por, regs, regd)
+#define	por(vars, vard)	mmx_m2m(por, vars, vard)
+
+
+/*	1x64 bitwise eXclusive OR
+*/
+#define	pxor_m2r(var, reg)	mmx_m2r(pxor, var, reg)
+#define	pxor_r2r(regs, regd)	mmx_r2r(pxor, regs, regd)
+#define	pxor(vars, vard)	mmx_m2m(pxor, vars, vard)
+
+
+/*	2x32, 4x16, and 8x8 Parallel CoMPare for EQuality
+	(resulting fields are either 0 or -1)
+*/
+#define	pcmpeqd_m2r(var, reg)	mmx_m2r(pcmpeqd, var, reg)
+#define	pcmpeqd_r2r(regs, regd)	mmx_r2r(pcmpeqd, regs, regd)
+#define	pcmpeqd(vars, vard)	mmx_m2m(pcmpeqd, vars, vard)
+
+#define	pcmpeqw_m2r(var, reg)	mmx_m2r(pcmpeqw, var, reg)
+#define	pcmpeqw_r2r(regs, regd)	mmx_r2r(pcmpeqw, regs, regd)
+#define	pcmpeqw(vars, vard)	mmx_m2m(pcmpeqw, vars, vard)
+
+#define	pcmpeqb_m2r(var, reg)	mmx_m2r(pcmpeqb, var, reg)
+#define	pcmpeqb_r2r(regs, regd)	mmx_r2r(pcmpeqb, regs, regd)
+#define	pcmpeqb(vars, vard)	mmx_m2m(pcmpeqb, vars, vard)
+
+
+/*	2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than
+	(resulting fields are either 0 or -1)
+*/
+#define	pcmpgtd_m2r(var, reg)	mmx_m2r(pcmpgtd, var, reg)
+#define	pcmpgtd_r2r(regs, regd)	mmx_r2r(pcmpgtd, regs, regd)
+#define	pcmpgtd(vars, vard)	mmx_m2m(pcmpgtd, vars, vard)
+
+#define	pcmpgtw_m2r(var, reg)	mmx_m2r(pcmpgtw, var, reg)
+#define	pcmpgtw_r2r(regs, regd)	mmx_r2r(pcmpgtw, regs, regd)
+#define	pcmpgtw(vars, vard)	mmx_m2m(pcmpgtw, vars, vard)
+
+#define	pcmpgtb_m2r(var, reg)	mmx_m2r(pcmpgtb, var, reg)
+#define	pcmpgtb_r2r(regs, regd)	mmx_r2r(pcmpgtb, regs, regd)
+#define	pcmpgtb(vars, vard)	mmx_m2m(pcmpgtb, vars, vard)
+
+
+/*	1x64, 2x32, and 4x16 Parallel Shift Left Logical
+*/
+#define	psllq_i2r(imm, reg)	mmx_i2r(psllq, imm, reg)
+#define	psllq_m2r(var, reg)	mmx_m2r(psllq, var, reg)
+#define	psllq_r2r(regs, regd)	mmx_r2r(psllq, regs, regd)
+#define	psllq(vars, vard)	mmx_m2m(psllq, vars, vard)
+
+#define	pslld_i2r(imm, reg)	mmx_i2r(pslld, imm, reg)
+#define	pslld_m2r(var, reg)	mmx_m2r(pslld, var, reg)
+#define	pslld_r2r(regs, regd)	mmx_r2r(pslld, regs, regd)
+#define	pslld(vars, vard)	mmx_m2m(pslld, vars, vard)
+
+#define	psllw_i2r(imm, reg)	mmx_i2r(psllw, imm, reg)
+#define	psllw_m2r(var, reg)	mmx_m2r(psllw, var, reg)
+#define	psllw_r2r(regs, regd)	mmx_r2r(psllw, regs, regd)
+#define	psllw(vars, vard)	mmx_m2m(psllw, vars, vard)
+
+
+/*	1x64, 2x32, and 4x16 Parallel Shift Right Logical
+*/
+#define	psrlq_i2r(imm, reg)	mmx_i2r(psrlq, imm, reg)
+#define	psrlq_m2r(var, reg)	mmx_m2r(psrlq, var, reg)
+#define	psrlq_r2r(regs, regd)	mmx_r2r(psrlq, regs, regd)
+#define	psrlq(vars, vard)	mmx_m2m(psrlq, vars, vard)
+
+#define	psrld_i2r(imm, reg)	mmx_i2r(psrld, imm, reg)
+#define	psrld_m2r(var, reg)	mmx_m2r(psrld, var, reg)
+#define	psrld_r2r(regs, regd)	mmx_r2r(psrld, regs, regd)
+#define	psrld(vars, vard)	mmx_m2m(psrld, vars, vard)
+
+#define	psrlw_i2r(imm, reg)	mmx_i2r(psrlw, imm, reg)
+#define	psrlw_m2r(var, reg)	mmx_m2r(psrlw, var, reg)
+#define	psrlw_r2r(regs, regd)	mmx_r2r(psrlw, regs, regd)
+#define	psrlw(vars, vard)	mmx_m2m(psrlw, vars, vard)
+
+
+/*	2x32 and 4x16 Parallel Shift Right Arithmetic
+*/
+#define	psrad_i2r(imm, reg)	mmx_i2r(psrad, imm, reg)
+#define	psrad_m2r(var, reg)	mmx_m2r(psrad, var, reg)
+#define	psrad_r2r(regs, regd)	mmx_r2r(psrad, regs, regd)
+#define	psrad(vars, vard)	mmx_m2m(psrad, vars, vard)
+
+#define	psraw_i2r(imm, reg)	mmx_i2r(psraw, imm, reg)
+#define	psraw_m2r(var, reg)	mmx_m2r(psraw, var, reg)
+#define	psraw_r2r(regs, regd)	mmx_r2r(psraw, regs, regd)
+#define	psraw(vars, vard)	mmx_m2m(psraw, vars, vard)
+
+
+/*	2x32->4x16 and 4x16->8x8 PACK and Signed Saturate
+	(packs source and dest fields into dest in that order)
+*/
+#define	packssdw_m2r(var, reg)	mmx_m2r(packssdw, var, reg)
+#define	packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd)
+#define	packssdw(vars, vard)	mmx_m2m(packssdw, vars, vard)
+
+#define	packsswb_m2r(var, reg)	mmx_m2r(packsswb, var, reg)
+#define	packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd)
+#define	packsswb(vars, vard)	mmx_m2m(packsswb, vars, vard)
+
+
+/*	4x16->8x8 PACK and Unsigned Saturate
+	(packs source and dest fields into dest in that order)
+*/
+#define	packuswb_m2r(var, reg)	mmx_m2r(packuswb, var, reg)
+#define	packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd)
+#define	packuswb(vars, vard)	mmx_m2m(packuswb, vars, vard)
+
+
+/*	2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low
+	(interleaves low half of dest with low half of source
+	 as padding in each result field)
+*/
+#define	punpckldq_m2r(var, reg)	mmx_m2r(punpckldq, var, reg)
+#define	punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd)
+#define	punpckldq(vars, vard)	mmx_m2m(punpckldq, vars, vard)
+
+#define	punpcklwd_m2r(var, reg)	mmx_m2r(punpcklwd, var, reg)
+#define	punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd)
+#define	punpcklwd(vars, vard)	mmx_m2m(punpcklwd, vars, vard)
+
+#define	punpcklbw_m2r(var, reg)	mmx_m2r(punpcklbw, var, reg)
+#define	punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd)
+#define	punpcklbw(vars, vard)	mmx_m2m(punpcklbw, vars, vard)
+
+
+/*	2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High
+	(interleaves high half of dest with high half of source
+	 as padding in each result field)
+*/
+#define	punpckhdq_m2r(var, reg)	mmx_m2r(punpckhdq, var, reg)
+#define	punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd)
+#define	punpckhdq(vars, vard)	mmx_m2m(punpckhdq, vars, vard)
+
+#define	punpckhwd_m2r(var, reg)	mmx_m2r(punpckhwd, var, reg)
+#define	punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd)
+#define	punpckhwd(vars, vard)	mmx_m2m(punpckhwd, vars, vard)
+
+#define	punpckhbw_m2r(var, reg)	mmx_m2r(punpckhbw, var, reg)
+#define	punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd)
+#define	punpckhbw(vars, vard)	mmx_m2m(punpckhbw, vars, vard)
+
+
+/*	Empty MMx State
+	(used to clean-up when going from mmx to float use
+	 of the registers that are shared by both; note that
+	 there is no float-to-mmx operation needed, because
+	 only the float tag word info is corruptible)
+*/
+#ifdef	MMX_TRACE
+
+#define	emms() \
+	{ \
+		fprintf(stderr, "emms()\n"); \
+		__asm__ __volatile__ ("emms"); \
+	}
+
+#else
+
+#define	emms()			__asm__ __volatile__ ("emms")
+
+#endif
+
+#endif
+
--- a/src/mesa/drivers/dri/common/spantmp.h
+++ b/src/mesa/drivers/dri/common/spantmp.h
@ -0,0 +1,259 @@
+#ifndef DBG
+#define DBG 0
+#endif
+
+#ifndef HW_WRITE_LOCK
+#define HW_WRITE_LOCK()		HW_LOCK()
+#endif
+
+#ifndef HW_WRITE_UNLOCK
+#define HW_WRITE_UNLOCK()	HW_UNLOCK()
+#endif
+
+#ifndef HW_READ_LOCK
+#define HW_READ_LOCK()		HW_LOCK()
+#endif
+
+#ifndef HW_READ_UNLOCK
+#define HW_READ_UNLOCK()	HW_UNLOCK()
+#endif
+
+#ifndef HW_READ_CLIPLOOP
+#define HW_READ_CLIPLOOP()	HW_CLIPLOOP()
+#endif
+
+#ifndef HW_WRITE_CLIPLOOP
+#define HW_WRITE_CLIPLOOP()	HW_CLIPLOOP()
+#endif
+
+
+static void TAG(WriteRGBASpan)( const GLcontext *ctx,
+				GLuint n, GLint x, GLint y,
+				const GLubyte rgba[][4],
+				const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_VARS;
+
+	 y = Y_FLIP(y);
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+
+	       if (DBG) fprintf(stderr, "WriteRGBASpan %d..%d (x1 %d)\n",
+				(int)i, (int)n1, (int)x1);
+
+	       if (mask)
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     if (mask[i])
+			WRITE_RGBA( x1, y,
+				    rgba[i][0], rgba[i][1],
+				    rgba[i][2], rgba[i][3] );
+	       }
+	       else
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     WRITE_RGBA( x1, y,
+				 rgba[i][0], rgba[i][1],
+				 rgba[i][2], rgba[i][3] );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+static void TAG(WriteRGBSpan)( const GLcontext *ctx,
+			       GLuint n, GLint x, GLint y,
+			       const GLubyte rgb[][3],
+			       const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_VARS;
+
+	 y = Y_FLIP(y);
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+
+	       if (DBG) fprintf(stderr, "WriteRGBSpan %d..%d (x1 %d)\n",
+				(int)i, (int)n1, (int)x1);
+
+	       if (mask)
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     if (mask[i])
+			WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
+	       }
+	       else
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+static void TAG(WriteRGBAPixels)( const GLcontext *ctx,
+			       GLuint n,
+			       const GLint x[],
+			       const GLint y[],
+			       const GLubyte rgba[][4],
+			       const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+	 GLint i;
+	 LOCAL_VARS;
+
+	 if (DBG) fprintf(stderr, "WriteRGBAPixels\n");
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       for (i=0;i<n;i++)
+	       {
+		  if (mask[i]) {
+		     const int fy = Y_FLIP(y[i]);
+		     if (CLIPPIXEL(x[i],fy))
+			WRITE_RGBA( x[i], fy,
+				    rgba[i][0], rgba[i][1],
+				    rgba[i][2], rgba[i][3] );
+		  }
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+static void TAG(WriteMonoRGBASpan)( const GLcontext *ctx,	
+				    GLuint n, GLint x, GLint y, 
+				    const GLchan color[4],
+				    const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_VARS;
+	 INIT_MONO_PIXEL(p, color);
+
+	 y = Y_FLIP( y );
+
+	 if (DBG) fprintf(stderr, "WriteMonoRGBASpan\n");
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+	       for (;n1>0;i++,x1++,n1--)
+		  if (mask[i])
+		     WRITE_PIXEL( x1, y, p );
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+static void TAG(WriteMonoRGBAPixels)( const GLcontext *ctx,
+				      GLuint n,
+				      const GLint x[], const GLint y[],
+				      const GLchan color[],
+				      const GLubyte mask[] ) 
+{
+   HW_WRITE_LOCK()
+      {
+	 GLint i;
+	 LOCAL_VARS;
+	 INIT_MONO_PIXEL(p, color);
+
+	 if (DBG) fprintf(stderr, "WriteMonoRGBAPixels\n");
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       for (i=0;i<n;i++)
+		  if (mask[i]) {
+		     int fy = Y_FLIP(y[i]);
+		     if (CLIPPIXEL( x[i], fy ))
+			WRITE_PIXEL( x[i], fy, p );
+		  }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+static void TAG(ReadRGBASpan)( const GLcontext *ctx,
+			       GLuint n, GLint x, GLint y,
+			       GLubyte rgba[][4])
+{
+   HW_READ_LOCK()
+      {
+	 GLint x1,n1;
+	 LOCAL_VARS;
+
+	 y = Y_FLIP(y);
+
+	 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
+
+	 HW_READ_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+	       for (;n1>0;i++,x1++,n1--)
+		  READ_RGBA( rgba[i], x1, y );
+	    }
+         HW_ENDCLIPLOOP();
+      }
+   HW_READ_UNLOCK();
+}
+
+
+static void TAG(ReadRGBAPixels)( const GLcontext *ctx,
+				 GLuint n, const GLint x[], const GLint y[],
+				 GLubyte rgba[][4], const GLubyte mask[] )
+{
+   HW_READ_LOCK()
+      {
+	 GLint i;
+	 LOCAL_VARS;
+
+	 if (DBG) fprintf(stderr, "ReadRGBAPixels\n");
+
+	 HW_READ_CLIPLOOP()
+	    {
+	       for (i=0;i<n;i++)
+		  if (mask[i]) {
+		     int fy = Y_FLIP( y[i] );
+		     if (CLIPPIXEL( x[i], fy ))
+			READ_RGBA( rgba[i], x[i], fy );
+		  }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_READ_UNLOCK();
+}
+
+
+
+
+#undef WRITE_PIXEL
+#undef WRITE_RGBA
+#undef READ_RGBA
+#undef TAG
--- a/src/mesa/drivers/dri/common/stenciltmp.h
+++ b/src/mesa/drivers/dri/common/stenciltmp.h
@ -0,0 +1,147 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/common/stenciltmp.h,v 1.3 2001/03/21 16:14:20 dawes Exp $ */
+
+#ifndef DBG
+#define DBG 0
+#endif
+
+#ifndef HW_WRITE_LOCK
+#define HW_WRITE_LOCK()		HW_LOCK()
+#endif
+#ifndef HW_WRITE_UNLOCK
+#define HW_WRITE_UNLOCK()	HW_UNLOCK()
+#endif
+
+#ifndef HW_READ_LOCK
+#define HW_READ_LOCK()		HW_LOCK()
+#endif
+#ifndef HW_READ_UNLOCK
+#define HW_READ_UNLOCK()	HW_UNLOCK()
+#endif
+
+static void TAG(WriteStencilSpan)( GLcontext *ctx,
+				   GLuint n, GLint x, GLint y,
+				   const GLstencil *stencil, 
+				   const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_STENCIL_VARS;
+
+	 y = Y_FLIP(y);
+
+	 HW_CLIPLOOP() 
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+
+	       if (DBG) fprintf(stderr, "WriteStencilSpan %d..%d (x1 %d)\n",
+				(int)i, (int)n1, (int)x1);
+
+	       if (mask)
+	       {
+		  for (;i<n1;i++,x1++)
+		     if (mask[i])
+			WRITE_STENCIL( x1, y, stencil[i] );
+	       }
+	       else
+	       {
+		  for (;i<n1;i++,x1++)
+		     WRITE_STENCIL( x1, y, stencil[i] );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+static void TAG(WriteStencilPixels)( GLcontext *ctx,
+				     GLuint n, 
+				     const GLint x[], 
+				     const GLint y[],
+				     const GLstencil stencil[], 
+				     const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+	 GLint i;
+	 LOCAL_STENCIL_VARS;
+
+	 if (DBG) fprintf(stderr, "WriteStencilPixels\n");
+
+	 HW_CLIPLOOP()
+	    {
+	       for (i=0;i<n;i++)
+	       {
+		  if (mask[i]) {
+		     const int fy = Y_FLIP(y[i]);
+		     if (CLIPPIXEL(x[i],fy))
+			WRITE_STENCIL( x[i], fy, stencil[i] );
+		  }
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+/* Read stencil spans and pixels
+ */
+static void TAG(ReadStencilSpan)( GLcontext *ctx,
+				  GLuint n, GLint x, GLint y,
+				  GLstencil stencil[])
+{
+   HW_READ_LOCK()
+      {
+	 GLint x1,n1;
+	 LOCAL_STENCIL_VARS;
+
+	 y = Y_FLIP(y);
+
+	 if (DBG) fprintf(stderr, "ReadStencilSpan\n");
+
+	 HW_CLIPLOOP() 
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+	       for (;i<n1;i++)
+		  READ_STENCIL( stencil[i], (x1+i), y );
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_READ_UNLOCK();
+}
+
+static void TAG(ReadStencilPixels)( GLcontext *ctx, GLuint n, 
+				    const GLint x[], const GLint y[],
+				    GLstencil stencil[] )
+{
+   HW_READ_LOCK()
+      {
+	 GLint i;
+	 LOCAL_STENCIL_VARS;
+
+	 if (DBG) fprintf(stderr, "ReadStencilPixels\n");
+ 
+	 HW_CLIPLOOP()
+	    {
+	       for (i=0;i<n;i++) {
+		  int fy = Y_FLIP( y[i] );
+		  if (CLIPPIXEL( x[i], fy ))
+		     READ_STENCIL( stencil[i], x[i], fy );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_READ_UNLOCK();
+}
+
+
+
+
+#undef WRITE_STENCIL
+#undef READ_STENCIL
+#undef TAG
--- a/src/mesa/drivers/dri/common/texmem.c
+++ b/src/mesa/drivers/dri/common/texmem.c
--- a/src/mesa/drivers/dri/common/texmem.h
+++ b/src/mesa/drivers/dri/common/texmem.h
@ -0,0 +1,293 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * (c) Copyright IBM Corporation 2002
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ian Romanick <idr@us.ibm.com>
+ *    Keith Whitwell <keithw@tungstengraphics.com>
+ *    Kevin E. Martin <kem@users.sourceforge.net>
+ *    Gareth Hughes <gareth@nvidia.com>
+ */
+/* $XFree86:$ */
+
+/** \file texmem.h
+ * Public interface to the DRI texture memory management routines.
+ * 
+ * \sa texmem.c
+ */
+
+#ifndef DRI_TEXMEM_H
+#define DRI_TEXMEM_H
+
+#include "mtypes.h"
+#include "mm.h"
+#include "xf86drm.h"
+
+struct dri_tex_heap;
+typedef struct dri_tex_heap driTexHeap;
+
+struct dri_texture_object;
+typedef struct dri_texture_object driTextureObject;
+
+
+/**
+ * Base texture object type.  Each driver will extend this type with its own
+ * private data members.
+ */
+
+struct dri_texture_object {
+	struct dri_texture_object * next;
+	struct dri_texture_object * prev;
+
+	driTexHeap * heap;		/**< Texture heap currently stored in */
+	struct gl_texture_object * tObj;/**< Pointer to Mesa texture object
+					 * If NULL, this texture object is a
+					 * "placeholder" object representing
+					 * texture memory in use by another context.
+					 * A placeholder should have a heap and a memBlock.
+					 */
+	PMemBlock   memBlock;		/**< Memory block containing texture */
+	unsigned    bound;		/**< Texture unit currently bound to */
+
+	unsigned    totalSize;		/**< Total size of the texture,
+					 * including all mipmap levels 
+					 */
+
+	unsigned    dirty_images[6];	/**< Flags for whether or not images
+					 * need to be uploaded to local or
+					 * AGP texture space.  One flag set
+					 * for each cube face for cubic
+					 * textures.  Bit zero corresponds to
+					 * the base-level, which may or may
+					 * not be the level zero mipmap.
+					 */
+
+        unsigned    timestamp;	        /**< Timestamp used to
+					 * synchronize with 3d engine
+					 * in hardware where textures
+					 * are uploaded directly to
+					 * the framebuffer.  
+					 */
+
+        unsigned    firstLevel;         /**< Image in \c tObj->Image that
+					 * corresponds to the base-level of
+					 * this texture object.
+					 */
+
+        unsigned    lastLevel;          /**< Last image in \c tObj->Image used
+					 * by the current LOD settings of this
+					 * texture object.  This value must be
+					 * greater than or equal to
+					 * \c firstLevel.
+					 */
+};
+
+
+typedef void (destroy_texture_object_t)( void * driverContext,
+				        driTextureObject * t );
+
+/**
+ * Client-private representation of texture memory state.
+ *
+ * Clients will place one or more of these structs in their driver
+ * context struct to manage one or more global texture heaps.
+ */
+
+struct dri_tex_heap {
+
+	/** Client-supplied heap identifier 
+	 */
+	unsigned heapId;	
+
+	/** Pointer to the client's private context 
+	 */
+	void *driverContext;
+
+	/** Total size of the heap, in bytes
+	 */
+	unsigned size;
+
+	/** \brief \f$log_2\f$ of size of single heap region
+	 *
+	 * Each context takes memory from the global texture heap in
+	 * \f$2^{logGranularity}\f$ byte blocks.  The value of
+	 * \a logGranularity is based on the amount of memory represented
+	 * by the heap and the maximum number of regions in the SAREA.  Given
+	 * \a b bytes of texture memory an \a n regions in the SAREA,
+	 * \a logGranularity will be \f$\lfloor\log_2( b / n )\rfloor\f$.
+	 */
+	unsigned logGranularity;
+
+	/** \brief Required alignment of allocations in this heap
+	 * 
+	 * The alignment shift is supplied to \a mmAllocMem when memory is
+	 * allocated from this heap.  The value of \a alignmentShift will
+	 * typically reflect some require of the hardware.  This value has
+	 * \b no \b relation to \a logGranularity.  \a alignmentShift is a
+	 * per-context value.
+	 *
+	 * \sa mmAllocMem
+	 */
+	unsigned alignmentShift;
+
+	/** Number of elements in global list (the SAREA).
+	 */
+	unsigned nrRegions;	 
+
+	/** Pointer to SAREA \a driTexRegion array
+	 */
+	drmTextureRegionPtr global_regions;
+
+	/** Pointer to the texture state age (generation number) in the SAREA
+	 */
+	unsigned     * global_age;
+
+	/** Local age (generation number) of texture state
+	 */
+	unsigned local_age;
+
+	/** Memory heap used to manage texture memory represented by
+	 * this texture heap.
+	 */
+	memHeap_t    * memory_heap;
+
+	/** List of objects that we currently believe to be in texture
+	 * memory.
+	 */
+	driTextureObject     texture_objects;
+    
+	/** Pointer to the list of texture objects that are not in
+	 * texture memory.
+	 */
+	driTextureObject   * swapped_objects;
+
+	/** Size of the driver-speicific texture object.
+	 */
+	unsigned       texture_object_size;
+
+
+	/**
+	 * \brief Function to destroy driver-specific texture object data.
+	 * 
+	 * This function is supplied by the driver so that the texture manager
+	 * can release all resources associated with a texture object.  This
+	 * function should only release driver-specific data.  That is,
+	 * \a driDestroyTextureObject will release the texture memory
+	 * associated with the texture object, it will release the memory
+	 * for the texture object itself, and it will unlink the texture
+	 * object from the texture object lists.
+	 *
+	 * \param driverContext Pointer to the driver supplied context
+	 * \param t Texture object that is to be destroyed
+	 * \sa driDestroyTextureObject
+	 */
+
+	destroy_texture_object_t * destroy_texture_object;
+
+
+	/**
+	 */
+	unsigned * texture_swaps;
+
+        /**
+	 * Timestamp used to synchronize with 3d engine in hardware
+	 * where textures are uploaded directly to the
+	 * framebuffer.  
+	 */
+        unsigned timestamp;
+};
+
+
+
+
+/**
+ * Called by the client on lock contention to determine whether textures have
+ * been stolen.  If another client has modified a region in which we have
+ * textures, then we need to figure out which of our textures have been
+ * removed and update our global LRU.
+ * 
+ * \param heap Texture heap to be updated
+ * \hideinitializer
+ */
+
+#define DRI_AGE_TEXTURES( heap )				\
+   do {								\
+       if ( ((heap) != NULL)					\
+	    && ((heap)->local_age != (heap)->global_age[0]) )	\
+	   driAgeTextures( heap );				\
+   } while( 0 )
+
+
+
+
+/* This should be called whenever there has been contention on the hardware
+ * lock.  driAgeTextures should not be called directly.  Instead, clients
+ * should use DRI_AGE_TEXTURES, above.
+ */
+
+void driAgeTextures( driTexHeap * heap );
+
+void driUpdateTextureLRU( driTextureObject * t );
+void driSwapOutTextureObject( driTextureObject * t );
+void driDestroyTextureObject( driTextureObject * t );
+int driAllocateTexture( driTexHeap * const * heap_array, unsigned nr_heaps,
+    driTextureObject * t );
+
+GLboolean driIsTextureResident( GLcontext * ctx, 
+    struct gl_texture_object * texObj );
+
+driTexHeap * driCreateTextureHeap( unsigned heap_id, void * context,
+    unsigned size, unsigned alignmentShift, unsigned nr_regions,
+    drmTextureRegionPtr global_regions, unsigned * global_age,
+    driTextureObject * swapped_objects, unsigned texture_object_size,
+    destroy_texture_object_t * destroy_tex_obj );
+void driDestroyTextureHeap( driTexHeap * heap );
+
+void
+driCalculateMaxTextureLevels( driTexHeap * const * heaps,
+			      unsigned nr_heaps,
+			      struct gl_constants * limits,
+			      unsigned max_bytes_per_texel, 
+			      unsigned max_2D_size,
+			      unsigned max_3D_size,
+			      unsigned max_cube_size,
+			      unsigned max_rect_size,
+			      unsigned mipmaps_at_once,
+			      int all_textures_one_heap );
+
+void
+driSetTextureSwapCounterLocation( driTexHeap * heap, unsigned * counter );
+
+#define DRI_TEXMGR_DO_TEXTURE_1D    0x0001
+#define DRI_TEXMGR_DO_TEXTURE_2D    0x0002
+#define DRI_TEXMGR_DO_TEXTURE_3D    0x0004
+#define DRI_TEXMGR_DO_TEXTURE_CUBE  0x0008
+#define DRI_TEXMGR_DO_TEXTURE_RECT  0x0010
+
+void driInitTextureObjects( GLcontext *ctx, driTextureObject * swapped,
+			    GLuint targets );
+
+GLboolean driValidateTextureHeaps( driTexHeap * const * texture_heaps,
+    unsigned nr_heaps, const driTextureObject * swapped );
+
+#endif /* DRI_TEXMEM_H */
--- a/src/mesa/drivers/dri/common/utils.c
+++ b/src/mesa/drivers/dri/common/utils.c
@ -0,0 +1,186 @@
+/*
+ * (c) Copyright IBM Corporation 2002
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ian Romanick <idr@us.ibm.com>
+ */
+/* $XFree86:$ */
+
+#include <string.h>
+#include <stdlib.h>
+#include "mtypes.h"
+#include "extensions.h"
+#include "utils.h"
+
+#if defined(USE_X86_ASM)
+#include "X86/common_x86_asm.h"
+#endif
+
+unsigned
+driParseDebugString( const char * debug, 
+		     const struct dri_debug_control * control  )
+{
+   unsigned   flag;
+
+
+   flag = 0;
+   if ( debug != NULL ) {
+      while( control->string != NULL ) {
+	 if ( strstr( debug, control->string ) != NULL ) {
+	    flag |= control->flag;
+	 }
+
+	 control++;
+      }
+   }
+
+   return flag;
+}
+
+
+
+
+unsigned
+driGetRendererString( char * buffer, const char * hardware_name,
+		      const char * driver_date, GLuint agp_mode )
+{
+#ifdef USE_X86_ASM
+   char * x86_str = "";
+   char * mmx_str = "";
+   char * tdnow_str = "";
+   char * sse_str = "";
+#endif
+   unsigned   offset;
+
+
+   offset = sprintf( buffer, "Mesa DRI %s %s", hardware_name, driver_date );
+
+   /* Append any AGP-specific information.
+    */
+   switch ( agp_mode ) {
+   case 1:
+   case 2:
+   case 4:
+   case 8:
+      offset += sprintf( & buffer[ offset ], " AGP %ux", agp_mode );
+      break;
+	
+   default:
+      break;
+   }
+
+   /* Append any CPU-specific information.
+    */
+#ifdef USE_X86_ASM
+   if ( _mesa_x86_cpu_features ) {
+      x86_str = " x86";
+   }
+# ifdef USE_MMX_ASM
+   if ( cpu_has_mmx ) {
+      mmx_str = (cpu_has_mmxext) ? "/MMX+" : "/MMX";
+   }
+# endif
+# ifdef USE_3DNOW_ASM
+   if ( cpu_has_3dnow ) {
+      tdnow_str = (cpu_has_3dnowext) ? "/3DNow!+" : "/3DNow!";
+   }
+# endif
+# ifdef USE_SSE_ASM
+   if ( cpu_has_xmm ) {
+      sse_str = (cpu_has_xmm2) ? "/SSE2" : "/SSE";
+   }
+# endif
+
+   offset += sprintf( & buffer[ offset ], "%s%s%s%s", 
+		      x86_str, mmx_str, tdnow_str, sse_str );
+
+#elif defined(USE_SPARC_ASM)
+
+   offset += sprintf( & buffer[ offset ], " Sparc" );
+
+#endif
+
+   return offset;
+}
+
+
+
+
+void driInitExtensions( GLcontext * ctx,
+			const char * const extensions_to_enable[],
+			GLboolean  enable_imaging )
+{
+   unsigned   i;
+
+   if ( enable_imaging ) {
+      _mesa_enable_imaging_extensions( ctx );
+   }
+
+   for ( i = 0 ; extensions_to_enable[i] != NULL ; i++ ) {
+      _mesa_enable_extension( ctx, extensions_to_enable[i] );
+   }
+}
+
+
+
+
+GLboolean
+driCheckDriDdxDrmVersions(__DRIscreenPrivate *sPriv,
+			  const char * driver_name,
+			  int dri_major, int dri_minor,
+			  int ddx_major, int ddx_minor,
+			  int drm_major, int drm_minor)
+{
+   static const char format[] = "%s DRI driver expected %s version %d.%d.x "
+       "but got version %d.%d.%d";
+   int major, minor, patch;
+
+#ifndef _SOLO
+   /* Check the DRI version */
+   if (XF86DRIQueryVersion(sPriv->display, &major, &minor, &patch)) {
+      if (major != dri_major || minor < dri_minor) {
+	 __driUtilMessage(format, "DRI", driver_name, dri_major, dri_minor,
+			  major, minor, patch);
+	 return GL_FALSE;
+      }
+   }
+#else
+   (void)major;(void)minor;(void)patch;
+#endif
+   
+   /* Check that the DDX driver version is compatible */
+   if (sPriv->ddxMajor != ddx_major || sPriv->ddxMinor < ddx_minor) {
+      __driUtilMessage(format, "DDX", driver_name, ddx_major, ddx_minor,
+		       sPriv->ddxMajor, sPriv->ddxMinor, sPriv->ddxPatch);
+      return GL_FALSE;
+   }
+
+   /* Check that the DRM driver version is compatible */
+   if (sPriv->drmMajor != drm_major || sPriv->drmMinor < drm_minor) {
+      __driUtilMessage(format, "DRM", driver_name, drm_major, drm_minor,
+		       sPriv->drmMajor, sPriv->drmMinor, sPriv->drmPatch);
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
--- a/src/mesa/drivers/dri/common/utils.h
+++ b/src/mesa/drivers/dri/common/utils.h
@ -0,0 +1,54 @@
+/*
+ * (c) Copyright IBM Corporation 2002
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ian Romanick <idr@us.ibm.com>
+ */
+/* $XFree86:$ */
+
+#ifndef DRI_DEBUG_H
+#define DRI_DEBUG_H
+
+#include "context.h"
+#include "dri_util.h"
+
+struct dri_debug_control
+{
+    const char * string;
+    unsigned     flag;
+};
+
+extern unsigned driParseDebugString( const char * debug,
+    const struct dri_debug_control * control );
+
+extern unsigned driGetRendererString( char * buffer,
+    const char * hardware_name, const char * driver_date, GLuint agp_mode );
+
+extern void driInitExtensions( GLcontext * ctx, 
+    const char * const card_extensions[], GLboolean enable_imaging );
+
+extern GLboolean driCheckDriDdxDrmVersions( __DRIscreenPrivate *sPriv,
+    const char * driver_name, int dri_major, int dri_minor,
+    int ddx_major, int ddx_minor, int drm_major, int drm_minor );
+
+#endif /* DRI_DEBUG_H */
--- a/src/mesa/drivers/dri/common/vblank.c
+++ b/src/mesa/drivers/dri/common/vblank.c
@ -0,0 +1,325 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * (c) Copyright IBM Corporation 2002
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ian Romanick <idr@us.ibm.com>
+ */
+/* $XFree86:$ */
+
+#include "glheader.h"
+#include "xf86drm.h"
+#include "mtypes.h"
+#include "macros.h"
+#include "dd.h"
+#include "vblank.h"
+
+
+/****************************************************************************/
+/**
+ * Get the current MSC refresh counter.
+ *
+ * Stores the 64-bit count of vertical refreshes since some (arbitrary)
+ * point in time in \c count.  Unless the value wraps around, which it
+ * may, it will never decrease.
+ *
+ * \warning This function is called from \c glXGetVideoSyncSGI, which expects
+ * a \c count of type \c unsigned (32-bit), and \c glXGetSyncValuesOML, which 
+ * expects a \c count of type \c int64_t (signed 64-bit).  The kernel ioctl 
+ * currently always returns a \c sequence of type \c unsigned.
+ *
+ * \param priv   Pointer to the DRI screen private struct.
+ * \param count  Storage to hold MSC counter.
+ * \return       Zero is returned on success.  A negative errno value
+ *               is returned on failure.
+ */
+int driGetMSC32( __DRIscreenPrivate * priv, int64_t * count )
+{
+   drmVBlank vbl;
+   int ret;
+
+   /* Don't wait for anything.  Just get the current refresh count. */
+
+   vbl.request.type = DRM_VBLANK_RELATIVE;
+   vbl.request.sequence = 0;
+
+   ret = drmWaitVBlank( priv->fd, &vbl );
+   *count = (int64_t)vbl.reply.sequence;
+
+   return ret;
+}
+
+
+/****************************************************************************/
+/**
+ * Wait for a specified refresh count.  This implements most of the
+ * functionality of \c glXWaitForMscOML from the GLX_OML_sync_control spec.
+ * Waits for the \c target_msc refresh.  If that has already passed, it
+ * waits until \f$(MSC \bmod divisor)\f$ is equal to \c remainder.  If 
+ * \c target_msc is 0, use the behavior of glXWaitVideoSyncSGI(), which
+ * omits the initial check against a target MSC value.
+ * 
+ * This function is actually something of a hack.  The problem is that, at
+ * the time of this writing, none of the existing DRM modules support an
+ * ioctl that returns a 64-bit count (at least not on 32-bit platforms).
+ * However, this function exists to support a GLX function that requires
+ * the use of 64-bit counts.  As such, there is a little bit of ugly
+ * hackery at the end of this function to make the 32-bit count act like
+ * a 64-bit count.  There are still some cases where this will break, but
+ * I believe it catches the most common cases.
+ *
+ * The real solution is to provide an ioctl that uses a 64-bit count.
+ *
+ * \param dpy         Pointer to the \c Display.
+ * \param priv        Pointer to the DRI drawable private.
+ * \param target_msc  Desired refresh count to wait for.  A value of 0
+ *                    means to use the glXWaitVideoSyncSGI() behavior.
+ * \param divisor     MSC divisor if \c target_msc is already reached.
+ * \param remainder   Desired MSC remainder if \c target_msc is already
+ *                    reached.
+ * \param msc         Buffer to hold MSC when done waiting.
+ *
+ * \return            Zero on success or \c GLX_BAD_CONTEXT on failure.
+ */
+
+int driWaitForMSC32( __DRIdrawablePrivate *priv,
+		     int64_t target_msc, int64_t divisor, int64_t remainder,
+		     int64_t * msc )
+{
+   drmVBlank vbl;
+
+
+   if ( divisor != 0 ) {
+      unsigned int target = (unsigned int)target_msc;
+      unsigned int next = target;
+      unsigned int r;
+      int dont_wait = (target_msc == 0);
+
+      do {
+         /* dont_wait means we're using the glXWaitVideoSyncSGI() behavior.
+          * The first time around, just get the current count and proceed 
+          * to the test for (MSC % divisor) == remainder.
+          */
+         vbl.request.type = dont_wait ? DRM_VBLANK_RELATIVE :
+                                        DRM_VBLANK_ABSOLUTE;
+         vbl.request.sequence = next;
+
+	 if ( drmWaitVBlank( priv->driScreenPriv->fd, &vbl ) != 0 ) {
+	    /* FIXME: This doesn't seem like the right thing to return here.
+	     */
+#ifndef _SOLO
+	    return GLX_BAD_CONTEXT;
+#else
+	    return -1;
+#endif
+	 }
+
+         dont_wait = 0;
+         if (target_msc != 0 && vbl.reply.sequence == target)
+            break;
+
+         /* Assuming the wait-done test fails, the next refresh to wait for
+          * will be one that satisfies (MSC % divisor) == remainder.  The
+          * value (MSC - (MSC % divisor) + remainder) is the refresh value 
+          * closest to the current value that would satisfy the equation.  
+          * If this refresh has already happened, we add divisor to obtain 
+          * the next refresh after the current one that will satisfy it.
+          */
+         r = (vbl.reply.sequence % (unsigned int)divisor);
+         next = (vbl.reply.sequence - r + (unsigned int)remainder);
+         if (next <= vbl.reply.sequence) next += (unsigned int)divisor;
+
+      } while ( r != (unsigned int)remainder );
+   }
+   else {
+      /* If the \c divisor is zero, just wait until the MSC is greater
+       * than or equal to \c target_msc.
+       */
+
+      vbl.request.type = DRM_VBLANK_ABSOLUTE;
+      vbl.request.sequence = target_msc;
+
+      if ( drmWaitVBlank( priv->driScreenPriv->fd, &vbl ) != 0 ) {
+	 /* FIXME: This doesn't seem like the right thing to return here.
+	  */
+#ifndef _SOLO
+	    return GLX_BAD_CONTEXT;
+#else
+	    return -1;
+#endif
+      }
+   }
+
+   *msc  = (target_msc & 0xffffffff00000000LL);
+   *msc |= vbl.reply.sequence;
+   if ( *msc < target_msc ) {
+      *msc += 0x0000000100000000LL;
+   }
+
+   return 0;
+}
+
+
+/****************************************************************************/
+/**
+ * Gets a set of default vertical-blank-wait flags based on the internal GLX
+ * API version and several environment variables.
+ */
+
+GLuint driGetDefaultVBlankFlags( void )
+{
+   GLuint  flags = 0;
+
+
+   flags |= (driCompareGLXAPIVersion( 20030317 ) >= 0) 
+       ? VBLANK_FLAG_INTERVAL : 0;
+   flags |= (getenv("LIBGL_SYNC_REFRESH") != NULL)
+       ? VBLANK_FLAG_SYNC : 0;
+   flags |= (getenv("LIBGL_THROTTLE_REFRESH") != NULL)
+       ? VBLANK_FLAG_THROTTLE : 0;
+
+   return flags;
+}
+
+
+/****************************************************************************/
+/**
+ * Wrapper to call \c drmWaitVBlank.  The main purpose of this function is to
+ * wrap the error message logging.  The error message should only be logged
+ * the first time the \c drmWaitVBlank fails.  If \c drmWaitVBlank is
+ * successful, \c vbl_seq will be set the sequence value in the reply.
+ *
+ * \param vbl      Pointer to drmVBlank packet desribing how to wait.
+ * \param vbl_seq  Location to store the current refresh counter.
+ * \param fd       File descriptor use to call into the DRM.
+ * \return         Zero on success or -1 on failure.
+ */
+
+static int do_wait( drmVBlank * vbl, GLuint * vbl_seq, int fd )
+{
+   int   ret;
+
+
+   ret = drmWaitVBlank( fd, vbl );
+   if ( ret != 0 ) {
+      static GLboolean first_time = GL_TRUE;
+
+      if ( first_time ) {
+	 fprintf(stderr, 
+		 "%s: drmWaitVBlank returned %d, IRQs don't seem to be"
+		 " working correctly.\nTry running with LIBGL_THROTTLE_REFRESH"
+		 " and LIBL_SYNC_REFRESH unset.\n", __FUNCTION__, ret);
+	 first_time = GL_FALSE;
+      }
+
+      return -1;
+   }
+
+   *vbl_seq = vbl->reply.sequence;
+   return 0;
+}
+
+
+/****************************************************************************/
+/**
+ * Waits for the vertical blank for use with glXSwapBuffers.
+ * 
+ * \param vbl_seq  Vertical blank sequence number (MSC) after the last buffer
+ *                 swap.  Updated after this wait.
+ * \param flags    \c VBLANK_FLAG bits that control how long to wait.
+ * \param missed_deadline  Set to \c GL_TRUE if the MSC after waiting is later
+ *                 than the "target" based on \c flags.  The idea is that if
+ *                 \c missed_deadline is set, then the application is not 
+ *                 achieving its desired framerate.
+ * \return         Zero on success, -1 on error.
+ */
+
+int
+driWaitForVBlank( const  __DRIdrawablePrivate *priv, GLuint * vbl_seq,
+		  GLuint flags, GLboolean * missed_deadline )
+{
+   drmVBlank vbl;
+   unsigned   original_seq;
+   unsigned   deadline;
+   unsigned   interval;
+
+
+   *missed_deadline = GL_FALSE;
+   if ( (flags & VBLANK_FLAG_NO_IRQ) != 0 ) {
+      return 0;
+   }
+
+
+   /* VBLANK_FLAG_SYNC means to wait for at least one vertical blank.  If
+    * that flag is not set, do a fake wait for zero vertical blanking
+    * periods so that we can get the current MSC.
+    *
+    * VBLANK_FLAG_INTERVAL and VBLANK_FLAG_THROTTLE mean to wait for at
+    * least one vertical blank since the last wait.  Since do_wait modifies
+    * vbl_seq, we have to save the original value of vbl_seq for the
+    * VBLANK_FLAG_INTERVAL / VBLANK_FLAG_THROTTLE calculation later.
+    */
+
+   original_seq = *vbl_seq;
+
+   vbl.request.sequence = ((flags & VBLANK_FLAG_SYNC) != 0) ? 1 : 0;
+   vbl.request.type = DRM_VBLANK_RELATIVE;
+      
+   if ( do_wait( & vbl, vbl_seq, priv->driScreenPriv->fd ) != 0 ) {
+      return -1;
+   }
+
+	
+   vbl.request.type = DRM_VBLANK_ABSOLUTE;
+
+   if ( (flags & VBLANK_FLAG_INTERVAL) != 0 ) {
+#ifndef _SOLO
+      interval = priv->pdraw->swap_interval;
+#else
+      interval = 0;
+#endif
+   }
+   else if ( (flags & VBLANK_FLAG_THROTTLE) != 0 ) {
+      interval = 1;
+   }
+   else {
+      interval = 0;
+   }
+
+
+   /* Wait until the next vertical blank.  If the interval is zero, then
+    * the deadline is one vertical blank after the previous wait.
+    */
+
+   vbl.request.sequence = original_seq + interval;
+   if ( *vbl_seq < vbl.request.sequence ) {
+      if ( do_wait( & vbl, vbl_seq, priv->driScreenPriv->fd ) != 0 ) {
+	 return -1;
+      }
+   }
+
+   deadline = original_seq + ((interval == 0) ? 1 : interval);
+   *missed_deadline = ( *vbl_seq > deadline );
+
+   return 0;
+}
--- a/src/mesa/drivers/dri/common/vblank.h
+++ b/src/mesa/drivers/dri/common/vblank.h
@ -0,0 +1,62 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * (c) Copyright IBM Corporation 2002
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ian Romanick <idr@us.ibm.com>
+ */
+/* $XFree86:$ */
+
+#ifndef DRI_VBLANK_H
+#define DRI_VBLANK_H
+
+#include "context.h"
+#include "dri_util.h"
+
+#define VBLANK_FLAG_INTERVAL  (1U << 0)  /* Respect the swap_interval setting
+					  */
+#define VBLANK_FLAG_THROTTLE  (1U << 1)  /* Wait 1 refresh since last call.
+					  */
+#define VBLANK_FLAG_SYNC      (1U << 2)  /* Sync to the next refresh.
+					  */
+#define VBLANK_FLAG_NO_IRQ    (1U << 7)  /* DRM has no IRQ to wait on.
+					  */
+
+extern int driGetMSC32( __DRIscreenPrivate * priv, int64_t * count );
+extern int driWaitForMSC32( __DRIdrawablePrivate *priv,
+    int64_t target_msc, int64_t divisor, int64_t remainder, int64_t * msc );
+extern GLuint driGetDefaultVBlankFlags( void );
+extern int driWaitForVBlank( const __DRIdrawablePrivate *priv,
+    GLuint * vbl_seq, GLuint flags, GLboolean * missed_deadline );
+
+#undef usleep
+#include <unistd.h>  /* for usleep() */
+
+#define DO_USLEEP(nr)							\
+   do {								 	\
+      if (0) fprintf(stderr, "%s: usleep for %u\n", __FUNCTION__, nr );	\
+      if (1) usleep( nr );						\
+      sched_yield();							\
+   } while( 0 )
+
+#endif /* DRI_VBLANK_H */