mesa/src/gallium/drivers/llvmpipe/lp_bld_interp.c

/**************************************************************************
 *
 * Copyright 2009 VMware, Inc.
 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 **************************************************************************/

/**
 * @file
 * Position and shader input interpolation.
 *
 * @author Jose Fonseca <jfonseca@vmware.com>
 */

#include "pipe/p_shader_tokens.h"
#include "util/u_debug.h"
#include "util/u_memory.h"
#include "util/u_math.h"
#include "tgsi/tgsi_scan.h"
#include "gallivm/lp_bld_debug.h"
#include "gallivm/lp_bld_const.h"
#include "gallivm/lp_bld_arit.h"
#include "gallivm/lp_bld_swizzle.h"
#include "lp_bld_interp.h"


/*
 * The shader JIT function operates on blocks of quads.
 * Each block has 2x2 quads and each quad has 2x2 pixels.
 *
 * We iterate over the quads in order 0, 1, 2, 3:
 *
 * #################
 * #   |   #   |   #
 * #---0---#---1---#
 * #   |   #   |   #
 * #################
 * #   |   #   |   #
 * #---2---#---3---#
 * #   |   #   |   #
 * #################
 *
 * Within each quad, we have four pixels which are represented in SOA
 * order:
 *
 * #########
 * # 0 | 1 #
 * #---+---#
 * # 2 | 3 #
 * #########
 *
 * So the green channel (for example) of the four pixels is stored in
 * a single vector register: {g0, g1, g2, g3}.
 */


static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};


static void
attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
{
   if(attrib == 0)
      lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
   else
      lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
}


/**
 * Initialize the bld->a0, dadx, dady fields.  This involves fetching
 * those values from the arrays which are passed into the JIT function.
 */
static void
coeffs_init(struct lp_build_interp_soa_context *bld,
            LLVMValueRef a0_ptr,
            LLVMValueRef dadx_ptr,
            LLVMValueRef dady_ptr)
{
   struct lp_build_context *coeff_bld = &bld->coeff_bld;
   LLVMBuilderRef builder = coeff_bld->builder;
   LLVMValueRef zero = LLVMConstNull(coeff_bld->elem_type);
   LLVMValueRef one = LLVMConstReal(coeff_bld->elem_type, 1.0);
   LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
   LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
   LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
   LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
   LLVMValueRef oow = NULL;
   unsigned attrib;
   unsigned chan;

   /* TODO: Use more vector operations */

   for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
      const unsigned mask = bld->mask[attrib];
      const unsigned interp = bld->interp[attrib];
      for (chan = 0; chan < NUM_CHANNELS; ++chan) {
         if (mask & (1 << chan)) {
            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0);
            LLVMValueRef a0 = zero;
            LLVMValueRef dadx = zero;
            LLVMValueRef dady = zero;
            LLVMValueRef dadxy = zero;
            LLVMValueRef dadq;
            LLVMValueRef dadq2;
            LLVMValueRef a;

            switch (interp) {
            case LP_INTERP_PERSPECTIVE:
               /* fall-through */

            case LP_INTERP_LINEAR:
               if (attrib == 0 && chan == 0) {
                  dadxy = dadx = one;
               }
               else if (attrib == 0 && chan == 1) {
                  dadxy = dady = one;
               }
               else {
                  dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
                  dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
                  dadxy = LLVMBuildAdd(builder, dadx, dady, "");
                  attrib_name(dadx, attrib, chan, ".dadx");
                  attrib_name(dady, attrib, chan, ".dady");
                  attrib_name(dadxy, attrib, chan, ".dadxy");
               }
               /* fall-through */

            case LP_INTERP_CONSTANT:
            case LP_INTERP_FACING:
               a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), "");
               attrib_name(a0, attrib, chan, ".a0");
               break;

            case LP_INTERP_POSITION:
               /* Nothing to do as the position coeffs are already setup in slot 0 */
               continue;

            default:
               assert(0);
               break;
            }

            /*
             * dadq = {0, dadx, dady, dadx + dady}
             */

            dadq = coeff_bld->undef;
            dadq = LLVMBuildInsertElement(builder, dadq, zero,  i0, "");
            dadq = LLVMBuildInsertElement(builder, dadq, dadx,  i1, "");
            dadq = LLVMBuildInsertElement(builder, dadq, dady,  i2, "");
            dadq = LLVMBuildInsertElement(builder, dadq, dadxy, i3, "");

            /*
             * dadq2 = 2 * dq
             */

            dadq2 = LLVMBuildAdd(builder, dadq, dadq, "");

            /*
             * a = a0 + x * dadx + y * dady
             */

            if (attrib == 0 && chan == 0) {
               a = bld->x;
            }
            else if (attrib == 0 && chan == 1) {
               a = bld->y;
            }
            else {
               a = a0;
               if (interp != LP_INTERP_CONSTANT &&
                   interp != LP_INTERP_FACING) {
                  a = LLVMBuildAdd(builder, a,
                                   LLVMBuildMul(builder, bld->x, dadx, ""),
                                   "");
                  a = LLVMBuildAdd(builder, a,
                                   LLVMBuildMul(builder, bld->y, dady, ""),
                                   "");
               }
            }

            /*
             * a = {a, a, a, a}
             */

            a = lp_build_broadcast(builder, coeff_bld->vec_type, a);

            /*
             * Compute the attrib values on the upper-left corner of each quad.
             */

            a = LLVMBuildAdd(builder, a, dadq2, "");

            /*
             * a    *= 1 / w
             * dadq *= 1 / w
             */

            if (interp == LP_INTERP_PERSPECTIVE) {
               LLVMValueRef w = bld->a[0][3];
               assert(attrib != 0);
               assert(bld->mask[0] & TGSI_WRITEMASK_W);
               if (!oow) {
                  oow = lp_build_rcp(coeff_bld, w);
                  lp_build_name(oow, "oow");
               }
               a = lp_build_mul(coeff_bld, a, oow);
               dadq = lp_build_mul(coeff_bld, dadq, oow);
            }

            attrib_name(a, attrib, chan, ".a");
            attrib_name(dadq, attrib, chan, ".dadq");

            bld->a   [attrib][chan] = a;
            bld->dadq[attrib][chan] = dadq;
         }
      }
   }
}


/**
 * Increment the shader input attribute values.
 * This is called when we move from one quad to the next.
 */
static void
attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
{
   struct lp_build_context *coeff_bld = &bld->coeff_bld;
   LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index);
   unsigned attrib;
   unsigned chan;

   assert(quad_index < 4);

   for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
      const unsigned mask = bld->mask[attrib];
      const unsigned interp = bld->interp[attrib];
      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
         if(mask & (1 << chan)) {
            LLVMValueRef a;
            if (interp == LP_INTERP_CONSTANT ||
                interp == LP_INTERP_FACING) {
               a = bld->a[attrib][chan];
            }
            else if (interp == LP_INTERP_POSITION) {
               assert(attrib > 0);
               a = bld->attribs[0][chan];
            }
            else {
               a = bld->a[attrib][chan];

               /*
                * Broadcast the attribute value for this quad into all elements
                */

               a = LLVMBuildShuffleVector(coeff_bld->builder,
                                          a, coeff_bld->undef, shuffle, "");

               /*
                * Add the derivatives
                */

               a = lp_build_add(coeff_bld, a, bld->dadq[attrib][chan]);

               attrib_name(a, attrib, chan, "");
            }
            bld->attribs[attrib][chan] = a;
         }
      }
   }
}


/**
 * Generate the position vectors.
 *
 * Parameter x0, y0 are the integer values with upper left coordinates.
 */
static void
pos_init(struct lp_build_interp_soa_context *bld,
         LLVMValueRef x0,
         LLVMValueRef y0)
{
   struct lp_build_context *coeff_bld = &bld->coeff_bld;

   bld->x = LLVMBuildSIToFP(coeff_bld->builder, x0, coeff_bld->elem_type, "");
   bld->y = LLVMBuildSIToFP(coeff_bld->builder, y0, coeff_bld->elem_type, "");
}


/**
 * Initialize fragment shader input attribute info.
 */
void
lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
                         unsigned num_inputs,
                         const struct lp_shader_input *inputs,
                         LLVMBuilderRef builder,
                         struct lp_type type,
                         LLVMValueRef a0_ptr,
                         LLVMValueRef dadx_ptr,
                         LLVMValueRef dady_ptr,
                         LLVMValueRef x0,
                         LLVMValueRef y0)
{
   struct lp_type coeff_type;
   unsigned attrib;
   unsigned chan;

   memset(bld, 0, sizeof *bld);

   memset(&coeff_type, 0, sizeof coeff_type);
   coeff_type.floating = TRUE;
   coeff_type.sign = TRUE;
   coeff_type.width = 32;
   coeff_type.length = QUAD_SIZE;

   /* XXX: we don't support interpolating into any other types */
   assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);

   lp_build_context_init(&bld->coeff_bld, builder, coeff_type);

   /* For convenience */
   bld->pos = bld->attribs[0];
   bld->inputs = (const LLVMValueRef (*)[NUM_CHANNELS]) bld->attribs[1];

   /* Position */
   bld->num_attribs = 1;
   bld->mask[0] = TGSI_WRITEMASK_XYZW;
   bld->interp[0] = LP_INTERP_LINEAR;

   /* Inputs */
   for (attrib = 0; attrib < num_inputs; ++attrib) {
      bld->mask[1 + attrib] = inputs[attrib].usage_mask;
      bld->interp[1 + attrib] = inputs[attrib].interp;
   }
   bld->num_attribs = 1 + num_inputs;

   /* Ensure all masked out input channels have a valid value */
   for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
      for (chan = 0; chan < NUM_CHANNELS; ++chan) {
         bld->attribs[attrib][chan] = bld->coeff_bld.undef;
      }
   }

   pos_init(bld, x0, y0);

   coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);

   attribs_update(bld, 0);
}


/**
 * Advance the position and inputs to the given quad within the block.
 */
void
lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
                           int quad_index)
{
   assert(quad_index < 4);

   attribs_update(bld, quad_index);
}