/*
 * Copyright (C) 2018-2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#ifndef __AGX_PUBLIC_H_
#define __AGX_PUBLIC_H_

#include "compiler/nir/nir.h"
#include "util/u_dynarray.h"
#include "asahi/lib/agx_pack.h"

enum agx_push_type {
   /* Array of 64-bit pointers to the base addresses (BASES) and array of
    * 16-bit sizes for optional bounds checking (SIZES) */
   AGX_PUSH_UBO_BASES = 0,
   AGX_PUSH_UBO_SIZES = 1,
   AGX_PUSH_VBO_BASES = 2,
   AGX_PUSH_VBO_SIZES = 3,
   AGX_PUSH_SSBO_BASES = 4,
   AGX_PUSH_SSBO_SIZES = 5,

   /* Push the attached constant memory */
   AGX_PUSH_CONSTANTS = 6,

   /* Push the content of a UBO */
   AGX_PUSH_UBO_DATA = 7,

   /* RGBA blend constant (FP32) */
   AGX_PUSH_BLEND_CONST = 8,

   /* Array of 16-bit (array_size - 1) for indexed array textures, used to
    * lower access to indexed array textures
    */
   AGX_PUSH_ARRAY_SIZE_MINUS_1 = 9,

   /* Keep last */
   AGX_PUSH_NUM_TYPES
};

struct agx_push {
   /* Contents to push */
   enum agx_push_type type : 8;

   /* Base of where to push, indexed in 16-bit units. The uniform file contains
    * 512 = 2^9 such units. */
   unsigned base : 9;

   /* Number of 16-bit units to push */
   unsigned length : 9;

   /* If set, rather than pushing the specified data, push a pointer to the
    * specified data. This is slower to access but enables indirect access, as
    * the uniform file does not support indirection. */
   bool indirect : 1;

   union {
      struct {
         uint16_t ubo;
         uint16_t offset;
      } ubo_data;
   };
};

/* Arbitrary */
#define AGX_MAX_PUSH_RANGES (16)
#define AGX_MAX_VARYINGS (32)

struct agx_varyings_vs {
   /* The first index used for FP16 varyings. Indices less than this are treated
    * as FP32. This may require remapping slots to guarantee.
    */
   unsigned base_index_fp16;

   /* The total number of vertex shader indices output. Must be at least
    * base_index_fp16.
    */
   unsigned nr_index;

   /* If the slot is written, this is the base index that the first component
    * of the slot is written to.  The next components are found in the next
    * indices. If less than base_index_fp16, this is a 32-bit slot (with 4
    * indices for the 4 components), else this is a 16-bit slot (with 2
    * indices for the 4 components). This must be less than nr_index.
    *
    * If the slot is not written, this must be ~0.
    */
   unsigned slots[VARYING_SLOT_MAX];
};

/* Conservative bound */
#define AGX_MAX_CF_BINDINGS (VARYING_SLOT_MAX)

struct agx_varyings_fs {
   /* Number of coefficient registers used */
   unsigned nr_cf;

   /* Number of coefficient register bindings */
   unsigned nr_bindings;

   /* Whether gl_FragCoord.z is read */
   bool reads_z;

   /* Coefficient register bindings */
   struct {
      /* Base coefficient register */
      unsigned cf_base;

      /* Slot being bound */
      gl_varying_slot slot;

      /* First component bound.
       *
       * Must be 2 (Z) or 3 (W) if slot == VARYING_SLOT_POS.
       */
      unsigned offset : 2;

      /* Number of components bound */
      unsigned count : 3;

      /* Is smooth shading enabled? If false, flat shading is used */
      bool smooth : 1;

      /* Perspective correct interpolation */
      bool perspective : 1;
   } bindings[AGX_MAX_CF_BINDINGS];
};

struct agx_varyings {
   union {
      struct agx_varyings_vs vs;
      struct agx_varyings_fs fs;
   };
};

struct agx_shader_info {
   unsigned push_ranges;
   struct agx_push push[AGX_MAX_PUSH_RANGES];
   struct agx_varyings varyings;

   /* Does the shader read the tilebuffer? */
   bool reads_tib;

   /* Does the shader write point size? */
   bool writes_psiz;

   /* Does the shader control the sample mask? */
   bool writes_sample_mask;
};

#define AGX_MAX_RTS (8)
#define AGX_MAX_ATTRIBS (16)
#define AGX_MAX_VBUFS (16)

enum agx_format {
   AGX_FORMAT_I8 = 0,
   AGX_FORMAT_I16 = 1,
   AGX_FORMAT_I32 = 2,
   AGX_FORMAT_F16 = 3,
   AGX_FORMAT_U8NORM = 4,
   AGX_FORMAT_S8NORM = 5,
   AGX_FORMAT_U16NORM = 6,
   AGX_FORMAT_S16NORM = 7,
   AGX_FORMAT_RGB10A2 = 8,
   AGX_FORMAT_SRGBA8 = 10,
   AGX_FORMAT_RG11B10F = 12,
   AGX_FORMAT_RGB9E5 = 13,

   /* Keep last */
   AGX_NUM_FORMATS,
};

/* Returns the number of bits at the bottom of the address required to be zero.
 * That is, returns the base-2 logarithm of the minimum alignment for an
 * agx_format, where the minimum alignment is 2^n where n is the result of this
 * function. The offset argument to device_load is left-shifted by this amount
 * in the hardware */

static inline unsigned
agx_format_shift(enum agx_format format)
{
   switch (format) {
   case AGX_FORMAT_I8:
   case AGX_FORMAT_U8NORM:
   case AGX_FORMAT_S8NORM:
   case AGX_FORMAT_SRGBA8:
      return 0;

   case AGX_FORMAT_I16:
   case AGX_FORMAT_F16:
   case AGX_FORMAT_U16NORM:
   case AGX_FORMAT_S16NORM:
      return 1;

   case AGX_FORMAT_I32:
   case AGX_FORMAT_RGB10A2:
   case AGX_FORMAT_RG11B10F:
   case AGX_FORMAT_RGB9E5:
      return 2;

   default:
      unreachable("invalid format");
   }
}

struct agx_attribute {
   uint32_t divisor;

   unsigned buf : 5;
   unsigned src_offset : 16;
   unsigned nr_comps_minus_1 : 2;
   enum agx_format format : 4;
   unsigned padding : 5;
};

struct agx_vs_shader_key {
   unsigned num_vbufs;
   unsigned vbuf_strides[AGX_MAX_VBUFS];

   struct agx_attribute attributes[AGX_MAX_ATTRIBS];

   /* Set to true for clip coordinates to range [0, 1] instead of [-1, 1] */
   bool clip_halfz : 1;
};

struct agx_fs_shader_key {
   enum agx_format tib_formats[AGX_MAX_RTS];
};

struct agx_shader_key {
   union {
      struct agx_vs_shader_key vs;
      struct agx_fs_shader_key fs;
   };
};

void
agx_compile_shader_nir(nir_shader *nir,
      struct agx_shader_key *key,
      struct util_dynarray *binary,
      struct agx_shader_info *out);

static const nir_shader_compiler_options agx_nir_options = {
   .lower_fdiv = true,
   .fuse_ffma16 = true,
   .fuse_ffma32 = true,
   .lower_flrp16 = true,
   .lower_flrp32 = true,
   .lower_fpow = true,
   .lower_fmod = true,
   .lower_ifind_msb = true,
   .lower_find_lsb = true,
   .lower_scmp = true,
   .lower_isign = true,
   .lower_fsign = true,
   .lower_iabs = true,
   .lower_fdph = true,
   .lower_ffract = true,
   .lower_pack_split = true,
   .lower_insert_byte = true,
   .lower_insert_word = true,
   .lower_cs_local_index_to_id = true,
   .has_cs_global_id = true,
   .lower_wpos_pntc = true,
   .vectorize_io = true,
   .use_interpolated_input_intrinsics = true,
   .lower_rotate = true,
   .has_fsub = true,
   .has_isub = true,
   .max_unroll_iterations = 32,
   .lower_uniforms_to_ubo = true,
   .force_indirect_unrolling_sampler = true,
   .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
   .lower_int64_options = (nir_lower_int64_options) ~(nir_lower_iadd64 | nir_lower_imul_2x32_64),
   .lower_doubles_options = nir_lower_dmod,
};

#endif