/*
 * Copyright © 2019 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include "nir.h"
#include "nir_deref.h"
#include "gl_nir_linker.h"
#include "linker_util.h"
#include "main/consts_exts.h"
#include "main/shader_types.h"
#include "util/u_math.h"

/**
 * This file contains code to do a nir-based linking for uniform blocks. This
 * includes ubos and ssbos.
 *
 * For the case of ARB_gl_spirv there are some differences compared with GLSL:
 *
 * 1. Linking doesn't use names: GLSL linking use names as core concept. But
 *    on SPIR-V, uniform block name, fields names, and other names are
 *    considered optional debug infor so could not be present. So the linking
 *    should work without it, and it is optional to not handle them at
 *    all. From ARB_gl_spirv spec.
 *
 *    "19. How should the program interface query operations behave for program
 *         objects created from SPIR-V shaders?
 *
 *     DISCUSSION: we previously said we didn't need reflection to work for
 *     SPIR-V shaders (at least for the first version), however we are left
 *     with specifying how it should "not work". The primary issue is that
 *     SPIR-V binaries are not required to have names associated with
 *     variables. They can be associated in debug information, but there is no
 *     requirement for that to be present, and it should not be relied upon.
 *
 *     Options:
 *
 *     <skip>
 *
 *    C) Allow as much as possible to work "naturally". You can query for the
 *    number of active resources, and for details about them. Anything that
 *    doesn't query by name will work as expected. Queries for maximum length
 *    of names return one. Queries for anything "by name" return INVALID_INDEX
 *    (or -1). Querying the name property of a resource returns an empty
 *    string. This may allow many queries to work, but it's not clear how
 *    useful it would be if you can't actually know which specific variable
 *    you are retrieving information on. If everything is specified a-priori
 *    by location/binding/offset/index/component in the shader, this may be
 *    sufficient.
 *
 *    RESOLVED.  Pick (c), but also allow debug names to be returned if an
 *    implementation wants to."
 *
 * When linking SPIR-V shaders this implemention doesn't care for the names,
 * as the main objective is functional, and not support optional debug
 * features.
 *
 * 2. Terminology: this file handles both UBO and SSBO, including both as
 *    "uniform blocks" analogously to what is done in the GLSL (IR) path.
 *
 *    From ARB_gl_spirv spec:
 *      "Mapping of Storage Classes:
 *       <skip>
 *       uniform blockN { ... } ...;  -> Uniform, with Block decoration
 *       <skip>
 *       buffer  blockN { ... } ...;  -> Uniform, with BufferBlock decoration"
 *
 * 3. Explicit data: for the SPIR-V path the code assumes that all structure
 *    members have an Offset decoration, all arrays have an ArrayStride and
 *    all matrices have a MatrixStride, even for nested structures. That way
 *    we don’t have to worry about the different layout modes. This is
 *    explicitly required in the SPIR-V spec:
 *
 *    "Composite objects in the UniformConstant, Uniform, and PushConstant
 *     Storage Classes must be explicitly laid out. The following apply to all
 *     the aggregate and matrix types describing such an object, recursively
 *     through their nested types:
 *
 *    – Each structure-type member must have an Offset Decoration.
 *    – Each array type must have an ArrayStride Decoration.
 *    – Each structure-type member that is a matrix or array-of-matrices must
 *      have be decorated with a MatrixStride Decoration, and one of the
 *      RowMajor or ColMajor Decorations."
 *
 *    Additionally, the structure members are expected to be presented in
 *    increasing offset order:
 *
 *   "a structure has lower-numbered members appearing at smaller offsets than
 *    higher-numbered members"
 */

enum block_type {
   BLOCK_UBO,
   BLOCK_SSBO
};

struct uniform_block_array_elements {
   unsigned *array_elements;
   unsigned num_array_elements;
   /**
    * Size of the array before array-trimming optimizations.
    *
    * Locations are only assigned to active array elements, but the location
    * values are calculated as if all elements are active. The total number
    * of elements in an array including the elements in arrays of arrays before
    * inactive elements are removed is needed to be perform that calculation.
    */
   unsigned aoa_size;

   struct uniform_block_array_elements *array;
};

struct link_uniform_block_active {
   const struct glsl_type *type;
   nir_variable *var;

   struct uniform_block_array_elements *array;

   unsigned binding;

   bool has_instance_name;
   bool has_binding;
   bool is_shader_storage;
};

/*
 * It is worth to note that ARB_gl_spirv spec doesn't require us to do this
 * validation, but at the same time, it allow us to do it.
 */
static bool
link_blocks_are_compatible(const struct gl_uniform_block *a,
                           const struct gl_uniform_block *b)
{
   /*
    *   "7.4.2. SPIR-V Shader Interface Matching":
    *    "Uniform and shader storage block variables must also be decorated
    *     with a Binding"
    */
   if (a->Binding != b->Binding)
      return false;

   assert((a->name.string == NULL && b->name.string == NULL) ||
          strcmp(a->name.string, b->name.string) == 0);

   if (a->NumUniforms != b->NumUniforms)
      return false;

   if (a->_Packing != b->_Packing)
      return false;

   if (a->_RowMajor != b->_RowMajor)
      return false;

   for (unsigned i = 0; i < a->NumUniforms; i++) {
      if (a->Uniforms[i].Name != NULL && b->Uniforms[i].Name != NULL &&
          strcmp(a->Uniforms[i].Name, b->Uniforms[i].Name) != 0)
         return false;

      if (a->Uniforms[i].Type != b->Uniforms[i].Type)
         return false;

      if (a->Uniforms[i].RowMajor != b->Uniforms[i].RowMajor)
         return false;

      if (a->Uniforms[i].Offset != b->Uniforms[i].Offset)
         return false;
   }

   return true;
}

/**
 * Merges a buffer block into an array of buffer blocks that may or may not
 * already contain a copy of it.
 *
 * Returns the index of the block in the array (new if it was needed, or the
 * index of the copy of it). -1 if there are two incompatible block
 * definitions with the same binding.
 *
 */
static int
link_cross_validate_uniform_block(void *mem_ctx,
                                  struct gl_uniform_block **linked_blocks,
                                  unsigned int *num_linked_blocks,
                                  struct gl_uniform_block *new_block,
                                  bool is_spirv)
{
   /* We first check if new_block was already linked */
   for (unsigned int i = 0; i < *num_linked_blocks; i++) {
      struct gl_uniform_block *old_block = &(*linked_blocks)[i];

      if ((is_spirv && old_block->Binding == new_block->Binding) ||
          (!is_spirv && (strcmp(old_block->name.string, new_block->name.string) == 0)))
         return link_blocks_are_compatible(old_block, new_block) ? i : -1;
   }

   *linked_blocks = reralloc(mem_ctx, *linked_blocks,
                             struct gl_uniform_block,
                             *num_linked_blocks + 1);
   int linked_block_index = (*num_linked_blocks)++;
   struct gl_uniform_block *linked_block = &(*linked_blocks)[linked_block_index];

   memcpy(linked_block, new_block, sizeof(*new_block));
   linked_block->Uniforms = ralloc_array(*linked_blocks,
                                         struct gl_uniform_buffer_variable,
                                         linked_block->NumUniforms);

   memcpy(linked_block->Uniforms,
          new_block->Uniforms,
          sizeof(*linked_block->Uniforms) * linked_block->NumUniforms);

   /* If we mem copied a pointer to a string above we need to create our own
    * copy of the string.
    */
   if (linked_block->name.string) {
      linked_block->name.string =
         ralloc_strdup(*linked_blocks, linked_block->name.string);
      resource_name_updated(&linked_block->name);

      for (unsigned int i = 0; i < linked_block->NumUniforms; i++) {
        struct gl_uniform_buffer_variable *ubo_var =
           &linked_block->Uniforms[i];

         if (ubo_var->Name == ubo_var->IndexName) {
            ubo_var->Name = ralloc_strdup(*linked_blocks, ubo_var->Name);
            ubo_var->IndexName = ubo_var->Name;
         } else {
            ubo_var->Name = ralloc_strdup(*linked_blocks, ubo_var->Name);
            ubo_var->IndexName =
               ralloc_strdup(*linked_blocks, ubo_var->IndexName);
         }
      }
   }

   return linked_block_index;
}


/**
 * Accumulates the array of buffer blocks and checks that all definitions of
 * blocks agree on their contents.
 */
static bool
nir_interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog,
                                             enum block_type block_type)
{
   int *interfaceBlockStageIndex[MESA_SHADER_STAGES];
   struct gl_uniform_block *blks = NULL;
   unsigned *num_blks = block_type == BLOCK_SSBO ? &prog->data->NumShaderStorageBlocks :
      &prog->data->NumUniformBlocks;

   unsigned max_num_buffer_blocks = 0;
   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
      if (prog->_LinkedShaders[i]) {
         if (block_type == BLOCK_SSBO) {
            max_num_buffer_blocks +=
               prog->_LinkedShaders[i]->Program->info.num_ssbos;
         } else {
            max_num_buffer_blocks +=
               prog->_LinkedShaders[i]->Program->info.num_ubos;
         }
      }
   }

   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
      struct gl_linked_shader *sh = prog->_LinkedShaders[i];

      interfaceBlockStageIndex[i] = malloc(max_num_buffer_blocks * sizeof(int));
      for (unsigned int j = 0; j < max_num_buffer_blocks; j++)
         interfaceBlockStageIndex[i][j] = -1;

      if (sh == NULL)
         continue;

      unsigned sh_num_blocks;
      struct gl_uniform_block **sh_blks;
      if (block_type == BLOCK_SSBO) {
         sh_num_blocks = prog->_LinkedShaders[i]->Program->info.num_ssbos;
         sh_blks = sh->Program->sh.ShaderStorageBlocks;
      } else {
         sh_num_blocks = prog->_LinkedShaders[i]->Program->info.num_ubos;
         sh_blks = sh->Program->sh.UniformBlocks;
      }

      for (unsigned int j = 0; j < sh_num_blocks; j++) {
         int index = link_cross_validate_uniform_block(prog->data, &blks,
                                                       num_blks, sh_blks[j],
                                                       !!prog->data->spirv);

         if (index == -1) {
            /* We use the binding as we are ignoring the names */
            linker_error(prog, "buffer block with binding `%i' has mismatching "
                         "definitions\n", sh_blks[j]->Binding);

            for (unsigned k = 0; k <= i; k++) {
               free(interfaceBlockStageIndex[k]);
            }

            /* Reset the block count. This will help avoid various segfaults
             * from api calls that assume the array exists due to the count
             * being non-zero.
             */
            *num_blks = 0;
            return false;
         }

         interfaceBlockStageIndex[i][index] = j;
      }
   }

   /* Update per stage block pointers to point to the program list.
    */
   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
      for (unsigned j = 0; j < *num_blks; j++) {
         int stage_index = interfaceBlockStageIndex[i][j];

         if (stage_index != -1) {
            struct gl_linked_shader *sh = prog->_LinkedShaders[i];

            struct gl_uniform_block **sh_blks = block_type == BLOCK_SSBO ?
               sh->Program->sh.ShaderStorageBlocks :
               sh->Program->sh.UniformBlocks;

            blks[j].stageref |= sh_blks[stage_index]->stageref;
            sh_blks[stage_index] = &blks[j];
         }
      }
   }

   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
      free(interfaceBlockStageIndex[i]);
   }

   if (block_type == BLOCK_SSBO)
      prog->data->ShaderStorageBlocks = blks;
   else {
      prog->data->NumUniformBlocks = *num_blks;
      prog->data->UniformBlocks = blks;
   }

   return true;
}

/*
 * Iterates @type in order to compute how many individual leaf variables
 * contains.
 */
static void
iterate_type_count_variables(const struct glsl_type *type,
                             unsigned int *num_variables)
{
   unsigned length = glsl_get_length(type);
   if (glsl_type_is_unsized_array(type))
      length = 1;

   for (unsigned i = 0; i < length; i++) {
      const struct glsl_type *field_type;

      if (glsl_type_is_struct_or_ifc(type))
         field_type = glsl_get_struct_field(type, i);
      else
         field_type = glsl_get_array_element(type);

      if (glsl_type_is_leaf(field_type))
         (*num_variables)++;
      else
         iterate_type_count_variables(field_type, num_variables);
   }
}

static void
fill_individual_variable(void *mem_ctx, const char *name,
                         const struct glsl_type *type,
                         struct gl_uniform_buffer_variable *variables,
                         unsigned int *variable_index,
                         unsigned int *offset,
                         unsigned *buffer_size,
                         struct gl_shader_program *prog,
                         struct gl_uniform_block *block,
                         const enum glsl_interface_packing packing,
                         bool is_array_instance,
                         bool last_field)
{
   struct gl_uniform_buffer_variable *v = &variables[*variable_index];
   v->Type = type;

   const struct glsl_type *t_without_array = glsl_without_array(type);
   if (glsl_type_is_matrix(glsl_without_array(t_without_array))) {
      v->RowMajor = glsl_matrix_type_is_row_major(t_without_array);
   } else {
      /* default value, better that potential meaningless garbage */
      v->RowMajor = false;
   }

   if (!prog->data->spirv) {
      v->Name = ralloc_strdup(mem_ctx, name);

      if (is_array_instance) {
         v->IndexName = ralloc_strdup(mem_ctx, name);

         char *open_bracket = strchr(v->IndexName, '[');
         assert(open_bracket != NULL);

         char *close_bracket = strchr(open_bracket, '.') - 1;
         assert(close_bracket != NULL);

         /* Length of the tail without the ']' but with the NUL. */
         unsigned len = strlen(close_bracket + 1) + 1;

         memmove(open_bracket, close_bracket + 1, len);
      } else {
         v->IndexName = v->Name;
      }

      unsigned alignment = 0;
      unsigned size = 0;

      /* The ARB_program_interface_query spec says:
       *
       *    If the final member of an active shader storage block is array
       *    with no declared size, the minimum buffer size is computed
       *    assuming the array was declared as an array with one element.
       *
       * For that reason, we use the base type of the unsized array to
       * calculate its size. We don't need to check if the unsized array is
       * the last member of a shader storage block (that check was already
       * done by the parser).
       */
      const struct glsl_type *type_for_size = type;
      if (glsl_type_is_unsized_array(type)) {
         if (!last_field) {
            linker_error(prog, "unsized array `%s' definition: "
                         "only last member of a shader storage block "
                         "can be defined as unsized array",
                         name);
         }

         type_for_size = glsl_get_array_element(type);
      }

      if (packing == GLSL_INTERFACE_PACKING_STD430) {
         alignment = glsl_get_std430_base_alignment(type, v->RowMajor);
         size = glsl_get_std430_size(type_for_size, v->RowMajor);
      } else {
         alignment = glsl_get_std140_base_alignment(type, v->RowMajor);
         size = glsl_get_std140_size(type_for_size, v->RowMajor);
      }

      *offset = align(*offset, alignment);
      v->Offset = *offset;

      *offset += size;

      /* The ARB_uniform_buffer_object spec says:
       *
       *    For uniform blocks laid out according to [std140] rules, the
       *    minimum buffer object size returned by the UNIFORM_BLOCK_DATA_SIZE
       *    query is derived by taking the offset of the last basic machine
       *    unit consumed by the last uniform of the uniform block (including
       *    any end-of-array or end-of-structure padding), adding one, and
       *    rounding up to the next multiple of the base alignment required
       *    for a vec4.
       */
      *buffer_size = align(*offset, 16);
   } else {
      /**
       * Although ARB_gl_spirv points that the offsets need to be included
       * (see "Mappings of layouts"), in the end those are only valid for
       * root-variables, and we would need to recompute offsets when we iterate
       * over non-trivial types, like aoa. So we compute the offset always.
       */
      v->Offset = *offset;
      (*offset) += glsl_get_explicit_size(type, true);
   }

   (*variable_index)++;
}

static void
enter_or_leave_record(struct gl_uniform_block *block, unsigned *offset,
                      const struct gl_constants *consts,
                      const struct glsl_type *type,
                      bool row_major,
                      enum glsl_interface_packing internal_packing)
{
   assert(glsl_type_is_struct(type));

   if (internal_packing == GLSL_INTERFACE_PACKING_STD430) {
      *offset = align(
         *offset, glsl_get_std430_base_alignment(type, row_major));
   } else
      *offset = align(
         *offset, glsl_get_std140_base_alignment(type, row_major));
}

static void
iterate_type_fill_variables(void *mem_ctx, char **name,
                            size_t name_length,
                            const struct gl_constants *consts,
                            const struct glsl_type *type,
                            struct gl_uniform_buffer_variable *variables,
                            unsigned int *variable_index,
                            unsigned int *offset,
                            unsigned *buffer_size,
                            struct gl_shader_program *prog,
                            struct gl_uniform_block *block,
                            const struct glsl_type *blk_type,
                            bool is_array_instance, bool row_major,
                            enum glsl_interface_packing internal_packing)
{
   unsigned struct_base_offset;

   bool struct_or_ifc = glsl_type_is_struct_or_ifc(type);
   if (struct_or_ifc)
      struct_base_offset = *offset;

   /* Handle shader storage block unsized arrays */
   unsigned length = glsl_get_length(type);
      if (glsl_type_is_unsized_array(type))
         length = 1;

   if (glsl_type_is_struct(type) && !prog->data->spirv)
      enter_or_leave_record(block, offset, consts, type, row_major,
                            internal_packing);

   bool has_block_name = *name ? strcmp(*name, "") : false;
   for (unsigned i = 0; i < length; i++) {
      const struct glsl_type *field_type;
      size_t new_length = name_length;
      bool field_row_major = row_major;

      if (struct_or_ifc) {
         field_type = glsl_get_struct_field(type, i);

         if (prog->data->spirv) {
            *offset = struct_base_offset + glsl_get_struct_field_offset(type, i);

         } else if (glsl_get_struct_field_offset(type, i) != -1 &&
                    type == glsl_without_array(blk_type)) {
            *offset = glsl_get_struct_field_offset(type, i);
         }

         /* Append '.field' to the current variable name. */
         if (*name) {
            ralloc_asprintf_rewrite_tail(name, &new_length,
                                         has_block_name ? ".%s" : "%s",
                                         glsl_get_struct_elem_name(type, i));
         }

         /* The layout of structures at the top level of the block is set
          * during parsing.  For matrices contained in multiple levels of
          * structures in the block, the inner structures have no layout.
          * These cases inherit the layout from the outer levels.
          */
         const enum glsl_matrix_layout matrix_layout =
            glsl_get_struct_field_data(type, i)->matrix_layout;
         if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) {
            field_row_major = true;
         } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) {
            field_row_major = false;
         }
      } else {
         field_type = glsl_get_array_element(type);

         /* Append the subscript to the current variable name */
         if (*name) {
            ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
         }
      }

      if (glsl_type_is_leaf(field_type)) {
         fill_individual_variable(mem_ctx, *name, field_type, variables,
                                  variable_index, offset, buffer_size, prog,
                                  block, internal_packing, is_array_instance,
                                  (i + 1) == glsl_get_length(type));
      } else {
         iterate_type_fill_variables(mem_ctx, name, new_length, consts, field_type, variables,
                                     variable_index, offset, buffer_size,
                                     prog, block, blk_type, is_array_instance,
                                     field_row_major, internal_packing);
      }
   }

   if (glsl_type_is_struct(type) && !prog->data->spirv)
      enter_or_leave_record(block, offset, consts, type, row_major,
                            internal_packing);
}

static struct link_uniform_block_active *
process_block(void *mem_ctx, struct hash_table *ht, nir_variable *var)
{
   const struct hash_entry *existing_block =
      _mesa_hash_table_search(ht, glsl_get_type_name(var->interface_type));

   bool is_interface_instance =
      glsl_without_array(var->type) == var->interface_type;
   const struct glsl_type *block_type = is_interface_instance ?
         var->type : var->interface_type;

   /* If a block with this block-name has not previously been seen, add it.
    * If a block with this block-name has been seen, it must be identical to
    * the block currently being examined.
    */
   if (existing_block == NULL) {
      struct link_uniform_block_active *b =
         rzalloc(mem_ctx, struct link_uniform_block_active);

      b->var = var;
      b->type = block_type;
      b->has_instance_name = is_interface_instance;
      b->is_shader_storage = var->data.mode == nir_var_mem_ssbo;

      if (var->data.explicit_binding) {
         b->has_binding = true;
         b->binding = var->data.binding;
      } else {
         b->has_binding = false;
         b->binding = 0;
      }

      _mesa_hash_table_insert(ht, glsl_get_type_name(var->interface_type),
                              (void *) b);
      return b;
   } else {
      struct link_uniform_block_active *b =
         (struct link_uniform_block_active *) existing_block->data;

      if (b->type != block_type ||
          b->has_instance_name != is_interface_instance)
         return NULL;
      else
         return b;
   }

   assert(!"Should not get here.");
   return NULL;
}

/* For arrays of arrays this function will give us a middle ground between
 * detecting inactive uniform blocks and structuring them in a way that makes
 * it easy to calculate the offset for indirect indexing.
 *
 * For example given the shader:
 *
 *   uniform ArraysOfArraysBlock
 *   {
 *      vec4 a;
 *   } i[3][4][5];
 *
 *   void main()
 *   {
 *      vec4 b = i[0][1][1].a;
 *      gl_Position = i[2][2][3].a + b;
 *   }
 *
 * There are only 2 active blocks above but for the sake of indirect indexing
 * and not over complicating the code we will end up with a count of 8.  Here
 * each dimension has 2 different indices counted so we end up with 2*2*2
 */
static void
process_arrays(void *mem_ctx, nir_deref_instr *deref,
               struct link_uniform_block_active *block)
{
   if (!glsl_type_is_array(block->type))
      return;

   nir_deref_path path;
   nir_deref_path_init(&path, deref, NULL);

   assert(path.path[0]->deref_type == nir_deref_type_var);
   nir_deref_instr **p = &path.path[1];
   assert((*p)->deref_type == nir_deref_type_array);

   const struct glsl_type *type = block->type;
   struct uniform_block_array_elements **ub_array_ptr = &block->array;
   for (; *p; p++) {
      if ((*p)->deref_type == nir_deref_type_array) {
         if (*ub_array_ptr == NULL) {
            *ub_array_ptr = rzalloc(mem_ctx,
                                    struct uniform_block_array_elements);
            (*ub_array_ptr)->aoa_size = glsl_get_aoa_size(type);
         }

         struct uniform_block_array_elements *ub_array = *ub_array_ptr;
         if (nir_src_is_const((*p)->arr.index)) {
            /* Index is a constant, so mark just that element used, if not
             * already.
             */
            const unsigned idx = nir_src_as_uint((*p)->arr.index);

            unsigned i;
            for (i = 0; i < ub_array->num_array_elements; i++) {
               if (ub_array->array_elements[i] == idx)
                  break;
            }

            if (i == ub_array->num_array_elements) {
               ub_array->array_elements = reralloc(mem_ctx,
                                                   ub_array->array_elements,
                                                   unsigned,
                                                   ub_array->num_array_elements + 1);

               ub_array->array_elements[ub_array->num_array_elements] = idx;
               ub_array->num_array_elements++;
            }
         } else {
            /* The array index is not a constant, so mark the entire array used.
             */
            assert(glsl_type_is_array((*p)->type));
            if (ub_array->num_array_elements < glsl_get_length(type)) {
               ub_array->num_array_elements = glsl_get_length(type);
               ub_array->array_elements = reralloc(mem_ctx,
                                                   ub_array->array_elements,
                                                   unsigned,
                                                   ub_array->num_array_elements);

               for (unsigned i = 0; i < ub_array->num_array_elements; i++) {
                  ub_array->array_elements[i] = i;
               }
            }
         }
         ub_array_ptr = &ub_array->array;
         type = glsl_get_array_element(type);
      } else {
         /* We found the block so break out of loop */
         assert((*p)->deref_type == nir_deref_type_struct);
         break;
      }
   }

   nir_deref_path_finish(&path);
}

/* This function resizes the array types of the block so that later we can use
 * this new size to correctly calculate the offest for indirect indexing.
 */
static const struct glsl_type *
resize_block_array(const struct glsl_type *type,
                   struct uniform_block_array_elements *ub_array)
{
   if (glsl_type_is_array(type)) {
      struct uniform_block_array_elements *child_array =
         glsl_type_is_array(glsl_get_array_element(type)) ? ub_array->array : NULL;

      const struct glsl_type *new_child_type =
         resize_block_array(glsl_get_array_element(type), child_array);
      const struct glsl_type *new_type =
         glsl_array_type(new_child_type, ub_array->num_array_elements, 0);

      return new_type;
   } else {
      assert(glsl_type_is_struct_or_ifc(type));
      return type;
   }
}

static void
count_block(const struct glsl_type *blk_type, unsigned *num_blocks,
            unsigned *num_variables)
{
   const struct glsl_type *type = glsl_without_array(blk_type);
   unsigned aoa_size = glsl_get_aoa_size(blk_type);
   unsigned buffer_count = aoa_size == 0 ? 1 : aoa_size;

   *num_blocks += buffer_count;

   unsigned int block_variables = 0;
   iterate_type_count_variables(type, &block_variables);

   *num_variables += block_variables * buffer_count;
}

static bool
gather_packed_block_info(void *mem_ctx, struct gl_shader_program *prog,
                         struct hash_table *block_hash,
                         nir_deref_instr *deref, enum block_type block_type)
{

   nir_variable_mode mask = nir_var_mem_ubo | nir_var_mem_ssbo;

   if (!nir_deref_mode_is_one_of(deref, mask))
      return true;

   nir_variable *var = nir_deref_instr_get_variable(deref);

   if (block_type == BLOCK_UBO && !nir_variable_is_in_ubo(var))
      return true;

   if (block_type == BLOCK_SSBO && !nir_variable_is_in_ssbo(var))
      return true;

   /* Process the block.  Bail if there was an error. */
   struct link_uniform_block_active *b =
      process_block(mem_ctx, block_hash, var);
   if (b == NULL) {
      linker_error(prog,
                   "uniform block `%s' has mismatching definitions",
                   glsl_without_array(var->type) == var->interface_type ?
                      glsl_get_type_name(var->type) :
                      glsl_get_type_name(var->interface_type));
      return false;
   }

   assert(b->type != NULL);

   /* If the block was declared with a shared or std140 layout
    * qualifier, all its instances have been already marked as used.
    */
   if (glsl_get_ifc_packing(glsl_without_array(b->type)) ==
       GLSL_INTERFACE_PACKING_PACKED) {
      process_arrays(mem_ctx, deref, b);
   }

   return true;
}

static bool
gather_packed_blocks_info(void *mem_ctx, struct gl_shader_program *prog,
                          nir_shader *shader, struct hash_table *block_hash,
                          enum block_type block_type)
{
   bool success = true;
   nir_foreach_function_impl(impl, shader) {
      nir_foreach_block(block, impl) {
         nir_foreach_instr(instr, block) {
            if (instr->type != nir_instr_type_intrinsic)
               continue;

            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
            if (intr->intrinsic != nir_intrinsic_copy_deref &&
                intr->intrinsic != nir_intrinsic_load_deref &&
                intr->intrinsic != nir_intrinsic_store_deref &&
                intr->intrinsic != nir_intrinsic_deref_buffer_array_length)
               continue;

            nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
            success |=
               gather_packed_block_info(mem_ctx, prog, block_hash, deref,
                                        block_type);

            if (intr->intrinsic == nir_intrinsic_copy_deref) {
               deref = nir_src_as_deref(intr->src[1]);
               success |=
                  gather_packed_block_info(mem_ctx, prog, block_hash, deref,
                                           block_type);
            }
         }
      }
   }

   return success;
}

static void
allocate_uniform_blocks(void *mem_ctx, struct hash_table *block_hash,
                        struct gl_shader_program *prog,
                        struct gl_linked_shader *shader,
                        struct gl_uniform_block **out_blks, unsigned *num_blocks,
                        struct gl_uniform_buffer_variable **out_variables,
                        unsigned *num_variables, enum block_type block_type,
                        bool supports_std430)
{
   *num_variables = 0;
   *num_blocks = 0;

   /* Section 2.11.6 (Uniform Variables) of the OpenGL ES 3.0.3 spec says:
    *
    *     "All members of a named uniform block declared with a shared or
    *     std140 layout qualifier are considered active, even if they are not
    *     referenced in any shader in the program. The uniform block itself is
    *     also considered active, even if no member of the block is
    *     referenced."
    *
    * So for blocks not defined as packed we simply iterate over the type to
    * establish a count of active blocks.
    */
   nir_foreach_variable_in_shader(var, shader->Program->nir) {
      if (block_type == BLOCK_UBO && !nir_variable_is_in_ubo(var))
         continue;

      if (block_type == BLOCK_SSBO && !nir_variable_is_in_ssbo(var))
         continue;

      if (prog->data->spirv) {
         count_block(var->type, num_blocks, num_variables);
      } else {
         /* For UBO and SSBO variables, we need explicit types */
         const glsl_type *explicit_ifc_type =
            glsl_get_explicit_interface_type(var->interface_type,
                                             supports_std430);

         var->interface_type = explicit_ifc_type;

         if (glsl_type_is_interface(glsl_without_array(var->type))) {
            /* If the type contains the interface, wrap the explicit type in
             * the right number of arrays.
             */
            var->type = glsl_type_wrap_in_arrays(explicit_ifc_type, var->type);
         } else {
            /* Otherwise, this variable is one entry in the interface */
            UNUSED bool found = false;
            for (unsigned i = 0; i < explicit_ifc_type->length; i++) {
               const glsl_struct_field *field =
                  &explicit_ifc_type->fields.structure[i];
               if (strcmp(var->name, field->name) != 0)
                  continue;

               var->type = field->type;
               found = true;
               break;
            }
            assert(found);
         }

         /* Process the block.  Bail if there was an error. */
         struct link_uniform_block_active *b =
            process_block(mem_ctx, block_hash, var);
         if (b == NULL) {
            linker_error(prog, "uniform block `%s' has mismatching definitions",
                         glsl_get_type_name(var->interface_type));
            return;
         }

         assert(b->array == NULL);
         assert(b->type != NULL);
         assert(!glsl_type_is_array(b->type) || b->has_instance_name);

         /* For uniform block arrays declared with a shared or std140 layout
          * qualifier, mark all its instances as used.
          */
         if (glsl_get_ifc_packing(glsl_without_array(b->type)) ==
             GLSL_INTERFACE_PACKING_PACKED)
            continue;

         const struct glsl_type *type = b->type;
         struct uniform_block_array_elements **ub_array = &b->array;
         while (glsl_type_is_array(type)) {
            assert(glsl_get_length(b->type) > 0);

            *ub_array = rzalloc(mem_ctx, struct uniform_block_array_elements);
            (*ub_array)->num_array_elements = glsl_get_length(type);
            (*ub_array)->array_elements = reralloc(mem_ctx,
                                                   (*ub_array)->array_elements,
                                                   unsigned,
                                                   (*ub_array)->num_array_elements);
            (*ub_array)->aoa_size = glsl_get_aoa_size(type);

            for (unsigned i = 0; i < (*ub_array)->num_array_elements; i++) {
               (*ub_array)->array_elements[i] = i;
            }
            ub_array = &(*ub_array)->array;
            type = glsl_get_array_element(type);
         }
      }
   }

   if (!prog->data->spirv) {
      /* Gather packed ubo information by looping over derefs */
      if (!gather_packed_blocks_info(mem_ctx, prog, shader->Program->nir,
                                     block_hash, block_type))
         return;

      /* Count the number of active uniform blocks.  Count the total number of
       * active slots in those uniform blocks.
       */
      hash_table_foreach(block_hash, entry) {
         struct link_uniform_block_active *const b =
            (struct link_uniform_block_active *) entry->data;

         assert((b->array != NULL) == glsl_type_is_array(b->type));

         if (b->array != NULL &&
             (glsl_get_ifc_packing(glsl_without_array(b->type)) ==
              GLSL_INTERFACE_PACKING_PACKED)) {
            b->type = resize_block_array(b->type, b->array);
            b->var->type = b->type;
         }

         count_block(b->type, num_blocks, num_variables);
      }
   }

   if (*num_blocks == 0) {
      assert(*num_variables == 0);
      return;
   }

   nir_fixup_deref_types(shader->Program->nir);

   assert(*num_variables != 0);

   struct gl_uniform_block *blocks =
      rzalloc_array(mem_ctx, struct gl_uniform_block, *num_blocks);

   struct gl_uniform_buffer_variable *variables =
      rzalloc_array(blocks, struct gl_uniform_buffer_variable, *num_variables);

   *out_blks = blocks;
   *out_variables = variables;
}

static void
fill_block(void *mem_ctx, const struct gl_constants *consts, const char *name,
           struct gl_uniform_block *blocks, unsigned *block_index,
           nir_variable *var,
           struct gl_uniform_buffer_variable *variables,
           unsigned *variable_index,
           unsigned binding_offset,
           unsigned linearized_index,
           struct gl_shader_program *prog,
           const gl_shader_stage stage,
           enum block_type block_type)
{
   struct gl_uniform_block *block = &blocks[*block_index];

   bool is_spirv = prog->data->spirv;

   bool is_interface_instance =
      glsl_without_array(var->type) == var->interface_type;
   const struct glsl_type *blk_type = is_interface_instance ?
         var->type : var->interface_type;
   const struct glsl_type *type = glsl_without_array(blk_type);

   block->name.string = is_spirv ? NULL : ralloc_strdup(blocks, name);
   resource_name_updated(&block->name);

   /* From ARB_gl_spirv spec:
    *    "Vulkan uses only one binding point for a resource array,
    *     while OpenGL still uses multiple binding points, so binding
    *     numbers are counted differently for SPIR-V used in Vulkan
    *     and OpenGL
    */
   block->Binding =
      var->data.explicit_binding ? var->data.binding + binding_offset : 0;

   block->Uniforms = &variables[*variable_index];

   /* FIXME: This sets stageref when a block is declared in a spirv shader
    * even when it is not referenced.
    */
   if (is_spirv)
      block->stageref = 1U << stage;

   block->_Packing = glsl_get_ifc_packing(type);
   block->_RowMajor = glsl_matrix_type_is_row_major(type);

   block->linearized_array_index = linearized_index;

   const char *ifc_name = is_interface_instance ? block->name.string : "";
   char *ifc_name_dup = NULL;
   size_t ifc_name_length = 0;
   if (!is_spirv) {
      ifc_name_dup = ralloc_strdup(NULL, ifc_name);
      ifc_name_length = strlen(ifc_name_dup);
   }

   unsigned old_variable_index = *variable_index;
   unsigned offset = 0;
   unsigned buffer_size = 0;
   bool is_array_instance =
      is_interface_instance && glsl_type_is_array(var->type);
   enum glsl_interface_packing packing =
      glsl_get_internal_ifc_packing(type, consts->UseSTD430AsDefaultPacking);

   iterate_type_fill_variables(mem_ctx, &ifc_name_dup, ifc_name_length, consts, type, variables, variable_index,
                               &offset, &buffer_size, prog, block, blk_type, is_array_instance, block->_RowMajor,
                               packing);
   ralloc_free(ifc_name_dup);
   block->NumUniforms = *variable_index - old_variable_index;

   if (is_spirv) {
      block->UniformBufferSize =  glsl_get_explicit_size(type, false);

      /* From OpenGL 4.6 spec, section 7.6.2.3, "SPIR-V Uniform Offsets and
       * strides"
       *
       *   "If the variable is decorated as a BufferBlock , its offsets and
       *    strides must not contradict std430 alignment and minimum offset
       *    requirements. Otherwise, its offsets and strides must not contradict
       *    std140 alignment and minimum offset requirements."
       *
       * So although we are computing the size based on the offsets and
       * array/matrix strides, at the end we need to ensure that the alignment is
       * the same that with std140. From ARB_uniform_buffer_object spec:
       *
       *   "For uniform blocks laid out according to [std140] rules, the minimum
       *    buffer object size returned by the UNIFORM_BLOCK_DATA_SIZE query is
       *    derived by taking the offset of the last basic machine unit consumed
       *    by the last uniform of the uniform block (including any end-of-array
       *    or end-of-structure padding), adding one, and rounding up to the next
       *    multiple of the base alignment required for a vec4."
       */
      block->UniformBufferSize = align(block->UniformBufferSize, 16);
   } else {
      block->UniformBufferSize = buffer_size;
   }

   /* Check SSBO size is lower than maximum supported size for SSBO */
   if (block_type == BLOCK_SSBO &&
       buffer_size > consts->MaxShaderStorageBlockSize) {
      linker_error(prog, "shader storage block `%s' has size %d, "
                   "which is larger than the maximum allowed (%d)",
                   type == var->interface_type ?
                      glsl_get_type_name(var->type) :
                      glsl_get_type_name(var->interface_type),
                   buffer_size,
                   consts->MaxShaderStorageBlockSize);
   }

   *block_index += 1;
}

static void
fill_block_array(struct uniform_block_array_elements *ub_array,
                 const struct gl_constants *consts, char **name,
                 size_t name_length, struct gl_uniform_block *blks,
                 nir_variable *var,
                 struct gl_uniform_buffer_variable *variables,
                 unsigned *variable_index, unsigned binding_offset,
                 struct gl_shader_program *prog,
                 const gl_shader_stage stage, enum block_type block_type,
                 unsigned *block_index, unsigned first_index)
{
   for (unsigned j = 0; j < ub_array->num_array_elements; j++) {
      size_t new_length = name_length;

      unsigned int element_idx = ub_array->array_elements[j];
      /* Append the subscript to the current variable name */
      ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", element_idx);

      if (ub_array->array) {
         unsigned binding_stride = binding_offset +
            (element_idx * ub_array->array->aoa_size);
         fill_block_array(ub_array->array, consts, name, new_length, blks, var, variables,
                          variable_index, binding_stride, prog, stage, block_type, block_index, first_index);
      } else {
         fill_block(blks, consts, *name,
                    blks, block_index, var, variables,
                    variable_index, binding_offset + element_idx, *block_index - first_index, prog, stage,
                    block_type);
      }
   }
}

/*
 * Link ubos/ssbos for a given linked_shader/stage.
 */
static void
link_linked_shader_uniform_blocks(void *mem_ctx,
                                  const struct gl_constants *consts,
                                  struct gl_shader_program *prog,
                                  struct gl_linked_shader *shader,
                                  struct gl_uniform_block **blocks,
                                  unsigned *num_blocks,
                                  enum block_type block_type)
{
   struct gl_uniform_buffer_variable *variables = NULL;
   unsigned num_variables = 0;

   /* This hash table will track all of the uniform blocks that have been
    * encountered.  Since blocks with the same block-name must be the same,
    * the hash is organized by block-name.
    */
   struct hash_table *block_hash =
      _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
                              _mesa_key_string_equal);

   allocate_uniform_blocks(mem_ctx, block_hash, prog, shader,
                           blocks, num_blocks,
                           &variables, &num_variables,
                           block_type, consts->UseSTD430AsDefaultPacking);
   if (!prog->data->LinkStatus)
      return;

   /* Fill the content of uniforms and variables */
   unsigned block_index = 0;
   unsigned variable_index = 0;
   struct gl_uniform_block *blks = *blocks;


   if (!prog->data->spirv) {
      hash_table_foreach(block_hash, entry) {
         struct link_uniform_block_active *const b =
            (struct link_uniform_block_active *) entry->data;

         const struct glsl_type *blk_type =
            glsl_without_array(b->var->type) == b->var->interface_type ?
               b->var->type : b->var->interface_type;

         if (glsl_type_is_array(blk_type)) {
             char *name =
               ralloc_strdup(NULL,
                             glsl_get_type_name(glsl_without_array(blk_type)));
            size_t name_length = strlen(name);

            assert(b->has_instance_name);
            fill_block_array(b->array, consts, &name, name_length,
                             blks, b->var, variables, &variable_index, 0,
                             prog, shader->Stage, block_type, &block_index, block_index);
            ralloc_free(name);
         } else {
            fill_block(blks, consts, glsl_get_type_name(blk_type), blks, &block_index, b->var,
                       variables, &variable_index, 0, 0, prog, shader->Stage,
                       block_type);
         }
      }
   } else {
      nir_foreach_variable_in_shader(var, shader->Program->nir) {
         if (block_type == BLOCK_UBO && !nir_variable_is_in_ubo(var))
            continue;

         if (block_type == BLOCK_SSBO && !nir_variable_is_in_ssbo(var))
            continue;

         unsigned aoa_size = glsl_get_aoa_size(var->type);
         unsigned buffer_count = aoa_size == 0 ? 1 : aoa_size;
         for (unsigned array_index = 0; array_index < buffer_count; array_index++) {
            fill_block(NULL, consts, NULL, blks, &block_index, var, variables,
                       &variable_index, array_index, array_index, prog, shader->Stage,
                       block_type);
         }
      }
   }

   assert(block_index == *num_blocks);
   assert(variable_index == num_variables);
}

bool
gl_nir_link_uniform_blocks(const struct gl_constants *consts,
                           struct gl_shader_program *prog)
{
   void *mem_ctx = ralloc_context(NULL);
   bool ret = false;
   for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) {
      struct gl_linked_shader *const linked = prog->_LinkedShaders[stage];
      struct gl_uniform_block *ubo_blocks = NULL;
      unsigned num_ubo_blocks = 0;
      struct gl_uniform_block *ssbo_blocks = NULL;
      unsigned num_ssbo_blocks = 0;

      if (!linked)
         continue;

      link_linked_shader_uniform_blocks(mem_ctx, consts, prog, linked,
                                        &ubo_blocks, &num_ubo_blocks,
                                        BLOCK_UBO);

      link_linked_shader_uniform_blocks(mem_ctx, consts, prog, linked,
                                        &ssbo_blocks, &num_ssbo_blocks,
                                        BLOCK_SSBO);

      const unsigned max_uniform_blocks =
         consts->Program[linked->Stage].MaxUniformBlocks;
      if (num_ubo_blocks > max_uniform_blocks) {
         linker_error(prog, "Too many %s uniform blocks (%d/%d)\n",
                      _mesa_shader_stage_to_string(linked->Stage),
                      num_ubo_blocks, max_uniform_blocks);
      }

      const unsigned max_shader_storage_blocks =
         consts->Program[linked->Stage].MaxShaderStorageBlocks;
      if (num_ssbo_blocks > max_shader_storage_blocks) {
         linker_error(prog, "Too many %s shader storage blocks (%d/%d)\n",
                      _mesa_shader_stage_to_string(linked->Stage),
                      num_ssbo_blocks, max_shader_storage_blocks);
      }

      if (!prog->data->LinkStatus) {
         goto out;
      }

      prog->data->linked_stages |= 1 << stage;

      /* Copy ubo blocks to linked shader list */
      linked->Program->sh.UniformBlocks =
         ralloc_array(linked, struct gl_uniform_block *, num_ubo_blocks);
      ralloc_steal(linked, ubo_blocks);
      linked->Program->sh.NumUniformBlocks = num_ubo_blocks;
      for (unsigned i = 0; i < num_ubo_blocks; i++) {
         linked->Program->sh.UniformBlocks[i] = &ubo_blocks[i];
      }

      /* We need to set it twice to avoid the value being overwritten by the
       * one from nir in brw_shader_gather_info. TODO: get a way to set the
       * info once, and being able to gather properly the info.
       */
      linked->Program->nir->info.num_ubos = num_ubo_blocks;
      linked->Program->info.num_ubos = num_ubo_blocks;

      /* Copy ssbo blocks to linked shader list */
      linked->Program->sh.ShaderStorageBlocks =
         ralloc_array(linked, struct gl_uniform_block *, num_ssbo_blocks);
      ralloc_steal(linked, ssbo_blocks);
      for (unsigned i = 0; i < num_ssbo_blocks; i++) {
         linked->Program->sh.ShaderStorageBlocks[i] = &ssbo_blocks[i];
      }

      /* See previous comment on num_ubo_blocks */
      linked->Program->nir->info.num_ssbos = num_ssbo_blocks;
      linked->Program->info.num_ssbos = num_ssbo_blocks;
   }

   if (!nir_interstage_cross_validate_uniform_blocks(prog, BLOCK_UBO))
      goto out;

   if (!nir_interstage_cross_validate_uniform_blocks(prog, BLOCK_SSBO))
      goto out;

   ret = true;
out:
   ralloc_free(mem_ctx);
   return ret;
}