/*
 * Copyright © 2016 Red Hat.
 * Copyright © 2016 Bas Nieuwenhuizen
 * Copyright © 2023 Valve Corporation
 *
 * SPDX-License-Identifier: MIT
 */

#include "ac_nir.h"
#include "nir.h"
#include "nir_builder.h"
#include "radv_device.h"
#include "radv_nir.h"
#include "radv_physical_device.h"
#include "radv_shader.h"

static int
type_size_vec4(const struct glsl_type *type, bool bindless)
{
   return glsl_count_attribute_slots(type, false);
}

void
radv_nir_lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask)
{
   bool progress = false;

   NIR_PASS(progress, nir, nir_lower_io_to_scalar_early, mask);
   if (progress) {
      /* Optimize the new vector code and then remove dead vars */
      NIR_PASS(_, nir, nir_copy_prop);
      NIR_PASS(_, nir, nir_opt_shrink_vectors, true);

      if (mask & nir_var_shader_out) {
         /* Optimize swizzled movs of load_const for nir_link_opt_varyings's constant propagation. */
         NIR_PASS(_, nir, nir_opt_constant_folding);

         /* For nir_link_opt_varyings's duplicate input opt */
         NIR_PASS(_, nir, nir_opt_cse);
      }

      /* Run copy-propagation to help remove dead output variables (some shaders have useless copies
       * to/from an output), so compaction later will be more effective.
       *
       * This will have been done earlier but it might not have worked because the outputs were
       * vector.
       */
      if (nir->info.stage == MESA_SHADER_TESS_CTRL)
         NIR_PASS(_, nir, nir_opt_copy_prop_vars);

      NIR_PASS(_, nir, nir_opt_dce);
      NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL);
   }
}

typedef struct {
   uint64_t always_per_vertex;
   uint64_t potentially_per_primitive;
   uint64_t always_per_primitive;
   unsigned num_always_per_vertex;
   unsigned num_potentially_per_primitive;
} radv_recompute_fs_input_bases_state;

static bool
radv_recompute_fs_input_bases_callback(UNUSED nir_builder *b, nir_intrinsic_instr *intrin, void *data)
{
   const radv_recompute_fs_input_bases_state *s = (const radv_recompute_fs_input_bases_state *)data;

   /* Filter possible FS input intrinsics */
   switch (intrin->intrinsic) {
   case nir_intrinsic_load_input:
   case nir_intrinsic_load_per_primitive_input:
   case nir_intrinsic_load_interpolated_input:
   case nir_intrinsic_load_input_vertex:
      break;
   default:
      return false;
   }

   const nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
   const uint64_t location_bit = BITFIELD64_BIT(sem.location);
   const uint64_t location_mask = BITFIELD64_MASK(sem.location);
   const unsigned old_base = nir_intrinsic_base(intrin);
   unsigned new_base = 0;

   if (location_bit & s->always_per_vertex) {
      new_base = util_bitcount64(s->always_per_vertex & location_mask);
   } else if (location_bit & s->potentially_per_primitive) {
      new_base = s->num_always_per_vertex;

      switch (location_bit) {
      case VARYING_BIT_VIEWPORT:
         break;
      case VARYING_BIT_PRIMITIVE_ID:
         new_base += !!(s->potentially_per_primitive & VARYING_BIT_VIEWPORT);
         break;
      }
   } else if (location_bit & s->always_per_primitive) {
      new_base = s->num_always_per_vertex + s->num_potentially_per_primitive +
                 util_bitcount64(s->always_per_primitive & location_mask);
   } else {
      unreachable("invalid FS input");
   }

   if (new_base != old_base) {
      nir_intrinsic_set_base(intrin, new_base);
      return true;
   }

   return false;
}

bool
radv_recompute_fs_input_bases(nir_shader *nir)
{
   const uint64_t always_per_vertex =
      nir->info.inputs_read & ~nir->info.per_primitive_inputs & ~(VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT);

   const uint64_t potentially_per_primitive = nir->info.inputs_read & (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT);

   const uint64_t always_per_primitive =
      nir->info.inputs_read & nir->info.per_primitive_inputs & ~(VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT);

   radv_recompute_fs_input_bases_state s = {
      .always_per_vertex = always_per_vertex,
      .potentially_per_primitive = potentially_per_primitive,
      .always_per_primitive = always_per_primitive,
      .num_always_per_vertex = util_bitcount64(always_per_vertex),
      .num_potentially_per_primitive = util_bitcount64(potentially_per_primitive),
   };

   return nir_shader_intrinsics_pass(nir, radv_recompute_fs_input_bases_callback, nir_metadata_control_flow, &s);
}

void
radv_nir_lower_io(struct radv_device *device, nir_shader *nir)
{
   const struct radv_physical_device *pdev = radv_device_physical(device);

   if (nir->info.stage == MESA_SHADER_VERTEX) {
      NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0);
      NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, type_size_vec4, nir_lower_io_lower_64bit_to_32);
   } else {
      NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4,
               nir_lower_io_lower_64bit_to_32 | nir_lower_io_use_interpolated_input_intrinsics);
   }

   /* This pass needs actual constants */
   NIR_PASS(_, nir, nir_opt_constant_folding);

   NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);

   if (nir->xfb_info) {
      NIR_PASS(_, nir, nir_io_add_intrinsic_xfb_info);

      if (pdev->use_ngg_streamout) {
         /* The total number of shader outputs is required for computing the pervertex LDS size for
          * VS/TES when lowering NGG streamout.
          */
         nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, nir->info.stage);
      }
   }

   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
      /* Lower explicit input load intrinsics to sysvals for the layer ID. */
      NIR_PASS(_, nir, nir_lower_system_values);

      /* Recompute FS input intrinsic bases to assign a location to each FS input.
       * The computed base will match the index of each input in SPI_PS_INPUT_CNTL_n.
       */
      radv_recompute_fs_input_bases(nir);
   }

   NIR_PASS_V(nir, nir_opt_dce);
   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_in | nir_var_shader_out, NULL);
}

/* IO slot layout for stages that aren't linked. */
enum {
   RADV_IO_SLOT_POS = 0,
   RADV_IO_SLOT_CLIP_DIST0,
   RADV_IO_SLOT_CLIP_DIST1,
   RADV_IO_SLOT_PSIZ,
   RADV_IO_SLOT_VAR0, /* 0..31 */
};

unsigned
radv_map_io_driver_location(unsigned semantic)
{
   if ((semantic >= VARYING_SLOT_PATCH0 && semantic < VARYING_SLOT_TESS_MAX) ||
       semantic == VARYING_SLOT_TESS_LEVEL_INNER || semantic == VARYING_SLOT_TESS_LEVEL_OUTER)
      return ac_shader_io_get_unique_index_patch(semantic);

   switch (semantic) {
   case VARYING_SLOT_POS:
      return RADV_IO_SLOT_POS;
   case VARYING_SLOT_CLIP_DIST0:
      return RADV_IO_SLOT_CLIP_DIST0;
   case VARYING_SLOT_CLIP_DIST1:
      return RADV_IO_SLOT_CLIP_DIST1;
   case VARYING_SLOT_PSIZ:
      return RADV_IO_SLOT_PSIZ;
   default:
      assert(semantic >= VARYING_SLOT_VAR0 && semantic <= VARYING_SLOT_VAR31);
      return RADV_IO_SLOT_VAR0 + (semantic - VARYING_SLOT_VAR0);
   }
}

bool
radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *stage)
{
   const struct radv_physical_device *pdev = radv_device_physical(device);
   const struct radv_shader_info *info = &stage->info;
   ac_nir_map_io_driver_location map_input = info->inputs_linked ? NULL : radv_map_io_driver_location;
   ac_nir_map_io_driver_location map_output = info->outputs_linked ? NULL : radv_map_io_driver_location;
   nir_shader *nir = stage->nir;

   if (nir->info.stage == MESA_SHADER_VERTEX) {
      if (info->vs.as_ls) {
         NIR_PASS_V(nir, ac_nir_lower_ls_outputs_to_mem, map_output, pdev->info.gfx_level, info->vs.tcs_in_out_eq,
                    info->vs.tcs_inputs_via_temp, info->vs.tcs_inputs_via_lds);
         return true;
      } else if (info->vs.as_es) {
         NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize, info->gs_inputs_read);
         return true;
      }
   } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
      NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, map_input, pdev->info.gfx_level, info->vs.tcs_in_out_eq,
                 info->vs.tcs_inputs_via_temp, info->vs.tcs_inputs_via_lds);
      NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, &info->tcs.info, map_output, pdev->info.gfx_level,
                 info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, info->wave_size);

      return true;
   } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
      NIR_PASS_V(nir, ac_nir_lower_tes_inputs_to_mem, map_input);

      if (info->tes.as_es) {
         NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize, info->gs_inputs_read);
      }

      return true;
   } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
      NIR_PASS_V(nir, ac_nir_lower_gs_inputs_to_mem, map_input, pdev->info.gfx_level, false);
      return true;
   } else if (nir->info.stage == MESA_SHADER_TASK) {
      ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, pdev->task_info.num_entries,
                                       info->cs.has_query);
      return true;
   } else if (nir->info.stage == MESA_SHADER_MESH) {
      ac_nir_lower_mesh_inputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, pdev->task_info.num_entries);
      return true;
   }

   return false;
}

static bool
radv_nir_lower_draw_id_to_zero_callback(struct nir_builder *b, nir_intrinsic_instr *intrin, UNUSED void *state)
{
   if (intrin->intrinsic != nir_intrinsic_load_draw_id)
      return false;

   nir_def *replacement = nir_imm_zero(b, intrin->def.num_components, intrin->def.bit_size);
   nir_def_replace(&intrin->def, replacement);
   nir_instr_free(&intrin->instr);

   return true;
}

bool
radv_nir_lower_draw_id_to_zero(nir_shader *shader)
{
   return nir_shader_intrinsics_pass(shader, radv_nir_lower_draw_id_to_zero_callback, nir_metadata_control_flow, NULL);
}