// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/compiler/memory-optimizer.h"

#include "src/compiler/js-graph.h"
#include "src/compiler/linkage.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/node-properties.h"
#include "src/compiler/node.h"
#include "src/compiler/simplified-operator.h"

namespace v8 {
namespace internal {
namespace compiler {

MemoryOptimizer::MemoryOptimizer(JSGraph* jsgraph, Zone* zone,
                                 PoisoningMitigationLevel poisoning_level,
                                 AllocationFolding allocation_folding)
    : jsgraph_(jsgraph),
      empty_state_(AllocationState::Empty(zone)),
      pending_(zone),
      tokens_(zone),
      zone_(zone),
      graph_assembler_(jsgraph, nullptr, nullptr, zone),
      poisoning_level_(poisoning_level),
      allocation_folding_(allocation_folding) {}

void MemoryOptimizer::Optimize() {
  EnqueueUses(graph()->start(), empty_state());
  while (!tokens_.empty()) {
    Token const token = tokens_.front();
    tokens_.pop();
    VisitNode(token.node, token.state);
  }
  DCHECK(pending_.empty());
  DCHECK(tokens_.empty());
}

MemoryOptimizer::AllocationGroup::AllocationGroup(Node* node,
                                                  PretenureFlag pretenure,
                                                  Zone* zone)
    : node_ids_(zone), pretenure_(pretenure), size_(nullptr) {
  node_ids_.insert(node->id());
}

MemoryOptimizer::AllocationGroup::AllocationGroup(Node* node,
                                                  PretenureFlag pretenure,
                                                  Node* size, Zone* zone)
    : node_ids_(zone), pretenure_(pretenure), size_(size) {
  node_ids_.insert(node->id());
}

void MemoryOptimizer::AllocationGroup::Add(Node* node) {
  node_ids_.insert(node->id());
}

bool MemoryOptimizer::AllocationGroup::Contains(Node* node) const {
  return node_ids_.find(node->id()) != node_ids_.end();
}

MemoryOptimizer::AllocationState::AllocationState()
    : group_(nullptr), size_(std::numeric_limits<int>::max()), top_(nullptr) {}

MemoryOptimizer::AllocationState::AllocationState(AllocationGroup* group)
    : group_(group), size_(std::numeric_limits<int>::max()), top_(nullptr) {}

MemoryOptimizer::AllocationState::AllocationState(AllocationGroup* group,
                                                  int size, Node* top)
    : group_(group), size_(size), top_(top) {}

bool MemoryOptimizer::AllocationState::IsNewSpaceAllocation() const {
  return group() && group()->IsNewSpaceAllocation();
}

void MemoryOptimizer::VisitNode(Node* node, AllocationState const* state) {
  DCHECK(!node->IsDead());
  DCHECK_LT(0, node->op()->EffectInputCount());
  switch (node->opcode()) {
    case IrOpcode::kAllocate:
      // Allocate nodes were purged from the graph in effect-control
      // linearization.
      UNREACHABLE();
    case IrOpcode::kAllocateRaw:
      return VisitAllocateRaw(node, state);
    case IrOpcode::kCall:
      return VisitCall(node, state);
    case IrOpcode::kCallWithCallerSavedRegisters:
      return VisitCallWithCallerSavedRegisters(node, state);
    case IrOpcode::kLoadElement:
      return VisitLoadElement(node, state);
    case IrOpcode::kLoadField:
      return VisitLoadField(node, state);
    case IrOpcode::kStoreElement:
      return VisitStoreElement(node, state);
    case IrOpcode::kStoreField:
      return VisitStoreField(node, state);
    case IrOpcode::kDeoptimizeIf:
    case IrOpcode::kDeoptimizeUnless:
    case IrOpcode::kIfException:
    case IrOpcode::kLoad:
    case IrOpcode::kProtectedLoad:
    case IrOpcode::kUnalignedLoad:
    case IrOpcode::kStore:
    case IrOpcode::kProtectedStore:
    case IrOpcode::kUnalignedStore:
    case IrOpcode::kRetain:
    case IrOpcode::kUnsafePointerAdd:
    case IrOpcode::kDebugBreak:
    case IrOpcode::kUnreachable:
    case IrOpcode::kWord32PoisonOnSpeculation:
    case IrOpcode::kWord64PoisonOnSpeculation:
      return VisitOtherEffect(node, state);
    default:
      break;
  }
  DCHECK_EQ(0, node->op()->EffectOutputCount());
}

#define __ gasm()->

void MemoryOptimizer::VisitAllocateRaw(Node* node,
                                       AllocationState const* state) {
  DCHECK_EQ(IrOpcode::kAllocateRaw, node->opcode());
  Node* value;
  Node* size = node->InputAt(0);
  Node* effect = node->InputAt(1);
  Node* control = node->InputAt(2);

  gasm()->Reset(effect, control);

  PretenureFlag pretenure = PretenureFlagOf(node->op());

  // Propagate tenuring from outer allocations to inner allocations, i.e.
  // when we allocate an object in old space and store a newly allocated
  // child object into the pretenured object, then the newly allocated
  // child object also should get pretenured to old space.
  if (pretenure == TENURED) {
    for (Edge const edge : node->use_edges()) {
      Node* const user = edge.from();
      if (user->opcode() == IrOpcode::kStoreField && edge.index() == 0) {
        Node* const child = user->InputAt(1);
        if (child->opcode() == IrOpcode::kAllocateRaw &&
            PretenureFlagOf(child->op()) == NOT_TENURED) {
          NodeProperties::ChangeOp(child, node->op());
          break;
        }
      }
    }
  } else {
    DCHECK_EQ(NOT_TENURED, pretenure);
    for (Edge const edge : node->use_edges()) {
      Node* const user = edge.from();
      if (user->opcode() == IrOpcode::kStoreField && edge.index() == 1) {
        Node* const parent = user->InputAt(0);
        if (parent->opcode() == IrOpcode::kAllocateRaw &&
            PretenureFlagOf(parent->op()) == TENURED) {
          pretenure = TENURED;
          break;
        }
      }
    }
  }

  // Determine the top/limit addresses.
  Node* top_address = __ ExternalConstant(
      pretenure == NOT_TENURED
          ? ExternalReference::new_space_allocation_top_address(isolate())
          : ExternalReference::old_space_allocation_top_address(isolate()));
  Node* limit_address = __ ExternalConstant(
      pretenure == NOT_TENURED
          ? ExternalReference::new_space_allocation_limit_address(isolate())
          : ExternalReference::old_space_allocation_limit_address(isolate()));

  // Check if we can fold this allocation into a previous allocation represented
  // by the incoming {state}.
  Int32Matcher m(size);
  if (m.HasValue() && m.Value() < kMaxRegularHeapObjectSize) {
    int32_t const object_size = m.Value();
    if (allocation_folding_ == AllocationFolding::kDoAllocationFolding &&
        state->size() <= kMaxRegularHeapObjectSize - object_size &&
        state->group()->pretenure() == pretenure) {
      // We can fold this Allocate {node} into the allocation {group}
      // represented by the given {state}. Compute the upper bound for
      // the new {state}.
      int32_t const state_size = state->size() + object_size;

      // Update the reservation check to the actual maximum upper bound.
      AllocationGroup* const group = state->group();
      if (OpParameter<int32_t>(group->size()->op()) < state_size) {
        NodeProperties::ChangeOp(group->size(),
                                 common()->Int32Constant(state_size));
      }

      // Update the allocation top with the new object allocation.
      // TODO(bmeurer): Defer writing back top as much as possible.
      Node* top = __ IntAdd(state->top(), __ IntPtrConstant(object_size));
      __ Store(StoreRepresentation(MachineType::PointerRepresentation(),
                                   kNoWriteBarrier),
               top_address, __ IntPtrConstant(0), top);

      // Compute the effective inner allocated address.
      value = __ BitcastWordToTagged(
          __ IntAdd(state->top(), __ IntPtrConstant(kHeapObjectTag)));

      // Extend the allocation {group}.
      group->Add(value);
      state = AllocationState::Open(group, state_size, top, zone());
    } else {
      auto call_runtime = __ MakeDeferredLabel();
      auto done = __ MakeLabel(MachineType::PointerRepresentation());

      // Setup a mutable reservation size node; will be patched as we fold
      // additional allocations into this new group.
      Node* size = __ UniqueInt32Constant(object_size);

      // Load allocation top and limit.
      Node* top =
          __ Load(MachineType::Pointer(), top_address, __ IntPtrConstant(0));
      Node* limit =
          __ Load(MachineType::Pointer(), limit_address, __ IntPtrConstant(0));

      // Check if we need to collect garbage before we can start bump pointer
      // allocation (always done for folded allocations).
      Node* check = __ UintLessThan(
          __ IntAdd(top,
                    machine()->Is64() ? __ ChangeInt32ToInt64(size) : size),
          limit);

      __ GotoIfNot(check, &call_runtime);
      __ Goto(&done, top);

      __ Bind(&call_runtime);
      {
        Node* target =
            pretenure == NOT_TENURED ? __ AllocateInNewSpaceStubConstant()
                                     : __
                                       AllocateInOldSpaceStubConstant();
        if (!allocate_operator_.is_set()) {
          auto call_descriptor = Linkage::GetStubCallDescriptor(
              graph()->zone(), AllocateDescriptor{}, 0,
              CallDescriptor::kCanUseRoots, Operator::kNoThrow);
          allocate_operator_.set(common()->Call(call_descriptor));
        }
        Node* vfalse = __ Call(allocate_operator_.get(), target, size);
        vfalse = __ IntSub(vfalse, __ IntPtrConstant(kHeapObjectTag));
        __ Goto(&done, vfalse);
      }

      __ Bind(&done);

      // Compute the new top and write it back.
      top = __ IntAdd(done.PhiAt(0), __ IntPtrConstant(object_size));
      __ Store(StoreRepresentation(MachineType::PointerRepresentation(),
                                   kNoWriteBarrier),
               top_address, __ IntPtrConstant(0), top);

      // Compute the initial object address.
      value = __ BitcastWordToTagged(
          __ IntAdd(done.PhiAt(0), __ IntPtrConstant(kHeapObjectTag)));

      // Start a new allocation group.
      AllocationGroup* group =
          new (zone()) AllocationGroup(value, pretenure, size, zone());
      state = AllocationState::Open(group, object_size, top, zone());
    }
  } else {
    auto call_runtime = __ MakeDeferredLabel();
    auto done = __ MakeLabel(MachineRepresentation::kTaggedPointer);

    // Load allocation top and limit.
    Node* top =
        __ Load(MachineType::Pointer(), top_address, __ IntPtrConstant(0));
    Node* limit =
        __ Load(MachineType::Pointer(), limit_address, __ IntPtrConstant(0));

    // Compute the new top.
    Node* new_top =
        __ IntAdd(top, machine()->Is64() ? __ ChangeInt32ToInt64(size) : size);

    // Check if we can do bump pointer allocation here.
    Node* check = __ UintLessThan(new_top, limit);
    __ GotoIfNot(check, &call_runtime);
    __ Store(StoreRepresentation(MachineType::PointerRepresentation(),
                                 kNoWriteBarrier),
             top_address, __ IntPtrConstant(0), new_top);
    __ Goto(&done, __ BitcastWordToTagged(
                       __ IntAdd(top, __ IntPtrConstant(kHeapObjectTag))));

    __ Bind(&call_runtime);
    Node* target =
        pretenure == NOT_TENURED ? __ AllocateInNewSpaceStubConstant()
                                 : __
                                   AllocateInOldSpaceStubConstant();
    if (!allocate_operator_.is_set()) {
      auto call_descriptor = Linkage::GetStubCallDescriptor(
          graph()->zone(), AllocateDescriptor{}, 0,
          CallDescriptor::kCanUseRoots, Operator::kNoThrow);
      allocate_operator_.set(common()->Call(call_descriptor));
    }
    __ Goto(&done, __ Call(allocate_operator_.get(), target, size));

    __ Bind(&done);
    value = done.PhiAt(0);

    // Create an unfoldable allocation group.
    AllocationGroup* group =
        new (zone()) AllocationGroup(value, pretenure, zone());
    state = AllocationState::Closed(group, zone());
  }

  effect = __ ExtractCurrentEffect();
  control = __ ExtractCurrentControl();

  // Replace all effect uses of {node} with the {effect}, enqueue the
  // effect uses for further processing, and replace all value uses of
  // {node} with the {value}.
  for (Edge edge : node->use_edges()) {
    if (NodeProperties::IsEffectEdge(edge)) {
      EnqueueUse(edge.from(), edge.index(), state);
      edge.UpdateTo(effect);
    } else if (NodeProperties::IsValueEdge(edge)) {
      edge.UpdateTo(value);
    } else {
      DCHECK(NodeProperties::IsControlEdge(edge));
      edge.UpdateTo(control);
    }
  }

  // Kill the {node} to make sure we don't leave dangling dead uses.
  node->Kill();
}

#undef __

void MemoryOptimizer::VisitCall(Node* node, AllocationState const* state) {
  DCHECK_EQ(IrOpcode::kCall, node->opcode());
  // If the call can allocate, we start with a fresh state.
  if (!(CallDescriptorOf(node->op())->flags() & CallDescriptor::kNoAllocate)) {
    state = empty_state();
  }
  EnqueueUses(node, state);
}

void MemoryOptimizer::VisitCallWithCallerSavedRegisters(
    Node* node, AllocationState const* state) {
  DCHECK_EQ(IrOpcode::kCallWithCallerSavedRegisters, node->opcode());
  // If the call can allocate, we start with a fresh state.
  if (!(CallDescriptorOf(node->op())->flags() & CallDescriptor::kNoAllocate)) {
    state = empty_state();
  }
  EnqueueUses(node, state);
}

void MemoryOptimizer::VisitLoadElement(Node* node,
                                       AllocationState const* state) {
  DCHECK_EQ(IrOpcode::kLoadElement, node->opcode());
  ElementAccess const& access = ElementAccessOf(node->op());
  Node* index = node->InputAt(1);
  node->ReplaceInput(1, ComputeIndex(access, index));
  if (NeedsPoisoning(access.load_sensitivity) &&
      access.machine_type.representation() !=
          MachineRepresentation::kTaggedPointer) {
    NodeProperties::ChangeOp(node,
                             machine()->PoisonedLoad(access.machine_type));
  } else {
    NodeProperties::ChangeOp(node, machine()->Load(access.machine_type));
  }
  EnqueueUses(node, state);
}

void MemoryOptimizer::VisitLoadField(Node* node, AllocationState const* state) {
  DCHECK_EQ(IrOpcode::kLoadField, node->opcode());
  FieldAccess const& access = FieldAccessOf(node->op());
  Node* offset = jsgraph()->IntPtrConstant(access.offset - access.tag());
  node->InsertInput(graph()->zone(), 1, offset);
  if (NeedsPoisoning(access.load_sensitivity) &&
      access.machine_type.representation() !=
          MachineRepresentation::kTaggedPointer) {
    NodeProperties::ChangeOp(node,
                             machine()->PoisonedLoad(access.machine_type));
  } else {
    NodeProperties::ChangeOp(node, machine()->Load(access.machine_type));
  }
  EnqueueUses(node, state);
}

void MemoryOptimizer::VisitStoreElement(Node* node,
                                        AllocationState const* state) {
  DCHECK_EQ(IrOpcode::kStoreElement, node->opcode());
  ElementAccess const& access = ElementAccessOf(node->op());
  Node* object = node->InputAt(0);
  Node* index = node->InputAt(1);
  WriteBarrierKind write_barrier_kind =
      ComputeWriteBarrierKind(object, state, access.write_barrier_kind);
  node->ReplaceInput(1, ComputeIndex(access, index));
  NodeProperties::ChangeOp(
      node, machine()->Store(StoreRepresentation(
                access.machine_type.representation(), write_barrier_kind)));
  EnqueueUses(node, state);
}

void MemoryOptimizer::VisitStoreField(Node* node,
                                      AllocationState const* state) {
  DCHECK_EQ(IrOpcode::kStoreField, node->opcode());
  FieldAccess const& access = FieldAccessOf(node->op());
  Node* object = node->InputAt(0);
  WriteBarrierKind write_barrier_kind =
      ComputeWriteBarrierKind(object, state, access.write_barrier_kind);
  Node* offset = jsgraph()->IntPtrConstant(access.offset - access.tag());
  node->InsertInput(graph()->zone(), 1, offset);
  NodeProperties::ChangeOp(
      node, machine()->Store(StoreRepresentation(
                access.machine_type.representation(), write_barrier_kind)));
  EnqueueUses(node, state);
}

void MemoryOptimizer::VisitOtherEffect(Node* node,
                                       AllocationState const* state) {
  EnqueueUses(node, state);
}

Node* MemoryOptimizer::ComputeIndex(ElementAccess const& access, Node* key) {
  Node* index;
  if (machine()->Is64()) {
    // On 64-bit platforms, we need to feed a Word64 index to the Load and
    // Store operators. Since LoadElement or StoreElement don't do any bounds
    // checking themselves, we can be sure that the {key} was already checked
    // and is in valid range, so we can do the further address computation on
    // Word64 below, which ideally allows us to fuse the address computation
    // with the actual memory access operation on Intel platforms.
    index = graph()->NewNode(machine()->ChangeUint32ToUint64(), key);
  } else {
    index = key;
  }
  int const element_size_shift =
      ElementSizeLog2Of(access.machine_type.representation());
  if (element_size_shift) {
    index = graph()->NewNode(machine()->WordShl(), index,
                             jsgraph()->IntPtrConstant(element_size_shift));
  }
  int const fixed_offset = access.header_size - access.tag();
  if (fixed_offset) {
    index = graph()->NewNode(machine()->IntAdd(), index,
                             jsgraph()->IntPtrConstant(fixed_offset));
  }
  return index;
}

WriteBarrierKind MemoryOptimizer::ComputeWriteBarrierKind(
    Node* object, AllocationState const* state,
    WriteBarrierKind write_barrier_kind) {
  if (state->IsNewSpaceAllocation() && state->group()->Contains(object)) {
    write_barrier_kind = kNoWriteBarrier;
  }
  return write_barrier_kind;
}

MemoryOptimizer::AllocationState const* MemoryOptimizer::MergeStates(
    AllocationStates const& states) {
  // Check if all states are the same; or at least if all allocation
  // states belong to the same allocation group.
  AllocationState const* state = states.front();
  AllocationGroup* group = state->group();
  for (size_t i = 1; i < states.size(); ++i) {
    if (states[i] != state) state = nullptr;
    if (states[i]->group() != group) group = nullptr;
  }
  if (state == nullptr) {
    if (group != nullptr) {
      // We cannot fold any more allocations into this group, but we can still
      // eliminate write barriers on stores to this group.
      // TODO(bmeurer): We could potentially just create a Phi here to merge
      // the various tops; but we need to pay special attention not to create
      // an unschedulable graph.
      state = AllocationState::Closed(group, zone());
    } else {
      // The states are from different allocation groups.
      state = empty_state();
    }
  }
  return state;
}

void MemoryOptimizer::EnqueueMerge(Node* node, int index,
                                   AllocationState const* state) {
  DCHECK_EQ(IrOpcode::kEffectPhi, node->opcode());
  int const input_count = node->InputCount() - 1;
  DCHECK_LT(0, input_count);
  Node* const control = node->InputAt(input_count);
  if (control->opcode() == IrOpcode::kLoop) {
    // For loops we always start with an empty state at the beginning.
    if (index == 0) EnqueueUses(node, empty_state());
  } else {
    DCHECK_EQ(IrOpcode::kMerge, control->opcode());
    // Check if we already know about this pending merge.
    NodeId const id = node->id();
    auto it = pending_.find(id);
    if (it == pending_.end()) {
      // Insert a new pending merge.
      it = pending_.insert(std::make_pair(id, AllocationStates(zone()))).first;
    }
    // Add the next input state.
    it->second.push_back(state);
    // Check if states for all inputs are available by now.
    if (it->second.size() == static_cast<size_t>(input_count)) {
      // All inputs to this effect merge are done, merge the states given all
      // input constraints, drop the pending merge and enqueue uses of the
      // EffectPhi {node}.
      state = MergeStates(it->second);
      EnqueueUses(node, state);
      pending_.erase(it);
    }
  }
}

void MemoryOptimizer::EnqueueUses(Node* node, AllocationState const* state) {
  for (Edge const edge : node->use_edges()) {
    if (NodeProperties::IsEffectEdge(edge)) {
      EnqueueUse(edge.from(), edge.index(), state);
    }
  }
}

void MemoryOptimizer::EnqueueUse(Node* node, int index,
                                 AllocationState const* state) {
  if (node->opcode() == IrOpcode::kEffectPhi) {
    // An EffectPhi represents a merge of different effect chains, which
    // needs special handling depending on whether the merge is part of a
    // loop or just a normal control join.
    EnqueueMerge(node, index, state);
  } else {
    Token token = {node, state};
    tokens_.push(token);
  }
}

Graph* MemoryOptimizer::graph() const { return jsgraph()->graph(); }

Isolate* MemoryOptimizer::isolate() const { return jsgraph()->isolate(); }

CommonOperatorBuilder* MemoryOptimizer::common() const {
  return jsgraph()->common();
}

MachineOperatorBuilder* MemoryOptimizer::machine() const {
  return jsgraph()->machine();
}

bool MemoryOptimizer::NeedsPoisoning(LoadSensitivity load_sensitivity) const {
  // Safe loads do not need poisoning.
  if (load_sensitivity == LoadSensitivity::kSafe) return false;

  switch (poisoning_level_) {
    case PoisoningMitigationLevel::kDontPoison:
      return false;
    case PoisoningMitigationLevel::kPoisonAll:
      return true;
    case PoisoningMitigationLevel::kPoisonCriticalOnly:
      return load_sensitivity == LoadSensitivity::kCritical;
  }
  UNREACHABLE();
}

}  // namespace compiler
}  // namespace internal
}  // namespace v8