aiplatform/v1/endpoint_service.proto

// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.aiplatform.v1;

import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/aiplatform/v1/endpoint.proto";
import "google/cloud/aiplatform/v1/operation.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/field_mask.proto";

option csharp_namespace = "Google.Cloud.AIPlatform.V1";
option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
option java_multiple_files = true;
option java_outer_classname = "EndpointServiceProto";
option java_package = "com.google.cloud.aiplatform.v1";
option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
option ruby_package = "Google::Cloud::AIPlatform::V1";

// A service for managing Vertex AI's Endpoints.
service EndpointService {
  option (google.api.default_host) = "aiplatform.googleapis.com";
  option (google.api.oauth_scopes) =
      "https://www.googleapis.com/auth/cloud-platform";

  // Creates an Endpoint.
  rpc CreateEndpoint(CreateEndpointRequest)
      returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v1/{parent=projects/*/locations/*}/endpoints"
      body: "endpoint"
    };
    option (google.api.method_signature) = "parent,endpoint";
    option (google.api.method_signature) = "parent,endpoint,endpoint_id";
    option (google.longrunning.operation_info) = {
      response_type: "Endpoint"
      metadata_type: "CreateEndpointOperationMetadata"
    };
  }

  // Gets an Endpoint.
  rpc GetEndpoint(GetEndpointRequest) returns (Endpoint) {
    option (google.api.http) = {
      get: "/v1/{name=projects/*/locations/*/endpoints/*}"
    };
    option (google.api.method_signature) = "name";
  }

  // Lists Endpoints in a Location.
  rpc ListEndpoints(ListEndpointsRequest) returns (ListEndpointsResponse) {
    option (google.api.http) = {
      get: "/v1/{parent=projects/*/locations/*}/endpoints"
    };
    option (google.api.method_signature) = "parent";
  }

  // Updates an Endpoint.
  rpc UpdateEndpoint(UpdateEndpointRequest) returns (Endpoint) {
    option (google.api.http) = {
      patch: "/v1/{endpoint.name=projects/*/locations/*/endpoints/*}"
      body: "endpoint"
    };
    option (google.api.method_signature) = "endpoint,update_mask";
  }

  // Deletes an Endpoint.
  rpc DeleteEndpoint(DeleteEndpointRequest)
      returns (google.longrunning.Operation) {
    option (google.api.http) = {
      delete: "/v1/{name=projects/*/locations/*/endpoints/*}"
    };
    option (google.api.method_signature) = "name";
    option (google.longrunning.operation_info) = {
      response_type: "google.protobuf.Empty"
      metadata_type: "DeleteOperationMetadata"
    };
  }

  // Deploys a Model into this Endpoint, creating a DeployedModel within it.
  rpc DeployModel(DeployModelRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:deployModel"
      body: "*"
    };
    option (google.api.method_signature) =
        "endpoint,deployed_model,traffic_split";
    option (google.longrunning.operation_info) = {
      response_type: "DeployModelResponse"
      metadata_type: "DeployModelOperationMetadata"
    };
  }

  // Undeploys a Model from an Endpoint, removing a DeployedModel from it, and
  // freeing all resources it's using.
  rpc UndeployModel(UndeployModelRequest)
      returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:undeployModel"
      body: "*"
    };
    option (google.api.method_signature) =
        "endpoint,deployed_model_id,traffic_split";
    option (google.longrunning.operation_info) = {
      response_type: "UndeployModelResponse"
      metadata_type: "UndeployModelOperationMetadata"
    };
  }

  // Updates an existing deployed model. Updatable fields include
  // `min_replica_count`, `max_replica_count`, `autoscaling_metric_specs`,
  // `disable_container_logging` (v1 only), and `enable_container_logging`
  // (v1beta1 only).
  rpc MutateDeployedModel(MutateDeployedModelRequest)
      returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:mutateDeployedModel"
      body: "*"
    };
    option (google.api.method_signature) =
        "endpoint,deployed_model,update_mask";
    option (google.longrunning.operation_info) = {
      response_type: "MutateDeployedModelResponse"
      metadata_type: "MutateDeployedModelOperationMetadata"
    };
  }
}

// Request message for
// [EndpointService.CreateEndpoint][google.cloud.aiplatform.v1.EndpointService.CreateEndpoint].
message CreateEndpointRequest {
  // Required. The resource name of the Location to create the Endpoint in.
  // Format: `projects/{project}/locations/{location}`
  string parent = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "locations.googleapis.com/Location"
    }
  ];

  // Required. The Endpoint to create.
  Endpoint endpoint = 2 [(google.api.field_behavior) = REQUIRED];

  // Immutable. The ID to use for endpoint, which will become the final
  // component of the endpoint resource name.
  // If not provided, Vertex AI will generate a value for this ID.
  //
  // If the first character is a letter, this value may be up to 63 characters,
  // and valid characters are `[a-z0-9-]`. The last character must be a letter
  // or number.
  //
  // If the first character is a number, this value may be up to 9 characters,
  // and valid characters are `[0-9]` with no leading zeros.
  //
  // When using HTTP/JSON, this field is populated
  // based on a query string argument, such as `?endpoint_id=12345`. This is the
  // fallback for fields that are not included in either the URI or the body.
  string endpoint_id = 4 [(google.api.field_behavior) = IMMUTABLE];
}

// Runtime operation information for
// [EndpointService.CreateEndpoint][google.cloud.aiplatform.v1.EndpointService.CreateEndpoint].
message CreateEndpointOperationMetadata {
  // The operation generic information.
  GenericOperationMetadata generic_metadata = 1;
}

// Request message for
// [EndpointService.GetEndpoint][google.cloud.aiplatform.v1.EndpointService.GetEndpoint]
message GetEndpointRequest {
  // Required. The name of the Endpoint resource.
  // Format:
  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
  string name = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];
}

// Request message for
// [EndpointService.ListEndpoints][google.cloud.aiplatform.v1.EndpointService.ListEndpoints].
message ListEndpointsRequest {
  // Required. The resource name of the Location from which to list the
  // Endpoints. Format: `projects/{project}/locations/{location}`
  string parent = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "locations.googleapis.com/Location"
    }
  ];

  // Optional. An expression for filtering the results of the request. For field
  // names both snake_case and camelCase are supported.
  //
  //   * `endpoint` supports `=` and `!=`. `endpoint` represents the Endpoint
  //     ID, i.e. the last segment of the Endpoint's
  //     [resource name][google.cloud.aiplatform.v1.Endpoint.name].
  //   * `display_name` supports `=` and `!=`.
  //   * `labels` supports general map functions that is:
  //     * `labels.key=value` - key:value equality
  //     * `labels.key:*` or `labels:key` - key existence
  //     * A key including a space must be quoted. `labels."a key"`.
  //   * `base_model_name` only supports `=`.
  //
  // Some examples:
  //
  //   * `endpoint=1`
  //   * `displayName="myDisplayName"`
  //   * `labels.myKey="myValue"`
  //   * `baseModelName="text-bison"`
  string filter = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The standard list page size.
  int32 page_size = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The standard list page token.
  // Typically obtained via
  // [ListEndpointsResponse.next_page_token][google.cloud.aiplatform.v1.ListEndpointsResponse.next_page_token]
  // of the previous
  // [EndpointService.ListEndpoints][google.cloud.aiplatform.v1.EndpointService.ListEndpoints]
  // call.
  string page_token = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Mask specifying which fields to read.
  google.protobuf.FieldMask read_mask = 5
      [(google.api.field_behavior) = OPTIONAL];

  // A comma-separated list of fields to order by, sorted in ascending order.
  // Use "desc" after a field name for descending.
  // Supported fields:
  //
  //   * `display_name`
  //   * `create_time`
  //   * `update_time`
  //
  // Example: `display_name, create_time desc`.
  string order_by = 6;
}

// Response message for
// [EndpointService.ListEndpoints][google.cloud.aiplatform.v1.EndpointService.ListEndpoints].
message ListEndpointsResponse {
  // List of Endpoints in the requested page.
  repeated Endpoint endpoints = 1;

  // A token to retrieve the next page of results.
  // Pass to
  // [ListEndpointsRequest.page_token][google.cloud.aiplatform.v1.ListEndpointsRequest.page_token]
  // to obtain that page.
  string next_page_token = 2;
}

// Request message for
// [EndpointService.UpdateEndpoint][google.cloud.aiplatform.v1.EndpointService.UpdateEndpoint].
message UpdateEndpointRequest {
  // Required. The Endpoint which replaces the resource on the server.
  Endpoint endpoint = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. The update mask applies to the resource. See
  // [google.protobuf.FieldMask][google.protobuf.FieldMask].
  google.protobuf.FieldMask update_mask = 2
      [(google.api.field_behavior) = REQUIRED];
}

// Request message for
// [EndpointService.DeleteEndpoint][google.cloud.aiplatform.v1.EndpointService.DeleteEndpoint].
message DeleteEndpointRequest {
  // Required. The name of the Endpoint resource to be deleted.
  // Format:
  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
  string name = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];
}

// Request message for
// [EndpointService.DeployModel][google.cloud.aiplatform.v1.EndpointService.DeployModel].
message DeployModelRequest {
  // Required. The name of the Endpoint resource into which to deploy a Model.
  // Format:
  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
  string endpoint = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];

  // Required. The DeployedModel to be created within the Endpoint. Note that
  // [Endpoint.traffic_split][google.cloud.aiplatform.v1.Endpoint.traffic_split]
  // must be updated for the DeployedModel to start receiving traffic, either as
  // part of this call, or via
  // [EndpointService.UpdateEndpoint][google.cloud.aiplatform.v1.EndpointService.UpdateEndpoint].
  DeployedModel deployed_model = 2 [(google.api.field_behavior) = REQUIRED];

  // A map from a DeployedModel's ID to the percentage of this Endpoint's
  // traffic that should be forwarded to that DeployedModel.
  //
  // If this field is non-empty, then the Endpoint's
  // [traffic_split][google.cloud.aiplatform.v1.Endpoint.traffic_split] will be
  // overwritten with it. To refer to the ID of the just being deployed Model, a
  // "0" should be used, and the actual ID of the new DeployedModel will be
  // filled in its place by this method. The traffic percentage values must add
  // up to 100.
  //
  // If this field is empty, then the Endpoint's
  // [traffic_split][google.cloud.aiplatform.v1.Endpoint.traffic_split] is not
  // updated.
  map<string, int32> traffic_split = 3;
}

// Response message for
// [EndpointService.DeployModel][google.cloud.aiplatform.v1.EndpointService.DeployModel].
message DeployModelResponse {
  // The DeployedModel that had been deployed in the Endpoint.
  DeployedModel deployed_model = 1;
}

// Runtime operation information for
// [EndpointService.DeployModel][google.cloud.aiplatform.v1.EndpointService.DeployModel].
message DeployModelOperationMetadata {
  // The operation generic information.
  GenericOperationMetadata generic_metadata = 1;
}

// Request message for
// [EndpointService.UndeployModel][google.cloud.aiplatform.v1.EndpointService.UndeployModel].
message UndeployModelRequest {
  // Required. The name of the Endpoint resource from which to undeploy a Model.
  // Format:
  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
  string endpoint = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];

  // Required. The ID of the DeployedModel to be undeployed from the Endpoint.
  string deployed_model_id = 2 [(google.api.field_behavior) = REQUIRED];

  // If this field is provided, then the Endpoint's
  // [traffic_split][google.cloud.aiplatform.v1.Endpoint.traffic_split] will be
  // overwritten with it. If last DeployedModel is being undeployed from the
  // Endpoint, the [Endpoint.traffic_split] will always end up empty when this
  // call returns. A DeployedModel will be successfully undeployed only if it
  // doesn't have any traffic assigned to it when this method executes, or if
  // this field unassigns any traffic to it.
  map<string, int32> traffic_split = 3;
}

// Response message for
// [EndpointService.UndeployModel][google.cloud.aiplatform.v1.EndpointService.UndeployModel].
message UndeployModelResponse {}

// Runtime operation information for
// [EndpointService.UndeployModel][google.cloud.aiplatform.v1.EndpointService.UndeployModel].
message UndeployModelOperationMetadata {
  // The operation generic information.
  GenericOperationMetadata generic_metadata = 1;
}

// Request message for
// [EndpointService.MutateDeployedModel][google.cloud.aiplatform.v1.EndpointService.MutateDeployedModel].
message MutateDeployedModelRequest {
  // Required. The name of the Endpoint resource into which to mutate a
  // DeployedModel. Format:
  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
  string endpoint = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];

  // Required. The DeployedModel to be mutated within the Endpoint. Only the
  // following fields can be mutated:
  //
  // * `min_replica_count` in either
  // [DedicatedResources][google.cloud.aiplatform.v1.DedicatedResources] or
  // [AutomaticResources][google.cloud.aiplatform.v1.AutomaticResources]
  // * `max_replica_count` in either
  // [DedicatedResources][google.cloud.aiplatform.v1.DedicatedResources] or
  // [AutomaticResources][google.cloud.aiplatform.v1.AutomaticResources]
  // * [autoscaling_metric_specs][google.cloud.aiplatform.v1.DedicatedResources.autoscaling_metric_specs]
  // * `disable_container_logging` (v1 only)
  // * `enable_container_logging` (v1beta1 only)
  DeployedModel deployed_model = 2 [(google.api.field_behavior) = REQUIRED];

  // Required. The update mask applies to the resource. See
  // [google.protobuf.FieldMask][google.protobuf.FieldMask].
  google.protobuf.FieldMask update_mask = 4
      [(google.api.field_behavior) = REQUIRED];
}

// Response message for
// [EndpointService.MutateDeployedModel][google.cloud.aiplatform.v1.EndpointService.MutateDeployedModel].
message MutateDeployedModelResponse {
  // The DeployedModel that's being mutated.
  DeployedModel deployed_model = 1;
}

// Runtime operation information for
// [EndpointService.MutateDeployedModel][google.cloud.aiplatform.v1.EndpointService.MutateDeployedModel].
message MutateDeployedModelOperationMetadata {
  // The operation generic information.
  GenericOperationMetadata generic_metadata = 1;
}