aiplatform/v1/llm_utility_service.proto

// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.aiplatform.v1;

import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/aiplatform/v1/prediction_service.proto";
import "google/protobuf/struct.proto";

option csharp_namespace = "Google.Cloud.AIPlatform.V1";
option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
option java_multiple_files = true;
option java_outer_classname = "LlmUtilityServiceProto";
option java_package = "com.google.cloud.aiplatform.v1";
option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
option ruby_package = "Google::Cloud::AIPlatform::V1";

// Service for LLM related utility functions.
service LlmUtilityService {
  option (google.api.default_host) = "aiplatform.googleapis.com";
  option (google.api.oauth_scopes) =
      "https://www.googleapis.com/auth/cloud-platform";

  // Perform a token counting.
  rpc CountTokens(CountTokensRequest) returns (CountTokensResponse) {
    option (google.api.http) = {
      post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:countTokens"
      body: "*"
      additional_bindings {
        post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:countTokens"
        body: "*"
      }
    };
    option (google.api.method_signature) = "endpoint,instances";
  }

  // Return a list of tokens based on the input text.
  rpc ComputeTokens(ComputeTokensRequest) returns (ComputeTokensResponse) {
    option (google.api.http) = {
      post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:computeTokens"
      body: "*"
      additional_bindings {
        post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:computeTokens"
        body: "*"
      }
    };
    option (google.api.method_signature) = "endpoint,instances";
  }
}

// Request message for ComputeTokens RPC call.
message ComputeTokensRequest {
  // Required. The name of the Endpoint requested to get lists of tokens and
  // token ids.
  string endpoint = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];

  // Required. The instances that are the input to token computing API call.
  // Schema is identical to the prediction schema of the text model, even for
  // the non-text models, like chat models, or Codey models.
  repeated google.protobuf.Value instances = 2
      [(google.api.field_behavior) = REQUIRED];
}

// Tokens info with a list of tokens and the corresponding list of token ids.
message TokensInfo {
  // A list of tokens from the input.
  repeated bytes tokens = 1;

  // A list of token ids from the input.
  repeated int64 token_ids = 2;
}

// Response message for ComputeTokens RPC call.
message ComputeTokensResponse {
  // Lists of tokens info from the input. A ComputeTokensRequest could have
  // multiple instances with a prompt in each instance. We also need to return
  // lists of tokens info for the request with multiple instances.
  repeated TokensInfo tokens_info = 1;
}