1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.aiplatform.v1; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/api/resource.proto"; 23import "google/cloud/aiplatform/v1/prediction_service.proto"; 24import "google/protobuf/struct.proto"; 25 26option csharp_namespace = "Google.Cloud.AIPlatform.V1"; 27option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb"; 28option java_multiple_files = true; 29option java_outer_classname = "LlmUtilityServiceProto"; 30option java_package = "com.google.cloud.aiplatform.v1"; 31option php_namespace = "Google\\Cloud\\AIPlatform\\V1"; 32option ruby_package = "Google::Cloud::AIPlatform::V1"; 33 34// Service for LLM related utility functions. 35service LlmUtilityService { 36 option (google.api.default_host) = "aiplatform.googleapis.com"; 37 option (google.api.oauth_scopes) = 38 "https://www.googleapis.com/auth/cloud-platform"; 39 40 // Perform a token counting. 41 rpc CountTokens(CountTokensRequest) returns (CountTokensResponse) { 42 option (google.api.http) = { 43 post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:countTokens" 44 body: "*" 45 additional_bindings { 46 post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:countTokens" 47 body: "*" 48 } 49 }; 50 option (google.api.method_signature) = "endpoint,instances"; 51 } 52 53 // Return a list of tokens based on the input text. 54 rpc ComputeTokens(ComputeTokensRequest) returns (ComputeTokensResponse) { 55 option (google.api.http) = { 56 post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:computeTokens" 57 body: "*" 58 additional_bindings { 59 post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:computeTokens" 60 body: "*" 61 } 62 }; 63 option (google.api.method_signature) = "endpoint,instances"; 64 } 65} 66 67// Request message for ComputeTokens RPC call. 68message ComputeTokensRequest { 69 // Required. The name of the Endpoint requested to get lists of tokens and 70 // token ids. 71 string endpoint = 1 [ 72 (google.api.field_behavior) = REQUIRED, 73 (google.api.resource_reference) = { 74 type: "aiplatform.googleapis.com/Endpoint" 75 } 76 ]; 77 78 // Required. The instances that are the input to token computing API call. 79 // Schema is identical to the prediction schema of the text model, even for 80 // the non-text models, like chat models, or Codey models. 81 repeated google.protobuf.Value instances = 2 82 [(google.api.field_behavior) = REQUIRED]; 83} 84 85// Tokens info with a list of tokens and the corresponding list of token ids. 86message TokensInfo { 87 // A list of tokens from the input. 88 repeated bytes tokens = 1; 89 90 // A list of token ids from the input. 91 repeated int64 token_ids = 2; 92} 93 94// Response message for ComputeTokens RPC call. 95message ComputeTokensResponse { 96 // Lists of tokens info from the input. A ComputeTokensRequest could have 97 // multiple instances with a prompt in each instance. We also need to return 98 // lists of tokens info for the request with multiple instances. 99 repeated TokensInfo tokens_info = 1; 100} 101