1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.aiplatform.v1; 18 19import "google/api/field_behavior.proto"; 20import "google/api/resource.proto"; 21import "google/cloud/aiplatform/v1/completion_stats.proto"; 22import "google/cloud/aiplatform/v1/encryption_spec.proto"; 23import "google/cloud/aiplatform/v1/explanation.proto"; 24import "google/cloud/aiplatform/v1/io.proto"; 25import "google/cloud/aiplatform/v1/job_state.proto"; 26import "google/cloud/aiplatform/v1/machine_resources.proto"; 27import "google/cloud/aiplatform/v1/manual_batch_tuning_parameters.proto"; 28import "google/cloud/aiplatform/v1/unmanaged_container_model.proto"; 29import "google/protobuf/struct.proto"; 30import "google/protobuf/timestamp.proto"; 31import "google/rpc/status.proto"; 32 33option csharp_namespace = "Google.Cloud.AIPlatform.V1"; 34option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb"; 35option java_multiple_files = true; 36option java_outer_classname = "BatchPredictionJobProto"; 37option java_package = "com.google.cloud.aiplatform.v1"; 38option php_namespace = "Google\\Cloud\\AIPlatform\\V1"; 39option ruby_package = "Google::Cloud::AIPlatform::V1"; 40 41// A job that uses a 42// [Model][google.cloud.aiplatform.v1.BatchPredictionJob.model] to produce 43// predictions on multiple [input 44// instances][google.cloud.aiplatform.v1.BatchPredictionJob.input_config]. If 45// predictions for significant portion of the instances fail, the job may finish 46// without attempting predictions for all remaining instances. 47message BatchPredictionJob { 48 option (google.api.resource) = { 49 type: "aiplatform.googleapis.com/BatchPredictionJob" 50 pattern: "projects/{project}/locations/{location}/batchPredictionJobs/{batch_prediction_job}" 51 }; 52 53 // Configures the input to 54 // [BatchPredictionJob][google.cloud.aiplatform.v1.BatchPredictionJob]. See 55 // [Model.supported_input_storage_formats][google.cloud.aiplatform.v1.Model.supported_input_storage_formats] 56 // for Model's supported input formats, and how instances should be expressed 57 // via any of them. 58 message InputConfig { 59 // Required. The source of the input. 60 oneof source { 61 // The Cloud Storage location for the input instances. 62 GcsSource gcs_source = 2; 63 64 // The BigQuery location of the input table. 65 // The schema of the table should be in the format described by the given 66 // context OpenAPI Schema, if one is provided. The table may contain 67 // additional columns that are not described by the schema, and they will 68 // be ignored. 69 BigQuerySource bigquery_source = 3; 70 } 71 72 // Required. The format in which instances are given, must be one of the 73 // [Model's][google.cloud.aiplatform.v1.BatchPredictionJob.model] 74 // [supported_input_storage_formats][google.cloud.aiplatform.v1.Model.supported_input_storage_formats]. 75 string instances_format = 1 [(google.api.field_behavior) = REQUIRED]; 76 } 77 78 // Configuration defining how to transform batch prediction input instances to 79 // the instances that the Model accepts. 80 message InstanceConfig { 81 // The format of the instance that the Model accepts. Vertex AI will 82 // convert compatible 83 // [batch prediction input instance 84 // formats][google.cloud.aiplatform.v1.BatchPredictionJob.InputConfig.instances_format] 85 // to the specified format. 86 // 87 // Supported values are: 88 // 89 // * `object`: Each input is converted to JSON object format. 90 // * For `bigquery`, each row is converted to an object. 91 // * For `jsonl`, each line of the JSONL input must be an object. 92 // * Does not apply to `csv`, `file-list`, `tf-record`, or 93 // `tf-record-gzip`. 94 // 95 // * `array`: Each input is converted to JSON array format. 96 // * For `bigquery`, each row is converted to an array. The order 97 // of columns is determined by the BigQuery column order, unless 98 // [included_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.included_fields] 99 // is populated. 100 // [included_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.included_fields] 101 // must be populated for specifying field orders. 102 // * For `jsonl`, if each line of the JSONL input is an object, 103 // [included_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.included_fields] 104 // must be populated for specifying field orders. 105 // * Does not apply to `csv`, `file-list`, `tf-record`, or 106 // `tf-record-gzip`. 107 // 108 // If not specified, Vertex AI converts the batch prediction input as 109 // follows: 110 // 111 // * For `bigquery` and `csv`, the behavior is the same as `array`. The 112 // order of columns is the same as defined in the file or table, unless 113 // [included_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.included_fields] 114 // is populated. 115 // * For `jsonl`, the prediction instance format is determined by 116 // each line of the input. 117 // * For `tf-record`/`tf-record-gzip`, each record will be converted to 118 // an object in the format of `{"b64": <value>}`, where `<value>` is 119 // the Base64-encoded string of the content of the record. 120 // * For `file-list`, each file in the list will be converted to an 121 // object in the format of `{"b64": <value>}`, where `<value>` is 122 // the Base64-encoded string of the content of the file. 123 string instance_type = 1; 124 125 // The name of the field that is considered as a key. 126 // 127 // The values identified by the key field is not included in the transformed 128 // instances that is sent to the Model. This is similar to 129 // specifying this name of the field in 130 // [excluded_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.excluded_fields]. 131 // In addition, the batch prediction output will not include the instances. 132 // Instead the output will only include the value of the key field, in a 133 // field named `key` in the output: 134 // 135 // * For `jsonl` output format, the output will have a `key` field 136 // instead of the `instance` field. 137 // * For `csv`/`bigquery` output format, the output will have have a `key` 138 // column instead of the instance feature columns. 139 // 140 // The input must be JSONL with objects at each line, CSV, BigQuery 141 // or TfRecord. 142 string key_field = 2; 143 144 // Fields that will be included in the prediction instance that is 145 // sent to the Model. 146 // 147 // If 148 // [instance_type][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.instance_type] 149 // is `array`, the order of field names in included_fields also determines 150 // the order of the values in the array. 151 // 152 // When included_fields is populated, 153 // [excluded_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.excluded_fields] 154 // must be empty. 155 // 156 // The input must be JSONL with objects at each line, BigQuery 157 // or TfRecord. 158 repeated string included_fields = 3; 159 160 // Fields that will be excluded in the prediction instance that is 161 // sent to the Model. 162 // 163 // Excluded will be attached to the batch prediction output if 164 // [key_field][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.key_field] 165 // is not specified. 166 // 167 // When excluded_fields is populated, 168 // [included_fields][google.cloud.aiplatform.v1.BatchPredictionJob.InstanceConfig.included_fields] 169 // must be empty. 170 // 171 // The input must be JSONL with objects at each line, BigQuery 172 // or TfRecord. 173 repeated string excluded_fields = 4; 174 } 175 176 // Configures the output of 177 // [BatchPredictionJob][google.cloud.aiplatform.v1.BatchPredictionJob]. See 178 // [Model.supported_output_storage_formats][google.cloud.aiplatform.v1.Model.supported_output_storage_formats] 179 // for supported output formats, and how predictions are expressed via any of 180 // them. 181 message OutputConfig { 182 // Required. The destination of the output. 183 oneof destination { 184 // The Cloud Storage location of the directory where the output is 185 // to be written to. In the given directory a new directory is created. 186 // Its name is `prediction-<model-display-name>-<job-create-time>`, 187 // where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. 188 // Inside of it files `predictions_0001.<extension>`, 189 // `predictions_0002.<extension>`, ..., `predictions_N.<extension>` 190 // are created where `<extension>` depends on chosen 191 // [predictions_format][google.cloud.aiplatform.v1.BatchPredictionJob.OutputConfig.predictions_format], 192 // and N may equal 0001 and depends on the total number of successfully 193 // predicted instances. If the Model has both 194 // [instance][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri] 195 // and 196 // [prediction][google.cloud.aiplatform.v1.PredictSchemata.parameters_schema_uri] 197 // schemata defined then each such file contains predictions as per the 198 // [predictions_format][google.cloud.aiplatform.v1.BatchPredictionJob.OutputConfig.predictions_format]. 199 // If prediction for any instance failed (partially or completely), then 200 // an additional `errors_0001.<extension>`, `errors_0002.<extension>`,..., 201 // `errors_N.<extension>` files are created (N depends on total number 202 // of failed predictions). These files contain the failed instances, 203 // as per their schema, followed by an additional `error` field which as 204 // value has [google.rpc.Status][google.rpc.Status] 205 // containing only `code` and `message` fields. 206 GcsDestination gcs_destination = 2; 207 208 // The BigQuery project or dataset location where the output is to be 209 // written to. If project is provided, a new dataset is created with name 210 // `prediction_<model-display-name>_<job-create-time>` 211 // where <model-display-name> is made 212 // BigQuery-dataset-name compatible (for example, most special characters 213 // become underscores), and timestamp is in 214 // YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset 215 // two tables will be created, `predictions`, and `errors`. 216 // If the Model has both 217 // [instance][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri] 218 // and 219 // [prediction][google.cloud.aiplatform.v1.PredictSchemata.parameters_schema_uri] 220 // schemata defined then the tables have columns as follows: The 221 // `predictions` table contains instances for which the prediction 222 // succeeded, it has columns as per a concatenation of the Model's 223 // instance and prediction schemata. The `errors` table contains rows for 224 // which the prediction has failed, it has instance columns, as per the 225 // instance schema, followed by a single "errors" column, which as values 226 // has [google.rpc.Status][google.rpc.Status] 227 // represented as a STRUCT, and containing only `code` and `message`. 228 BigQueryDestination bigquery_destination = 3; 229 } 230 231 // Required. The format in which Vertex AI gives the predictions, must be 232 // one of the [Model's][google.cloud.aiplatform.v1.BatchPredictionJob.model] 233 // [supported_output_storage_formats][google.cloud.aiplatform.v1.Model.supported_output_storage_formats]. 234 string predictions_format = 1 [(google.api.field_behavior) = REQUIRED]; 235 } 236 237 // Further describes this job's output. 238 // Supplements 239 // [output_config][google.cloud.aiplatform.v1.BatchPredictionJob.output_config]. 240 message OutputInfo { 241 // The output location into which prediction output is written. 242 oneof output_location { 243 // Output only. The full path of the Cloud Storage directory created, into 244 // which the prediction output is written. 245 string gcs_output_directory = 1 246 [(google.api.field_behavior) = OUTPUT_ONLY]; 247 248 // Output only. The path of the BigQuery dataset created, in 249 // `bq://projectId.bqDatasetId` 250 // format, into which the prediction output is written. 251 string bigquery_output_dataset = 2 252 [(google.api.field_behavior) = OUTPUT_ONLY]; 253 } 254 255 // Output only. The name of the BigQuery table created, in 256 // `predictions_<timestamp>` 257 // format, into which the prediction output is written. 258 // Can be used by UI to generate the BigQuery output path, for example. 259 string bigquery_output_table = 4 260 [(google.api.field_behavior) = OUTPUT_ONLY]; 261 } 262 263 // Output only. Resource name of the BatchPredictionJob. 264 string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 265 266 // Required. The user-defined name of this BatchPredictionJob. 267 string display_name = 2 [(google.api.field_behavior) = REQUIRED]; 268 269 // The name of the Model resource that produces the predictions via this job, 270 // must share the same ancestor Location. 271 // Starting this job has no impact on any existing deployments of the Model 272 // and their resources. 273 // Exactly one of model and unmanaged_container_model must be set. 274 // 275 // The model resource name may contain version id or version alias to specify 276 // the version. 277 // Example: `projects/{project}/locations/{location}/models/{model}@2` 278 // or 279 // `projects/{project}/locations/{location}/models/{model}@golden` 280 // if no version is specified, the default version will be deployed. 281 // 282 // The model resource could also be a publisher model. 283 // Example: `publishers/{publisher}/models/{model}` 284 // or 285 // `projects/{project}/locations/{location}/publishers/{publisher}/models/{model}` 286 string model = 3 [(google.api.resource_reference) = { 287 type: "aiplatform.googleapis.com/Model" 288 }]; 289 290 // Output only. The version ID of the Model that produces the predictions via 291 // this job. 292 string model_version_id = 30 [(google.api.field_behavior) = OUTPUT_ONLY]; 293 294 // Contains model information necessary to perform batch prediction without 295 // requiring uploading to model registry. 296 // Exactly one of model and unmanaged_container_model must be set. 297 UnmanagedContainerModel unmanaged_container_model = 28; 298 299 // Required. Input configuration of the instances on which predictions are 300 // performed. The schema of any single instance may be specified via the 301 // [Model's][google.cloud.aiplatform.v1.BatchPredictionJob.model] 302 // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata] 303 // [instance_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri]. 304 InputConfig input_config = 4 [(google.api.field_behavior) = REQUIRED]; 305 306 // Configuration for how to convert batch prediction input instances to the 307 // prediction instances that are sent to the Model. 308 InstanceConfig instance_config = 27; 309 310 // The parameters that govern the predictions. The schema of the parameters 311 // may be specified via the 312 // [Model's][google.cloud.aiplatform.v1.BatchPredictionJob.model] 313 // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata] 314 // [parameters_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.parameters_schema_uri]. 315 google.protobuf.Value model_parameters = 5; 316 317 // Required. The Configuration specifying where output predictions should 318 // be written. 319 // The schema of any single prediction may be specified as a concatenation 320 // of [Model's][google.cloud.aiplatform.v1.BatchPredictionJob.model] 321 // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata] 322 // [instance_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri] 323 // and 324 // [prediction_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.prediction_schema_uri]. 325 OutputConfig output_config = 6 [(google.api.field_behavior) = REQUIRED]; 326 327 // The config of resources used by the Model during the batch prediction. If 328 // the Model 329 // [supports][google.cloud.aiplatform.v1.Model.supported_deployment_resources_types] 330 // DEDICATED_RESOURCES this config may be provided (and the job will use these 331 // resources), if the Model doesn't support AUTOMATIC_RESOURCES, this config 332 // must be provided. 333 BatchDedicatedResources dedicated_resources = 7; 334 335 // The service account that the DeployedModel's container runs as. If not 336 // specified, a system generated one will be used, which 337 // has minimal permissions and the custom container, if used, may not have 338 // enough permission to access other Google Cloud resources. 339 // 340 // Users deploying the Model must have the `iam.serviceAccounts.actAs` 341 // permission on this service account. 342 string service_account = 29; 343 344 // Immutable. Parameters configuring the batch behavior. Currently only 345 // applicable when 346 // [dedicated_resources][google.cloud.aiplatform.v1.BatchPredictionJob.dedicated_resources] 347 // are used (in other cases Vertex AI does the tuning itself). 348 ManualBatchTuningParameters manual_batch_tuning_parameters = 8 349 [(google.api.field_behavior) = IMMUTABLE]; 350 351 // Generate explanation with the batch prediction results. 352 // 353 // When set to `true`, the batch prediction output changes based on the 354 // `predictions_format` field of the 355 // [BatchPredictionJob.output_config][google.cloud.aiplatform.v1.BatchPredictionJob.output_config] 356 // object: 357 // 358 // * `bigquery`: output includes a column named `explanation`. The value 359 // is a struct that conforms to the 360 // [Explanation][google.cloud.aiplatform.v1.Explanation] object. 361 // * `jsonl`: The JSON objects on each line include an additional entry 362 // keyed `explanation`. The value of the entry is a JSON object that 363 // conforms to the [Explanation][google.cloud.aiplatform.v1.Explanation] 364 // object. 365 // * `csv`: Generating explanations for CSV format is not supported. 366 // 367 // If this field is set to true, either the 368 // [Model.explanation_spec][google.cloud.aiplatform.v1.Model.explanation_spec] 369 // or 370 // [explanation_spec][google.cloud.aiplatform.v1.BatchPredictionJob.explanation_spec] 371 // must be populated. 372 bool generate_explanation = 23; 373 374 // Explanation configuration for this BatchPredictionJob. Can be 375 // specified only if 376 // [generate_explanation][google.cloud.aiplatform.v1.BatchPredictionJob.generate_explanation] 377 // is set to `true`. 378 // 379 // This value overrides the value of 380 // [Model.explanation_spec][google.cloud.aiplatform.v1.Model.explanation_spec]. 381 // All fields of 382 // [explanation_spec][google.cloud.aiplatform.v1.BatchPredictionJob.explanation_spec] 383 // are optional in the request. If a field of the 384 // [explanation_spec][google.cloud.aiplatform.v1.BatchPredictionJob.explanation_spec] 385 // object is not populated, the corresponding field of the 386 // [Model.explanation_spec][google.cloud.aiplatform.v1.Model.explanation_spec] 387 // object is inherited. 388 ExplanationSpec explanation_spec = 25; 389 390 // Output only. Information further describing the output of this job. 391 OutputInfo output_info = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; 392 393 // Output only. The detailed state of the job. 394 JobState state = 10 [(google.api.field_behavior) = OUTPUT_ONLY]; 395 396 // Output only. Only populated when the job's state is JOB_STATE_FAILED or 397 // JOB_STATE_CANCELLED. 398 google.rpc.Status error = 11 [(google.api.field_behavior) = OUTPUT_ONLY]; 399 400 // Output only. Partial failures encountered. 401 // For example, single files that can't be read. 402 // This field never exceeds 20 entries. 403 // Status details fields contain standard Google Cloud error details. 404 repeated google.rpc.Status partial_failures = 12 405 [(google.api.field_behavior) = OUTPUT_ONLY]; 406 407 // Output only. Information about resources that had been consumed by this 408 // job. Provided in real time at best effort basis, as well as a final value 409 // once the job completes. 410 // 411 // Note: This field currently may be not populated for batch predictions that 412 // use AutoML Models. 413 ResourcesConsumed resources_consumed = 13 414 [(google.api.field_behavior) = OUTPUT_ONLY]; 415 416 // Output only. Statistics on completed and failed prediction instances. 417 CompletionStats completion_stats = 14 418 [(google.api.field_behavior) = OUTPUT_ONLY]; 419 420 // Output only. Time when the BatchPredictionJob was created. 421 google.protobuf.Timestamp create_time = 15 422 [(google.api.field_behavior) = OUTPUT_ONLY]; 423 424 // Output only. Time when the BatchPredictionJob for the first time entered 425 // the `JOB_STATE_RUNNING` state. 426 google.protobuf.Timestamp start_time = 16 427 [(google.api.field_behavior) = OUTPUT_ONLY]; 428 429 // Output only. Time when the BatchPredictionJob entered any of the following 430 // states: `JOB_STATE_SUCCEEDED`, `JOB_STATE_FAILED`, `JOB_STATE_CANCELLED`. 431 google.protobuf.Timestamp end_time = 17 432 [(google.api.field_behavior) = OUTPUT_ONLY]; 433 434 // Output only. Time when the BatchPredictionJob was most recently updated. 435 google.protobuf.Timestamp update_time = 18 436 [(google.api.field_behavior) = OUTPUT_ONLY]; 437 438 // The labels with user-defined metadata to organize BatchPredictionJobs. 439 // 440 // Label keys and values can be no longer than 64 characters 441 // (Unicode codepoints), can only contain lowercase letters, numeric 442 // characters, underscores and dashes. International characters are allowed. 443 // 444 // See https://goo.gl/xmQnxf for more information and examples of labels. 445 map<string, string> labels = 19; 446 447 // Customer-managed encryption key options for a BatchPredictionJob. If this 448 // is set, then all resources created by the BatchPredictionJob will be 449 // encrypted with the provided encryption key. 450 EncryptionSpec encryption_spec = 24; 451 452 // For custom-trained Models and AutoML Tabular Models, the container of the 453 // DeployedModel instances will send `stderr` and `stdout` streams to 454 // Cloud Logging by default. Please note that the logs incur cost, 455 // which are subject to [Cloud Logging 456 // pricing](https://cloud.google.com/logging/pricing). 457 // 458 // User can disable container logging by setting this flag to true. 459 bool disable_container_logging = 34; 460} 461