1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.bigquery.storage.v1beta2; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/api/resource.proto"; 23import "google/cloud/bigquery/storage/v1beta2/arrow.proto"; 24import "google/cloud/bigquery/storage/v1beta2/avro.proto"; 25import "google/cloud/bigquery/storage/v1beta2/protobuf.proto"; 26import "google/cloud/bigquery/storage/v1beta2/stream.proto"; 27import "google/cloud/bigquery/storage/v1beta2/table.proto"; 28import "google/protobuf/timestamp.proto"; 29import "google/protobuf/wrappers.proto"; 30import "google/rpc/status.proto"; 31 32option go_package = "cloud.google.com/go/bigquery/storage/apiv1beta2/storagepb;storagepb"; 33option java_multiple_files = true; 34option java_outer_classname = "StorageProto"; 35option java_package = "com.google.cloud.bigquery.storage.v1beta2"; 36 37// BigQuery Read API. 38// 39// The Read API can be used to read data from BigQuery. 40// 41// New code should use the v1 Read API going forward, if they don't use Write 42// API at the same time. 43service BigQueryRead { 44 option (google.api.default_host) = "bigquerystorage.googleapis.com"; 45 option (google.api.oauth_scopes) = 46 "https://www.googleapis.com/auth/bigquery," 47 "https://www.googleapis.com/auth/cloud-platform"; 48 49 // Creates a new read session. A read session divides the contents of a 50 // BigQuery table into one or more streams, which can then be used to read 51 // data from the table. The read session also specifies properties of the 52 // data to be read, such as a list of columns or a push-down filter describing 53 // the rows to be returned. 54 // 55 // A particular row can be read by at most one stream. When the caller has 56 // reached the end of each stream in the session, then all the data in the 57 // table has been read. 58 // 59 // Data is assigned to each stream such that roughly the same number of 60 // rows can be read from each stream. Because the server-side unit for 61 // assigning data is collections of rows, the API does not guarantee that 62 // each stream will return the same number or rows. Additionally, the 63 // limits are enforced based on the number of pre-filtered rows, so some 64 // filters can lead to lopsided assignments. 65 // 66 // Read sessions automatically expire 6 hours after they are created and do 67 // not require manual clean-up by the caller. 68 rpc CreateReadSession(CreateReadSessionRequest) returns (ReadSession) { 69 option (google.api.http) = { 70 post: "/v1beta2/{read_session.table=projects/*/datasets/*/tables/*}" 71 body: "*" 72 }; 73 option (google.api.method_signature) = 74 "parent,read_session,max_stream_count"; 75 } 76 77 // Reads rows from the stream in the format prescribed by the ReadSession. 78 // Each response contains one or more table rows, up to a maximum of 100 MiB 79 // per response; read requests which attempt to read individual rows larger 80 // than 100 MiB will fail. 81 // 82 // Each request also returns a set of stream statistics reflecting the current 83 // state of the stream. 84 rpc ReadRows(ReadRowsRequest) returns (stream ReadRowsResponse) { 85 option (google.api.http) = { 86 get: "/v1beta2/{read_stream=projects/*/locations/*/sessions/*/streams/*}" 87 }; 88 option (google.api.method_signature) = "read_stream,offset"; 89 } 90 91 // Splits a given `ReadStream` into two `ReadStream` objects. These 92 // `ReadStream` objects are referred to as the primary and the residual 93 // streams of the split. The original `ReadStream` can still be read from in 94 // the same manner as before. Both of the returned `ReadStream` objects can 95 // also be read from, and the rows returned by both child streams will be 96 // the same as the rows read from the original stream. 97 // 98 // Moreover, the two child streams will be allocated back-to-back in the 99 // original `ReadStream`. Concretely, it is guaranteed that for streams 100 // original, primary, and residual, that original[0-j] = primary[0-j] and 101 // original[j-n] = residual[0-m] once the streams have been read to 102 // completion. 103 rpc SplitReadStream(SplitReadStreamRequest) 104 returns (SplitReadStreamResponse) { 105 option (google.api.http) = { 106 get: "/v1beta2/{name=projects/*/locations/*/sessions/*/streams/*}" 107 }; 108 } 109} 110 111// BigQuery Write API. 112// 113// The Write API can be used to write data to BigQuery. 114// 115// 116// The [google.cloud.bigquery.storage.v1 117// API](/bigquery/docs/reference/storage/rpc/google.cloud.bigquery.storage.v1) 118// should be used instead of the v1beta2 API for BigQueryWrite operations. 119service BigQueryWrite { 120 option deprecated = true; 121 option (google.api.default_host) = "bigquerystorage.googleapis.com"; 122 option (google.api.oauth_scopes) = 123 "https://www.googleapis.com/auth/bigquery," 124 "https://www.googleapis.com/auth/bigquery.insertdata," 125 "https://www.googleapis.com/auth/cloud-platform"; 126 127 // Creates a write stream to the given table. 128 // Additionally, every table has a special COMMITTED stream named '_default' 129 // to which data can be written. This stream doesn't need to be created using 130 // CreateWriteStream. It is a stream that can be used simultaneously by any 131 // number of clients. Data written to this stream is considered committed as 132 // soon as an acknowledgement is received. 133 rpc CreateWriteStream(CreateWriteStreamRequest) returns (WriteStream) { 134 option deprecated = true; 135 option (google.api.http) = { 136 post: "/v1beta2/{parent=projects/*/datasets/*/tables/*}" 137 body: "write_stream" 138 }; 139 option (google.api.method_signature) = "parent,write_stream"; 140 } 141 142 // Appends data to the given stream. 143 // 144 // If `offset` is specified, the `offset` is checked against the end of 145 // stream. The server returns `OUT_OF_RANGE` in `AppendRowsResponse` if an 146 // attempt is made to append to an offset beyond the current end of the stream 147 // or `ALREADY_EXISTS` if user provids an `offset` that has already been 148 // written to. User can retry with adjusted offset within the same RPC 149 // stream. If `offset` is not specified, append happens at the end of the 150 // stream. 151 // 152 // The response contains the offset at which the append happened. Responses 153 // are received in the same order in which requests are sent. There will be 154 // one response for each successful request. If the `offset` is not set in 155 // response, it means append didn't happen due to some errors. If one request 156 // fails, all the subsequent requests will also fail until a success request 157 // is made again. 158 // 159 // If the stream is of `PENDING` type, data will only be available for read 160 // operations after the stream is committed. 161 rpc AppendRows(stream AppendRowsRequest) returns (stream AppendRowsResponse) { 162 option deprecated = true; 163 option (google.api.http) = { 164 post: "/v1beta2/{write_stream=projects/*/datasets/*/tables/*/streams/*}" 165 body: "*" 166 }; 167 option (google.api.method_signature) = "write_stream"; 168 } 169 170 // Gets a write stream. 171 rpc GetWriteStream(GetWriteStreamRequest) returns (WriteStream) { 172 option deprecated = true; 173 option (google.api.http) = { 174 post: "/v1beta2/{name=projects/*/datasets/*/tables/*/streams/*}" 175 body: "*" 176 }; 177 option (google.api.method_signature) = "name"; 178 } 179 180 // Finalize a write stream so that no new data can be appended to the 181 // stream. Finalize is not supported on the '_default' stream. 182 rpc FinalizeWriteStream(FinalizeWriteStreamRequest) 183 returns (FinalizeWriteStreamResponse) { 184 option deprecated = true; 185 option (google.api.http) = { 186 post: "/v1beta2/{name=projects/*/datasets/*/tables/*/streams/*}" 187 body: "*" 188 }; 189 option (google.api.method_signature) = "name"; 190 } 191 192 // Atomically commits a group of `PENDING` streams that belong to the same 193 // `parent` table. 194 // Streams must be finalized before commit and cannot be committed multiple 195 // times. Once a stream is committed, data in the stream becomes available 196 // for read operations. 197 rpc BatchCommitWriteStreams(BatchCommitWriteStreamsRequest) 198 returns (BatchCommitWriteStreamsResponse) { 199 option deprecated = true; 200 option (google.api.http) = { 201 get: "/v1beta2/{parent=projects/*/datasets/*/tables/*}" 202 }; 203 option (google.api.method_signature) = "parent"; 204 } 205 206 // Flushes rows to a BUFFERED stream. 207 // If users are appending rows to BUFFERED stream, flush operation is 208 // required in order for the rows to become available for reading. A 209 // Flush operation flushes up to any previously flushed offset in a BUFFERED 210 // stream, to the offset specified in the request. 211 // Flush is not supported on the _default stream, since it is not BUFFERED. 212 rpc FlushRows(FlushRowsRequest) returns (FlushRowsResponse) { 213 option deprecated = true; 214 option (google.api.http) = { 215 post: "/v1beta2/{write_stream=projects/*/datasets/*/tables/*/streams/*}" 216 body: "*" 217 }; 218 option (google.api.method_signature) = "write_stream"; 219 } 220} 221 222// Request message for `CreateReadSession`. 223message CreateReadSessionRequest { 224 // Required. The request project that owns the session, in the form of 225 // `projects/{project_id}`. 226 string parent = 1 [ 227 (google.api.field_behavior) = REQUIRED, 228 (google.api.resource_reference) = { 229 type: "cloudresourcemanager.googleapis.com/Project" 230 } 231 ]; 232 233 // Required. Session to be created. 234 ReadSession read_session = 2 [(google.api.field_behavior) = REQUIRED]; 235 236 // Max initial number of streams. If unset or zero, the server will 237 // provide a value of streams so as to produce reasonable throughput. Must be 238 // non-negative. The number of streams may be lower than the requested number, 239 // depending on the amount parallelism that is reasonable for the table. Error 240 // will be returned if the max count is greater than the current system 241 // max limit of 1,000. 242 // 243 // Streams must be read starting from offset 0. 244 int32 max_stream_count = 3; 245} 246 247// Request message for `ReadRows`. 248message ReadRowsRequest { 249 // Required. Stream to read rows from. 250 string read_stream = 1 [ 251 (google.api.field_behavior) = REQUIRED, 252 (google.api.resource_reference) = { 253 type: "bigquerystorage.googleapis.com/ReadStream" 254 } 255 ]; 256 257 // The offset requested must be less than the last row read from Read. 258 // Requesting a larger offset is undefined. If not specified, start reading 259 // from offset zero. 260 int64 offset = 2; 261} 262 263// Information on if the current connection is being throttled. 264message ThrottleState { 265 // How much this connection is being throttled. Zero means no throttling, 266 // 100 means fully throttled. 267 int32 throttle_percent = 1; 268} 269 270// Estimated stream statistics for a given Stream. 271message StreamStats { 272 message Progress { 273 // The fraction of rows assigned to the stream that have been processed by 274 // the server so far, not including the rows in the current response 275 // message. 276 // 277 // This value, along with `at_response_end`, can be used to interpolate 278 // the progress made as the rows in the message are being processed using 279 // the following formula: `at_response_start + (at_response_end - 280 // at_response_start) * rows_processed_from_response / rows_in_response`. 281 // 282 // Note that if a filter is provided, the `at_response_end` value of the 283 // previous response may not necessarily be equal to the 284 // `at_response_start` value of the current response. 285 double at_response_start = 1; 286 287 // Similar to `at_response_start`, except that this value includes the 288 // rows in the current response. 289 double at_response_end = 2; 290 } 291 292 // Represents the progress of the current stream. 293 Progress progress = 2; 294} 295 296// Response from calling `ReadRows` may include row data, progress and 297// throttling information. 298message ReadRowsResponse { 299 // Row data is returned in format specified during session creation. 300 oneof rows { 301 // Serialized row data in AVRO format. 302 AvroRows avro_rows = 3; 303 304 // Serialized row data in Arrow RecordBatch format. 305 ArrowRecordBatch arrow_record_batch = 4; 306 } 307 308 // Number of serialized rows in the rows block. 309 int64 row_count = 6; 310 311 // Statistics for the stream. 312 StreamStats stats = 2; 313 314 // Throttling state. If unset, the latest response still describes 315 // the current throttling status. 316 ThrottleState throttle_state = 5; 317 318 // The schema for the read. If read_options.selected_fields is set, the 319 // schema may be different from the table schema as it will only contain 320 // the selected fields. This schema is equivalent to the one returned by 321 // CreateSession. This field is only populated in the first ReadRowsResponse 322 // RPC. 323 oneof schema { 324 // Output only. Avro schema. 325 AvroSchema avro_schema = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; 326 327 // Output only. Arrow schema. 328 ArrowSchema arrow_schema = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; 329 } 330} 331 332// Request message for `SplitReadStream`. 333message SplitReadStreamRequest { 334 // Required. Name of the stream to split. 335 string name = 1 [ 336 (google.api.field_behavior) = REQUIRED, 337 (google.api.resource_reference) = { 338 type: "bigquerystorage.googleapis.com/ReadStream" 339 } 340 ]; 341 342 // A value in the range (0.0, 1.0) that specifies the fractional point at 343 // which the original stream should be split. The actual split point is 344 // evaluated on pre-filtered rows, so if a filter is provided, then there is 345 // no guarantee that the division of the rows between the new child streams 346 // will be proportional to this fractional value. Additionally, because the 347 // server-side unit for assigning data is collections of rows, this fraction 348 // will always map to a data storage boundary on the server side. 349 double fraction = 2; 350} 351 352message SplitReadStreamResponse { 353 // Primary stream, which contains the beginning portion of 354 // |original_stream|. An empty value indicates that the original stream can no 355 // longer be split. 356 ReadStream primary_stream = 1; 357 358 // Remainder stream, which contains the tail of |original_stream|. An empty 359 // value indicates that the original stream can no longer be split. 360 ReadStream remainder_stream = 2; 361} 362 363// Request message for `CreateWriteStream`. 364message CreateWriteStreamRequest { 365 // Required. Reference to the table to which the stream belongs, in the format 366 // of `projects/{project}/datasets/{dataset}/tables/{table}`. 367 string parent = 1 [ 368 (google.api.field_behavior) = REQUIRED, 369 (google.api.resource_reference) = { type: "bigquery.googleapis.com/Table" } 370 ]; 371 372 // Required. Stream to be created. 373 WriteStream write_stream = 2 [(google.api.field_behavior) = REQUIRED]; 374} 375 376// Request message for `AppendRows`. 377message AppendRowsRequest { 378 // Proto schema and data. 379 message ProtoData { 380 // Proto schema used to serialize the data. 381 ProtoSchema writer_schema = 1; 382 383 // Serialized row data in protobuf message format. 384 ProtoRows rows = 2; 385 } 386 387 // Required. The stream that is the target of the append operation. This value 388 // must be specified for the initial request. If subsequent requests specify 389 // the stream name, it must equal to the value provided in the first request. 390 // To write to the _default stream, populate this field with a string in the 391 // format `projects/{project}/datasets/{dataset}/tables/{table}/_default`. 392 string write_stream = 1 [ 393 (google.api.field_behavior) = REQUIRED, 394 (google.api.resource_reference) = { 395 type: "bigquerystorage.googleapis.com/WriteStream" 396 } 397 ]; 398 399 // If present, the write is only performed if the next append offset is same 400 // as the provided value. If not present, the write is performed at the 401 // current end of stream. Specifying a value for this field is not allowed 402 // when calling AppendRows for the '_default' stream. 403 google.protobuf.Int64Value offset = 2; 404 405 // Input rows. The `writer_schema` field must be specified at the initial 406 // request and currently, it will be ignored if specified in following 407 // requests. Following requests must have data in the same format as the 408 // initial request. 409 oneof rows { 410 // Rows in proto format. 411 ProtoData proto_rows = 4; 412 } 413 414 // Id set by client to annotate its identity. Only initial request setting is 415 // respected. 416 string trace_id = 6; 417} 418 419// Response message for `AppendRows`. 420message AppendRowsResponse { 421 // AppendResult is returned for successful append requests. 422 message AppendResult { 423 // The row offset at which the last append occurred. The offset will not be 424 // set if appending using default streams. 425 google.protobuf.Int64Value offset = 1; 426 } 427 428 oneof response { 429 // Result if the append is successful. 430 AppendResult append_result = 1; 431 432 // Error returned when problems were encountered. If present, 433 // it indicates rows were not accepted into the system. 434 // Users can retry or continue with other append requests within the 435 // same connection. 436 // 437 // Additional information about error signalling: 438 // 439 // ALREADY_EXISTS: Happens when an append specified an offset, and the 440 // backend already has received data at this offset. Typically encountered 441 // in retry scenarios, and can be ignored. 442 // 443 // OUT_OF_RANGE: Returned when the specified offset in the stream is beyond 444 // the current end of the stream. 445 // 446 // INVALID_ARGUMENT: Indicates a malformed request or data. 447 // 448 // ABORTED: Request processing is aborted because of prior failures. The 449 // request can be retried if previous failure is addressed. 450 // 451 // INTERNAL: Indicates server side error(s) that can be retried. 452 google.rpc.Status error = 2; 453 } 454 455 // If backend detects a schema update, pass it to user so that user can 456 // use it to input new type of message. It will be empty when no schema 457 // updates have occurred. 458 TableSchema updated_schema = 3; 459} 460 461// Request message for `GetWriteStreamRequest`. 462message GetWriteStreamRequest { 463 // Required. Name of the stream to get, in the form of 464 // `projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}`. 465 string name = 1 [ 466 (google.api.field_behavior) = REQUIRED, 467 (google.api.resource_reference) = { 468 type: "bigquerystorage.googleapis.com/WriteStream" 469 } 470 ]; 471} 472 473// Request message for `BatchCommitWriteStreams`. 474message BatchCommitWriteStreamsRequest { 475 // Required. Parent table that all the streams should belong to, in the form 476 // of `projects/{project}/datasets/{dataset}/tables/{table}`. 477 string parent = 1 [(google.api.field_behavior) = REQUIRED]; 478 479 // Required. The group of streams that will be committed atomically. 480 repeated string write_streams = 2 [(google.api.field_behavior) = REQUIRED]; 481} 482 483// Response message for `BatchCommitWriteStreams`. 484message BatchCommitWriteStreamsResponse { 485 // The time at which streams were committed in microseconds granularity. 486 // This field will only exist when there are no stream errors. 487 // **Note** if this field is not set, it means the commit was not successful. 488 google.protobuf.Timestamp commit_time = 1; 489 490 // Stream level error if commit failed. Only streams with error will be in 491 // the list. 492 // If empty, there is no error and all streams are committed successfully. 493 // If non empty, certain streams have errors and ZERO stream is committed due 494 // to atomicity guarantee. 495 repeated StorageError stream_errors = 2; 496} 497 498// Request message for invoking `FinalizeWriteStream`. 499message FinalizeWriteStreamRequest { 500 // Required. Name of the stream to finalize, in the form of 501 // `projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}`. 502 string name = 1 [ 503 (google.api.field_behavior) = REQUIRED, 504 (google.api.resource_reference) = { 505 type: "bigquerystorage.googleapis.com/WriteStream" 506 } 507 ]; 508} 509 510// Response message for `FinalizeWriteStream`. 511message FinalizeWriteStreamResponse { 512 // Number of rows in the finalized stream. 513 int64 row_count = 1; 514} 515 516// Request message for `FlushRows`. 517message FlushRowsRequest { 518 // Required. The stream that is the target of the flush operation. 519 string write_stream = 1 [ 520 (google.api.field_behavior) = REQUIRED, 521 (google.api.resource_reference) = { 522 type: "bigquerystorage.googleapis.com/WriteStream" 523 } 524 ]; 525 526 // Ending offset of the flush operation. Rows before this offset(including 527 // this offset) will be flushed. 528 google.protobuf.Int64Value offset = 2; 529} 530 531// Respond message for `FlushRows`. 532message FlushRowsResponse { 533 // The rows before this offset (including this offset) are flushed. 534 int64 offset = 1; 535} 536 537// Structured custom BigQuery Storage error message. The error can be attached 538// as error details in the returned rpc Status. In particular, the use of error 539// codes allows more structured error handling, and reduces the need to evaluate 540// unstructured error text strings. 541message StorageError { 542 // Error code for `StorageError`. 543 enum StorageErrorCode { 544 // Default error. 545 STORAGE_ERROR_CODE_UNSPECIFIED = 0; 546 547 // Table is not found in the system. 548 TABLE_NOT_FOUND = 1; 549 550 // Stream is already committed. 551 STREAM_ALREADY_COMMITTED = 2; 552 553 // Stream is not found. 554 STREAM_NOT_FOUND = 3; 555 556 // Invalid Stream type. 557 // For example, you try to commit a stream that is not pending. 558 INVALID_STREAM_TYPE = 4; 559 560 // Invalid Stream state. 561 // For example, you try to commit a stream that is not finalized or is 562 // garbaged. 563 INVALID_STREAM_STATE = 5; 564 565 // Stream is finalized. 566 STREAM_FINALIZED = 6; 567 } 568 569 // BigQuery Storage specific error code. 570 StorageErrorCode code = 1; 571 572 // Name of the failed entity. 573 string entity = 2; 574 575 // Message that describes the error. 576 string error_message = 3; 577} 578