• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.bigquery.storage.v1beta2;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/resource.proto";
23import "google/cloud/bigquery/storage/v1beta2/arrow.proto";
24import "google/cloud/bigquery/storage/v1beta2/avro.proto";
25import "google/cloud/bigquery/storage/v1beta2/protobuf.proto";
26import "google/cloud/bigquery/storage/v1beta2/stream.proto";
27import "google/cloud/bigquery/storage/v1beta2/table.proto";
28import "google/protobuf/timestamp.proto";
29import "google/protobuf/wrappers.proto";
30import "google/rpc/status.proto";
31
32option go_package = "cloud.google.com/go/bigquery/storage/apiv1beta2/storagepb;storagepb";
33option java_multiple_files = true;
34option java_outer_classname = "StorageProto";
35option java_package = "com.google.cloud.bigquery.storage.v1beta2";
36
37// BigQuery Read API.
38//
39// The Read API can be used to read data from BigQuery.
40//
41// New code should use the v1 Read API going forward, if they don't use Write
42// API at the same time.
43service BigQueryRead {
44  option (google.api.default_host) = "bigquerystorage.googleapis.com";
45  option (google.api.oauth_scopes) =
46      "https://www.googleapis.com/auth/bigquery,"
47      "https://www.googleapis.com/auth/cloud-platform";
48
49  // Creates a new read session. A read session divides the contents of a
50  // BigQuery table into one or more streams, which can then be used to read
51  // data from the table. The read session also specifies properties of the
52  // data to be read, such as a list of columns or a push-down filter describing
53  // the rows to be returned.
54  //
55  // A particular row can be read by at most one stream. When the caller has
56  // reached the end of each stream in the session, then all the data in the
57  // table has been read.
58  //
59  // Data is assigned to each stream such that roughly the same number of
60  // rows can be read from each stream. Because the server-side unit for
61  // assigning data is collections of rows, the API does not guarantee that
62  // each stream will return the same number or rows. Additionally, the
63  // limits are enforced based on the number of pre-filtered rows, so some
64  // filters can lead to lopsided assignments.
65  //
66  // Read sessions automatically expire 6 hours after they are created and do
67  // not require manual clean-up by the caller.
68  rpc CreateReadSession(CreateReadSessionRequest) returns (ReadSession) {
69    option (google.api.http) = {
70      post: "/v1beta2/{read_session.table=projects/*/datasets/*/tables/*}"
71      body: "*"
72    };
73    option (google.api.method_signature) =
74        "parent,read_session,max_stream_count";
75  }
76
77  // Reads rows from the stream in the format prescribed by the ReadSession.
78  // Each response contains one or more table rows, up to a maximum of 100 MiB
79  // per response; read requests which attempt to read individual rows larger
80  // than 100 MiB will fail.
81  //
82  // Each request also returns a set of stream statistics reflecting the current
83  // state of the stream.
84  rpc ReadRows(ReadRowsRequest) returns (stream ReadRowsResponse) {
85    option (google.api.http) = {
86      get: "/v1beta2/{read_stream=projects/*/locations/*/sessions/*/streams/*}"
87    };
88    option (google.api.method_signature) = "read_stream,offset";
89  }
90
91  // Splits a given `ReadStream` into two `ReadStream` objects. These
92  // `ReadStream` objects are referred to as the primary and the residual
93  // streams of the split. The original `ReadStream` can still be read from in
94  // the same manner as before. Both of the returned `ReadStream` objects can
95  // also be read from, and the rows returned by both child streams will be
96  // the same as the rows read from the original stream.
97  //
98  // Moreover, the two child streams will be allocated back-to-back in the
99  // original `ReadStream`. Concretely, it is guaranteed that for streams
100  // original, primary, and residual, that original[0-j] = primary[0-j] and
101  // original[j-n] = residual[0-m] once the streams have been read to
102  // completion.
103  rpc SplitReadStream(SplitReadStreamRequest)
104      returns (SplitReadStreamResponse) {
105    option (google.api.http) = {
106      get: "/v1beta2/{name=projects/*/locations/*/sessions/*/streams/*}"
107    };
108  }
109}
110
111// BigQuery Write API.
112//
113// The Write API can be used to write data to BigQuery.
114//
115//
116// The [google.cloud.bigquery.storage.v1
117//   API](/bigquery/docs/reference/storage/rpc/google.cloud.bigquery.storage.v1)
118//   should be used instead of the v1beta2 API for BigQueryWrite operations.
119service BigQueryWrite {
120  option deprecated = true;
121  option (google.api.default_host) = "bigquerystorage.googleapis.com";
122  option (google.api.oauth_scopes) =
123      "https://www.googleapis.com/auth/bigquery,"
124      "https://www.googleapis.com/auth/bigquery.insertdata,"
125      "https://www.googleapis.com/auth/cloud-platform";
126
127  // Creates a write stream to the given table.
128  // Additionally, every table has a special COMMITTED stream named '_default'
129  // to which data can be written. This stream doesn't need to be created using
130  // CreateWriteStream. It is a stream that can be used simultaneously by any
131  // number of clients. Data written to this stream is considered committed as
132  // soon as an acknowledgement is received.
133  rpc CreateWriteStream(CreateWriteStreamRequest) returns (WriteStream) {
134    option deprecated = true;
135    option (google.api.http) = {
136      post: "/v1beta2/{parent=projects/*/datasets/*/tables/*}"
137      body: "write_stream"
138    };
139    option (google.api.method_signature) = "parent,write_stream";
140  }
141
142  // Appends data to the given stream.
143  //
144  // If `offset` is specified, the `offset` is checked against the end of
145  // stream. The server returns `OUT_OF_RANGE` in `AppendRowsResponse` if an
146  // attempt is made to append to an offset beyond the current end of the stream
147  // or `ALREADY_EXISTS` if user provids an `offset` that has already been
148  // written to. User can retry with adjusted offset within the same RPC
149  // stream. If `offset` is not specified, append happens at the end of the
150  // stream.
151  //
152  // The response contains the offset at which the append happened. Responses
153  // are received in the same order in which requests are sent. There will be
154  // one response for each successful request. If the `offset` is not set in
155  // response, it means append didn't happen due to some errors. If one request
156  // fails, all the subsequent requests will also fail until a success request
157  // is made again.
158  //
159  // If the stream is of `PENDING` type, data will only be available for read
160  // operations after the stream is committed.
161  rpc AppendRows(stream AppendRowsRequest) returns (stream AppendRowsResponse) {
162    option deprecated = true;
163    option (google.api.http) = {
164      post: "/v1beta2/{write_stream=projects/*/datasets/*/tables/*/streams/*}"
165      body: "*"
166    };
167    option (google.api.method_signature) = "write_stream";
168  }
169
170  // Gets a write stream.
171  rpc GetWriteStream(GetWriteStreamRequest) returns (WriteStream) {
172    option deprecated = true;
173    option (google.api.http) = {
174      post: "/v1beta2/{name=projects/*/datasets/*/tables/*/streams/*}"
175      body: "*"
176    };
177    option (google.api.method_signature) = "name";
178  }
179
180  // Finalize a write stream so that no new data can be appended to the
181  // stream. Finalize is not supported on the '_default' stream.
182  rpc FinalizeWriteStream(FinalizeWriteStreamRequest)
183      returns (FinalizeWriteStreamResponse) {
184    option deprecated = true;
185    option (google.api.http) = {
186      post: "/v1beta2/{name=projects/*/datasets/*/tables/*/streams/*}"
187      body: "*"
188    };
189    option (google.api.method_signature) = "name";
190  }
191
192  // Atomically commits a group of `PENDING` streams that belong to the same
193  // `parent` table.
194  // Streams must be finalized before commit and cannot be committed multiple
195  // times. Once a stream is committed, data in the stream becomes available
196  // for read operations.
197  rpc BatchCommitWriteStreams(BatchCommitWriteStreamsRequest)
198      returns (BatchCommitWriteStreamsResponse) {
199    option deprecated = true;
200    option (google.api.http) = {
201      get: "/v1beta2/{parent=projects/*/datasets/*/tables/*}"
202    };
203    option (google.api.method_signature) = "parent";
204  }
205
206  // Flushes rows to a BUFFERED stream.
207  // If users are appending rows to BUFFERED stream, flush operation is
208  // required in order for the rows to become available for reading. A
209  // Flush operation flushes up to any previously flushed offset in a BUFFERED
210  // stream, to the offset specified in the request.
211  // Flush is not supported on the _default stream, since it is not BUFFERED.
212  rpc FlushRows(FlushRowsRequest) returns (FlushRowsResponse) {
213    option deprecated = true;
214    option (google.api.http) = {
215      post: "/v1beta2/{write_stream=projects/*/datasets/*/tables/*/streams/*}"
216      body: "*"
217    };
218    option (google.api.method_signature) = "write_stream";
219  }
220}
221
222// Request message for `CreateReadSession`.
223message CreateReadSessionRequest {
224  // Required. The request project that owns the session, in the form of
225  // `projects/{project_id}`.
226  string parent = 1 [
227    (google.api.field_behavior) = REQUIRED,
228    (google.api.resource_reference) = {
229      type: "cloudresourcemanager.googleapis.com/Project"
230    }
231  ];
232
233  // Required. Session to be created.
234  ReadSession read_session = 2 [(google.api.field_behavior) = REQUIRED];
235
236  // Max initial number of streams. If unset or zero, the server will
237  // provide a value of streams so as to produce reasonable throughput. Must be
238  // non-negative. The number of streams may be lower than the requested number,
239  // depending on the amount parallelism that is reasonable for the table. Error
240  // will be returned if the max count is greater than the current system
241  // max limit of 1,000.
242  //
243  // Streams must be read starting from offset 0.
244  int32 max_stream_count = 3;
245}
246
247// Request message for `ReadRows`.
248message ReadRowsRequest {
249  // Required. Stream to read rows from.
250  string read_stream = 1 [
251    (google.api.field_behavior) = REQUIRED,
252    (google.api.resource_reference) = {
253      type: "bigquerystorage.googleapis.com/ReadStream"
254    }
255  ];
256
257  // The offset requested must be less than the last row read from Read.
258  // Requesting a larger offset is undefined. If not specified, start reading
259  // from offset zero.
260  int64 offset = 2;
261}
262
263// Information on if the current connection is being throttled.
264message ThrottleState {
265  // How much this connection is being throttled. Zero means no throttling,
266  // 100 means fully throttled.
267  int32 throttle_percent = 1;
268}
269
270// Estimated stream statistics for a given Stream.
271message StreamStats {
272  message Progress {
273    // The fraction of rows assigned to the stream that have been processed by
274    // the server so far, not including the rows in the current response
275    // message.
276    //
277    // This value, along with `at_response_end`, can be used to interpolate
278    // the progress made as the rows in the message are being processed using
279    // the following formula: `at_response_start + (at_response_end -
280    // at_response_start) * rows_processed_from_response / rows_in_response`.
281    //
282    // Note that if a filter is provided, the `at_response_end` value of the
283    // previous response may not necessarily be equal to the
284    // `at_response_start` value of the current response.
285    double at_response_start = 1;
286
287    // Similar to `at_response_start`, except that this value includes the
288    // rows in the current response.
289    double at_response_end = 2;
290  }
291
292  // Represents the progress of the current stream.
293  Progress progress = 2;
294}
295
296// Response from calling `ReadRows` may include row data, progress and
297// throttling information.
298message ReadRowsResponse {
299  // Row data is returned in format specified during session creation.
300  oneof rows {
301    // Serialized row data in AVRO format.
302    AvroRows avro_rows = 3;
303
304    // Serialized row data in Arrow RecordBatch format.
305    ArrowRecordBatch arrow_record_batch = 4;
306  }
307
308  // Number of serialized rows in the rows block.
309  int64 row_count = 6;
310
311  // Statistics for the stream.
312  StreamStats stats = 2;
313
314  // Throttling state. If unset, the latest response still describes
315  // the current throttling status.
316  ThrottleState throttle_state = 5;
317
318  // The schema for the read. If read_options.selected_fields is set, the
319  // schema may be different from the table schema as it will only contain
320  // the selected fields. This schema is equivalent to the one returned by
321  // CreateSession. This field is only populated in the first ReadRowsResponse
322  // RPC.
323  oneof schema {
324    // Output only. Avro schema.
325    AvroSchema avro_schema = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
326
327    // Output only. Arrow schema.
328    ArrowSchema arrow_schema = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
329  }
330}
331
332// Request message for `SplitReadStream`.
333message SplitReadStreamRequest {
334  // Required. Name of the stream to split.
335  string name = 1 [
336    (google.api.field_behavior) = REQUIRED,
337    (google.api.resource_reference) = {
338      type: "bigquerystorage.googleapis.com/ReadStream"
339    }
340  ];
341
342  // A value in the range (0.0, 1.0) that specifies the fractional point at
343  // which the original stream should be split. The actual split point is
344  // evaluated on pre-filtered rows, so if a filter is provided, then there is
345  // no guarantee that the division of the rows between the new child streams
346  // will be proportional to this fractional value. Additionally, because the
347  // server-side unit for assigning data is collections of rows, this fraction
348  // will always map to a data storage boundary on the server side.
349  double fraction = 2;
350}
351
352message SplitReadStreamResponse {
353  // Primary stream, which contains the beginning portion of
354  // |original_stream|. An empty value indicates that the original stream can no
355  // longer be split.
356  ReadStream primary_stream = 1;
357
358  // Remainder stream, which contains the tail of |original_stream|. An empty
359  // value indicates that the original stream can no longer be split.
360  ReadStream remainder_stream = 2;
361}
362
363// Request message for `CreateWriteStream`.
364message CreateWriteStreamRequest {
365  // Required. Reference to the table to which the stream belongs, in the format
366  // of `projects/{project}/datasets/{dataset}/tables/{table}`.
367  string parent = 1 [
368    (google.api.field_behavior) = REQUIRED,
369    (google.api.resource_reference) = { type: "bigquery.googleapis.com/Table" }
370  ];
371
372  // Required. Stream to be created.
373  WriteStream write_stream = 2 [(google.api.field_behavior) = REQUIRED];
374}
375
376// Request message for `AppendRows`.
377message AppendRowsRequest {
378  // Proto schema and data.
379  message ProtoData {
380    // Proto schema used to serialize the data.
381    ProtoSchema writer_schema = 1;
382
383    // Serialized row data in protobuf message format.
384    ProtoRows rows = 2;
385  }
386
387  // Required. The stream that is the target of the append operation. This value
388  // must be specified for the initial request. If subsequent requests specify
389  // the stream name, it must equal to the value provided in the first request.
390  // To write to the _default stream, populate this field with a string in the
391  // format `projects/{project}/datasets/{dataset}/tables/{table}/_default`.
392  string write_stream = 1 [
393    (google.api.field_behavior) = REQUIRED,
394    (google.api.resource_reference) = {
395      type: "bigquerystorage.googleapis.com/WriteStream"
396    }
397  ];
398
399  // If present, the write is only performed if the next append offset is same
400  // as the provided value. If not present, the write is performed at the
401  // current end of stream. Specifying a value for this field is not allowed
402  // when calling AppendRows for the '_default' stream.
403  google.protobuf.Int64Value offset = 2;
404
405  // Input rows. The `writer_schema` field must be specified at the initial
406  // request and currently, it will be ignored if specified in following
407  // requests. Following requests must have data in the same format as the
408  // initial request.
409  oneof rows {
410    // Rows in proto format.
411    ProtoData proto_rows = 4;
412  }
413
414  // Id set by client to annotate its identity. Only initial request setting is
415  // respected.
416  string trace_id = 6;
417}
418
419// Response message for `AppendRows`.
420message AppendRowsResponse {
421  // AppendResult is returned for successful append requests.
422  message AppendResult {
423    // The row offset at which the last append occurred. The offset will not be
424    // set if appending using default streams.
425    google.protobuf.Int64Value offset = 1;
426  }
427
428  oneof response {
429    // Result if the append is successful.
430    AppendResult append_result = 1;
431
432    // Error returned when problems were encountered.  If present,
433    // it indicates rows were not accepted into the system.
434    // Users can retry or continue with other append requests within the
435    // same connection.
436    //
437    // Additional information about error signalling:
438    //
439    // ALREADY_EXISTS: Happens when an append specified an offset, and the
440    // backend already has received data at this offset.  Typically encountered
441    // in retry scenarios, and can be ignored.
442    //
443    // OUT_OF_RANGE: Returned when the specified offset in the stream is beyond
444    // the current end of the stream.
445    //
446    // INVALID_ARGUMENT: Indicates a malformed request or data.
447    //
448    // ABORTED: Request processing is aborted because of prior failures.  The
449    // request can be retried if previous failure is addressed.
450    //
451    // INTERNAL: Indicates server side error(s) that can be retried.
452    google.rpc.Status error = 2;
453  }
454
455  // If backend detects a schema update, pass it to user so that user can
456  // use it to input new type of message. It will be empty when no schema
457  // updates have occurred.
458  TableSchema updated_schema = 3;
459}
460
461// Request message for `GetWriteStreamRequest`.
462message GetWriteStreamRequest {
463  // Required. Name of the stream to get, in the form of
464  // `projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}`.
465  string name = 1 [
466    (google.api.field_behavior) = REQUIRED,
467    (google.api.resource_reference) = {
468      type: "bigquerystorage.googleapis.com/WriteStream"
469    }
470  ];
471}
472
473// Request message for `BatchCommitWriteStreams`.
474message BatchCommitWriteStreamsRequest {
475  // Required. Parent table that all the streams should belong to, in the form
476  // of `projects/{project}/datasets/{dataset}/tables/{table}`.
477  string parent = 1 [(google.api.field_behavior) = REQUIRED];
478
479  // Required. The group of streams that will be committed atomically.
480  repeated string write_streams = 2 [(google.api.field_behavior) = REQUIRED];
481}
482
483// Response message for `BatchCommitWriteStreams`.
484message BatchCommitWriteStreamsResponse {
485  // The time at which streams were committed in microseconds granularity.
486  // This field will only exist when there are no stream errors.
487  // **Note** if this field is not set, it means the commit was not successful.
488  google.protobuf.Timestamp commit_time = 1;
489
490  // Stream level error if commit failed. Only streams with error will be in
491  // the list.
492  // If empty, there is no error and all streams are committed successfully.
493  // If non empty, certain streams have errors and ZERO stream is committed due
494  // to atomicity guarantee.
495  repeated StorageError stream_errors = 2;
496}
497
498// Request message for invoking `FinalizeWriteStream`.
499message FinalizeWriteStreamRequest {
500  // Required. Name of the stream to finalize, in the form of
501  // `projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}`.
502  string name = 1 [
503    (google.api.field_behavior) = REQUIRED,
504    (google.api.resource_reference) = {
505      type: "bigquerystorage.googleapis.com/WriteStream"
506    }
507  ];
508}
509
510// Response message for `FinalizeWriteStream`.
511message FinalizeWriteStreamResponse {
512  // Number of rows in the finalized stream.
513  int64 row_count = 1;
514}
515
516// Request message for `FlushRows`.
517message FlushRowsRequest {
518  // Required. The stream that is the target of the flush operation.
519  string write_stream = 1 [
520    (google.api.field_behavior) = REQUIRED,
521    (google.api.resource_reference) = {
522      type: "bigquerystorage.googleapis.com/WriteStream"
523    }
524  ];
525
526  // Ending offset of the flush operation. Rows before this offset(including
527  // this offset) will be flushed.
528  google.protobuf.Int64Value offset = 2;
529}
530
531// Respond message for `FlushRows`.
532message FlushRowsResponse {
533  // The rows before this offset (including this offset) are flushed.
534  int64 offset = 1;
535}
536
537// Structured custom BigQuery Storage error message. The error can be attached
538// as error details in the returned rpc Status. In particular, the use of error
539// codes allows more structured error handling, and reduces the need to evaluate
540// unstructured error text strings.
541message StorageError {
542  // Error code for `StorageError`.
543  enum StorageErrorCode {
544    // Default error.
545    STORAGE_ERROR_CODE_UNSPECIFIED = 0;
546
547    // Table is not found in the system.
548    TABLE_NOT_FOUND = 1;
549
550    // Stream is already committed.
551    STREAM_ALREADY_COMMITTED = 2;
552
553    // Stream is not found.
554    STREAM_NOT_FOUND = 3;
555
556    // Invalid Stream type.
557    // For example, you try to commit a stream that is not pending.
558    INVALID_STREAM_TYPE = 4;
559
560    // Invalid Stream state.
561    // For example, you try to commit a stream that is not finalized or is
562    // garbaged.
563    INVALID_STREAM_STATE = 5;
564
565    // Stream is finalized.
566    STREAM_FINALIZED = 6;
567  }
568
569  // BigQuery Storage specific error code.
570  StorageErrorCode code = 1;
571
572  // Name of the failed entity.
573  string entity = 2;
574
575  // Message that describes the error.
576  string error_message = 3;
577}
578