• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.dataplex.v1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/resource.proto";
23import "google/cloud/dataplex/v1/data_profile.proto";
24import "google/cloud/dataplex/v1/data_quality.proto";
25import "google/cloud/dataplex/v1/processing.proto";
26import "google/cloud/dataplex/v1/resources.proto";
27import "google/cloud/dataplex/v1/service.proto";
28import "google/longrunning/operations.proto";
29import "google/protobuf/empty.proto";
30import "google/protobuf/field_mask.proto";
31import "google/protobuf/timestamp.proto";
32
33option go_package = "cloud.google.com/go/dataplex/apiv1/dataplexpb;dataplexpb";
34option java_multiple_files = true;
35option java_outer_classname = "DataScansProto";
36option java_package = "com.google.cloud.dataplex.v1";
37
38// DataScanService manages DataScan resources which can be configured to run
39// various types of data scanning workload and generate enriched metadata (e.g.
40// Data Profile, Data Quality) for the data source.
41service DataScanService {
42  option (google.api.default_host) = "dataplex.googleapis.com";
43  option (google.api.oauth_scopes) =
44      "https://www.googleapis.com/auth/cloud-platform";
45
46  // Creates a DataScan resource.
47  rpc CreateDataScan(CreateDataScanRequest)
48      returns (google.longrunning.Operation) {
49    option (google.api.http) = {
50      post: "/v1/{parent=projects/*/locations/*}/dataScans"
51      body: "data_scan"
52    };
53    option (google.api.method_signature) = "parent,data_scan,data_scan_id";
54    option (google.longrunning.operation_info) = {
55      response_type: "DataScan"
56      metadata_type: "OperationMetadata"
57    };
58  }
59
60  // Updates a DataScan resource.
61  rpc UpdateDataScan(UpdateDataScanRequest)
62      returns (google.longrunning.Operation) {
63    option (google.api.http) = {
64      patch: "/v1/{data_scan.name=projects/*/locations/*/dataScans/*}"
65      body: "data_scan"
66    };
67    option (google.api.method_signature) = "data_scan,update_mask";
68    option (google.longrunning.operation_info) = {
69      response_type: "DataScan"
70      metadata_type: "OperationMetadata"
71    };
72  }
73
74  // Deletes a DataScan resource.
75  rpc DeleteDataScan(DeleteDataScanRequest)
76      returns (google.longrunning.Operation) {
77    option (google.api.http) = {
78      delete: "/v1/{name=projects/*/locations/*/dataScans/*}"
79    };
80    option (google.api.method_signature) = "name";
81    option (google.longrunning.operation_info) = {
82      response_type: "google.protobuf.Empty"
83      metadata_type: "OperationMetadata"
84    };
85  }
86
87  // Gets a DataScan resource.
88  rpc GetDataScan(GetDataScanRequest) returns (DataScan) {
89    option (google.api.http) = {
90      get: "/v1/{name=projects/*/locations/*/dataScans/*}"
91    };
92    option (google.api.method_signature) = "name";
93  }
94
95  // Lists DataScans.
96  rpc ListDataScans(ListDataScansRequest) returns (ListDataScansResponse) {
97    option (google.api.http) = {
98      get: "/v1/{parent=projects/*/locations/*}/dataScans"
99    };
100    option (google.api.method_signature) = "parent";
101  }
102
103  // Runs an on-demand execution of a DataScan
104  rpc RunDataScan(RunDataScanRequest) returns (RunDataScanResponse) {
105    option (google.api.http) = {
106      post: "/v1/{name=projects/*/locations/*/dataScans/*}:run"
107      body: "*"
108    };
109    option (google.api.method_signature) = "name";
110  }
111
112  // Gets a DataScanJob resource.
113  rpc GetDataScanJob(GetDataScanJobRequest) returns (DataScanJob) {
114    option (google.api.http) = {
115      get: "/v1/{name=projects/*/locations/*/dataScans/*/jobs/*}"
116    };
117    option (google.api.method_signature) = "name";
118  }
119
120  // Lists DataScanJobs under the given DataScan.
121  rpc ListDataScanJobs(ListDataScanJobsRequest)
122      returns (ListDataScanJobsResponse) {
123    option (google.api.http) = {
124      get: "/v1/{parent=projects/*/locations/*/dataScans/*}/jobs"
125    };
126    option (google.api.method_signature) = "parent";
127  }
128
129  // Generates recommended DataQualityRule from a data profiling DataScan.
130  rpc GenerateDataQualityRules(GenerateDataQualityRulesRequest)
131      returns (GenerateDataQualityRulesResponse) {
132    option (google.api.http) = {
133      post: "/v1/{name=projects/*/locations/*/dataScans/*}:generateDataQualityRules"
134      body: "*"
135      additional_bindings {
136        post: "/v1/{name=projects/*/locations/*/dataScans/*/jobs/*}:generateDataQualityRules"
137        body: "*"
138      }
139    };
140    option (google.api.method_signature) = "name";
141  }
142}
143
144// Create dataScan request.
145message CreateDataScanRequest {
146  // Required. The resource name of the parent location:
147  // `projects/{project}/locations/{location_id}`
148  // where `project` refers to a *project_id* or *project_number* and
149  // `location_id` refers to a GCP region.
150  string parent = 1 [
151    (google.api.field_behavior) = REQUIRED,
152    (google.api.resource_reference) = {
153      type: "locations.googleapis.com/Location"
154    }
155  ];
156
157  // Required. DataScan resource.
158  DataScan data_scan = 2 [(google.api.field_behavior) = REQUIRED];
159
160  // Required. DataScan identifier.
161  //
162  // * Must contain only lowercase letters, numbers and hyphens.
163  // * Must start with a letter.
164  // * Must end with a number or a letter.
165  // * Must be between 1-63 characters.
166  // * Must be unique within the customer project / location.
167  string data_scan_id = 3 [(google.api.field_behavior) = REQUIRED];
168
169  // Optional. Only validate the request, but do not perform mutations.
170  // The default is `false`.
171  bool validate_only = 4 [(google.api.field_behavior) = OPTIONAL];
172}
173
174// Update dataScan request.
175message UpdateDataScanRequest {
176  // Required. DataScan resource to be updated.
177  //
178  // Only fields specified in `update_mask` are updated.
179  DataScan data_scan = 1 [(google.api.field_behavior) = REQUIRED];
180
181  // Required. Mask of fields to update.
182  google.protobuf.FieldMask update_mask = 2
183      [(google.api.field_behavior) = REQUIRED];
184
185  // Optional. Only validate the request, but do not perform mutations.
186  // The default is `false`.
187  bool validate_only = 3 [(google.api.field_behavior) = OPTIONAL];
188}
189
190// Delete dataScan request.
191message DeleteDataScanRequest {
192  // Required. The resource name of the dataScan:
193  // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}`
194  // where `project` refers to a *project_id* or *project_number* and
195  // `location_id` refers to a GCP region.
196  string name = 1 [
197    (google.api.field_behavior) = REQUIRED,
198    (google.api.resource_reference) = {
199      type: "dataplex.googleapis.com/DataScan"
200    }
201  ];
202}
203
204// Get dataScan request.
205message GetDataScanRequest {
206  // DataScan view options.
207  enum DataScanView {
208    // The API will default to the `BASIC` view.
209    DATA_SCAN_VIEW_UNSPECIFIED = 0;
210
211    // Basic view that does not include *spec* and *result*.
212    BASIC = 1;
213
214    // Include everything.
215    FULL = 10;
216  }
217
218  // Required. The resource name of the dataScan:
219  // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}`
220  // where `project` refers to a *project_id* or *project_number* and
221  // `location_id` refers to a GCP region.
222  string name = 1 [
223    (google.api.field_behavior) = REQUIRED,
224    (google.api.resource_reference) = {
225      type: "dataplex.googleapis.com/DataScan"
226    }
227  ];
228
229  // Optional. Select the DataScan view to return. Defaults to `BASIC`.
230  DataScanView view = 2 [(google.api.field_behavior) = OPTIONAL];
231}
232
233// List dataScans request.
234message ListDataScansRequest {
235  // Required. The resource name of the parent location:
236  // `projects/{project}/locations/{location_id}`
237  // where `project` refers to a *project_id* or *project_number* and
238  // `location_id` refers to a GCP region.
239  string parent = 1 [
240    (google.api.field_behavior) = REQUIRED,
241    (google.api.resource_reference) = {
242      type: "locations.googleapis.com/Location"
243    }
244  ];
245
246  // Optional. Maximum number of dataScans to return. The service may return
247  // fewer than this value. If unspecified, at most 500 scans will be returned.
248  // The maximum value is 1000; values above 1000 will be coerced to 1000.
249  int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
250
251  // Optional. Page token received from a previous `ListDataScans` call. Provide
252  // this to retrieve the subsequent page. When paginating, all other parameters
253  // provided to `ListDataScans` must match the call that provided the
254  // page token.
255  string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
256
257  // Optional. Filter request.
258  string filter = 4 [(google.api.field_behavior) = OPTIONAL];
259
260  // Optional. Order by fields (`name` or `create_time`) for the result.
261  // If not specified, the ordering is undefined.
262  string order_by = 5 [(google.api.field_behavior) = OPTIONAL];
263}
264
265// List dataScans response.
266message ListDataScansResponse {
267  // DataScans (`BASIC` view only) under the given parent location.
268  repeated DataScan data_scans = 1;
269
270  // Token to retrieve the next page of results, or empty if there are no more
271  // results in the list.
272  string next_page_token = 2;
273
274  // Locations that could not be reached.
275  repeated string unreachable = 3;
276}
277
278// Run DataScan Request
279message RunDataScanRequest {
280  // Required. The resource name of the DataScan:
281  // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}`.
282  // where `project` refers to a *project_id* or *project_number* and
283  // `location_id` refers to a GCP region.
284  //
285  // Only **OnDemand** data scans are allowed.
286  string name = 1 [
287    (google.api.field_behavior) = REQUIRED,
288    (google.api.resource_reference) = {
289      type: "dataplex.googleapis.com/DataScan"
290    }
291  ];
292}
293
294// Run DataScan Response.
295message RunDataScanResponse {
296  // DataScanJob created by RunDataScan request.
297  DataScanJob job = 1;
298}
299
300// Get DataScanJob request.
301message GetDataScanJobRequest {
302  // DataScanJob view options.
303  enum DataScanJobView {
304    // The API will default to the `BASIC` view.
305    DATA_SCAN_JOB_VIEW_UNSPECIFIED = 0;
306
307    // Basic view that does not include *spec* and *result*.
308    BASIC = 1;
309
310    // Include everything.
311    FULL = 10;
312  }
313
314  // Required. The resource name of the DataScanJob:
315  // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}/jobs/{data_scan_job_id}`
316  // where `project` refers to a *project_id* or *project_number* and
317  // `location_id` refers to a GCP region.
318  string name = 1 [
319    (google.api.field_behavior) = REQUIRED,
320    (google.api.resource_reference) = {
321      type: "dataplex.googleapis.com/DataScanJob"
322    }
323  ];
324
325  // Optional. Select the DataScanJob view to return. Defaults to `BASIC`.
326  DataScanJobView view = 2 [(google.api.field_behavior) = OPTIONAL];
327}
328
329// List DataScanJobs request.
330message ListDataScanJobsRequest {
331  // Required. The resource name of the parent environment:
332  // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}`
333  // where `project` refers to a *project_id* or *project_number* and
334  // `location_id` refers to a GCP region.
335  string parent = 1 [
336    (google.api.field_behavior) = REQUIRED,
337    (google.api.resource_reference) = {
338      type: "dataplex.googleapis.com/DataScan"
339    }
340  ];
341
342  // Optional. Maximum number of DataScanJobs to return. The service may return
343  // fewer than this value. If unspecified, at most 10 DataScanJobs will be
344  // returned. The maximum value is 1000; values above 1000 will be coerced to
345  // 1000.
346  int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
347
348  // Optional. Page token received from a previous `ListDataScanJobs` call.
349  // Provide this to retrieve the subsequent page. When paginating, all other
350  // parameters provided to `ListDataScanJobs` must match the call that provided
351  // the page token.
352  string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
353
354  // Optional. An expression for filtering the results of the ListDataScanJobs
355  // request.
356  //
357  // If unspecified, all datascan jobs will be returned. Multiple filters can be
358  // applied (with `AND`, `OR` logical operators). Filters are case-sensitive.
359  //
360  // Allowed fields are:
361  //
362  // - `start_time`
363  // - `end_time`
364  //
365  // `start_time` and `end_time` expect RFC-3339 formatted strings (e.g.
366  // 2018-10-08T18:30:00-07:00).
367  //
368  // For instance, 'start_time > 2018-10-08T00:00:00.123456789Z AND end_time <
369  // 2018-10-09T00:00:00.123456789Z' limits results to DataScanJobs between
370  // specified start and end times.
371  string filter = 4 [(google.api.field_behavior) = OPTIONAL];
372}
373
374// List DataScanJobs response.
375message ListDataScanJobsResponse {
376  // DataScanJobs (`BASIC` view only) under a given dataScan.
377  repeated DataScanJob data_scan_jobs = 1;
378
379  // Token to retrieve the next page of results, or empty if there are no more
380  // results in the list.
381  string next_page_token = 2;
382}
383
384// Generate recommended DataQualityRules request.
385message GenerateDataQualityRulesRequest {
386  // Required. The name should be either
387  // * the name of a datascan with at least one successful completed data
388  // profiling job, or
389  // * the name of a successful completed data profiling datascan job.
390  string name = 1 [(google.api.field_behavior) = REQUIRED];
391}
392
393// Generate recommended DataQualityRules response.
394message GenerateDataQualityRulesResponse {
395  // Generated recommended {@link DataQualityRule}s.
396  repeated DataQualityRule rule = 1;
397}
398
399// Represents a user-visible job which provides the insights for the related
400// data source.
401//
402// For example:
403//
404// * Data Quality: generates queries based on the rules and runs against the
405//   data to get data quality check results.
406// * Data Profile: analyzes the data in table(s) and generates insights about
407//   the structure, content and relationships (such as null percent,
408//   cardinality, min/max/mean, etc).
409message DataScan {
410  option (google.api.resource) = {
411    type: "dataplex.googleapis.com/DataScan"
412    pattern: "projects/{project}/locations/{location}/dataScans/{dataScan}"
413  };
414
415  // DataScan execution settings.
416  message ExecutionSpec {
417    // Optional. Spec related to how often and when a scan should be triggered.
418    //
419    // If not specified, the default is `OnDemand`, which means the scan will
420    // not run until the user calls `RunDataScan` API.
421    Trigger trigger = 1 [(google.api.field_behavior) = OPTIONAL];
422
423    // Spec related to incremental scan of the data
424    //
425    // When an option is selected for incremental scan, it cannot be unset or
426    // changed. If not specified, a data scan will run for all data in the
427    // table.
428    oneof incremental {
429      // Immutable. The unnested field (of type *Date* or *Timestamp*) that
430      // contains values which monotonically increase over time.
431      //
432      // If not specified, a data scan will run for all data in the table.
433      string field = 100 [(google.api.field_behavior) = IMMUTABLE];
434    }
435  }
436
437  // Status of the data scan execution.
438  message ExecutionStatus {
439    // The time when the latest DataScanJob started.
440    google.protobuf.Timestamp latest_job_start_time = 4;
441
442    // The time when the latest DataScanJob ended.
443    google.protobuf.Timestamp latest_job_end_time = 5;
444  }
445
446  // Output only. The relative resource name of the scan, of the form:
447  // `projects/{project}/locations/{location_id}/dataScans/{datascan_id}`,
448  // where `project` refers to a *project_id* or *project_number* and
449  // `location_id` refers to a GCP region.
450  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
451
452  // Output only. System generated globally unique ID for the scan. This ID will
453  // be different if the scan is deleted and re-created with the same name.
454  string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
455
456  // Optional. Description of the scan.
457  //
458  // * Must be between 1-1024 characters.
459  string description = 3 [(google.api.field_behavior) = OPTIONAL];
460
461  // Optional. User friendly display name.
462  //
463  // * Must be between 1-256 characters.
464  string display_name = 4 [(google.api.field_behavior) = OPTIONAL];
465
466  // Optional. User-defined labels for the scan.
467  map<string, string> labels = 5 [(google.api.field_behavior) = OPTIONAL];
468
469  // Output only. Current state of the DataScan.
470  State state = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
471
472  // Output only. The time when the scan was created.
473  google.protobuf.Timestamp create_time = 7
474      [(google.api.field_behavior) = OUTPUT_ONLY];
475
476  // Output only. The time when the scan was last updated.
477  google.protobuf.Timestamp update_time = 8
478      [(google.api.field_behavior) = OUTPUT_ONLY];
479
480  // Required. The data source for DataScan.
481  DataSource data = 9 [(google.api.field_behavior) = REQUIRED];
482
483  // Optional. DataScan execution settings.
484  //
485  // If not specified, the fields in it will use their default values.
486  ExecutionSpec execution_spec = 10 [(google.api.field_behavior) = OPTIONAL];
487
488  // Output only. Status of the data scan execution.
489  ExecutionStatus execution_status = 11
490      [(google.api.field_behavior) = OUTPUT_ONLY];
491
492  // Output only. The type of DataScan.
493  DataScanType type = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
494
495  // Data Scan related setting.
496  // It is required and immutable which means once data_quality_spec is set, it
497  // cannot be changed to data_profile_spec.
498  oneof spec {
499    // DataQualityScan related setting.
500    DataQualitySpec data_quality_spec = 100;
501
502    // DataProfileScan related setting.
503    DataProfileSpec data_profile_spec = 101;
504  }
505
506  // The result of the data scan.
507  oneof result {
508    // Output only. The result of the data quality scan.
509    DataQualityResult data_quality_result = 200
510        [(google.api.field_behavior) = OUTPUT_ONLY];
511
512    // Output only. The result of the data profile scan.
513    DataProfileResult data_profile_result = 201
514        [(google.api.field_behavior) = OUTPUT_ONLY];
515  }
516}
517
518// A DataScanJob represents an instance of DataScan execution.
519message DataScanJob {
520  option (google.api.resource) = {
521    type: "dataplex.googleapis.com/DataScanJob"
522    pattern: "projects/{project}/locations/{location}/dataScans/{dataScan}/jobs/{job}"
523  };
524
525  // Execution state for the DataScanJob.
526  enum State {
527    // The DataScanJob state is unspecified.
528    STATE_UNSPECIFIED = 0;
529
530    // The DataScanJob is running.
531    RUNNING = 1;
532
533    // The DataScanJob is canceling.
534    CANCELING = 2;
535
536    // The DataScanJob cancellation was successful.
537    CANCELLED = 3;
538
539    // The DataScanJob completed successfully.
540    SUCCEEDED = 4;
541
542    // The DataScanJob is no longer running due to an error.
543    FAILED = 5;
544
545    // The DataScanJob has been created but not started to run yet.
546    PENDING = 7;
547  }
548
549  // Output only. The relative resource name of the DataScanJob, of the form:
550  // `projects/{project}/locations/{location_id}/dataScans/{datascan_id}/jobs/{job_id}`,
551  // where `project` refers to a *project_id* or *project_number* and
552  // `location_id` refers to a GCP region.
553  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
554
555  // Output only. System generated globally unique ID for the DataScanJob.
556  string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
557
558  // Output only. The time when the DataScanJob was started.
559  google.protobuf.Timestamp start_time = 3
560      [(google.api.field_behavior) = OUTPUT_ONLY];
561
562  // Output only. The time when the DataScanJob ended.
563  google.protobuf.Timestamp end_time = 4
564      [(google.api.field_behavior) = OUTPUT_ONLY];
565
566  // Output only. Execution state for the DataScanJob.
567  State state = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
568
569  // Output only. Additional information about the current state.
570  string message = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
571
572  // Output only. The type of the parent DataScan.
573  DataScanType type = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
574
575  // Data Scan related setting.
576  oneof spec {
577    // Output only. DataQualityScan related setting.
578    DataQualitySpec data_quality_spec = 100
579        [(google.api.field_behavior) = OUTPUT_ONLY];
580
581    // Output only. DataProfileScan related setting.
582    DataProfileSpec data_profile_spec = 101
583        [(google.api.field_behavior) = OUTPUT_ONLY];
584  }
585
586  // The result of the data scan.
587  oneof result {
588    // Output only. The result of the data quality scan.
589    DataQualityResult data_quality_result = 200
590        [(google.api.field_behavior) = OUTPUT_ONLY];
591
592    // Output only. The result of the data profile scan.
593    DataProfileResult data_profile_result = 201
594        [(google.api.field_behavior) = OUTPUT_ONLY];
595  }
596}
597
598// The type of DataScan.
599enum DataScanType {
600  // The DataScan type is unspecified.
601  DATA_SCAN_TYPE_UNSPECIFIED = 0;
602
603  // Data Quality scan.
604  DATA_QUALITY = 1;
605
606  // Data Profile scan.
607  DATA_PROFILE = 2;
608}
609