1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.dataplex.v1; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/api/resource.proto"; 23import "google/cloud/dataplex/v1/data_profile.proto"; 24import "google/cloud/dataplex/v1/data_quality.proto"; 25import "google/cloud/dataplex/v1/processing.proto"; 26import "google/cloud/dataplex/v1/resources.proto"; 27import "google/cloud/dataplex/v1/service.proto"; 28import "google/longrunning/operations.proto"; 29import "google/protobuf/empty.proto"; 30import "google/protobuf/field_mask.proto"; 31import "google/protobuf/timestamp.proto"; 32 33option go_package = "cloud.google.com/go/dataplex/apiv1/dataplexpb;dataplexpb"; 34option java_multiple_files = true; 35option java_outer_classname = "DataScansProto"; 36option java_package = "com.google.cloud.dataplex.v1"; 37 38// DataScanService manages DataScan resources which can be configured to run 39// various types of data scanning workload and generate enriched metadata (e.g. 40// Data Profile, Data Quality) for the data source. 41service DataScanService { 42 option (google.api.default_host) = "dataplex.googleapis.com"; 43 option (google.api.oauth_scopes) = 44 "https://www.googleapis.com/auth/cloud-platform"; 45 46 // Creates a DataScan resource. 47 rpc CreateDataScan(CreateDataScanRequest) 48 returns (google.longrunning.Operation) { 49 option (google.api.http) = { 50 post: "/v1/{parent=projects/*/locations/*}/dataScans" 51 body: "data_scan" 52 }; 53 option (google.api.method_signature) = "parent,data_scan,data_scan_id"; 54 option (google.longrunning.operation_info) = { 55 response_type: "DataScan" 56 metadata_type: "OperationMetadata" 57 }; 58 } 59 60 // Updates a DataScan resource. 61 rpc UpdateDataScan(UpdateDataScanRequest) 62 returns (google.longrunning.Operation) { 63 option (google.api.http) = { 64 patch: "/v1/{data_scan.name=projects/*/locations/*/dataScans/*}" 65 body: "data_scan" 66 }; 67 option (google.api.method_signature) = "data_scan,update_mask"; 68 option (google.longrunning.operation_info) = { 69 response_type: "DataScan" 70 metadata_type: "OperationMetadata" 71 }; 72 } 73 74 // Deletes a DataScan resource. 75 rpc DeleteDataScan(DeleteDataScanRequest) 76 returns (google.longrunning.Operation) { 77 option (google.api.http) = { 78 delete: "/v1/{name=projects/*/locations/*/dataScans/*}" 79 }; 80 option (google.api.method_signature) = "name"; 81 option (google.longrunning.operation_info) = { 82 response_type: "google.protobuf.Empty" 83 metadata_type: "OperationMetadata" 84 }; 85 } 86 87 // Gets a DataScan resource. 88 rpc GetDataScan(GetDataScanRequest) returns (DataScan) { 89 option (google.api.http) = { 90 get: "/v1/{name=projects/*/locations/*/dataScans/*}" 91 }; 92 option (google.api.method_signature) = "name"; 93 } 94 95 // Lists DataScans. 96 rpc ListDataScans(ListDataScansRequest) returns (ListDataScansResponse) { 97 option (google.api.http) = { 98 get: "/v1/{parent=projects/*/locations/*}/dataScans" 99 }; 100 option (google.api.method_signature) = "parent"; 101 } 102 103 // Runs an on-demand execution of a DataScan 104 rpc RunDataScan(RunDataScanRequest) returns (RunDataScanResponse) { 105 option (google.api.http) = { 106 post: "/v1/{name=projects/*/locations/*/dataScans/*}:run" 107 body: "*" 108 }; 109 option (google.api.method_signature) = "name"; 110 } 111 112 // Gets a DataScanJob resource. 113 rpc GetDataScanJob(GetDataScanJobRequest) returns (DataScanJob) { 114 option (google.api.http) = { 115 get: "/v1/{name=projects/*/locations/*/dataScans/*/jobs/*}" 116 }; 117 option (google.api.method_signature) = "name"; 118 } 119 120 // Lists DataScanJobs under the given DataScan. 121 rpc ListDataScanJobs(ListDataScanJobsRequest) 122 returns (ListDataScanJobsResponse) { 123 option (google.api.http) = { 124 get: "/v1/{parent=projects/*/locations/*/dataScans/*}/jobs" 125 }; 126 option (google.api.method_signature) = "parent"; 127 } 128 129 // Generates recommended DataQualityRule from a data profiling DataScan. 130 rpc GenerateDataQualityRules(GenerateDataQualityRulesRequest) 131 returns (GenerateDataQualityRulesResponse) { 132 option (google.api.http) = { 133 post: "/v1/{name=projects/*/locations/*/dataScans/*}:generateDataQualityRules" 134 body: "*" 135 additional_bindings { 136 post: "/v1/{name=projects/*/locations/*/dataScans/*/jobs/*}:generateDataQualityRules" 137 body: "*" 138 } 139 }; 140 option (google.api.method_signature) = "name"; 141 } 142} 143 144// Create dataScan request. 145message CreateDataScanRequest { 146 // Required. The resource name of the parent location: 147 // `projects/{project}/locations/{location_id}` 148 // where `project` refers to a *project_id* or *project_number* and 149 // `location_id` refers to a GCP region. 150 string parent = 1 [ 151 (google.api.field_behavior) = REQUIRED, 152 (google.api.resource_reference) = { 153 type: "locations.googleapis.com/Location" 154 } 155 ]; 156 157 // Required. DataScan resource. 158 DataScan data_scan = 2 [(google.api.field_behavior) = REQUIRED]; 159 160 // Required. DataScan identifier. 161 // 162 // * Must contain only lowercase letters, numbers and hyphens. 163 // * Must start with a letter. 164 // * Must end with a number or a letter. 165 // * Must be between 1-63 characters. 166 // * Must be unique within the customer project / location. 167 string data_scan_id = 3 [(google.api.field_behavior) = REQUIRED]; 168 169 // Optional. Only validate the request, but do not perform mutations. 170 // The default is `false`. 171 bool validate_only = 4 [(google.api.field_behavior) = OPTIONAL]; 172} 173 174// Update dataScan request. 175message UpdateDataScanRequest { 176 // Required. DataScan resource to be updated. 177 // 178 // Only fields specified in `update_mask` are updated. 179 DataScan data_scan = 1 [(google.api.field_behavior) = REQUIRED]; 180 181 // Required. Mask of fields to update. 182 google.protobuf.FieldMask update_mask = 2 183 [(google.api.field_behavior) = REQUIRED]; 184 185 // Optional. Only validate the request, but do not perform mutations. 186 // The default is `false`. 187 bool validate_only = 3 [(google.api.field_behavior) = OPTIONAL]; 188} 189 190// Delete dataScan request. 191message DeleteDataScanRequest { 192 // Required. The resource name of the dataScan: 193 // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}` 194 // where `project` refers to a *project_id* or *project_number* and 195 // `location_id` refers to a GCP region. 196 string name = 1 [ 197 (google.api.field_behavior) = REQUIRED, 198 (google.api.resource_reference) = { 199 type: "dataplex.googleapis.com/DataScan" 200 } 201 ]; 202} 203 204// Get dataScan request. 205message GetDataScanRequest { 206 // DataScan view options. 207 enum DataScanView { 208 // The API will default to the `BASIC` view. 209 DATA_SCAN_VIEW_UNSPECIFIED = 0; 210 211 // Basic view that does not include *spec* and *result*. 212 BASIC = 1; 213 214 // Include everything. 215 FULL = 10; 216 } 217 218 // Required. The resource name of the dataScan: 219 // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}` 220 // where `project` refers to a *project_id* or *project_number* and 221 // `location_id` refers to a GCP region. 222 string name = 1 [ 223 (google.api.field_behavior) = REQUIRED, 224 (google.api.resource_reference) = { 225 type: "dataplex.googleapis.com/DataScan" 226 } 227 ]; 228 229 // Optional. Select the DataScan view to return. Defaults to `BASIC`. 230 DataScanView view = 2 [(google.api.field_behavior) = OPTIONAL]; 231} 232 233// List dataScans request. 234message ListDataScansRequest { 235 // Required. The resource name of the parent location: 236 // `projects/{project}/locations/{location_id}` 237 // where `project` refers to a *project_id* or *project_number* and 238 // `location_id` refers to a GCP region. 239 string parent = 1 [ 240 (google.api.field_behavior) = REQUIRED, 241 (google.api.resource_reference) = { 242 type: "locations.googleapis.com/Location" 243 } 244 ]; 245 246 // Optional. Maximum number of dataScans to return. The service may return 247 // fewer than this value. If unspecified, at most 500 scans will be returned. 248 // The maximum value is 1000; values above 1000 will be coerced to 1000. 249 int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL]; 250 251 // Optional. Page token received from a previous `ListDataScans` call. Provide 252 // this to retrieve the subsequent page. When paginating, all other parameters 253 // provided to `ListDataScans` must match the call that provided the 254 // page token. 255 string page_token = 3 [(google.api.field_behavior) = OPTIONAL]; 256 257 // Optional. Filter request. 258 string filter = 4 [(google.api.field_behavior) = OPTIONAL]; 259 260 // Optional. Order by fields (`name` or `create_time`) for the result. 261 // If not specified, the ordering is undefined. 262 string order_by = 5 [(google.api.field_behavior) = OPTIONAL]; 263} 264 265// List dataScans response. 266message ListDataScansResponse { 267 // DataScans (`BASIC` view only) under the given parent location. 268 repeated DataScan data_scans = 1; 269 270 // Token to retrieve the next page of results, or empty if there are no more 271 // results in the list. 272 string next_page_token = 2; 273 274 // Locations that could not be reached. 275 repeated string unreachable = 3; 276} 277 278// Run DataScan Request 279message RunDataScanRequest { 280 // Required. The resource name of the DataScan: 281 // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}`. 282 // where `project` refers to a *project_id* or *project_number* and 283 // `location_id` refers to a GCP region. 284 // 285 // Only **OnDemand** data scans are allowed. 286 string name = 1 [ 287 (google.api.field_behavior) = REQUIRED, 288 (google.api.resource_reference) = { 289 type: "dataplex.googleapis.com/DataScan" 290 } 291 ]; 292} 293 294// Run DataScan Response. 295message RunDataScanResponse { 296 // DataScanJob created by RunDataScan request. 297 DataScanJob job = 1; 298} 299 300// Get DataScanJob request. 301message GetDataScanJobRequest { 302 // DataScanJob view options. 303 enum DataScanJobView { 304 // The API will default to the `BASIC` view. 305 DATA_SCAN_JOB_VIEW_UNSPECIFIED = 0; 306 307 // Basic view that does not include *spec* and *result*. 308 BASIC = 1; 309 310 // Include everything. 311 FULL = 10; 312 } 313 314 // Required. The resource name of the DataScanJob: 315 // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}/jobs/{data_scan_job_id}` 316 // where `project` refers to a *project_id* or *project_number* and 317 // `location_id` refers to a GCP region. 318 string name = 1 [ 319 (google.api.field_behavior) = REQUIRED, 320 (google.api.resource_reference) = { 321 type: "dataplex.googleapis.com/DataScanJob" 322 } 323 ]; 324 325 // Optional. Select the DataScanJob view to return. Defaults to `BASIC`. 326 DataScanJobView view = 2 [(google.api.field_behavior) = OPTIONAL]; 327} 328 329// List DataScanJobs request. 330message ListDataScanJobsRequest { 331 // Required. The resource name of the parent environment: 332 // `projects/{project}/locations/{location_id}/dataScans/{data_scan_id}` 333 // where `project` refers to a *project_id* or *project_number* and 334 // `location_id` refers to a GCP region. 335 string parent = 1 [ 336 (google.api.field_behavior) = REQUIRED, 337 (google.api.resource_reference) = { 338 type: "dataplex.googleapis.com/DataScan" 339 } 340 ]; 341 342 // Optional. Maximum number of DataScanJobs to return. The service may return 343 // fewer than this value. If unspecified, at most 10 DataScanJobs will be 344 // returned. The maximum value is 1000; values above 1000 will be coerced to 345 // 1000. 346 int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL]; 347 348 // Optional. Page token received from a previous `ListDataScanJobs` call. 349 // Provide this to retrieve the subsequent page. When paginating, all other 350 // parameters provided to `ListDataScanJobs` must match the call that provided 351 // the page token. 352 string page_token = 3 [(google.api.field_behavior) = OPTIONAL]; 353 354 // Optional. An expression for filtering the results of the ListDataScanJobs 355 // request. 356 // 357 // If unspecified, all datascan jobs will be returned. Multiple filters can be 358 // applied (with `AND`, `OR` logical operators). Filters are case-sensitive. 359 // 360 // Allowed fields are: 361 // 362 // - `start_time` 363 // - `end_time` 364 // 365 // `start_time` and `end_time` expect RFC-3339 formatted strings (e.g. 366 // 2018-10-08T18:30:00-07:00). 367 // 368 // For instance, 'start_time > 2018-10-08T00:00:00.123456789Z AND end_time < 369 // 2018-10-09T00:00:00.123456789Z' limits results to DataScanJobs between 370 // specified start and end times. 371 string filter = 4 [(google.api.field_behavior) = OPTIONAL]; 372} 373 374// List DataScanJobs response. 375message ListDataScanJobsResponse { 376 // DataScanJobs (`BASIC` view only) under a given dataScan. 377 repeated DataScanJob data_scan_jobs = 1; 378 379 // Token to retrieve the next page of results, or empty if there are no more 380 // results in the list. 381 string next_page_token = 2; 382} 383 384// Generate recommended DataQualityRules request. 385message GenerateDataQualityRulesRequest { 386 // Required. The name should be either 387 // * the name of a datascan with at least one successful completed data 388 // profiling job, or 389 // * the name of a successful completed data profiling datascan job. 390 string name = 1 [(google.api.field_behavior) = REQUIRED]; 391} 392 393// Generate recommended DataQualityRules response. 394message GenerateDataQualityRulesResponse { 395 // Generated recommended {@link DataQualityRule}s. 396 repeated DataQualityRule rule = 1; 397} 398 399// Represents a user-visible job which provides the insights for the related 400// data source. 401// 402// For example: 403// 404// * Data Quality: generates queries based on the rules and runs against the 405// data to get data quality check results. 406// * Data Profile: analyzes the data in table(s) and generates insights about 407// the structure, content and relationships (such as null percent, 408// cardinality, min/max/mean, etc). 409message DataScan { 410 option (google.api.resource) = { 411 type: "dataplex.googleapis.com/DataScan" 412 pattern: "projects/{project}/locations/{location}/dataScans/{dataScan}" 413 }; 414 415 // DataScan execution settings. 416 message ExecutionSpec { 417 // Optional. Spec related to how often and when a scan should be triggered. 418 // 419 // If not specified, the default is `OnDemand`, which means the scan will 420 // not run until the user calls `RunDataScan` API. 421 Trigger trigger = 1 [(google.api.field_behavior) = OPTIONAL]; 422 423 // Spec related to incremental scan of the data 424 // 425 // When an option is selected for incremental scan, it cannot be unset or 426 // changed. If not specified, a data scan will run for all data in the 427 // table. 428 oneof incremental { 429 // Immutable. The unnested field (of type *Date* or *Timestamp*) that 430 // contains values which monotonically increase over time. 431 // 432 // If not specified, a data scan will run for all data in the table. 433 string field = 100 [(google.api.field_behavior) = IMMUTABLE]; 434 } 435 } 436 437 // Status of the data scan execution. 438 message ExecutionStatus { 439 // The time when the latest DataScanJob started. 440 google.protobuf.Timestamp latest_job_start_time = 4; 441 442 // The time when the latest DataScanJob ended. 443 google.protobuf.Timestamp latest_job_end_time = 5; 444 } 445 446 // Output only. The relative resource name of the scan, of the form: 447 // `projects/{project}/locations/{location_id}/dataScans/{datascan_id}`, 448 // where `project` refers to a *project_id* or *project_number* and 449 // `location_id` refers to a GCP region. 450 string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 451 452 // Output only. System generated globally unique ID for the scan. This ID will 453 // be different if the scan is deleted and re-created with the same name. 454 string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 455 456 // Optional. Description of the scan. 457 // 458 // * Must be between 1-1024 characters. 459 string description = 3 [(google.api.field_behavior) = OPTIONAL]; 460 461 // Optional. User friendly display name. 462 // 463 // * Must be between 1-256 characters. 464 string display_name = 4 [(google.api.field_behavior) = OPTIONAL]; 465 466 // Optional. User-defined labels for the scan. 467 map<string, string> labels = 5 [(google.api.field_behavior) = OPTIONAL]; 468 469 // Output only. Current state of the DataScan. 470 State state = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; 471 472 // Output only. The time when the scan was created. 473 google.protobuf.Timestamp create_time = 7 474 [(google.api.field_behavior) = OUTPUT_ONLY]; 475 476 // Output only. The time when the scan was last updated. 477 google.protobuf.Timestamp update_time = 8 478 [(google.api.field_behavior) = OUTPUT_ONLY]; 479 480 // Required. The data source for DataScan. 481 DataSource data = 9 [(google.api.field_behavior) = REQUIRED]; 482 483 // Optional. DataScan execution settings. 484 // 485 // If not specified, the fields in it will use their default values. 486 ExecutionSpec execution_spec = 10 [(google.api.field_behavior) = OPTIONAL]; 487 488 // Output only. Status of the data scan execution. 489 ExecutionStatus execution_status = 11 490 [(google.api.field_behavior) = OUTPUT_ONLY]; 491 492 // Output only. The type of DataScan. 493 DataScanType type = 12 [(google.api.field_behavior) = OUTPUT_ONLY]; 494 495 // Data Scan related setting. 496 // It is required and immutable which means once data_quality_spec is set, it 497 // cannot be changed to data_profile_spec. 498 oneof spec { 499 // DataQualityScan related setting. 500 DataQualitySpec data_quality_spec = 100; 501 502 // DataProfileScan related setting. 503 DataProfileSpec data_profile_spec = 101; 504 } 505 506 // The result of the data scan. 507 oneof result { 508 // Output only. The result of the data quality scan. 509 DataQualityResult data_quality_result = 200 510 [(google.api.field_behavior) = OUTPUT_ONLY]; 511 512 // Output only. The result of the data profile scan. 513 DataProfileResult data_profile_result = 201 514 [(google.api.field_behavior) = OUTPUT_ONLY]; 515 } 516} 517 518// A DataScanJob represents an instance of DataScan execution. 519message DataScanJob { 520 option (google.api.resource) = { 521 type: "dataplex.googleapis.com/DataScanJob" 522 pattern: "projects/{project}/locations/{location}/dataScans/{dataScan}/jobs/{job}" 523 }; 524 525 // Execution state for the DataScanJob. 526 enum State { 527 // The DataScanJob state is unspecified. 528 STATE_UNSPECIFIED = 0; 529 530 // The DataScanJob is running. 531 RUNNING = 1; 532 533 // The DataScanJob is canceling. 534 CANCELING = 2; 535 536 // The DataScanJob cancellation was successful. 537 CANCELLED = 3; 538 539 // The DataScanJob completed successfully. 540 SUCCEEDED = 4; 541 542 // The DataScanJob is no longer running due to an error. 543 FAILED = 5; 544 545 // The DataScanJob has been created but not started to run yet. 546 PENDING = 7; 547 } 548 549 // Output only. The relative resource name of the DataScanJob, of the form: 550 // `projects/{project}/locations/{location_id}/dataScans/{datascan_id}/jobs/{job_id}`, 551 // where `project` refers to a *project_id* or *project_number* and 552 // `location_id` refers to a GCP region. 553 string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 554 555 // Output only. System generated globally unique ID for the DataScanJob. 556 string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 557 558 // Output only. The time when the DataScanJob was started. 559 google.protobuf.Timestamp start_time = 3 560 [(google.api.field_behavior) = OUTPUT_ONLY]; 561 562 // Output only. The time when the DataScanJob ended. 563 google.protobuf.Timestamp end_time = 4 564 [(google.api.field_behavior) = OUTPUT_ONLY]; 565 566 // Output only. Execution state for the DataScanJob. 567 State state = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; 568 569 // Output only. Additional information about the current state. 570 string message = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; 571 572 // Output only. The type of the parent DataScan. 573 DataScanType type = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; 574 575 // Data Scan related setting. 576 oneof spec { 577 // Output only. DataQualityScan related setting. 578 DataQualitySpec data_quality_spec = 100 579 [(google.api.field_behavior) = OUTPUT_ONLY]; 580 581 // Output only. DataProfileScan related setting. 582 DataProfileSpec data_profile_spec = 101 583 [(google.api.field_behavior) = OUTPUT_ONLY]; 584 } 585 586 // The result of the data scan. 587 oneof result { 588 // Output only. The result of the data quality scan. 589 DataQualityResult data_quality_result = 200 590 [(google.api.field_behavior) = OUTPUT_ONLY]; 591 592 // Output only. The result of the data profile scan. 593 DataProfileResult data_profile_result = 201 594 [(google.api.field_behavior) = OUTPUT_ONLY]; 595 } 596} 597 598// The type of DataScan. 599enum DataScanType { 600 // The DataScan type is unspecified. 601 DATA_SCAN_TYPE_UNSPECIFIED = 0; 602 603 // Data Quality scan. 604 DATA_QUALITY = 1; 605 606 // Data Profile scan. 607 DATA_PROFILE = 2; 608} 609