• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.aiplatform.v1;
18
19import "google/api/resource.proto";
20import "google/cloud/aiplatform/v1/io.proto";
21
22option csharp_namespace = "Google.Cloud.AIPlatform.V1";
23option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
24option java_multiple_files = true;
25option java_outer_classname = "ModelMonitoringProto";
26option java_package = "com.google.cloud.aiplatform.v1";
27option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
28option ruby_package = "Google::Cloud::AIPlatform::V1";
29option (google.api.resource_definition) = {
30  type: "monitoring.googleapis.com/NotificationChannel"
31  pattern: "projects/{project}/notificationChannels/{notification_channel}"
32};
33
34// The objective configuration for model monitoring, including the information
35// needed to detect anomalies for one particular model.
36message ModelMonitoringObjectiveConfig {
37  // Training Dataset information.
38  message TrainingDataset {
39    oneof data_source {
40      // The resource name of the Dataset used to train this Model.
41      string dataset = 3 [(google.api.resource_reference) = {
42        type: "aiplatform.googleapis.com/Dataset"
43      }];
44
45      // The Google Cloud Storage uri of the unmanaged Dataset used to train
46      // this Model.
47      GcsSource gcs_source = 4;
48
49      // The BigQuery table of the unmanaged Dataset used to train this
50      // Model.
51      BigQuerySource bigquery_source = 5;
52    }
53
54    // Data format of the dataset, only applicable if the input is from
55    // Google Cloud Storage.
56    // The possible formats are:
57    //
58    // "tf-record"
59    // The source file is a TFRecord file.
60    //
61    // "csv"
62    // The source file is a CSV file.
63    // "jsonl"
64    // The source file is a JSONL file.
65    string data_format = 2;
66
67    // The target field name the model is to predict.
68    // This field will be excluded when doing Predict and (or) Explain for the
69    // training data.
70    string target_field = 6;
71
72    // Strategy to sample data from Training Dataset.
73    // If not set, we process the whole dataset.
74    SamplingStrategy logging_sampling_strategy = 7;
75  }
76
77  // The config for Training & Prediction data skew detection. It specifies the
78  // training dataset sources and the skew detection parameters.
79  message TrainingPredictionSkewDetectionConfig {
80    // Key is the feature name and value is the threshold. If a feature needs to
81    // be monitored for skew, a value threshold must be configured for that
82    // feature. The threshold here is against feature distribution distance
83    // between the training and prediction feature.
84    map<string, ThresholdConfig> skew_thresholds = 1;
85
86    // Key is the feature name and value is the threshold. The threshold here is
87    // against attribution score distance between the training and prediction
88    // feature.
89    map<string, ThresholdConfig> attribution_score_skew_thresholds = 2;
90
91    // Skew anomaly detection threshold used by all features.
92    // When the per-feature thresholds are not set, this field can be used to
93    // specify a threshold for all features.
94    ThresholdConfig default_skew_threshold = 6;
95  }
96
97  // The config for Prediction data drift detection.
98  message PredictionDriftDetectionConfig {
99    // Key is the feature name and value is the threshold. If a feature needs to
100    // be monitored for drift, a value threshold must be configured for that
101    // feature. The threshold here is against feature distribution distance
102    // between different time windws.
103    map<string, ThresholdConfig> drift_thresholds = 1;
104
105    // Key is the feature name and value is the threshold. The threshold here is
106    // against attribution score distance between different time windows.
107    map<string, ThresholdConfig> attribution_score_drift_thresholds = 2;
108
109    // Drift anomaly detection threshold used by all features.
110    // When the per-feature thresholds are not set, this field can be used to
111    // specify a threshold for all features.
112    ThresholdConfig default_drift_threshold = 5;
113  }
114
115  // The config for integrating with Vertex Explainable AI. Only applicable if
116  // the Model has explanation_spec populated.
117  message ExplanationConfig {
118    // Output from
119    // [BatchPredictionJob][google.cloud.aiplatform.v1.BatchPredictionJob] for
120    // Model Monitoring baseline dataset, which can be used to generate baseline
121    // attribution scores.
122    message ExplanationBaseline {
123      // The storage format of the predictions generated BatchPrediction job.
124      enum PredictionFormat {
125        // Should not be set.
126        PREDICTION_FORMAT_UNSPECIFIED = 0;
127
128        // Predictions are in JSONL files.
129        JSONL = 2;
130
131        // Predictions are in BigQuery.
132        BIGQUERY = 3;
133      }
134
135      // The configuration specifying of BatchExplain job output. This can be
136      // used to generate the baseline of feature attribution scores.
137      oneof destination {
138        // Cloud Storage location for BatchExplain output.
139        GcsDestination gcs = 2;
140
141        // BigQuery location for BatchExplain output.
142        BigQueryDestination bigquery = 3;
143      }
144
145      // The storage format of the predictions generated BatchPrediction job.
146      PredictionFormat prediction_format = 1;
147    }
148
149    // If want to analyze the Vertex Explainable AI feature attribute scores or
150    // not. If set to true, Vertex AI will log the feature attributions from
151    // explain response and do the skew/drift detection for them.
152    bool enable_feature_attributes = 1;
153
154    // Predictions generated by the BatchPredictionJob using baseline dataset.
155    ExplanationBaseline explanation_baseline = 2;
156  }
157
158  // Training dataset for models. This field has to be set only if
159  // TrainingPredictionSkewDetectionConfig is specified.
160  TrainingDataset training_dataset = 1;
161
162  // The config for skew between training data and prediction data.
163  TrainingPredictionSkewDetectionConfig
164      training_prediction_skew_detection_config = 2;
165
166  // The config for drift of prediction data.
167  PredictionDriftDetectionConfig prediction_drift_detection_config = 3;
168
169  // The config for integrating with Vertex Explainable AI.
170  ExplanationConfig explanation_config = 5;
171}
172
173// The alert config for model monitoring.
174message ModelMonitoringAlertConfig {
175  // The config for email alert.
176  message EmailAlertConfig {
177    // The email addresses to send the alert.
178    repeated string user_emails = 1;
179  }
180
181  oneof alert {
182    // Email alert config.
183    EmailAlertConfig email_alert_config = 1;
184  }
185
186  // Dump the anomalies to Cloud Logging. The anomalies will be put to json
187  // payload encoded from proto
188  // [google.cloud.aiplatform.logging.ModelMonitoringAnomaliesLogEntry][].
189  // This can be further sinked to Pub/Sub or any other services supported
190  // by Cloud Logging.
191  bool enable_logging = 2;
192
193  // Resource names of the NotificationChannels to send alert.
194  // Must be of the format
195  // `projects/<project_id_or_number>/notificationChannels/<channel_id>`
196  repeated string notification_channels = 3 [(google.api.resource_reference) = {
197    type: "monitoring.googleapis.com/NotificationChannel"
198  }];
199}
200
201// The config for feature monitoring threshold.
202message ThresholdConfig {
203  oneof threshold {
204    // Specify a threshold value that can trigger the alert.
205    // If this threshold config is for feature distribution distance:
206    //   1. For categorical feature, the distribution distance is calculated by
207    //      L-inifinity norm.
208    //   2. For numerical feature, the distribution distance is calculated by
209    //      Jensen–Shannon divergence.
210    // Each feature must have a non-zero threshold if they need to be monitored.
211    // Otherwise no alert will be triggered for that feature.
212    double value = 1;
213  }
214}
215
216// Sampling Strategy for logging, can be for both training and prediction
217// dataset.
218message SamplingStrategy {
219  // Requests are randomly selected.
220  message RandomSampleConfig {
221    // Sample rate (0, 1]
222    double sample_rate = 1;
223  }
224
225  // Random sample config. Will support more sampling strategies later.
226  RandomSampleConfig random_sample_config = 1;
227}
228