• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.aiplatform.v1beta1;
18
19import "google/api/resource.proto";
20import "google/cloud/aiplatform/v1beta1/io.proto";
21
22option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
23option go_package = "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb;aiplatformpb";
24option java_multiple_files = true;
25option java_outer_classname = "ModelMonitoringProto";
26option java_package = "com.google.cloud.aiplatform.v1beta1";
27option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1";
28option ruby_package = "Google::Cloud::AIPlatform::V1beta1";
29option (google.api.resource_definition) = {
30  type: "monitoring.googleapis.com/NotificationChannel"
31  pattern: "projects/{project}/notificationChannels/{notification_channel}"
32};
33
34// The model monitoring configuration used for Batch Prediction Job.
35message ModelMonitoringConfig {
36  // Model monitoring objective config.
37  repeated ModelMonitoringObjectiveConfig objective_configs = 3;
38
39  // Model monitoring alert config.
40  ModelMonitoringAlertConfig alert_config = 2;
41
42  // YAML schema file uri in Cloud Storage describing the format of a single
43  // instance that you want Tensorflow Data Validation (TFDV) to analyze.
44  //
45  // If there are any data type differences between predict instance and TFDV
46  // instance, this field can be used to override the schema.
47  // For models trained with Vertex AI, this field must be set as all the
48  // fields in predict instance formatted as string.
49  string analysis_instance_schema_uri = 4;
50
51  // A Google Cloud Storage location for batch prediction model monitoring to
52  // dump statistics and anomalies.
53  // If not provided, a folder will be created in customer project to hold
54  // statistics and anomalies.
55  GcsDestination stats_anomalies_base_directory = 5;
56}
57
58// The objective configuration for model monitoring, including the information
59// needed to detect anomalies for one particular model.
60message ModelMonitoringObjectiveConfig {
61  // Training Dataset information.
62  message TrainingDataset {
63    oneof data_source {
64      // The resource name of the Dataset used to train this Model.
65      string dataset = 3 [(google.api.resource_reference) = {
66        type: "aiplatform.googleapis.com/Dataset"
67      }];
68
69      // The Google Cloud Storage uri of the unmanaged Dataset used to train
70      // this Model.
71      GcsSource gcs_source = 4;
72
73      // The BigQuery table of the unmanaged Dataset used to train this
74      // Model.
75      BigQuerySource bigquery_source = 5;
76    }
77
78    // Data format of the dataset, only applicable if the input is from
79    // Google Cloud Storage.
80    // The possible formats are:
81    //
82    // "tf-record"
83    // The source file is a TFRecord file.
84    //
85    // "csv"
86    // The source file is a CSV file.
87    // "jsonl"
88    // The source file is a JSONL file.
89    string data_format = 2;
90
91    // The target field name the model is to predict.
92    // This field will be excluded when doing Predict and (or) Explain for the
93    // training data.
94    string target_field = 6;
95
96    // Strategy to sample data from Training Dataset.
97    // If not set, we process the whole dataset.
98    SamplingStrategy logging_sampling_strategy = 7;
99  }
100
101  // The config for Training & Prediction data skew detection. It specifies the
102  // training dataset sources and the skew detection parameters.
103  message TrainingPredictionSkewDetectionConfig {
104    // Key is the feature name and value is the threshold. If a feature needs to
105    // be monitored for skew, a value threshold must be configured for that
106    // feature. The threshold here is against feature distribution distance
107    // between the training and prediction feature.
108    map<string, ThresholdConfig> skew_thresholds = 1;
109
110    // Key is the feature name and value is the threshold. The threshold here is
111    // against attribution score distance between the training and prediction
112    // feature.
113    map<string, ThresholdConfig> attribution_score_skew_thresholds = 2;
114
115    // Skew anomaly detection threshold used by all features.
116    // When the per-feature thresholds are not set, this field can be used to
117    // specify a threshold for all features.
118    ThresholdConfig default_skew_threshold = 6;
119  }
120
121  // The config for Prediction data drift detection.
122  message PredictionDriftDetectionConfig {
123    // Key is the feature name and value is the threshold. If a feature needs to
124    // be monitored for drift, a value threshold must be configured for that
125    // feature. The threshold here is against feature distribution distance
126    // between different time windws.
127    map<string, ThresholdConfig> drift_thresholds = 1;
128
129    // Key is the feature name and value is the threshold. The threshold here is
130    // against attribution score distance between different time windows.
131    map<string, ThresholdConfig> attribution_score_drift_thresholds = 2;
132
133    // Drift anomaly detection threshold used by all features.
134    // When the per-feature thresholds are not set, this field can be used to
135    // specify a threshold for all features.
136    ThresholdConfig default_drift_threshold = 5;
137  }
138
139  // The config for integrating with Vertex Explainable AI. Only applicable if
140  // the Model has explanation_spec populated.
141  message ExplanationConfig {
142    // Output from
143    // [BatchPredictionJob][google.cloud.aiplatform.v1beta1.BatchPredictionJob]
144    // for Model Monitoring baseline dataset, which can be used to generate
145    // baseline attribution scores.
146    message ExplanationBaseline {
147      // The storage format of the predictions generated BatchPrediction job.
148      enum PredictionFormat {
149        // Should not be set.
150        PREDICTION_FORMAT_UNSPECIFIED = 0;
151
152        // Predictions are in JSONL files.
153        JSONL = 2;
154
155        // Predictions are in BigQuery.
156        BIGQUERY = 3;
157      }
158
159      // The configuration specifying of BatchExplain job output. This can be
160      // used to generate the baseline of feature attribution scores.
161      oneof destination {
162        // Cloud Storage location for BatchExplain output.
163        GcsDestination gcs = 2;
164
165        // BigQuery location for BatchExplain output.
166        BigQueryDestination bigquery = 3;
167      }
168
169      // The storage format of the predictions generated BatchPrediction job.
170      PredictionFormat prediction_format = 1;
171    }
172
173    // If want to analyze the Vertex Explainable AI feature attribute scores or
174    // not. If set to true, Vertex AI will log the feature attributions from
175    // explain response and do the skew/drift detection for them.
176    bool enable_feature_attributes = 1;
177
178    // Predictions generated by the BatchPredictionJob using baseline dataset.
179    ExplanationBaseline explanation_baseline = 2;
180  }
181
182  // Training dataset for models. This field has to be set only if
183  // TrainingPredictionSkewDetectionConfig is specified.
184  TrainingDataset training_dataset = 1;
185
186  // The config for skew between training data and prediction data.
187  TrainingPredictionSkewDetectionConfig
188      training_prediction_skew_detection_config = 2;
189
190  // The config for drift of prediction data.
191  PredictionDriftDetectionConfig prediction_drift_detection_config = 3;
192
193  // The config for integrating with Vertex Explainable AI.
194  ExplanationConfig explanation_config = 5;
195}
196
197// The alert config for model monitoring.
198message ModelMonitoringAlertConfig {
199  // The config for email alert.
200  message EmailAlertConfig {
201    // The email addresses to send the alert.
202    repeated string user_emails = 1;
203  }
204
205  oneof alert {
206    // Email alert config.
207    EmailAlertConfig email_alert_config = 1;
208  }
209
210  // Dump the anomalies to Cloud Logging. The anomalies will be put to json
211  // payload encoded from proto
212  // [google.cloud.aiplatform.logging.ModelMonitoringAnomaliesLogEntry][].
213  // This can be further sinked to Pub/Sub or any other services supported
214  // by Cloud Logging.
215  bool enable_logging = 2;
216
217  // Resource names of the NotificationChannels to send alert.
218  // Must be of the format
219  // `projects/<project_id_or_number>/notificationChannels/<channel_id>`
220  repeated string notification_channels = 3 [(google.api.resource_reference) = {
221    type: "monitoring.googleapis.com/NotificationChannel"
222  }];
223}
224
225// The config for feature monitoring threshold.
226message ThresholdConfig {
227  oneof threshold {
228    // Specify a threshold value that can trigger the alert.
229    // If this threshold config is for feature distribution distance:
230    //   1. For categorical feature, the distribution distance is calculated by
231    //      L-inifinity norm.
232    //   2. For numerical feature, the distribution distance is calculated by
233    //      Jensen–Shannon divergence.
234    // Each feature must have a non-zero threshold if they need to be monitored.
235    // Otherwise no alert will be triggered for that feature.
236    double value = 1;
237  }
238}
239
240// Sampling Strategy for logging, can be for both training and prediction
241// dataset.
242message SamplingStrategy {
243  // Requests are randomly selected.
244  message RandomSampleConfig {
245    // Sample rate (0, 1]
246    double sample_rate = 1;
247  }
248
249  // Random sample config. Will support more sampling strategies later.
250  RandomSampleConfig random_sample_config = 1;
251}
252