1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.aiplatform.v1; 18 19import "google/api/resource.proto"; 20import "google/cloud/aiplatform/v1/io.proto"; 21 22option csharp_namespace = "Google.Cloud.AIPlatform.V1"; 23option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb"; 24option java_multiple_files = true; 25option java_outer_classname = "ModelMonitoringProto"; 26option java_package = "com.google.cloud.aiplatform.v1"; 27option php_namespace = "Google\\Cloud\\AIPlatform\\V1"; 28option ruby_package = "Google::Cloud::AIPlatform::V1"; 29option (google.api.resource_definition) = { 30 type: "monitoring.googleapis.com/NotificationChannel" 31 pattern: "projects/{project}/notificationChannels/{notification_channel}" 32}; 33 34// The objective configuration for model monitoring, including the information 35// needed to detect anomalies for one particular model. 36message ModelMonitoringObjectiveConfig { 37 // Training Dataset information. 38 message TrainingDataset { 39 oneof data_source { 40 // The resource name of the Dataset used to train this Model. 41 string dataset = 3 [(google.api.resource_reference) = { 42 type: "aiplatform.googleapis.com/Dataset" 43 }]; 44 45 // The Google Cloud Storage uri of the unmanaged Dataset used to train 46 // this Model. 47 GcsSource gcs_source = 4; 48 49 // The BigQuery table of the unmanaged Dataset used to train this 50 // Model. 51 BigQuerySource bigquery_source = 5; 52 } 53 54 // Data format of the dataset, only applicable if the input is from 55 // Google Cloud Storage. 56 // The possible formats are: 57 // 58 // "tf-record" 59 // The source file is a TFRecord file. 60 // 61 // "csv" 62 // The source file is a CSV file. 63 // "jsonl" 64 // The source file is a JSONL file. 65 string data_format = 2; 66 67 // The target field name the model is to predict. 68 // This field will be excluded when doing Predict and (or) Explain for the 69 // training data. 70 string target_field = 6; 71 72 // Strategy to sample data from Training Dataset. 73 // If not set, we process the whole dataset. 74 SamplingStrategy logging_sampling_strategy = 7; 75 } 76 77 // The config for Training & Prediction data skew detection. It specifies the 78 // training dataset sources and the skew detection parameters. 79 message TrainingPredictionSkewDetectionConfig { 80 // Key is the feature name and value is the threshold. If a feature needs to 81 // be monitored for skew, a value threshold must be configured for that 82 // feature. The threshold here is against feature distribution distance 83 // between the training and prediction feature. 84 map<string, ThresholdConfig> skew_thresholds = 1; 85 86 // Key is the feature name and value is the threshold. The threshold here is 87 // against attribution score distance between the training and prediction 88 // feature. 89 map<string, ThresholdConfig> attribution_score_skew_thresholds = 2; 90 91 // Skew anomaly detection threshold used by all features. 92 // When the per-feature thresholds are not set, this field can be used to 93 // specify a threshold for all features. 94 ThresholdConfig default_skew_threshold = 6; 95 } 96 97 // The config for Prediction data drift detection. 98 message PredictionDriftDetectionConfig { 99 // Key is the feature name and value is the threshold. If a feature needs to 100 // be monitored for drift, a value threshold must be configured for that 101 // feature. The threshold here is against feature distribution distance 102 // between different time windws. 103 map<string, ThresholdConfig> drift_thresholds = 1; 104 105 // Key is the feature name and value is the threshold. The threshold here is 106 // against attribution score distance between different time windows. 107 map<string, ThresholdConfig> attribution_score_drift_thresholds = 2; 108 109 // Drift anomaly detection threshold used by all features. 110 // When the per-feature thresholds are not set, this field can be used to 111 // specify a threshold for all features. 112 ThresholdConfig default_drift_threshold = 5; 113 } 114 115 // The config for integrating with Vertex Explainable AI. Only applicable if 116 // the Model has explanation_spec populated. 117 message ExplanationConfig { 118 // Output from 119 // [BatchPredictionJob][google.cloud.aiplatform.v1.BatchPredictionJob] for 120 // Model Monitoring baseline dataset, which can be used to generate baseline 121 // attribution scores. 122 message ExplanationBaseline { 123 // The storage format of the predictions generated BatchPrediction job. 124 enum PredictionFormat { 125 // Should not be set. 126 PREDICTION_FORMAT_UNSPECIFIED = 0; 127 128 // Predictions are in JSONL files. 129 JSONL = 2; 130 131 // Predictions are in BigQuery. 132 BIGQUERY = 3; 133 } 134 135 // The configuration specifying of BatchExplain job output. This can be 136 // used to generate the baseline of feature attribution scores. 137 oneof destination { 138 // Cloud Storage location for BatchExplain output. 139 GcsDestination gcs = 2; 140 141 // BigQuery location for BatchExplain output. 142 BigQueryDestination bigquery = 3; 143 } 144 145 // The storage format of the predictions generated BatchPrediction job. 146 PredictionFormat prediction_format = 1; 147 } 148 149 // If want to analyze the Vertex Explainable AI feature attribute scores or 150 // not. If set to true, Vertex AI will log the feature attributions from 151 // explain response and do the skew/drift detection for them. 152 bool enable_feature_attributes = 1; 153 154 // Predictions generated by the BatchPredictionJob using baseline dataset. 155 ExplanationBaseline explanation_baseline = 2; 156 } 157 158 // Training dataset for models. This field has to be set only if 159 // TrainingPredictionSkewDetectionConfig is specified. 160 TrainingDataset training_dataset = 1; 161 162 // The config for skew between training data and prediction data. 163 TrainingPredictionSkewDetectionConfig 164 training_prediction_skew_detection_config = 2; 165 166 // The config for drift of prediction data. 167 PredictionDriftDetectionConfig prediction_drift_detection_config = 3; 168 169 // The config for integrating with Vertex Explainable AI. 170 ExplanationConfig explanation_config = 5; 171} 172 173// The alert config for model monitoring. 174message ModelMonitoringAlertConfig { 175 // The config for email alert. 176 message EmailAlertConfig { 177 // The email addresses to send the alert. 178 repeated string user_emails = 1; 179 } 180 181 oneof alert { 182 // Email alert config. 183 EmailAlertConfig email_alert_config = 1; 184 } 185 186 // Dump the anomalies to Cloud Logging. The anomalies will be put to json 187 // payload encoded from proto 188 // [google.cloud.aiplatform.logging.ModelMonitoringAnomaliesLogEntry][]. 189 // This can be further sinked to Pub/Sub or any other services supported 190 // by Cloud Logging. 191 bool enable_logging = 2; 192 193 // Resource names of the NotificationChannels to send alert. 194 // Must be of the format 195 // `projects/<project_id_or_number>/notificationChannels/<channel_id>` 196 repeated string notification_channels = 3 [(google.api.resource_reference) = { 197 type: "monitoring.googleapis.com/NotificationChannel" 198 }]; 199} 200 201// The config for feature monitoring threshold. 202message ThresholdConfig { 203 oneof threshold { 204 // Specify a threshold value that can trigger the alert. 205 // If this threshold config is for feature distribution distance: 206 // 1. For categorical feature, the distribution distance is calculated by 207 // L-inifinity norm. 208 // 2. For numerical feature, the distribution distance is calculated by 209 // Jensen–Shannon divergence. 210 // Each feature must have a non-zero threshold if they need to be monitored. 211 // Otherwise no alert will be triggered for that feature. 212 double value = 1; 213 } 214} 215 216// Sampling Strategy for logging, can be for both training and prediction 217// dataset. 218message SamplingStrategy { 219 // Requests are randomly selected. 220 message RandomSampleConfig { 221 // Sample rate (0, 1] 222 double sample_rate = 1; 223 } 224 225 // Random sample config. Will support more sampling strategies later. 226 RandomSampleConfig random_sample_config = 1; 227} 228