1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.aiplatform.v1beta1; 18 19import "google/api/resource.proto"; 20import "google/cloud/aiplatform/v1beta1/io.proto"; 21 22option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1"; 23option go_package = "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb;aiplatformpb"; 24option java_multiple_files = true; 25option java_outer_classname = "ModelMonitoringProto"; 26option java_package = "com.google.cloud.aiplatform.v1beta1"; 27option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1"; 28option ruby_package = "Google::Cloud::AIPlatform::V1beta1"; 29option (google.api.resource_definition) = { 30 type: "monitoring.googleapis.com/NotificationChannel" 31 pattern: "projects/{project}/notificationChannels/{notification_channel}" 32}; 33 34// The model monitoring configuration used for Batch Prediction Job. 35message ModelMonitoringConfig { 36 // Model monitoring objective config. 37 repeated ModelMonitoringObjectiveConfig objective_configs = 3; 38 39 // Model monitoring alert config. 40 ModelMonitoringAlertConfig alert_config = 2; 41 42 // YAML schema file uri in Cloud Storage describing the format of a single 43 // instance that you want Tensorflow Data Validation (TFDV) to analyze. 44 // 45 // If there are any data type differences between predict instance and TFDV 46 // instance, this field can be used to override the schema. 47 // For models trained with Vertex AI, this field must be set as all the 48 // fields in predict instance formatted as string. 49 string analysis_instance_schema_uri = 4; 50 51 // A Google Cloud Storage location for batch prediction model monitoring to 52 // dump statistics and anomalies. 53 // If not provided, a folder will be created in customer project to hold 54 // statistics and anomalies. 55 GcsDestination stats_anomalies_base_directory = 5; 56} 57 58// The objective configuration for model monitoring, including the information 59// needed to detect anomalies for one particular model. 60message ModelMonitoringObjectiveConfig { 61 // Training Dataset information. 62 message TrainingDataset { 63 oneof data_source { 64 // The resource name of the Dataset used to train this Model. 65 string dataset = 3 [(google.api.resource_reference) = { 66 type: "aiplatform.googleapis.com/Dataset" 67 }]; 68 69 // The Google Cloud Storage uri of the unmanaged Dataset used to train 70 // this Model. 71 GcsSource gcs_source = 4; 72 73 // The BigQuery table of the unmanaged Dataset used to train this 74 // Model. 75 BigQuerySource bigquery_source = 5; 76 } 77 78 // Data format of the dataset, only applicable if the input is from 79 // Google Cloud Storage. 80 // The possible formats are: 81 // 82 // "tf-record" 83 // The source file is a TFRecord file. 84 // 85 // "csv" 86 // The source file is a CSV file. 87 // "jsonl" 88 // The source file is a JSONL file. 89 string data_format = 2; 90 91 // The target field name the model is to predict. 92 // This field will be excluded when doing Predict and (or) Explain for the 93 // training data. 94 string target_field = 6; 95 96 // Strategy to sample data from Training Dataset. 97 // If not set, we process the whole dataset. 98 SamplingStrategy logging_sampling_strategy = 7; 99 } 100 101 // The config for Training & Prediction data skew detection. It specifies the 102 // training dataset sources and the skew detection parameters. 103 message TrainingPredictionSkewDetectionConfig { 104 // Key is the feature name and value is the threshold. If a feature needs to 105 // be monitored for skew, a value threshold must be configured for that 106 // feature. The threshold here is against feature distribution distance 107 // between the training and prediction feature. 108 map<string, ThresholdConfig> skew_thresholds = 1; 109 110 // Key is the feature name and value is the threshold. The threshold here is 111 // against attribution score distance between the training and prediction 112 // feature. 113 map<string, ThresholdConfig> attribution_score_skew_thresholds = 2; 114 115 // Skew anomaly detection threshold used by all features. 116 // When the per-feature thresholds are not set, this field can be used to 117 // specify a threshold for all features. 118 ThresholdConfig default_skew_threshold = 6; 119 } 120 121 // The config for Prediction data drift detection. 122 message PredictionDriftDetectionConfig { 123 // Key is the feature name and value is the threshold. If a feature needs to 124 // be monitored for drift, a value threshold must be configured for that 125 // feature. The threshold here is against feature distribution distance 126 // between different time windws. 127 map<string, ThresholdConfig> drift_thresholds = 1; 128 129 // Key is the feature name and value is the threshold. The threshold here is 130 // against attribution score distance between different time windows. 131 map<string, ThresholdConfig> attribution_score_drift_thresholds = 2; 132 133 // Drift anomaly detection threshold used by all features. 134 // When the per-feature thresholds are not set, this field can be used to 135 // specify a threshold for all features. 136 ThresholdConfig default_drift_threshold = 5; 137 } 138 139 // The config for integrating with Vertex Explainable AI. Only applicable if 140 // the Model has explanation_spec populated. 141 message ExplanationConfig { 142 // Output from 143 // [BatchPredictionJob][google.cloud.aiplatform.v1beta1.BatchPredictionJob] 144 // for Model Monitoring baseline dataset, which can be used to generate 145 // baseline attribution scores. 146 message ExplanationBaseline { 147 // The storage format of the predictions generated BatchPrediction job. 148 enum PredictionFormat { 149 // Should not be set. 150 PREDICTION_FORMAT_UNSPECIFIED = 0; 151 152 // Predictions are in JSONL files. 153 JSONL = 2; 154 155 // Predictions are in BigQuery. 156 BIGQUERY = 3; 157 } 158 159 // The configuration specifying of BatchExplain job output. This can be 160 // used to generate the baseline of feature attribution scores. 161 oneof destination { 162 // Cloud Storage location for BatchExplain output. 163 GcsDestination gcs = 2; 164 165 // BigQuery location for BatchExplain output. 166 BigQueryDestination bigquery = 3; 167 } 168 169 // The storage format of the predictions generated BatchPrediction job. 170 PredictionFormat prediction_format = 1; 171 } 172 173 // If want to analyze the Vertex Explainable AI feature attribute scores or 174 // not. If set to true, Vertex AI will log the feature attributions from 175 // explain response and do the skew/drift detection for them. 176 bool enable_feature_attributes = 1; 177 178 // Predictions generated by the BatchPredictionJob using baseline dataset. 179 ExplanationBaseline explanation_baseline = 2; 180 } 181 182 // Training dataset for models. This field has to be set only if 183 // TrainingPredictionSkewDetectionConfig is specified. 184 TrainingDataset training_dataset = 1; 185 186 // The config for skew between training data and prediction data. 187 TrainingPredictionSkewDetectionConfig 188 training_prediction_skew_detection_config = 2; 189 190 // The config for drift of prediction data. 191 PredictionDriftDetectionConfig prediction_drift_detection_config = 3; 192 193 // The config for integrating with Vertex Explainable AI. 194 ExplanationConfig explanation_config = 5; 195} 196 197// The alert config for model monitoring. 198message ModelMonitoringAlertConfig { 199 // The config for email alert. 200 message EmailAlertConfig { 201 // The email addresses to send the alert. 202 repeated string user_emails = 1; 203 } 204 205 oneof alert { 206 // Email alert config. 207 EmailAlertConfig email_alert_config = 1; 208 } 209 210 // Dump the anomalies to Cloud Logging. The anomalies will be put to json 211 // payload encoded from proto 212 // [google.cloud.aiplatform.logging.ModelMonitoringAnomaliesLogEntry][]. 213 // This can be further sinked to Pub/Sub or any other services supported 214 // by Cloud Logging. 215 bool enable_logging = 2; 216 217 // Resource names of the NotificationChannels to send alert. 218 // Must be of the format 219 // `projects/<project_id_or_number>/notificationChannels/<channel_id>` 220 repeated string notification_channels = 3 [(google.api.resource_reference) = { 221 type: "monitoring.googleapis.com/NotificationChannel" 222 }]; 223} 224 225// The config for feature monitoring threshold. 226message ThresholdConfig { 227 oneof threshold { 228 // Specify a threshold value that can trigger the alert. 229 // If this threshold config is for feature distribution distance: 230 // 1. For categorical feature, the distribution distance is calculated by 231 // L-inifinity norm. 232 // 2. For numerical feature, the distribution distance is calculated by 233 // Jensen–Shannon divergence. 234 // Each feature must have a non-zero threshold if they need to be monitored. 235 // Otherwise no alert will be triggered for that feature. 236 double value = 1; 237 } 238} 239 240// Sampling Strategy for logging, can be for both training and prediction 241// dataset. 242message SamplingStrategy { 243 // Requests are randomly selected. 244 message RandomSampleConfig { 245 // Sample rate (0, 1] 246 double sample_rate = 1; 247 } 248 249 // Random sample config. Will support more sampling strategies later. 250 RandomSampleConfig random_sample_config = 1; 251} 252