1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.aiplatform.v1beta1; 18 19import "google/protobuf/timestamp.proto"; 20 21option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1"; 22option go_package = "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb;aiplatformpb"; 23option java_multiple_files = true; 24option java_outer_classname = "FeatureMonitoringStatsProto"; 25option java_package = "com.google.cloud.aiplatform.v1beta1"; 26option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1"; 27option ruby_package = "Google::Cloud::AIPlatform::V1beta1"; 28 29// Stats and Anomaly generated at specific timestamp for specific Feature. 30// The start_time and end_time are used to define the time range of the dataset 31// that current stats belongs to, e.g. prediction traffic is bucketed into 32// prediction datasets by time window. If the Dataset is not defined by time 33// window, start_time = end_time. Timestamp of the stats and anomalies always 34// refers to end_time. Raw stats and anomalies are stored in stats_uri or 35// anomaly_uri in the tensorflow defined protos. Field data_stats contains 36// almost identical information with the raw stats in Vertex AI 37// defined proto, for UI to display. 38message FeatureStatsAnomaly { 39 // Feature importance score, only populated when cross-feature monitoring is 40 // enabled. For now only used to represent feature attribution score within 41 // range [0, 1] for 42 // [ModelDeploymentMonitoringObjectiveType.FEATURE_ATTRIBUTION_SKEW][google.cloud.aiplatform.v1beta1.ModelDeploymentMonitoringObjectiveType.FEATURE_ATTRIBUTION_SKEW] 43 // and 44 // [ModelDeploymentMonitoringObjectiveType.FEATURE_ATTRIBUTION_DRIFT][google.cloud.aiplatform.v1beta1.ModelDeploymentMonitoringObjectiveType.FEATURE_ATTRIBUTION_DRIFT]. 45 double score = 1; 46 47 // Path of the stats file for current feature values in Cloud Storage bucket. 48 // Format: gs://<bucket_name>/<object_name>/stats. 49 // Example: gs://monitoring_bucket/feature_name/stats. 50 // Stats are stored as binary format with Protobuf message 51 // [tensorflow.metadata.v0.FeatureNameStatistics](https://github.com/tensorflow/metadata/blob/master/tensorflow_metadata/proto/v0/statistics.proto). 52 string stats_uri = 3; 53 54 // Path of the anomaly file for current feature values in Cloud Storage 55 // bucket. 56 // Format: gs://<bucket_name>/<object_name>/anomalies. 57 // Example: gs://monitoring_bucket/feature_name/anomalies. 58 // Stats are stored as binary format with Protobuf message 59 // Anoamlies are stored as binary format with Protobuf message 60 // [tensorflow.metadata.v0.AnomalyInfo] 61 // (https://github.com/tensorflow/metadata/blob/master/tensorflow_metadata/proto/v0/anomalies.proto). 62 string anomaly_uri = 4; 63 64 // Deviation from the current stats to baseline stats. 65 // 1. For categorical feature, the distribution distance is calculated by 66 // L-inifinity norm. 67 // 2. For numerical feature, the distribution distance is calculated by 68 // Jensen–Shannon divergence. 69 double distribution_deviation = 5; 70 71 // This is the threshold used when detecting anomalies. 72 // The threshold can be changed by user, so this one might be different from 73 // [ThresholdConfig.value][google.cloud.aiplatform.v1beta1.ThresholdConfig.value]. 74 double anomaly_detection_threshold = 9; 75 76 // The start timestamp of window where stats were generated. 77 // For objectives where time window doesn't make sense (e.g. Featurestore 78 // Snapshot Monitoring), start_time is only used to indicate the monitoring 79 // intervals, so it always equals to (end_time - monitoring_interval). 80 google.protobuf.Timestamp start_time = 7; 81 82 // The end timestamp of window where stats were generated. 83 // For objectives where time window doesn't make sense (e.g. Featurestore 84 // Snapshot Monitoring), end_time indicates the timestamp of the data used to 85 // generate stats (e.g. timestamp we take snapshots for feature values). 86 google.protobuf.Timestamp end_time = 8; 87} 88