1// Copyright 2019 The gRPC Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// This file contains the eds protocol and its dependency. 16// 17// TODO(juanlishen): This file is a hack to avoid a problem we're 18// currently having where we can't depend on a proto file in an external 19// repo due to bazel limitations. Once that's fixed, this should be 20// removed. Until this, it should be used in the gRPC tests only, or else it 21// will cause a conflict due to the same proto messages being defined in 22// multiple files in the same binary. 23 24syntax = "proto3"; 25 26package envoy.service.load_stats.v2; 27 28import "google/protobuf/duration.proto"; 29import "src/proto/grpc/testing/xds/eds_for_test.proto"; 30 31// [#not-implemented-hide:] Not configuration. TBD how to doc proto APIs. 32message EndpointLoadMetricStats { 33 // Name of the metric; may be empty. 34 string metric_name = 1; 35 36 // Number of calls that finished and included this metric. 37 uint64 num_requests_finished_with_metric = 2; 38 39 // Sum of metric values across all calls that finished with this metric for 40 // load_reporting_interval. 41 double total_metric_value = 3; 42} 43 44message UpstreamLocalityStats { 45 // Name of zone, region and optionally endpoint group these metrics were 46 // collected from. Zone and region names could be empty if unknown. 47 envoy.api.v2.Locality locality = 1; 48 49 // The total number of requests successfully completed by the endpoints in the 50 // locality. 51 uint64 total_successful_requests = 2; 52 53 // The total number of unfinished requests 54 uint64 total_requests_in_progress = 3; 55 56 // The total number of requests that failed due to errors at the endpoint, 57 // aggregated over all endpoints in the locality. 58 uint64 total_error_requests = 4; 59 60 // The total number of requests that were issued by this Envoy since 61 // the last report. This information is aggregated over all the 62 // upstream endpoints in the locality. 63 uint64 total_issued_requests = 8; 64 65 // Stats for multi-dimensional load balancing. 66 repeated EndpointLoadMetricStats load_metric_stats = 5; 67 68// // Endpoint granularity stats information for this locality. This information 69// // is populated if the Server requests it by setting 70// // :ref:`LoadStatsResponse.report_endpoint_granularity<envoy_api_field_load_stats.LoadStatsResponse.report_endpoint_granularity>`. 71// repeated UpstreamEndpointStats upstream_endpoint_stats = 7; 72 73 // [#not-implemented-hide:] The priority of the endpoint group these metrics 74 // were collected from. 75 uint32 priority = 6; 76} 77 78// Per cluster load stats. Envoy reports these stats a management server in a 79// :ref:`LoadStatsRequest<envoy_api_msg_load_stats.LoadStatsRequest>` 80// [#not-implemented-hide:] Not configuration. TBD how to doc proto APIs. 81// Next ID: 7 82message ClusterStats { 83 // The name of the cluster. 84 string cluster_name = 1; 85 86 // The eds_cluster_config service_name of the cluster. 87 // It's possible that two clusters send the same service_name to EDS, 88 // in that case, the management server is supposed to do aggregation on the load reports. 89 string cluster_service_name = 6; 90 91 // Need at least one. 92 repeated UpstreamLocalityStats upstream_locality_stats = 2; 93 94 // Cluster-level stats such as total_successful_requests may be computed by 95 // summing upstream_locality_stats. In addition, below there are additional 96 // cluster-wide stats. 97 // 98 // The total number of dropped requests. This covers requests 99 // deliberately dropped by the drop_overload policy and circuit breaking. 100 uint64 total_dropped_requests = 3; 101 102 message DroppedRequests { 103 // Identifier for the policy specifying the drop. 104 string category = 1; 105 // Total number of deliberately dropped requests for the category. 106 uint64 dropped_count = 2; 107 } 108 // Information about deliberately dropped requests for each category specified 109 // in the DropOverload policy. 110 repeated DroppedRequests dropped_requests = 5; 111 112 // Period over which the actual load report occurred. This will be guaranteed to include every 113 // request reported. Due to system load and delays between the *LoadStatsRequest* sent from Envoy 114 // and the *LoadStatsResponse* message sent from the management server, this may be longer than 115 // the requested load reporting interval in the *LoadStatsResponse*. 116 google.protobuf.Duration load_report_interval = 4; 117} 118 119// [#protodoc-title: Load reporting service] 120 121service LoadReportingService { 122 // Advanced API to allow for multi-dimensional load balancing by remote 123 // server. For receiving LB assignments, the steps are: 124 // 1, The management server is configured with per cluster/zone/load metric 125 // capacity configuration. The capacity configuration definition is 126 // outside of the scope of this document. 127 // 2. Envoy issues a standard {Stream,Fetch}Endpoints request for the clusters 128 // to balance. 129 // 130 // Independently, Envoy will initiate a StreamLoadStats bidi stream with a 131 // management server: 132 // 1. Once a connection establishes, the management server publishes a 133 // LoadStatsResponse for all clusters it is interested in learning load 134 // stats about. 135 // 2. For each cluster, Envoy load balances incoming traffic to upstream hosts 136 // based on per-zone weights and/or per-instance weights (if specified) 137 // based on intra-zone LbPolicy. This information comes from the above 138 // {Stream,Fetch}Endpoints. 139 // 3. When upstream hosts reply, they optionally add header <define header 140 // name> with ASCII representation of EndpointLoadMetricStats. 141 // 4. Envoy aggregates load reports over the period of time given to it in 142 // LoadStatsResponse.load_reporting_interval. This includes aggregation 143 // stats Envoy maintains by itself (total_requests, rpc_errors etc.) as 144 // well as load metrics from upstream hosts. 145 // 5. When the timer of load_reporting_interval expires, Envoy sends new 146 // LoadStatsRequest filled with load reports for each cluster. 147 // 6. The management server uses the load reports from all reported Envoys 148 // from around the world, computes global assignment and prepares traffic 149 // assignment destined for each zone Envoys are located in. Goto 2. 150 rpc StreamLoadStats(stream LoadStatsRequest) returns (stream LoadStatsResponse) { 151 } 152} 153 154// A load report Envoy sends to the management server. 155// [#not-implemented-hide:] Not configuration. TBD how to doc proto APIs. 156message LoadStatsRequest { 157 // Node identifier for Envoy instance. 158 envoy.api.v2.Node node = 1; 159 160 // A list of load stats to report. 161 repeated ClusterStats cluster_stats = 2; 162} 163 164// The management server sends envoy a LoadStatsResponse with all clusters it 165// is interested in learning load stats about. 166// [#not-implemented-hide:] Not configuration. TBD how to doc proto APIs. 167message LoadStatsResponse { 168 // Clusters to report stats for. 169 // Not populated if *send_all_clusters* is true. 170 repeated string clusters = 1; 171 172 // If true, the client should send all clusters it knows about. 173 // Only clients that advertise the "envoy.lrs.supports_send_all_clusters" capability in their 174 // :ref:`client_features<envoy_api_field_core.Node.client_features>` field will honor this field. 175 bool send_all_clusters = 4; 176 177 // The minimum interval of time to collect stats over. This is only a minimum for two reasons: 178 // 1. There may be some delay from when the timer fires until stats sampling occurs. 179 // 2. For clusters that were already feature in the previous *LoadStatsResponse*, any traffic 180 // that is observed in between the corresponding previous *LoadStatsRequest* and this 181 // *LoadStatsResponse* will also be accumulated and billed to the cluster. This avoids a period 182 // of inobservability that might otherwise exists between the messages. New clusters are not 183 // subject to this consideration. 184 google.protobuf.Duration load_reporting_interval = 2; 185 186 // Set to *true* if the management server supports endpoint granularity 187 // report. 188 bool report_endpoint_granularity = 3; 189} 190