• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2019 The gRPC Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// This file contains the eds protocol and its dependency.
16//
17// TODO(juanlishen): This file is a hack to avoid a problem we're
18// currently having where we can't depend on a proto file in an external
19// repo due to bazel limitations.  Once that's fixed, this should be
20// removed.  Until this, it should be used in the gRPC tests only, or else it
21// will cause a conflict due to the same proto messages being defined in
22// multiple files in the same binary.
23
24syntax = "proto3";
25
26package envoy.service.load_stats.v2;
27
28import "google/protobuf/duration.proto";
29import "src/proto/grpc/testing/xds/eds_for_test.proto";
30
31// [#not-implemented-hide:] Not configuration. TBD how to doc proto APIs.
32message EndpointLoadMetricStats {
33  // Name of the metric; may be empty.
34  string metric_name = 1;
35
36  // Number of calls that finished and included this metric.
37  uint64 num_requests_finished_with_metric = 2;
38
39  // Sum of metric values across all calls that finished with this metric for
40  // load_reporting_interval.
41  double total_metric_value = 3;
42}
43
44message UpstreamLocalityStats {
45  // Name of zone, region and optionally endpoint group these metrics were
46  // collected from. Zone and region names could be empty if unknown.
47  envoy.api.v2.Locality locality = 1;
48
49  // The total number of requests successfully completed by the endpoints in the
50  // locality.
51  uint64 total_successful_requests = 2;
52
53  // The total number of unfinished requests
54  uint64 total_requests_in_progress = 3;
55
56  // The total number of requests that failed due to errors at the endpoint,
57  // aggregated over all endpoints in the locality.
58  uint64 total_error_requests = 4;
59
60  // The total number of requests that were issued by this Envoy since
61  // the last report. This information is aggregated over all the
62  // upstream endpoints in the locality.
63  uint64 total_issued_requests = 8;
64
65  // Stats for multi-dimensional load balancing.
66  repeated EndpointLoadMetricStats load_metric_stats = 5;
67
68//  // Endpoint granularity stats information for this locality. This information
69//  // is populated if the Server requests it by setting
70//  // :ref:`LoadStatsResponse.report_endpoint_granularity<envoy_api_field_load_stats.LoadStatsResponse.report_endpoint_granularity>`.
71//  repeated UpstreamEndpointStats upstream_endpoint_stats = 7;
72
73  // [#not-implemented-hide:] The priority of the endpoint group these metrics
74  // were collected from.
75  uint32 priority = 6;
76}
77
78// Per cluster load stats. Envoy reports these stats a management server in a
79// :ref:`LoadStatsRequest<envoy_api_msg_load_stats.LoadStatsRequest>`
80// [#not-implemented-hide:] Not configuration. TBD how to doc proto APIs.
81// Next ID: 7
82message ClusterStats {
83  // The name of the cluster.
84  string cluster_name = 1;
85
86  // The eds_cluster_config service_name of the cluster.
87  // It's possible that two clusters send the same service_name to EDS,
88  // in that case, the management server is supposed to do aggregation on the load reports.
89  string cluster_service_name = 6;
90
91  // Need at least one.
92  repeated UpstreamLocalityStats upstream_locality_stats = 2;
93
94  // Cluster-level stats such as total_successful_requests may be computed by
95  // summing upstream_locality_stats. In addition, below there are additional
96  // cluster-wide stats.
97  //
98  // The total number of dropped requests. This covers requests
99  // deliberately dropped by the drop_overload policy and circuit breaking.
100  uint64 total_dropped_requests = 3;
101
102  message DroppedRequests {
103    // Identifier for the policy specifying the drop.
104    string category = 1;
105    // Total number of deliberately dropped requests for the category.
106    uint64 dropped_count = 2;
107  }
108  // Information about deliberately dropped requests for each category specified
109  // in the DropOverload policy.
110  repeated DroppedRequests dropped_requests = 5;
111
112  // Period over which the actual load report occurred. This will be guaranteed to include every
113  // request reported. Due to system load and delays between the *LoadStatsRequest* sent from Envoy
114  // and the *LoadStatsResponse* message sent from the management server, this may be longer than
115  // the requested load reporting interval in the *LoadStatsResponse*.
116  google.protobuf.Duration load_report_interval = 4;
117}
118
119// [#protodoc-title: Load reporting service]
120
121service LoadReportingService {
122  // Advanced API to allow for multi-dimensional load balancing by remote
123  // server. For receiving LB assignments, the steps are:
124  // 1, The management server is configured with per cluster/zone/load metric
125  //    capacity configuration. The capacity configuration definition is
126  //    outside of the scope of this document.
127  // 2. Envoy issues a standard {Stream,Fetch}Endpoints request for the clusters
128  //    to balance.
129  //
130  // Independently, Envoy will initiate a StreamLoadStats bidi stream with a
131  // management server:
132  // 1. Once a connection establishes, the management server publishes a
133  //    LoadStatsResponse for all clusters it is interested in learning load
134  //    stats about.
135  // 2. For each cluster, Envoy load balances incoming traffic to upstream hosts
136  //    based on per-zone weights and/or per-instance weights (if specified)
137  //    based on intra-zone LbPolicy. This information comes from the above
138  //    {Stream,Fetch}Endpoints.
139  // 3. When upstream hosts reply, they optionally add header <define header
140  //    name> with ASCII representation of EndpointLoadMetricStats.
141  // 4. Envoy aggregates load reports over the period of time given to it in
142  //    LoadStatsResponse.load_reporting_interval. This includes aggregation
143  //    stats Envoy maintains by itself (total_requests, rpc_errors etc.) as
144  //    well as load metrics from upstream hosts.
145  // 5. When the timer of load_reporting_interval expires, Envoy sends new
146  //    LoadStatsRequest filled with load reports for each cluster.
147  // 6. The management server uses the load reports from all reported Envoys
148  //    from around the world, computes global assignment and prepares traffic
149  //    assignment destined for each zone Envoys are located in. Goto 2.
150  rpc StreamLoadStats(stream LoadStatsRequest) returns (stream LoadStatsResponse) {
151  }
152}
153
154// A load report Envoy sends to the management server.
155// [#not-implemented-hide:] Not configuration. TBD how to doc proto APIs.
156message LoadStatsRequest {
157  // Node identifier for Envoy instance.
158  envoy.api.v2.Node node = 1;
159
160  // A list of load stats to report.
161  repeated ClusterStats cluster_stats = 2;
162}
163
164// The management server sends envoy a LoadStatsResponse with all clusters it
165// is interested in learning load stats about.
166// [#not-implemented-hide:] Not configuration. TBD how to doc proto APIs.
167message LoadStatsResponse {
168  // Clusters to report stats for.
169  // Not populated if *send_all_clusters* is true.
170  repeated string clusters = 1;
171
172  // If true, the client should send all clusters it knows about.
173  // Only clients that advertise the "envoy.lrs.supports_send_all_clusters" capability in their
174  // :ref:`client_features<envoy_api_field_core.Node.client_features>` field will honor this field.
175  bool send_all_clusters = 4;
176
177  // The minimum interval of time to collect stats over. This is only a minimum for two reasons:
178  // 1. There may be some delay from when the timer fires until stats sampling occurs.
179  // 2. For clusters that were already feature in the previous *LoadStatsResponse*, any traffic
180  //    that is observed in between the corresponding previous *LoadStatsRequest* and this
181  //    *LoadStatsResponse* will also be accumulated and billed to the cluster. This avoids a period
182  //    of inobservability that might otherwise exists between the messages. New clusters are not
183  //    subject to this consideration.
184  google.protobuf.Duration load_reporting_interval = 2;
185
186  // Set to *true* if the management server supports endpoint granularity
187  // report.
188  bool report_endpoint_granularity = 3;
189}
190