• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_FL_SERVER_ITERATION_METRICS_H_
18 #define MINDSPORE_CCSRC_FL_SERVER_ITERATION_METRICS_H_
19 
20 #include <map>
21 #include <string>
22 #include <memory>
23 #include <fstream>
24 #include "ps/ps_context.h"
25 #include "ps/core/configuration.h"
26 #include "ps/core/file_configuration.h"
27 #include "fl/server/local_meta_store.h"
28 #include "fl/server/iteration.h"
29 
30 namespace mindspore {
31 namespace fl {
32 namespace server {
33 constexpr auto kFLName = "flName";
34 constexpr auto kInstanceStatus = "instanceStatus";
35 constexpr auto kFLIterationNum = "flIterationNum";
36 constexpr auto kCurIteration = "currentIteration";
37 constexpr auto kJoinedClientNum = "joinedClientNum";
38 constexpr auto kRejectedClientNum = "rejectedClientNum";
39 constexpr auto kMetricsAuc = "metricsAuc";
40 constexpr auto kMetricsLoss = "metricsLoss";
41 constexpr auto kIterExecutionTime = "iterationExecutionTime";
42 
43 const std::map<InstanceState, std::string> kInstanceStateName = {
44   {InstanceState::kRunning, "running"}, {InstanceState::kDisable, "disable"}, {InstanceState::kFinish, "finish"}};
45 
46 template <typename T>
JsonGetKeyWithException(const nlohmann::json & json,const std::string & key)47 inline T JsonGetKeyWithException(const nlohmann::json &json, const std::string &key) {
48   if (!json.contains(key)) {
49     MS_LOG(EXCEPTION) << "The key " << key << "does not exist in json " << json.dump();
50   }
51   return json[key].get<T>();
52 }
53 
54 constexpr auto kMetrics = "metrics";
55 
56 class IterationMetrics {
57  public:
IterationMetrics(const std::string & config_file)58   explicit IterationMetrics(const std::string &config_file)
59       : config_file_path_(config_file),
60         config_(nullptr),
61         fl_name_(""),
62         fl_iteration_num_(0),
63         cur_iteration_num_(0),
64         instance_state_(InstanceState::kFinish),
65         loss_(0.0),
66         accuracy_(0.0),
67         joined_client_num_(0),
68         rejected_client_num_(0),
69         iteration_time_cost_(0) {}
70   ~IterationMetrics() = default;
71 
72   bool Initialize();
73 
74   // Gather the details of this iteration and output to the persistent storage.
75   bool Summarize();
76 
77   // Clear data in persistent storage.
78   bool Clear();
79 
80   // Setters for the metrics data.
81   void set_fl_name(const std::string &fl_name);
82   void set_fl_iteration_num(size_t fl_iteration_num);
83   void set_cur_iteration_num(size_t cur_iteration_num);
84   void set_instance_state(InstanceState state);
85   void set_loss(float loss);
86   void set_accuracy(float acc);
87   void set_joined_client_num(size_t joined_client_num);
88   void set_rejected_client_num(size_t rejected_client_num);
89   void set_iteration_time_cost(uint64_t iteration_time_cost);
90 
91  private:
92   // This is the main config file set by ps context.
93   std::string config_file_path_;
94   std::unique_ptr<ps::core::FileConfiguration> config_;
95 
96   // The metrics file object.
97   std::fstream metrics_file_;
98 
99   // The metrics file path.
100   std::string metrics_file_path_;
101 
102   // Json object of metrics data.
103   nlohmann::basic_json<std::map, std::vector, std::string, bool, int64_t, uint64_t, float> js_;
104 
105   // The federated learning job name. Set by ps_context.
106   std::string fl_name_;
107 
108   // Federated learning iteration number. Set by ps_context.
109   // If this number of iterations are completed, one instance is finished.
110   size_t fl_iteration_num_;
111 
112   // Current iteration number.
113   size_t cur_iteration_num_;
114 
115   // Current instance state.
116   InstanceState instance_state_;
117 
118   // The training loss after this federated learning iteration, passed by worker.
119   float loss_;
120 
121   // The evaluation result after this federated learning iteration, passed by worker.
122   float accuracy_;
123 
124   // The number of clients which join the federated aggregation.
125   size_t joined_client_num_;
126 
127   // The number of clients which are not involved in federated aggregation.
128   size_t rejected_client_num_;
129 
130   // The time cost in millisecond for this completed iteration.
131   uint64_t iteration_time_cost_;
132 };
133 }  // namespace server
134 }  // namespace fl
135 }  // namespace mindspore
136 #endif  // MINDSPORE_CCSRC_FL_SERVER_ITERATION_METRICS_H_
137