1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /*
18 * A service that exchanges time synchronization information between
19 * a master that defines a timeline and clients that follow the timeline.
20 */
21
22 #define __STDC_LIMIT_MACROS
23 #define LOG_TAG "common_time"
24 #include <utils/Log.h>
25 #include <stdint.h>
26
27 #include <common_time/local_clock.h>
28 #include <assert.h>
29
30 #include "clock_recovery.h"
31 #include "common_clock.h"
32 #ifdef TIME_SERVICE_DEBUG
33 #include "diag_thread.h"
34 #endif
35
36 // Define log macro so we can make LOGV into LOGE when we are exclusively
37 // debugging this code.
38 #ifdef TIME_SERVICE_DEBUG
39 #define LOG_TS ALOGE
40 #else
41 #define LOG_TS ALOGV
42 #endif
43
44 namespace android {
45
ClockRecoveryLoop(LocalClock * local_clock,CommonClock * common_clock)46 ClockRecoveryLoop::ClockRecoveryLoop(LocalClock* local_clock,
47 CommonClock* common_clock) {
48 assert(NULL != local_clock);
49 assert(NULL != common_clock);
50
51 local_clock_ = local_clock;
52 common_clock_ = common_clock;
53
54 local_clock_can_slew_ = local_clock_->initCheck() &&
55 (local_clock_->setLocalSlew(0) == OK);
56
57 reset(true, true);
58
59 #ifdef TIME_SERVICE_DEBUG
60 diag_thread_ = new DiagThread(common_clock_, local_clock_);
61 if (diag_thread_ != NULL) {
62 status_t res = diag_thread_->startWorkThread();
63 if (res != OK)
64 ALOGW("Failed to start A@H clock recovery diagnostic thread.");
65 } else
66 ALOGW("Failed to allocate diagnostic thread.");
67 #endif
68 }
69
~ClockRecoveryLoop()70 ClockRecoveryLoop::~ClockRecoveryLoop() {
71 #ifdef TIME_SERVICE_DEBUG
72 diag_thread_->stopWorkThread();
73 #endif
74 }
75
76 // Constants.
77 const float ClockRecoveryLoop::dT = 1.0;
78 const float ClockRecoveryLoop::Kc = 1.0f;
79 const float ClockRecoveryLoop::Ti = 15.0f;
80 const float ClockRecoveryLoop::Tf = 0.05;
81 const float ClockRecoveryLoop::bias_Fc = 0.01;
82 const float ClockRecoveryLoop::bias_RC = (dT / (2 * 3.14159f * bias_Fc));
83 const float ClockRecoveryLoop::bias_Alpha = (dT / (bias_RC + dT));
84 const int64_t ClockRecoveryLoop::panic_thresh_ = 50000;
85 const int64_t ClockRecoveryLoop::control_thresh_ = 10000;
86 const float ClockRecoveryLoop::COmin = -100.0f;
87 const float ClockRecoveryLoop::COmax = 100.0f;
88
reset(bool position,bool frequency)89 void ClockRecoveryLoop::reset(bool position, bool frequency) {
90 Mutex::Autolock lock(&lock_);
91 reset_l(position, frequency);
92 }
93
findMinRTTNdx(DisciplineDataPoint * data,uint32_t count)94 uint32_t ClockRecoveryLoop::findMinRTTNdx(DisciplineDataPoint* data,
95 uint32_t count) {
96 uint32_t min_rtt = 0;
97 for (uint32_t i = 1; i < count; ++i)
98 if (data[min_rtt].rtt > data[i].rtt)
99 min_rtt = i;
100
101 return min_rtt;
102 }
103
pushDisciplineEvent(int64_t local_time,int64_t nominal_common_time,int64_t rtt)104 bool ClockRecoveryLoop::pushDisciplineEvent(int64_t local_time,
105 int64_t nominal_common_time,
106 int64_t rtt) {
107 Mutex::Autolock lock(&lock_);
108
109 int64_t local_common_time = 0;
110 common_clock_->localToCommon(local_time, &local_common_time);
111 int64_t raw_delta = nominal_common_time - local_common_time;
112
113 #ifdef TIME_SERVICE_DEBUG
114 ALOGE("local=%lld, common=%lld, delta=%lld, rtt=%lld\n",
115 local_common_time, nominal_common_time,
116 raw_delta, rtt);
117 #endif
118
119 // If we have not defined a basis for common time, then we need to use these
120 // initial points to do so. In order to avoid significant initial error
121 // from a particularly bad startup data point, we collect the first N data
122 // points and choose the best of them before moving on.
123 if (!common_clock_->isValid()) {
124 if (startup_filter_wr_ < kStartupFilterSize) {
125 DisciplineDataPoint& d = startup_filter_data_[startup_filter_wr_];
126 d.local_time = local_time;
127 d.nominal_common_time = nominal_common_time;
128 d.rtt = rtt;
129 startup_filter_wr_++;
130 }
131
132 if (startup_filter_wr_ == kStartupFilterSize) {
133 uint32_t min_rtt = findMinRTTNdx(startup_filter_data_,
134 kStartupFilterSize);
135
136 common_clock_->setBasis(
137 startup_filter_data_[min_rtt].local_time,
138 startup_filter_data_[min_rtt].nominal_common_time);
139 }
140
141 return true;
142 }
143
144 int64_t observed_common;
145 int64_t delta;
146 float delta_f, dCO;
147 int32_t correction_cur;
148
149 if (OK != common_clock_->localToCommon(local_time, &observed_common)) {
150 // Since we just checked to make certain that this conversion was valid,
151 // and no one else in the system should be messing with it, if this
152 // conversion is suddenly invalid, it is a good reason to panic.
153 ALOGE("Failed to convert local time to common time in %s:%d",
154 __PRETTY_FUNCTION__, __LINE__);
155 return false;
156 }
157
158 // Implement a filter which should match NTP filtering behavior when a
159 // client is associated with only one peer of lower stratum. Basically,
160 // always use the best of the N last data points, where best is defined as
161 // lowest round trip time. NTP uses an N of 8; we use a value of 6.
162 //
163 // TODO(johngro) : experiment with other filter strategies. The goal here
164 // is to mitigate the effects of high RTT data points which typically have
165 // large asymmetries in the TX/RX legs. Downside of the existing NTP
166 // approach (particularly because of the PID controller we are using to
167 // produce the control signal from the filtered data) are that the rate at
168 // which discipline events are actually acted upon becomes irregular and can
169 // become drawn out (the time between actionable event can go way up). If
170 // the system receives a strong high quality data point, the proportional
171 // component of the controller can produce a strong correction which is left
172 // in place for too long causing overshoot. In addition, the integral
173 // component of the system currently is an approximation based on the
174 // assumption of a more or less homogeneous sampling of the error. Its
175 // unclear what the effect of undermining this assumption would be right
176 // now.
177
178 // Two ideas which come to mind immediately would be to...
179 // 1) Keep a history of more data points (32 or so) and ignore data points
180 // whose RTT is more than a certain number of standard deviations outside
181 // of the norm.
182 // 2) Eliminate the PID controller portion of this system entirely.
183 // Instead, move to a system which uses a very wide filter (128 data
184 // points or more) with a sum-of-least-squares line fitting approach to
185 // tracking the long term drift. This would take the place of the I
186 // component in the current PID controller. Also use a much more narrow
187 // outlier-rejector filter (as described in #1) to drive a short term
188 // correction factor similar to the P component of the PID controller.
189 assert(filter_wr_ < kFilterSize);
190 filter_data_[filter_wr_].local_time = local_time;
191 filter_data_[filter_wr_].observed_common_time = observed_common;
192 filter_data_[filter_wr_].nominal_common_time = nominal_common_time;
193 filter_data_[filter_wr_].rtt = rtt;
194 filter_data_[filter_wr_].point_used = false;
195 uint32_t current_point = filter_wr_;
196 filter_wr_ = (filter_wr_ + 1) % kFilterSize;
197 if (!filter_wr_)
198 filter_full_ = true;
199
200 uint32_t scan_end = filter_full_ ? kFilterSize : filter_wr_;
201 uint32_t min_rtt = findMinRTTNdx(filter_data_, scan_end);
202 // We only use packets with low RTTs for control. If the packet RTT
203 // is less than the panic threshold, we can probably eat the jitter with the
204 // control loop. Otherwise, take the packet only if it better than all
205 // of the packets we have in the history. That way we try to track
206 // something, even if it is noisy.
207 if (current_point == min_rtt || rtt < control_thresh_) {
208 delta_f = delta = nominal_common_time - observed_common;
209
210 // Compute the error then clamp to the panic threshold. If we ever
211 // exceed this amt of error, its time to panic and reset the system.
212 // Given that the error in the measurement of the error could be as
213 // high as the RTT of the data point, we don't actually panic until
214 // the implied error (delta) is greater than the absolute panic
215 // threashold plus the RTT. IOW - we don't panic until we are
216 // absoluely sure that our best case sync is worse than the absolute
217 // panic threshold.
218 int64_t effective_panic_thresh = panic_thresh_ + rtt;
219 if ((delta > effective_panic_thresh) ||
220 (delta < -effective_panic_thresh)) {
221 // PANIC!!!
222 reset_l(false, true);
223 return false;
224 }
225
226 } else {
227 // We do not have a good packet to look at, but we also do not want to
228 // free-run the clock at some crazy slew rate. So we guess the
229 // trajectory of the clock based on the last controller output and the
230 // estimated bias of our clock against the master.
231 // The net effect of this is that CO == CObias after some extended
232 // period of no feedback.
233 delta_f = last_delta_f_ - dT*(CO - CObias);
234 delta = delta_f;
235 }
236
237 // Velocity form PI control equation.
238 dCO = Kc * (1.0f + dT/Ti) * delta_f - Kc * last_delta_f_;
239 CO += dCO * Tf; // Filter CO by applying gain <1 here.
240
241 // Save error terms for later.
242 last_delta_f_ = delta_f;
243 last_delta_ = delta;
244
245 // Clamp CO to +/- 100ppm.
246 if (CO < COmin)
247 CO = COmin;
248 else if (CO > COmax)
249 CO = COmax;
250
251 // Update the controller bias.
252 CObias = bias_Alpha * CO + (1.0f - bias_Alpha) * lastCObias;
253 lastCObias = CObias;
254
255 // Convert PPM to 16-bit int range. Add some guard band (-0.01) so we
256 // don't get fp weirdness.
257 correction_cur = CO * 327.66;
258
259 // If there was a change in the amt of correction to use, update the
260 // system.
261 if (correction_cur_ != correction_cur) {
262 correction_cur_ = correction_cur;
263 applySlew();
264 }
265
266 LOG_TS("clock_loop %lld %f %f %f %d\n", raw_delta, delta_f, CO, CObias, correction_cur);
267
268 #ifdef TIME_SERVICE_DEBUG
269 diag_thread_->pushDisciplineEvent(
270 local_time,
271 observed_common,
272 nominal_common_time,
273 correction_cur,
274 rtt);
275 #endif
276
277 return true;
278 }
279
getLastErrorEstimate()280 int32_t ClockRecoveryLoop::getLastErrorEstimate() {
281 Mutex::Autolock lock(&lock_);
282
283 if (last_delta_valid_)
284 return last_delta_;
285 else
286 return ICommonClock::kErrorEstimateUnknown;
287 }
288
reset_l(bool position,bool frequency)289 void ClockRecoveryLoop::reset_l(bool position, bool frequency) {
290 assert(NULL != common_clock_);
291
292 if (position) {
293 common_clock_->resetBasis();
294 startup_filter_wr_ = 0;
295 }
296
297 if (frequency) {
298 last_delta_valid_ = false;
299 last_delta_ = 0;
300 last_delta_f_ = 0.0;
301 correction_cur_ = 0x0;
302 CO = 0.0f;
303 lastCObias = CObias = 0.0f;
304 applySlew();
305 }
306
307 filter_wr_ = 0;
308 filter_full_ = false;
309 }
310
applySlew()311 void ClockRecoveryLoop::applySlew() {
312 if (local_clock_can_slew_) {
313 local_clock_->setLocalSlew(correction_cur_);
314 } else {
315 // The SW clock recovery implemented by the common clock class expects
316 // values expressed in PPM. CO is in ppm.
317 common_clock_->setSlew(local_clock_->getLocalTime(), CO);
318 }
319 }
320
321 } // namespace android
322