1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.net.ip; 18 19 import static android.net.metrics.IpReachabilityEvent.NUD_FAILED; 20 import static android.net.metrics.IpReachabilityEvent.NUD_FAILED_ORGANIC; 21 import static android.net.metrics.IpReachabilityEvent.PROVISIONING_LOST; 22 import static android.net.metrics.IpReachabilityEvent.PROVISIONING_LOST_ORGANIC; 23 24 import android.content.Context; 25 import android.net.ConnectivityManager; 26 import android.net.LinkProperties; 27 import android.net.RouteInfo; 28 import android.net.ip.IpNeighborMonitor.NeighborEvent; 29 import android.net.metrics.IpConnectivityLog; 30 import android.net.metrics.IpReachabilityEvent; 31 import android.net.netlink.StructNdMsg; 32 import android.net.util.InterfaceParams; 33 import android.net.util.SharedLog; 34 import android.os.ConditionVariable; 35 import android.os.Handler; 36 import android.os.Looper; 37 import android.os.PowerManager; 38 import android.os.PowerManager.WakeLock; 39 import android.os.SystemClock; 40 import android.util.Log; 41 42 import com.android.internal.annotations.VisibleForTesting; 43 44 import java.io.PrintWriter; 45 import java.net.Inet6Address; 46 import java.net.InetAddress; 47 import java.util.ArrayList; 48 import java.util.HashMap; 49 import java.util.List; 50 import java.util.Map; 51 52 53 /** 54 * IpReachabilityMonitor. 55 * 56 * Monitors on-link IP reachability and notifies callers whenever any on-link 57 * addresses of interest appear to have become unresponsive. 58 * 59 * This code does not concern itself with "why" a neighbour might have become 60 * unreachable. Instead, it primarily reacts to the kernel's notion of IP 61 * reachability for each of the neighbours we know to be critically important 62 * to normal network connectivity. As such, it is often "just the messenger": 63 * the neighbours about which it warns are already deemed by the kernel to have 64 * become unreachable. 65 * 66 * 67 * How it works: 68 * 69 * 1. The "on-link neighbours of interest" found in a given LinkProperties 70 * instance are added to a "watch list" via #updateLinkProperties(). 71 * This usually means all default gateways and any on-link DNS servers. 72 * 73 * 2. We listen continuously for netlink neighbour messages (RTM_NEWNEIGH, 74 * RTM_DELNEIGH), watching only for neighbours in the watch list. 75 * 76 * - A neighbour going into NUD_REACHABLE, NUD_STALE, NUD_DELAY, and 77 * even NUD_PROBE is perfectly normal; we merely record the new state. 78 * 79 * - A neighbour's entry may be deleted (RTM_DELNEIGH), for example due 80 * to garbage collection. This is not necessarily of immediate 81 * concern; we record the neighbour as moving to NUD_NONE. 82 * 83 * - A neighbour transitioning to NUD_FAILED (for any reason) is 84 * critically important and is handled as described below in #4. 85 * 86 * 3. All on-link neighbours in the watch list can be forcibly "probed" by 87 * calling #probeAll(). This should be called whenever it is important to 88 * verify that critical neighbours on the link are still reachable, e.g. 89 * when roaming between BSSIDs. 90 * 91 * - The kernel will send unicast ARP requests for IPv4 neighbours and 92 * unicast NS packets for IPv6 neighbours. The expected replies will 93 * likely be unicast. 94 * 95 * - The forced probing is done holding a wakelock. The kernel may, 96 * however, initiate probing of a neighbor on its own, i.e. whenever 97 * a neighbour has expired from NUD_DELAY. 98 * 99 * - The kernel sends: 100 * 101 * /proc/sys/net/ipv{4,6}/neigh/<ifname>/ucast_solicit 102 * 103 * number of probes (usually 3) every: 104 * 105 * /proc/sys/net/ipv{4,6}/neigh/<ifname>/retrans_time_ms 106 * 107 * number of milliseconds (usually 1000ms). This normally results in 108 * 3 unicast packets, 1 per second. 109 * 110 * - If no response is received to any of the probe packets, the kernel 111 * marks the neighbour as being in state NUD_FAILED, and the listening 112 * process in #2 will learn of it. 113 * 114 * 4. We call the supplied Callback#notifyLost() function if the loss of a 115 * neighbour in NUD_FAILED would cause IPv4 or IPv6 configuration to 116 * become incomplete (a loss of provisioning). 117 * 118 * - For example, losing all our IPv4 on-link DNS servers (or losing 119 * our only IPv6 default gateway) constitutes a loss of IPv4 (IPv6) 120 * provisioning; Callback#notifyLost() would be called. 121 * 122 * - Since it can be non-trivial to reacquire certain IP provisioning 123 * state it may be best for the link to disconnect completely and 124 * reconnect afresh. 125 * 126 * Accessing an instance of this class from multiple threads is NOT safe. 127 * 128 * @hide 129 */ 130 public class IpReachabilityMonitor { 131 private static final String TAG = "IpReachabilityMonitor"; 132 private static final boolean DBG = Log.isLoggable(TAG, Log.DEBUG); 133 private static final boolean VDBG = Log.isLoggable(TAG, Log.VERBOSE); 134 135 public interface Callback { 136 // This callback function must execute as quickly as possible as it is 137 // run on the same thread that listens to kernel neighbor updates. 138 // 139 // TODO: refactor to something like notifyProvisioningLost(String msg). notifyLost(InetAddress ip, String logMsg)140 public void notifyLost(InetAddress ip, String logMsg); 141 } 142 143 /** 144 * Encapsulates IpReachabilityMonitor depencencies on systems that hinder unit testing. 145 * TODO: consider also wrapping MultinetworkPolicyTracker in this interface. 146 */ 147 interface Dependencies { acquireWakeLock(long durationMs)148 void acquireWakeLock(long durationMs); 149 makeDefault(Context context, String iface)150 static Dependencies makeDefault(Context context, String iface) { 151 final String lockName = TAG + "." + iface; 152 final PowerManager pm = (PowerManager) context.getSystemService(Context.POWER_SERVICE); 153 final WakeLock lock = pm.newWakeLock(PowerManager.PARTIAL_WAKE_LOCK, lockName); 154 155 return new Dependencies() { 156 public void acquireWakeLock(long durationMs) { 157 lock.acquire(durationMs); 158 } 159 }; 160 } 161 } 162 163 private final InterfaceParams mInterfaceParams; 164 private final IpNeighborMonitor mIpNeighborMonitor; 165 private final SharedLog mLog; 166 private final Callback mCallback; 167 private final Dependencies mDependencies; 168 private final boolean mUsingMultinetworkPolicyTracker; 169 private final ConnectivityManager mCm; 170 private final IpConnectivityLog mMetricsLog = new IpConnectivityLog(); 171 private LinkProperties mLinkProperties = new LinkProperties(); 172 private Map<InetAddress, NeighborEvent> mNeighborWatchList = new HashMap<>(); 173 // Time in milliseconds of the last forced probe request. 174 private volatile long mLastProbeTimeMs; 175 176 public IpReachabilityMonitor( 177 Context context, InterfaceParams ifParams, Handler h, SharedLog log, Callback callback, 178 boolean usingMultinetworkPolicyTracker) { 179 this(context, ifParams, h, log, callback, usingMultinetworkPolicyTracker, 180 Dependencies.makeDefault(context, ifParams.name)); 181 } 182 183 @VisibleForTesting 184 IpReachabilityMonitor(Context context, InterfaceParams ifParams, Handler h, SharedLog log, 185 Callback callback, boolean usingMultinetworkPolicyTracker, Dependencies dependencies) { 186 if (ifParams == null) throw new IllegalArgumentException("null InterfaceParams"); 187 188 mInterfaceParams = ifParams; 189 mLog = log.forSubComponent(TAG); 190 mCallback = callback; 191 mUsingMultinetworkPolicyTracker = usingMultinetworkPolicyTracker; 192 mCm = context.getSystemService(ConnectivityManager.class); 193 mDependencies = dependencies; 194 195 mIpNeighborMonitor = new IpNeighborMonitor(h, mLog, 196 (NeighborEvent event) -> { 197 if (mInterfaceParams.index != event.ifindex) return; 198 if (!mNeighborWatchList.containsKey(event.ip)) return; 199 200 final NeighborEvent prev = mNeighborWatchList.put(event.ip, event); 201 202 // TODO: Consider what to do with other states that are not within 203 // NeighborEvent#isValid() (i.e. NUD_NONE, NUD_INCOMPLETE). 204 if (event.nudState == StructNdMsg.NUD_FAILED) { 205 mLog.w("ALERT neighbor went from: " + prev + " to: " + event); 206 handleNeighborLost(event); 207 } 208 }); 209 mIpNeighborMonitor.start(); 210 } 211 212 public void stop() { 213 mIpNeighborMonitor.stop(); 214 clearLinkProperties(); 215 } 216 217 public void dump(PrintWriter pw) { 218 if (Looper.myLooper() == mIpNeighborMonitor.getHandler().getLooper()) { 219 pw.println(describeWatchList("\n")); 220 return; 221 } 222 223 final ConditionVariable cv = new ConditionVariable(false); 224 mIpNeighborMonitor.getHandler().post(() -> { 225 pw.println(describeWatchList("\n")); 226 cv.open(); 227 }); 228 229 if (!cv.block(1000)) { 230 pw.println("Timed out waiting for IpReachabilityMonitor dump"); 231 } 232 } 233 234 private String describeWatchList() { return describeWatchList(" "); } 235 236 private String describeWatchList(String sep) { 237 final StringBuilder sb = new StringBuilder(); 238 sb.append("iface{" + mInterfaceParams + "}," + sep); 239 sb.append("ntable=[" + sep); 240 String delimiter = ""; 241 for (Map.Entry<InetAddress, NeighborEvent> entry : mNeighborWatchList.entrySet()) { 242 sb.append(delimiter).append(entry.getKey().getHostAddress() + "/" + entry.getValue()); 243 delimiter = "," + sep; 244 } 245 sb.append("]"); 246 return sb.toString(); 247 } 248 249 private static boolean isOnLink(List<RouteInfo> routes, InetAddress ip) { 250 for (RouteInfo route : routes) { 251 if (!route.hasGateway() && route.matches(ip)) { 252 return true; 253 } 254 } 255 return false; 256 } 257 258 public void updateLinkProperties(LinkProperties lp) { 259 if (!mInterfaceParams.name.equals(lp.getInterfaceName())) { 260 // TODO: figure out whether / how to cope with interface changes. 261 Log.wtf(TAG, "requested LinkProperties interface '" + lp.getInterfaceName() + 262 "' does not match: " + mInterfaceParams.name); 263 return; 264 } 265 266 mLinkProperties = new LinkProperties(lp); 267 Map<InetAddress, NeighborEvent> newNeighborWatchList = new HashMap<>(); 268 269 final List<RouteInfo> routes = mLinkProperties.getRoutes(); 270 for (RouteInfo route : routes) { 271 if (route.hasGateway()) { 272 InetAddress gw = route.getGateway(); 273 if (isOnLink(routes, gw)) { 274 newNeighborWatchList.put(gw, mNeighborWatchList.getOrDefault(gw, null)); 275 } 276 } 277 } 278 279 for (InetAddress dns : lp.getDnsServers()) { 280 if (isOnLink(routes, dns)) { 281 newNeighborWatchList.put(dns, mNeighborWatchList.getOrDefault(dns, null)); 282 } 283 } 284 285 mNeighborWatchList = newNeighborWatchList; 286 if (DBG) { Log.d(TAG, "watch: " + describeWatchList()); } 287 } 288 289 public void clearLinkProperties() { 290 mLinkProperties.clear(); 291 mNeighborWatchList.clear(); 292 if (DBG) { Log.d(TAG, "clear: " + describeWatchList()); } 293 } 294 295 private void handleNeighborLost(NeighborEvent event) { 296 final LinkProperties whatIfLp = new LinkProperties(mLinkProperties); 297 298 InetAddress ip = null; 299 for (Map.Entry<InetAddress, NeighborEvent> entry : mNeighborWatchList.entrySet()) { 300 // TODO: Consider using NeighborEvent#isValid() here; it's more 301 // strict but may interact badly if other entries are somehow in 302 // NUD_INCOMPLETE (say, during network attach). 303 if (entry.getValue().nudState != StructNdMsg.NUD_FAILED) continue; 304 305 ip = entry.getKey(); 306 for (RouteInfo route : mLinkProperties.getRoutes()) { 307 if (ip.equals(route.getGateway())) { 308 whatIfLp.removeRoute(route); 309 } 310 } 311 312 if (avoidingBadLinks() || !(ip instanceof Inet6Address)) { 313 // We should do this unconditionally, but alas we cannot: b/31827713. 314 whatIfLp.removeDnsServer(ip); 315 } 316 } 317 318 final boolean lostProvisioning = 319 (mLinkProperties.isIpv4Provisioned() && !whatIfLp.isIpv4Provisioned()) 320 || (mLinkProperties.isIpv6Provisioned() && !whatIfLp.isIpv6Provisioned()); 321 322 if (lostProvisioning) { 323 final String logMsg = "FAILURE: LOST_PROVISIONING, " + event; 324 Log.w(TAG, logMsg); 325 if (mCallback != null) { 326 // TODO: remove |ip| when the callback signature no longer has 327 // an InetAddress argument. 328 mCallback.notifyLost(ip, logMsg); 329 } 330 } 331 logNudFailed(lostProvisioning); 332 } 333 334 private boolean avoidingBadLinks() { 335 return !mUsingMultinetworkPolicyTracker || mCm.shouldAvoidBadWifi(); 336 } 337 338 public void probeAll() { 339 final List<InetAddress> ipProbeList = new ArrayList<>(mNeighborWatchList.keySet()); 340 341 if (!ipProbeList.isEmpty()) { 342 // Keep the CPU awake long enough to allow all ARP/ND 343 // probes a reasonable chance at success. See b/23197666. 344 // 345 // The wakelock we use is (by default) refcounted, and this version 346 // of acquire(timeout) queues a release message to keep acquisitions 347 // and releases balanced. 348 mDependencies.acquireWakeLock(getProbeWakeLockDuration()); 349 } 350 351 for (InetAddress ip : ipProbeList) { 352 final int rval = IpNeighborMonitor.startKernelNeighborProbe(mInterfaceParams.index, ip); 353 mLog.log(String.format("put neighbor %s into NUD_PROBE state (rval=%d)", 354 ip.getHostAddress(), rval)); 355 logEvent(IpReachabilityEvent.PROBE, rval); 356 } 357 mLastProbeTimeMs = SystemClock.elapsedRealtime(); 358 } 359 360 private static long getProbeWakeLockDuration() { 361 // Ideally, this would be computed by examining the values of: 362 // 363 // /proc/sys/net/ipv[46]/neigh/<ifname>/ucast_solicit 364 // 365 // and: 366 // 367 // /proc/sys/net/ipv[46]/neigh/<ifname>/retrans_time_ms 368 // 369 // For now, just make some assumptions. 370 final long numUnicastProbes = 3; 371 final long retransTimeMs = 1000; 372 final long gracePeriodMs = 500; 373 return (numUnicastProbes * retransTimeMs) + gracePeriodMs; 374 } 375 376 private void logEvent(int probeType, int errorCode) { 377 int eventType = probeType | (errorCode & 0xff); 378 mMetricsLog.log(mInterfaceParams.name, new IpReachabilityEvent(eventType)); 379 } 380 381 private void logNudFailed(boolean lostProvisioning) { 382 long duration = SystemClock.elapsedRealtime() - mLastProbeTimeMs; 383 boolean isFromProbe = (duration < getProbeWakeLockDuration()); 384 int eventType = nudFailureEventType(isFromProbe, lostProvisioning); 385 mMetricsLog.log(mInterfaceParams.name, new IpReachabilityEvent(eventType)); 386 } 387 388 /** 389 * Returns the NUD failure event type code corresponding to the given conditions. 390 */ 391 private static int nudFailureEventType(boolean isFromProbe, boolean isProvisioningLost) { 392 if (isFromProbe) { 393 return isProvisioningLost ? PROVISIONING_LOST : NUD_FAILED; 394 } else { 395 return isProvisioningLost ? PROVISIONING_LOST_ORGANIC : NUD_FAILED_ORGANIC; 396 } 397 } 398 } 399