1 /*
2 * libata-eh.c - libata error handling
3 *
4 * Maintained by: Tejun Heo <tj@kernel.org>
5 * Please ALWAYS copy linux-ide@vger.kernel.org
6 * on emails.
7 *
8 * Copyright 2006 Tejun Heo <htejun@gmail.com>
9 *
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License as
13 * published by the Free Software Foundation; either version 2, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; see the file COPYING. If not, write to
23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
24 * USA.
25 *
26 *
27 * libata documentation is available via 'make {ps|pdf}docs',
28 * as Documentation/DocBook/libata.*
29 *
30 * Hardware documentation available from http://www.t13.org/ and
31 * http://www.sata-io.org/
32 *
33 */
34
35 #include <linux/kernel.h>
36 #include <linux/blkdev.h>
37 #include <linux/export.h>
38 #include <linux/pci.h>
39 #include <scsi/scsi.h>
40 #include <scsi/scsi_host.h>
41 #include <scsi/scsi_eh.h>
42 #include <scsi/scsi_device.h>
43 #include <scsi/scsi_cmnd.h>
44 #include <scsi/scsi_dbg.h>
45 #include "../scsi/scsi_transport_api.h"
46
47 #include <linux/libata.h>
48
49 #include <trace/events/libata.h>
50 #include "libata.h"
51
52 enum {
53 /* speed down verdicts */
54 ATA_EH_SPDN_NCQ_OFF = (1 << 0),
55 ATA_EH_SPDN_SPEED_DOWN = (1 << 1),
56 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2),
57 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3),
58
59 /* error flags */
60 ATA_EFLAG_IS_IO = (1 << 0),
61 ATA_EFLAG_DUBIOUS_XFER = (1 << 1),
62 ATA_EFLAG_OLD_ER = (1 << 31),
63
64 /* error categories */
65 ATA_ECAT_NONE = 0,
66 ATA_ECAT_ATA_BUS = 1,
67 ATA_ECAT_TOUT_HSM = 2,
68 ATA_ECAT_UNK_DEV = 3,
69 ATA_ECAT_DUBIOUS_NONE = 4,
70 ATA_ECAT_DUBIOUS_ATA_BUS = 5,
71 ATA_ECAT_DUBIOUS_TOUT_HSM = 6,
72 ATA_ECAT_DUBIOUS_UNK_DEV = 7,
73 ATA_ECAT_NR = 8,
74
75 ATA_EH_CMD_DFL_TIMEOUT = 5000,
76
77 /* always put at least this amount of time between resets */
78 ATA_EH_RESET_COOL_DOWN = 5000,
79
80 /* Waiting in ->prereset can never be reliable. It's
81 * sometimes nice to wait there but it can't be depended upon;
82 * otherwise, we wouldn't be resetting. Just give it enough
83 * time for most drives to spin up.
84 */
85 ATA_EH_PRERESET_TIMEOUT = 10000,
86 ATA_EH_FASTDRAIN_INTERVAL = 3000,
87
88 ATA_EH_UA_TRIES = 5,
89
90 /* probe speed down parameters, see ata_eh_schedule_probe() */
91 ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */
92 ATA_EH_PROBE_TRIALS = 2,
93 };
94
95 /* The following table determines how we sequence resets. Each entry
96 * represents timeout for that try. The first try can be soft or
97 * hardreset. All others are hardreset if available. In most cases
98 * the first reset w/ 10sec timeout should succeed. Following entries
99 * are mostly for error handling, hotplug and those outlier devices that
100 * take an exceptionally long time to recover from reset.
101 */
102 static const unsigned long ata_eh_reset_timeouts[] = {
103 10000, /* most drives spin up by 10sec */
104 10000, /* > 99% working drives spin up before 20sec */
105 35000, /* give > 30 secs of idleness for outlier devices */
106 5000, /* and sweet one last chance */
107 ULONG_MAX, /* > 1 min has elapsed, give up */
108 };
109
110 static const unsigned long ata_eh_identify_timeouts[] = {
111 5000, /* covers > 99% of successes and not too boring on failures */
112 10000, /* combined time till here is enough even for media access */
113 30000, /* for true idiots */
114 ULONG_MAX,
115 };
116
117 static const unsigned long ata_eh_revalidate_timeouts[] = {
118 15000, /* Some drives are slow to read log pages when waking-up */
119 15000, /* combined time till here is enough even for media access */
120 ULONG_MAX,
121 };
122
123 static const unsigned long ata_eh_flush_timeouts[] = {
124 15000, /* be generous with flush */
125 15000, /* ditto */
126 30000, /* and even more generous */
127 ULONG_MAX,
128 };
129
130 static const unsigned long ata_eh_other_timeouts[] = {
131 5000, /* same rationale as identify timeout */
132 10000, /* ditto */
133 /* but no merciful 30sec for other commands, it just isn't worth it */
134 ULONG_MAX,
135 };
136
137 struct ata_eh_cmd_timeout_ent {
138 const u8 *commands;
139 const unsigned long *timeouts;
140 };
141
142 /* The following table determines timeouts to use for EH internal
143 * commands. Each table entry is a command class and matches the
144 * commands the entry applies to and the timeout table to use.
145 *
146 * On the retry after a command timed out, the next timeout value from
147 * the table is used. If the table doesn't contain further entries,
148 * the last value is used.
149 *
150 * ehc->cmd_timeout_idx keeps track of which timeout to use per
151 * command class, so if SET_FEATURES times out on the first try, the
152 * next try will use the second timeout value only for that class.
153 */
154 #define CMDS(cmds...) (const u8 []){ cmds, 0 }
155 static const struct ata_eh_cmd_timeout_ent
156 ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = {
157 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI),
158 .timeouts = ata_eh_identify_timeouts, },
159 { .commands = CMDS(ATA_CMD_READ_LOG_EXT, ATA_CMD_READ_LOG_DMA_EXT),
160 .timeouts = ata_eh_revalidate_timeouts, },
161 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT),
162 .timeouts = ata_eh_other_timeouts, },
163 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT),
164 .timeouts = ata_eh_other_timeouts, },
165 { .commands = CMDS(ATA_CMD_SET_FEATURES),
166 .timeouts = ata_eh_other_timeouts, },
167 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS),
168 .timeouts = ata_eh_other_timeouts, },
169 { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT),
170 .timeouts = ata_eh_flush_timeouts },
171 };
172 #undef CMDS
173
174 static void __ata_port_freeze(struct ata_port *ap);
175 #ifdef CONFIG_PM
176 static void ata_eh_handle_port_suspend(struct ata_port *ap);
177 static void ata_eh_handle_port_resume(struct ata_port *ap);
178 #else /* CONFIG_PM */
ata_eh_handle_port_suspend(struct ata_port * ap)179 static void ata_eh_handle_port_suspend(struct ata_port *ap)
180 { }
181
ata_eh_handle_port_resume(struct ata_port * ap)182 static void ata_eh_handle_port_resume(struct ata_port *ap)
183 { }
184 #endif /* CONFIG_PM */
185
__ata_ehi_pushv_desc(struct ata_eh_info * ehi,const char * fmt,va_list args)186 static void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, const char *fmt,
187 va_list args)
188 {
189 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len,
190 ATA_EH_DESC_LEN - ehi->desc_len,
191 fmt, args);
192 }
193
194 /**
195 * __ata_ehi_push_desc - push error description without adding separator
196 * @ehi: target EHI
197 * @fmt: printf format string
198 *
199 * Format string according to @fmt and append it to @ehi->desc.
200 *
201 * LOCKING:
202 * spin_lock_irqsave(host lock)
203 */
__ata_ehi_push_desc(struct ata_eh_info * ehi,const char * fmt,...)204 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...)
205 {
206 va_list args;
207
208 va_start(args, fmt);
209 __ata_ehi_pushv_desc(ehi, fmt, args);
210 va_end(args);
211 }
212
213 /**
214 * ata_ehi_push_desc - push error description with separator
215 * @ehi: target EHI
216 * @fmt: printf format string
217 *
218 * Format string according to @fmt and append it to @ehi->desc.
219 * If @ehi->desc is not empty, ", " is added in-between.
220 *
221 * LOCKING:
222 * spin_lock_irqsave(host lock)
223 */
ata_ehi_push_desc(struct ata_eh_info * ehi,const char * fmt,...)224 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...)
225 {
226 va_list args;
227
228 if (ehi->desc_len)
229 __ata_ehi_push_desc(ehi, ", ");
230
231 va_start(args, fmt);
232 __ata_ehi_pushv_desc(ehi, fmt, args);
233 va_end(args);
234 }
235
236 /**
237 * ata_ehi_clear_desc - clean error description
238 * @ehi: target EHI
239 *
240 * Clear @ehi->desc.
241 *
242 * LOCKING:
243 * spin_lock_irqsave(host lock)
244 */
ata_ehi_clear_desc(struct ata_eh_info * ehi)245 void ata_ehi_clear_desc(struct ata_eh_info *ehi)
246 {
247 ehi->desc[0] = '\0';
248 ehi->desc_len = 0;
249 }
250
251 /**
252 * ata_port_desc - append port description
253 * @ap: target ATA port
254 * @fmt: printf format string
255 *
256 * Format string according to @fmt and append it to port
257 * description. If port description is not empty, " " is added
258 * in-between. This function is to be used while initializing
259 * ata_host. The description is printed on host registration.
260 *
261 * LOCKING:
262 * None.
263 */
ata_port_desc(struct ata_port * ap,const char * fmt,...)264 void ata_port_desc(struct ata_port *ap, const char *fmt, ...)
265 {
266 va_list args;
267
268 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING));
269
270 if (ap->link.eh_info.desc_len)
271 __ata_ehi_push_desc(&ap->link.eh_info, " ");
272
273 va_start(args, fmt);
274 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args);
275 va_end(args);
276 }
277
278 #ifdef CONFIG_PCI
279
280 /**
281 * ata_port_pbar_desc - append PCI BAR description
282 * @ap: target ATA port
283 * @bar: target PCI BAR
284 * @offset: offset into PCI BAR
285 * @name: name of the area
286 *
287 * If @offset is negative, this function formats a string which
288 * contains the name, address, size and type of the BAR and
289 * appends it to the port description. If @offset is zero or
290 * positive, only name and offsetted address is appended.
291 *
292 * LOCKING:
293 * None.
294 */
ata_port_pbar_desc(struct ata_port * ap,int bar,ssize_t offset,const char * name)295 void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset,
296 const char *name)
297 {
298 struct pci_dev *pdev = to_pci_dev(ap->host->dev);
299 char *type = "";
300 unsigned long long start, len;
301
302 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM)
303 type = "m";
304 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO)
305 type = "i";
306
307 start = (unsigned long long)pci_resource_start(pdev, bar);
308 len = (unsigned long long)pci_resource_len(pdev, bar);
309
310 if (offset < 0)
311 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start);
312 else
313 ata_port_desc(ap, "%s 0x%llx", name,
314 start + (unsigned long long)offset);
315 }
316
317 #endif /* CONFIG_PCI */
318
ata_lookup_timeout_table(u8 cmd)319 static int ata_lookup_timeout_table(u8 cmd)
320 {
321 int i;
322
323 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) {
324 const u8 *cur;
325
326 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++)
327 if (*cur == cmd)
328 return i;
329 }
330
331 return -1;
332 }
333
334 /**
335 * ata_internal_cmd_timeout - determine timeout for an internal command
336 * @dev: target device
337 * @cmd: internal command to be issued
338 *
339 * Determine timeout for internal command @cmd for @dev.
340 *
341 * LOCKING:
342 * EH context.
343 *
344 * RETURNS:
345 * Determined timeout.
346 */
ata_internal_cmd_timeout(struct ata_device * dev,u8 cmd)347 unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd)
348 {
349 struct ata_eh_context *ehc = &dev->link->eh_context;
350 int ent = ata_lookup_timeout_table(cmd);
351 int idx;
352
353 if (ent < 0)
354 return ATA_EH_CMD_DFL_TIMEOUT;
355
356 idx = ehc->cmd_timeout_idx[dev->devno][ent];
357 return ata_eh_cmd_timeout_table[ent].timeouts[idx];
358 }
359
360 /**
361 * ata_internal_cmd_timed_out - notification for internal command timeout
362 * @dev: target device
363 * @cmd: internal command which timed out
364 *
365 * Notify EH that internal command @cmd for @dev timed out. This
366 * function should be called only for commands whose timeouts are
367 * determined using ata_internal_cmd_timeout().
368 *
369 * LOCKING:
370 * EH context.
371 */
ata_internal_cmd_timed_out(struct ata_device * dev,u8 cmd)372 void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd)
373 {
374 struct ata_eh_context *ehc = &dev->link->eh_context;
375 int ent = ata_lookup_timeout_table(cmd);
376 int idx;
377
378 if (ent < 0)
379 return;
380
381 idx = ehc->cmd_timeout_idx[dev->devno][ent];
382 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX)
383 ehc->cmd_timeout_idx[dev->devno][ent]++;
384 }
385
ata_ering_record(struct ata_ering * ering,unsigned int eflags,unsigned int err_mask)386 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags,
387 unsigned int err_mask)
388 {
389 struct ata_ering_entry *ent;
390
391 WARN_ON(!err_mask);
392
393 ering->cursor++;
394 ering->cursor %= ATA_ERING_SIZE;
395
396 ent = &ering->ring[ering->cursor];
397 ent->eflags = eflags;
398 ent->err_mask = err_mask;
399 ent->timestamp = get_jiffies_64();
400 }
401
ata_ering_top(struct ata_ering * ering)402 static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering)
403 {
404 struct ata_ering_entry *ent = &ering->ring[ering->cursor];
405
406 if (ent->err_mask)
407 return ent;
408 return NULL;
409 }
410
ata_ering_map(struct ata_ering * ering,int (* map_fn)(struct ata_ering_entry *,void *),void * arg)411 int ata_ering_map(struct ata_ering *ering,
412 int (*map_fn)(struct ata_ering_entry *, void *),
413 void *arg)
414 {
415 int idx, rc = 0;
416 struct ata_ering_entry *ent;
417
418 idx = ering->cursor;
419 do {
420 ent = &ering->ring[idx];
421 if (!ent->err_mask)
422 break;
423 rc = map_fn(ent, arg);
424 if (rc)
425 break;
426 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE;
427 } while (idx != ering->cursor);
428
429 return rc;
430 }
431
ata_ering_clear_cb(struct ata_ering_entry * ent,void * void_arg)432 static int ata_ering_clear_cb(struct ata_ering_entry *ent, void *void_arg)
433 {
434 ent->eflags |= ATA_EFLAG_OLD_ER;
435 return 0;
436 }
437
ata_ering_clear(struct ata_ering * ering)438 static void ata_ering_clear(struct ata_ering *ering)
439 {
440 ata_ering_map(ering, ata_ering_clear_cb, NULL);
441 }
442
ata_eh_dev_action(struct ata_device * dev)443 static unsigned int ata_eh_dev_action(struct ata_device *dev)
444 {
445 struct ata_eh_context *ehc = &dev->link->eh_context;
446
447 return ehc->i.action | ehc->i.dev_action[dev->devno];
448 }
449
ata_eh_clear_action(struct ata_link * link,struct ata_device * dev,struct ata_eh_info * ehi,unsigned int action)450 static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev,
451 struct ata_eh_info *ehi, unsigned int action)
452 {
453 struct ata_device *tdev;
454
455 if (!dev) {
456 ehi->action &= ~action;
457 ata_for_each_dev(tdev, link, ALL)
458 ehi->dev_action[tdev->devno] &= ~action;
459 } else {
460 /* doesn't make sense for port-wide EH actions */
461 WARN_ON(!(action & ATA_EH_PERDEV_MASK));
462
463 /* break ehi->action into ehi->dev_action */
464 if (ehi->action & action) {
465 ata_for_each_dev(tdev, link, ALL)
466 ehi->dev_action[tdev->devno] |=
467 ehi->action & action;
468 ehi->action &= ~action;
469 }
470
471 /* turn off the specified per-dev action */
472 ehi->dev_action[dev->devno] &= ~action;
473 }
474 }
475
476 /**
477 * ata_eh_acquire - acquire EH ownership
478 * @ap: ATA port to acquire EH ownership for
479 *
480 * Acquire EH ownership for @ap. This is the basic exclusion
481 * mechanism for ports sharing a host. Only one port hanging off
482 * the same host can claim the ownership of EH.
483 *
484 * LOCKING:
485 * EH context.
486 */
ata_eh_acquire(struct ata_port * ap)487 void ata_eh_acquire(struct ata_port *ap)
488 {
489 mutex_lock(&ap->host->eh_mutex);
490 WARN_ON_ONCE(ap->host->eh_owner);
491 ap->host->eh_owner = current;
492 }
493
494 /**
495 * ata_eh_release - release EH ownership
496 * @ap: ATA port to release EH ownership for
497 *
498 * Release EH ownership for @ap if the caller. The caller must
499 * have acquired EH ownership using ata_eh_acquire() previously.
500 *
501 * LOCKING:
502 * EH context.
503 */
ata_eh_release(struct ata_port * ap)504 void ata_eh_release(struct ata_port *ap)
505 {
506 WARN_ON_ONCE(ap->host->eh_owner != current);
507 ap->host->eh_owner = NULL;
508 mutex_unlock(&ap->host->eh_mutex);
509 }
510
511 /**
512 * ata_scsi_timed_out - SCSI layer time out callback
513 * @cmd: timed out SCSI command
514 *
515 * Handles SCSI layer timeout. We race with normal completion of
516 * the qc for @cmd. If the qc is already gone, we lose and let
517 * the scsi command finish (EH_HANDLED). Otherwise, the qc has
518 * timed out and EH should be invoked. Prevent ata_qc_complete()
519 * from finishing it by setting EH_SCHEDULED and return
520 * EH_NOT_HANDLED.
521 *
522 * TODO: kill this function once old EH is gone.
523 *
524 * LOCKING:
525 * Called from timer context
526 *
527 * RETURNS:
528 * EH_HANDLED or EH_NOT_HANDLED
529 */
ata_scsi_timed_out(struct scsi_cmnd * cmd)530 enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
531 {
532 struct Scsi_Host *host = cmd->device->host;
533 struct ata_port *ap = ata_shost_to_port(host);
534 unsigned long flags;
535 struct ata_queued_cmd *qc;
536 enum blk_eh_timer_return ret;
537
538 DPRINTK("ENTER\n");
539
540 if (ap->ops->error_handler) {
541 ret = BLK_EH_NOT_HANDLED;
542 goto out;
543 }
544
545 ret = BLK_EH_HANDLED;
546 spin_lock_irqsave(ap->lock, flags);
547 qc = ata_qc_from_tag(ap, ap->link.active_tag);
548 if (qc) {
549 WARN_ON(qc->scsicmd != cmd);
550 qc->flags |= ATA_QCFLAG_EH_SCHEDULED;
551 qc->err_mask |= AC_ERR_TIMEOUT;
552 ret = BLK_EH_NOT_HANDLED;
553 }
554 spin_unlock_irqrestore(ap->lock, flags);
555
556 out:
557 DPRINTK("EXIT, ret=%d\n", ret);
558 return ret;
559 }
560
ata_eh_unload(struct ata_port * ap)561 static void ata_eh_unload(struct ata_port *ap)
562 {
563 struct ata_link *link;
564 struct ata_device *dev;
565 unsigned long flags;
566
567 /* Restore SControl IPM and SPD for the next driver and
568 * disable attached devices.
569 */
570 ata_for_each_link(link, ap, PMP_FIRST) {
571 sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0);
572 ata_for_each_dev(dev, link, ALL)
573 ata_dev_disable(dev);
574 }
575
576 /* freeze and set UNLOADED */
577 spin_lock_irqsave(ap->lock, flags);
578
579 ata_port_freeze(ap); /* won't be thawed */
580 ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */
581 ap->pflags |= ATA_PFLAG_UNLOADED;
582
583 spin_unlock_irqrestore(ap->lock, flags);
584 }
585
586 /**
587 * ata_scsi_error - SCSI layer error handler callback
588 * @host: SCSI host on which error occurred
589 *
590 * Handles SCSI-layer-thrown error events.
591 *
592 * LOCKING:
593 * Inherited from SCSI layer (none, can sleep)
594 *
595 * RETURNS:
596 * Zero.
597 */
ata_scsi_error(struct Scsi_Host * host)598 void ata_scsi_error(struct Scsi_Host *host)
599 {
600 struct ata_port *ap = ata_shost_to_port(host);
601 unsigned long flags;
602 LIST_HEAD(eh_work_q);
603
604 DPRINTK("ENTER\n");
605
606 spin_lock_irqsave(host->host_lock, flags);
607 list_splice_init(&host->eh_cmd_q, &eh_work_q);
608 spin_unlock_irqrestore(host->host_lock, flags);
609
610 ata_scsi_cmd_error_handler(host, ap, &eh_work_q);
611
612 /* If we timed raced normal completion and there is nothing to
613 recover nr_timedout == 0 why exactly are we doing error recovery ? */
614 ata_scsi_port_error_handler(host, ap);
615
616 /* finish or retry handled scmd's and clean up */
617 WARN_ON(!list_empty(&eh_work_q));
618
619 DPRINTK("EXIT\n");
620 }
621
622 /**
623 * ata_scsi_cmd_error_handler - error callback for a list of commands
624 * @host: scsi host containing the port
625 * @ap: ATA port within the host
626 * @eh_work_q: list of commands to process
627 *
628 * process the given list of commands and return those finished to the
629 * ap->eh_done_q. This function is the first part of the libata error
630 * handler which processes a given list of failed commands.
631 */
ata_scsi_cmd_error_handler(struct Scsi_Host * host,struct ata_port * ap,struct list_head * eh_work_q)632 void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
633 struct list_head *eh_work_q)
634 {
635 int i;
636 unsigned long flags;
637
638 /* make sure sff pio task is not running */
639 ata_sff_flush_pio_task(ap);
640
641 /* synchronize with host lock and sort out timeouts */
642
643 /* For new EH, all qcs are finished in one of three ways -
644 * normal completion, error completion, and SCSI timeout.
645 * Both completions can race against SCSI timeout. When normal
646 * completion wins, the qc never reaches EH. When error
647 * completion wins, the qc has ATA_QCFLAG_FAILED set.
648 *
649 * When SCSI timeout wins, things are a bit more complex.
650 * Normal or error completion can occur after the timeout but
651 * before this point. In such cases, both types of
652 * completions are honored. A scmd is determined to have
653 * timed out iff its associated qc is active and not failed.
654 */
655 if (ap->ops->error_handler) {
656 struct scsi_cmnd *scmd, *tmp;
657 int nr_timedout = 0;
658
659 spin_lock_irqsave(ap->lock, flags);
660
661 /* This must occur under the ap->lock as we don't want
662 a polled recovery to race the real interrupt handler
663
664 The lost_interrupt handler checks for any completed but
665 non-notified command and completes much like an IRQ handler.
666
667 We then fall into the error recovery code which will treat
668 this as if normal completion won the race */
669
670 if (ap->ops->lost_interrupt)
671 ap->ops->lost_interrupt(ap);
672
673 list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) {
674 struct ata_queued_cmd *qc;
675
676 for (i = 0; i < ATA_MAX_QUEUE; i++) {
677 qc = __ata_qc_from_tag(ap, i);
678 if (qc->flags & ATA_QCFLAG_ACTIVE &&
679 qc->scsicmd == scmd)
680 break;
681 }
682
683 if (i < ATA_MAX_QUEUE) {
684 /* the scmd has an associated qc */
685 if (!(qc->flags & ATA_QCFLAG_FAILED)) {
686 /* which hasn't failed yet, timeout */
687 qc->err_mask |= AC_ERR_TIMEOUT;
688 qc->flags |= ATA_QCFLAG_FAILED;
689 nr_timedout++;
690 }
691 } else {
692 /* Normal completion occurred after
693 * SCSI timeout but before this point.
694 * Successfully complete it.
695 */
696 scmd->retries = scmd->allowed;
697 scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
698 }
699 }
700
701 /* If we have timed out qcs. They belong to EH from
702 * this point but the state of the controller is
703 * unknown. Freeze the port to make sure the IRQ
704 * handler doesn't diddle with those qcs. This must
705 * be done atomically w.r.t. setting QCFLAG_FAILED.
706 */
707 if (nr_timedout)
708 __ata_port_freeze(ap);
709
710 spin_unlock_irqrestore(ap->lock, flags);
711
712 /* initialize eh_tries */
713 ap->eh_tries = ATA_EH_MAX_TRIES;
714 } else
715 spin_unlock_wait(ap->lock);
716
717 }
718 EXPORT_SYMBOL(ata_scsi_cmd_error_handler);
719
720 /**
721 * ata_scsi_port_error_handler - recover the port after the commands
722 * @host: SCSI host containing the port
723 * @ap: the ATA port
724 *
725 * Handle the recovery of the port @ap after all the commands
726 * have been recovered.
727 */
ata_scsi_port_error_handler(struct Scsi_Host * host,struct ata_port * ap)728 void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
729 {
730 unsigned long flags;
731
732 /* invoke error handler */
733 if (ap->ops->error_handler) {
734 struct ata_link *link;
735
736 /* acquire EH ownership */
737 ata_eh_acquire(ap);
738 repeat:
739 /* kill fast drain timer */
740 del_timer_sync(&ap->fastdrain_timer);
741
742 /* process port resume request */
743 ata_eh_handle_port_resume(ap);
744
745 /* fetch & clear EH info */
746 spin_lock_irqsave(ap->lock, flags);
747
748 ata_for_each_link(link, ap, HOST_FIRST) {
749 struct ata_eh_context *ehc = &link->eh_context;
750 struct ata_device *dev;
751
752 memset(&link->eh_context, 0, sizeof(link->eh_context));
753 link->eh_context.i = link->eh_info;
754 memset(&link->eh_info, 0, sizeof(link->eh_info));
755
756 ata_for_each_dev(dev, link, ENABLED) {
757 int devno = dev->devno;
758
759 ehc->saved_xfer_mode[devno] = dev->xfer_mode;
760 if (ata_ncq_enabled(dev))
761 ehc->saved_ncq_enabled |= 1 << devno;
762 }
763 }
764
765 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
766 ap->pflags &= ~ATA_PFLAG_EH_PENDING;
767 ap->excl_link = NULL; /* don't maintain exclusion over EH */
768
769 spin_unlock_irqrestore(ap->lock, flags);
770
771 /* invoke EH, skip if unloading or suspended */
772 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED)))
773 ap->ops->error_handler(ap);
774 else {
775 /* if unloading, commence suicide */
776 if ((ap->pflags & ATA_PFLAG_UNLOADING) &&
777 !(ap->pflags & ATA_PFLAG_UNLOADED))
778 ata_eh_unload(ap);
779 ata_eh_finish(ap);
780 }
781
782 /* process port suspend request */
783 ata_eh_handle_port_suspend(ap);
784
785 /* Exception might have happened after ->error_handler
786 * recovered the port but before this point. Repeat
787 * EH in such case.
788 */
789 spin_lock_irqsave(ap->lock, flags);
790
791 if (ap->pflags & ATA_PFLAG_EH_PENDING) {
792 if (--ap->eh_tries) {
793 spin_unlock_irqrestore(ap->lock, flags);
794 goto repeat;
795 }
796 ata_port_err(ap,
797 "EH pending after %d tries, giving up\n",
798 ATA_EH_MAX_TRIES);
799 ap->pflags &= ~ATA_PFLAG_EH_PENDING;
800 }
801
802 /* this run is complete, make sure EH info is clear */
803 ata_for_each_link(link, ap, HOST_FIRST)
804 memset(&link->eh_info, 0, sizeof(link->eh_info));
805
806 /* end eh (clear host_eh_scheduled) while holding
807 * ap->lock such that if exception occurs after this
808 * point but before EH completion, SCSI midlayer will
809 * re-initiate EH.
810 */
811 ap->ops->end_eh(ap);
812
813 spin_unlock_irqrestore(ap->lock, flags);
814 ata_eh_release(ap);
815 } else {
816 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL);
817 ap->ops->eng_timeout(ap);
818 }
819
820 scsi_eh_flush_done_q(&ap->eh_done_q);
821
822 /* clean up */
823 spin_lock_irqsave(ap->lock, flags);
824
825 if (ap->pflags & ATA_PFLAG_LOADING)
826 ap->pflags &= ~ATA_PFLAG_LOADING;
827 else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG)
828 schedule_delayed_work(&ap->hotplug_task, 0);
829
830 if (ap->pflags & ATA_PFLAG_RECOVERED)
831 ata_port_info(ap, "EH complete\n");
832
833 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED);
834
835 /* tell wait_eh that we're done */
836 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS;
837 wake_up_all(&ap->eh_wait_q);
838
839 spin_unlock_irqrestore(ap->lock, flags);
840 }
841 EXPORT_SYMBOL_GPL(ata_scsi_port_error_handler);
842
843 /**
844 * ata_port_wait_eh - Wait for the currently pending EH to complete
845 * @ap: Port to wait EH for
846 *
847 * Wait until the currently pending EH is complete.
848 *
849 * LOCKING:
850 * Kernel thread context (may sleep).
851 */
ata_port_wait_eh(struct ata_port * ap)852 void ata_port_wait_eh(struct ata_port *ap)
853 {
854 unsigned long flags;
855 DEFINE_WAIT(wait);
856
857 retry:
858 spin_lock_irqsave(ap->lock, flags);
859
860 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) {
861 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE);
862 spin_unlock_irqrestore(ap->lock, flags);
863 schedule();
864 spin_lock_irqsave(ap->lock, flags);
865 }
866 finish_wait(&ap->eh_wait_q, &wait);
867
868 spin_unlock_irqrestore(ap->lock, flags);
869
870 /* make sure SCSI EH is complete */
871 if (scsi_host_in_recovery(ap->scsi_host)) {
872 ata_msleep(ap, 10);
873 goto retry;
874 }
875 }
876 EXPORT_SYMBOL_GPL(ata_port_wait_eh);
877
ata_eh_nr_in_flight(struct ata_port * ap)878 static int ata_eh_nr_in_flight(struct ata_port *ap)
879 {
880 unsigned int tag;
881 int nr = 0;
882
883 /* count only non-internal commands */
884 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++)
885 if (ata_qc_from_tag(ap, tag))
886 nr++;
887
888 return nr;
889 }
890
ata_eh_fastdrain_timerfn(unsigned long arg)891 void ata_eh_fastdrain_timerfn(unsigned long arg)
892 {
893 struct ata_port *ap = (void *)arg;
894 unsigned long flags;
895 int cnt;
896
897 spin_lock_irqsave(ap->lock, flags);
898
899 cnt = ata_eh_nr_in_flight(ap);
900
901 /* are we done? */
902 if (!cnt)
903 goto out_unlock;
904
905 if (cnt == ap->fastdrain_cnt) {
906 unsigned int tag;
907
908 /* No progress during the last interval, tag all
909 * in-flight qcs as timed out and freeze the port.
910 */
911 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) {
912 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag);
913 if (qc)
914 qc->err_mask |= AC_ERR_TIMEOUT;
915 }
916
917 ata_port_freeze(ap);
918 } else {
919 /* some qcs have finished, give it another chance */
920 ap->fastdrain_cnt = cnt;
921 ap->fastdrain_timer.expires =
922 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL);
923 add_timer(&ap->fastdrain_timer);
924 }
925
926 out_unlock:
927 spin_unlock_irqrestore(ap->lock, flags);
928 }
929
930 /**
931 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain
932 * @ap: target ATA port
933 * @fastdrain: activate fast drain
934 *
935 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain
936 * is non-zero and EH wasn't pending before. Fast drain ensures
937 * that EH kicks in in timely manner.
938 *
939 * LOCKING:
940 * spin_lock_irqsave(host lock)
941 */
ata_eh_set_pending(struct ata_port * ap,int fastdrain)942 static void ata_eh_set_pending(struct ata_port *ap, int fastdrain)
943 {
944 int cnt;
945
946 /* already scheduled? */
947 if (ap->pflags & ATA_PFLAG_EH_PENDING)
948 return;
949
950 ap->pflags |= ATA_PFLAG_EH_PENDING;
951
952 if (!fastdrain)
953 return;
954
955 /* do we have in-flight qcs? */
956 cnt = ata_eh_nr_in_flight(ap);
957 if (!cnt)
958 return;
959
960 /* activate fast drain */
961 ap->fastdrain_cnt = cnt;
962 ap->fastdrain_timer.expires =
963 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL);
964 add_timer(&ap->fastdrain_timer);
965 }
966
967 /**
968 * ata_qc_schedule_eh - schedule qc for error handling
969 * @qc: command to schedule error handling for
970 *
971 * Schedule error handling for @qc. EH will kick in as soon as
972 * other commands are drained.
973 *
974 * LOCKING:
975 * spin_lock_irqsave(host lock)
976 */
ata_qc_schedule_eh(struct ata_queued_cmd * qc)977 void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
978 {
979 struct ata_port *ap = qc->ap;
980 struct request_queue *q = qc->scsicmd->device->request_queue;
981 unsigned long flags;
982
983 WARN_ON(!ap->ops->error_handler);
984
985 qc->flags |= ATA_QCFLAG_FAILED;
986 ata_eh_set_pending(ap, 1);
987
988 /* The following will fail if timeout has already expired.
989 * ata_scsi_error() takes care of such scmds on EH entry.
990 * Note that ATA_QCFLAG_FAILED is unconditionally set after
991 * this function completes.
992 */
993 spin_lock_irqsave(q->queue_lock, flags);
994 blk_abort_request(qc->scsicmd->request);
995 spin_unlock_irqrestore(q->queue_lock, flags);
996 }
997
998 /**
999 * ata_std_sched_eh - non-libsas ata_ports issue eh with this common routine
1000 * @ap: ATA port to schedule EH for
1001 *
1002 * LOCKING: inherited from ata_port_schedule_eh
1003 * spin_lock_irqsave(host lock)
1004 */
ata_std_sched_eh(struct ata_port * ap)1005 void ata_std_sched_eh(struct ata_port *ap)
1006 {
1007 WARN_ON(!ap->ops->error_handler);
1008
1009 if (ap->pflags & ATA_PFLAG_INITIALIZING)
1010 return;
1011
1012 ata_eh_set_pending(ap, 1);
1013 scsi_schedule_eh(ap->scsi_host);
1014
1015 DPRINTK("port EH scheduled\n");
1016 }
1017 EXPORT_SYMBOL_GPL(ata_std_sched_eh);
1018
1019 /**
1020 * ata_std_end_eh - non-libsas ata_ports complete eh with this common routine
1021 * @ap: ATA port to end EH for
1022 *
1023 * In the libata object model there is a 1:1 mapping of ata_port to
1024 * shost, so host fields can be directly manipulated under ap->lock, in
1025 * the libsas case we need to hold a lock at the ha->level to coordinate
1026 * these events.
1027 *
1028 * LOCKING:
1029 * spin_lock_irqsave(host lock)
1030 */
ata_std_end_eh(struct ata_port * ap)1031 void ata_std_end_eh(struct ata_port *ap)
1032 {
1033 struct Scsi_Host *host = ap->scsi_host;
1034
1035 host->host_eh_scheduled = 0;
1036 }
1037 EXPORT_SYMBOL(ata_std_end_eh);
1038
1039
1040 /**
1041 * ata_port_schedule_eh - schedule error handling without a qc
1042 * @ap: ATA port to schedule EH for
1043 *
1044 * Schedule error handling for @ap. EH will kick in as soon as
1045 * all commands are drained.
1046 *
1047 * LOCKING:
1048 * spin_lock_irqsave(host lock)
1049 */
ata_port_schedule_eh(struct ata_port * ap)1050 void ata_port_schedule_eh(struct ata_port *ap)
1051 {
1052 /* see: ata_std_sched_eh, unless you know better */
1053 ap->ops->sched_eh(ap);
1054 }
1055
ata_do_link_abort(struct ata_port * ap,struct ata_link * link)1056 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link)
1057 {
1058 int tag, nr_aborted = 0;
1059
1060 WARN_ON(!ap->ops->error_handler);
1061
1062 /* we're gonna abort all commands, no need for fast drain */
1063 ata_eh_set_pending(ap, 0);
1064
1065 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
1066 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag);
1067
1068 if (qc && (!link || qc->dev->link == link)) {
1069 qc->flags |= ATA_QCFLAG_FAILED;
1070 ata_qc_complete(qc);
1071 nr_aborted++;
1072 }
1073 }
1074
1075 if (!nr_aborted)
1076 ata_port_schedule_eh(ap);
1077
1078 return nr_aborted;
1079 }
1080
1081 /**
1082 * ata_link_abort - abort all qc's on the link
1083 * @link: ATA link to abort qc's for
1084 *
1085 * Abort all active qc's active on @link and schedule EH.
1086 *
1087 * LOCKING:
1088 * spin_lock_irqsave(host lock)
1089 *
1090 * RETURNS:
1091 * Number of aborted qc's.
1092 */
ata_link_abort(struct ata_link * link)1093 int ata_link_abort(struct ata_link *link)
1094 {
1095 return ata_do_link_abort(link->ap, link);
1096 }
1097
1098 /**
1099 * ata_port_abort - abort all qc's on the port
1100 * @ap: ATA port to abort qc's for
1101 *
1102 * Abort all active qc's of @ap and schedule EH.
1103 *
1104 * LOCKING:
1105 * spin_lock_irqsave(host_set lock)
1106 *
1107 * RETURNS:
1108 * Number of aborted qc's.
1109 */
ata_port_abort(struct ata_port * ap)1110 int ata_port_abort(struct ata_port *ap)
1111 {
1112 return ata_do_link_abort(ap, NULL);
1113 }
1114
1115 /**
1116 * __ata_port_freeze - freeze port
1117 * @ap: ATA port to freeze
1118 *
1119 * This function is called when HSM violation or some other
1120 * condition disrupts normal operation of the port. Frozen port
1121 * is not allowed to perform any operation until the port is
1122 * thawed, which usually follows a successful reset.
1123 *
1124 * ap->ops->freeze() callback can be used for freezing the port
1125 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a
1126 * port cannot be frozen hardware-wise, the interrupt handler
1127 * must ack and clear interrupts unconditionally while the port
1128 * is frozen.
1129 *
1130 * LOCKING:
1131 * spin_lock_irqsave(host lock)
1132 */
__ata_port_freeze(struct ata_port * ap)1133 static void __ata_port_freeze(struct ata_port *ap)
1134 {
1135 WARN_ON(!ap->ops->error_handler);
1136
1137 if (ap->ops->freeze)
1138 ap->ops->freeze(ap);
1139
1140 ap->pflags |= ATA_PFLAG_FROZEN;
1141
1142 DPRINTK("ata%u port frozen\n", ap->print_id);
1143 }
1144
1145 /**
1146 * ata_port_freeze - abort & freeze port
1147 * @ap: ATA port to freeze
1148 *
1149 * Abort and freeze @ap. The freeze operation must be called
1150 * first, because some hardware requires special operations
1151 * before the taskfile registers are accessible.
1152 *
1153 * LOCKING:
1154 * spin_lock_irqsave(host lock)
1155 *
1156 * RETURNS:
1157 * Number of aborted commands.
1158 */
ata_port_freeze(struct ata_port * ap)1159 int ata_port_freeze(struct ata_port *ap)
1160 {
1161 int nr_aborted;
1162
1163 WARN_ON(!ap->ops->error_handler);
1164
1165 __ata_port_freeze(ap);
1166 nr_aborted = ata_port_abort(ap);
1167
1168 return nr_aborted;
1169 }
1170
1171 /**
1172 * sata_async_notification - SATA async notification handler
1173 * @ap: ATA port where async notification is received
1174 *
1175 * Handler to be called when async notification via SDB FIS is
1176 * received. This function schedules EH if necessary.
1177 *
1178 * LOCKING:
1179 * spin_lock_irqsave(host lock)
1180 *
1181 * RETURNS:
1182 * 1 if EH is scheduled, 0 otherwise.
1183 */
sata_async_notification(struct ata_port * ap)1184 int sata_async_notification(struct ata_port *ap)
1185 {
1186 u32 sntf;
1187 int rc;
1188
1189 if (!(ap->flags & ATA_FLAG_AN))
1190 return 0;
1191
1192 rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf);
1193 if (rc == 0)
1194 sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf);
1195
1196 if (!sata_pmp_attached(ap) || rc) {
1197 /* PMP is not attached or SNTF is not available */
1198 if (!sata_pmp_attached(ap)) {
1199 /* PMP is not attached. Check whether ATAPI
1200 * AN is configured. If so, notify media
1201 * change.
1202 */
1203 struct ata_device *dev = ap->link.device;
1204
1205 if ((dev->class == ATA_DEV_ATAPI) &&
1206 (dev->flags & ATA_DFLAG_AN))
1207 ata_scsi_media_change_notify(dev);
1208 return 0;
1209 } else {
1210 /* PMP is attached but SNTF is not available.
1211 * ATAPI async media change notification is
1212 * not used. The PMP must be reporting PHY
1213 * status change, schedule EH.
1214 */
1215 ata_port_schedule_eh(ap);
1216 return 1;
1217 }
1218 } else {
1219 /* PMP is attached and SNTF is available */
1220 struct ata_link *link;
1221
1222 /* check and notify ATAPI AN */
1223 ata_for_each_link(link, ap, EDGE) {
1224 if (!(sntf & (1 << link->pmp)))
1225 continue;
1226
1227 if ((link->device->class == ATA_DEV_ATAPI) &&
1228 (link->device->flags & ATA_DFLAG_AN))
1229 ata_scsi_media_change_notify(link->device);
1230 }
1231
1232 /* If PMP is reporting that PHY status of some
1233 * downstream ports has changed, schedule EH.
1234 */
1235 if (sntf & (1 << SATA_PMP_CTRL_PORT)) {
1236 ata_port_schedule_eh(ap);
1237 return 1;
1238 }
1239
1240 return 0;
1241 }
1242 }
1243
1244 /**
1245 * ata_eh_freeze_port - EH helper to freeze port
1246 * @ap: ATA port to freeze
1247 *
1248 * Freeze @ap.
1249 *
1250 * LOCKING:
1251 * None.
1252 */
ata_eh_freeze_port(struct ata_port * ap)1253 void ata_eh_freeze_port(struct ata_port *ap)
1254 {
1255 unsigned long flags;
1256
1257 if (!ap->ops->error_handler)
1258 return;
1259
1260 spin_lock_irqsave(ap->lock, flags);
1261 __ata_port_freeze(ap);
1262 spin_unlock_irqrestore(ap->lock, flags);
1263 }
1264
1265 /**
1266 * ata_port_thaw_port - EH helper to thaw port
1267 * @ap: ATA port to thaw
1268 *
1269 * Thaw frozen port @ap.
1270 *
1271 * LOCKING:
1272 * None.
1273 */
ata_eh_thaw_port(struct ata_port * ap)1274 void ata_eh_thaw_port(struct ata_port *ap)
1275 {
1276 unsigned long flags;
1277
1278 if (!ap->ops->error_handler)
1279 return;
1280
1281 spin_lock_irqsave(ap->lock, flags);
1282
1283 ap->pflags &= ~ATA_PFLAG_FROZEN;
1284
1285 if (ap->ops->thaw)
1286 ap->ops->thaw(ap);
1287
1288 spin_unlock_irqrestore(ap->lock, flags);
1289
1290 DPRINTK("ata%u port thawed\n", ap->print_id);
1291 }
1292
ata_eh_scsidone(struct scsi_cmnd * scmd)1293 static void ata_eh_scsidone(struct scsi_cmnd *scmd)
1294 {
1295 /* nada */
1296 }
1297
__ata_eh_qc_complete(struct ata_queued_cmd * qc)1298 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc)
1299 {
1300 struct ata_port *ap = qc->ap;
1301 struct scsi_cmnd *scmd = qc->scsicmd;
1302 unsigned long flags;
1303
1304 spin_lock_irqsave(ap->lock, flags);
1305 qc->scsidone = ata_eh_scsidone;
1306 __ata_qc_complete(qc);
1307 WARN_ON(ata_tag_valid(qc->tag));
1308 spin_unlock_irqrestore(ap->lock, flags);
1309
1310 scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
1311 }
1312
1313 /**
1314 * ata_eh_qc_complete - Complete an active ATA command from EH
1315 * @qc: Command to complete
1316 *
1317 * Indicate to the mid and upper layers that an ATA command has
1318 * completed. To be used from EH.
1319 */
ata_eh_qc_complete(struct ata_queued_cmd * qc)1320 void ata_eh_qc_complete(struct ata_queued_cmd *qc)
1321 {
1322 struct scsi_cmnd *scmd = qc->scsicmd;
1323 scmd->retries = scmd->allowed;
1324 __ata_eh_qc_complete(qc);
1325 }
1326
1327 /**
1328 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH
1329 * @qc: Command to retry
1330 *
1331 * Indicate to the mid and upper layers that an ATA command
1332 * should be retried. To be used from EH.
1333 *
1334 * SCSI midlayer limits the number of retries to scmd->allowed.
1335 * scmd->allowed is incremented for commands which get retried
1336 * due to unrelated failures (qc->err_mask is zero).
1337 */
ata_eh_qc_retry(struct ata_queued_cmd * qc)1338 void ata_eh_qc_retry(struct ata_queued_cmd *qc)
1339 {
1340 struct scsi_cmnd *scmd = qc->scsicmd;
1341 if (!qc->err_mask)
1342 scmd->allowed++;
1343 __ata_eh_qc_complete(qc);
1344 }
1345
1346 /**
1347 * ata_dev_disable - disable ATA device
1348 * @dev: ATA device to disable
1349 *
1350 * Disable @dev.
1351 *
1352 * Locking:
1353 * EH context.
1354 */
ata_dev_disable(struct ata_device * dev)1355 void ata_dev_disable(struct ata_device *dev)
1356 {
1357 if (!ata_dev_enabled(dev))
1358 return;
1359
1360 if (ata_msg_drv(dev->link->ap))
1361 ata_dev_warn(dev, "disabled\n");
1362 ata_acpi_on_disable(dev);
1363 ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET);
1364 dev->class++;
1365
1366 /* From now till the next successful probe, ering is used to
1367 * track probe failures. Clear accumulated device error info.
1368 */
1369 ata_ering_clear(&dev->ering);
1370 }
1371
1372 /**
1373 * ata_eh_detach_dev - detach ATA device
1374 * @dev: ATA device to detach
1375 *
1376 * Detach @dev.
1377 *
1378 * LOCKING:
1379 * None.
1380 */
ata_eh_detach_dev(struct ata_device * dev)1381 void ata_eh_detach_dev(struct ata_device *dev)
1382 {
1383 struct ata_link *link = dev->link;
1384 struct ata_port *ap = link->ap;
1385 struct ata_eh_context *ehc = &link->eh_context;
1386 unsigned long flags;
1387
1388 ata_dev_disable(dev);
1389
1390 spin_lock_irqsave(ap->lock, flags);
1391
1392 dev->flags &= ~ATA_DFLAG_DETACH;
1393
1394 if (ata_scsi_offline_dev(dev)) {
1395 dev->flags |= ATA_DFLAG_DETACHED;
1396 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG;
1397 }
1398
1399 /* clear per-dev EH info */
1400 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK);
1401 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK);
1402 ehc->saved_xfer_mode[dev->devno] = 0;
1403 ehc->saved_ncq_enabled &= ~(1 << dev->devno);
1404
1405 spin_unlock_irqrestore(ap->lock, flags);
1406 }
1407
1408 /**
1409 * ata_eh_about_to_do - about to perform eh_action
1410 * @link: target ATA link
1411 * @dev: target ATA dev for per-dev action (can be NULL)
1412 * @action: action about to be performed
1413 *
1414 * Called just before performing EH actions to clear related bits
1415 * in @link->eh_info such that eh actions are not unnecessarily
1416 * repeated.
1417 *
1418 * LOCKING:
1419 * None.
1420 */
ata_eh_about_to_do(struct ata_link * link,struct ata_device * dev,unsigned int action)1421 void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev,
1422 unsigned int action)
1423 {
1424 struct ata_port *ap = link->ap;
1425 struct ata_eh_info *ehi = &link->eh_info;
1426 struct ata_eh_context *ehc = &link->eh_context;
1427 unsigned long flags;
1428
1429 spin_lock_irqsave(ap->lock, flags);
1430
1431 ata_eh_clear_action(link, dev, ehi, action);
1432
1433 /* About to take EH action, set RECOVERED. Ignore actions on
1434 * slave links as master will do them again.
1435 */
1436 if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link)
1437 ap->pflags |= ATA_PFLAG_RECOVERED;
1438
1439 spin_unlock_irqrestore(ap->lock, flags);
1440 }
1441
1442 /**
1443 * ata_eh_done - EH action complete
1444 * @ap: target ATA port
1445 * @dev: target ATA dev for per-dev action (can be NULL)
1446 * @action: action just completed
1447 *
1448 * Called right after performing EH actions to clear related bits
1449 * in @link->eh_context.
1450 *
1451 * LOCKING:
1452 * None.
1453 */
ata_eh_done(struct ata_link * link,struct ata_device * dev,unsigned int action)1454 void ata_eh_done(struct ata_link *link, struct ata_device *dev,
1455 unsigned int action)
1456 {
1457 struct ata_eh_context *ehc = &link->eh_context;
1458
1459 ata_eh_clear_action(link, dev, &ehc->i, action);
1460 }
1461
1462 /**
1463 * ata_err_string - convert err_mask to descriptive string
1464 * @err_mask: error mask to convert to string
1465 *
1466 * Convert @err_mask to descriptive string. Errors are
1467 * prioritized according to severity and only the most severe
1468 * error is reported.
1469 *
1470 * LOCKING:
1471 * None.
1472 *
1473 * RETURNS:
1474 * Descriptive string for @err_mask
1475 */
ata_err_string(unsigned int err_mask)1476 static const char *ata_err_string(unsigned int err_mask)
1477 {
1478 if (err_mask & AC_ERR_HOST_BUS)
1479 return "host bus error";
1480 if (err_mask & AC_ERR_ATA_BUS)
1481 return "ATA bus error";
1482 if (err_mask & AC_ERR_TIMEOUT)
1483 return "timeout";
1484 if (err_mask & AC_ERR_HSM)
1485 return "HSM violation";
1486 if (err_mask & AC_ERR_SYSTEM)
1487 return "internal error";
1488 if (err_mask & AC_ERR_MEDIA)
1489 return "media error";
1490 if (err_mask & AC_ERR_INVALID)
1491 return "invalid argument";
1492 if (err_mask & AC_ERR_DEV)
1493 return "device error";
1494 return "unknown error";
1495 }
1496
1497 /**
1498 * ata_read_log_page - read a specific log page
1499 * @dev: target device
1500 * @log: log to read
1501 * @page: page to read
1502 * @buf: buffer to store read page
1503 * @sectors: number of sectors to read
1504 *
1505 * Read log page using READ_LOG_EXT command.
1506 *
1507 * LOCKING:
1508 * Kernel thread context (may sleep).
1509 *
1510 * RETURNS:
1511 * 0 on success, AC_ERR_* mask otherwise.
1512 */
ata_read_log_page(struct ata_device * dev,u8 log,u8 page,void * buf,unsigned int sectors)1513 unsigned int ata_read_log_page(struct ata_device *dev, u8 log,
1514 u8 page, void *buf, unsigned int sectors)
1515 {
1516 unsigned long ap_flags = dev->link->ap->flags;
1517 struct ata_taskfile tf;
1518 unsigned int err_mask;
1519 bool dma = false;
1520
1521 DPRINTK("read log page - log 0x%x, page 0x%x\n", log, page);
1522
1523 /*
1524 * Return error without actually issuing the command on controllers
1525 * which e.g. lockup on a read log page.
1526 */
1527 if (ap_flags & ATA_FLAG_NO_LOG_PAGE)
1528 return AC_ERR_DEV;
1529
1530 retry:
1531 ata_tf_init(dev, &tf);
1532 if (dev->dma_mode && ata_id_has_read_log_dma_ext(dev->id) &&
1533 !(dev->horkage & ATA_HORKAGE_NO_NCQ_LOG)) {
1534 tf.command = ATA_CMD_READ_LOG_DMA_EXT;
1535 tf.protocol = ATA_PROT_DMA;
1536 dma = true;
1537 } else {
1538 tf.command = ATA_CMD_READ_LOG_EXT;
1539 tf.protocol = ATA_PROT_PIO;
1540 dma = false;
1541 }
1542 tf.lbal = log;
1543 tf.lbam = page;
1544 tf.nsect = sectors;
1545 tf.hob_nsect = sectors >> 8;
1546 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE;
1547
1548 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE,
1549 buf, sectors * ATA_SECT_SIZE, 0);
1550
1551 if (err_mask && dma) {
1552 dev->horkage |= ATA_HORKAGE_NO_NCQ_LOG;
1553 ata_dev_warn(dev, "READ LOG DMA EXT failed, trying unqueued\n");
1554 goto retry;
1555 }
1556
1557 DPRINTK("EXIT, err_mask=%x\n", err_mask);
1558 return err_mask;
1559 }
1560
1561 /**
1562 * ata_eh_read_log_10h - Read log page 10h for NCQ error details
1563 * @dev: Device to read log page 10h from
1564 * @tag: Resulting tag of the failed command
1565 * @tf: Resulting taskfile registers of the failed command
1566 *
1567 * Read log page 10h to obtain NCQ error details and clear error
1568 * condition.
1569 *
1570 * LOCKING:
1571 * Kernel thread context (may sleep).
1572 *
1573 * RETURNS:
1574 * 0 on success, -errno otherwise.
1575 */
ata_eh_read_log_10h(struct ata_device * dev,int * tag,struct ata_taskfile * tf)1576 static int ata_eh_read_log_10h(struct ata_device *dev,
1577 int *tag, struct ata_taskfile *tf)
1578 {
1579 u8 *buf = dev->link->ap->sector_buf;
1580 unsigned int err_mask;
1581 u8 csum;
1582 int i;
1583
1584 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, 0, buf, 1);
1585 if (err_mask)
1586 return -EIO;
1587
1588 csum = 0;
1589 for (i = 0; i < ATA_SECT_SIZE; i++)
1590 csum += buf[i];
1591 if (csum)
1592 ata_dev_warn(dev, "invalid checksum 0x%x on log page 10h\n",
1593 csum);
1594
1595 if (buf[0] & 0x80)
1596 return -ENOENT;
1597
1598 *tag = buf[0] & 0x1f;
1599
1600 tf->command = buf[2];
1601 tf->feature = buf[3];
1602 tf->lbal = buf[4];
1603 tf->lbam = buf[5];
1604 tf->lbah = buf[6];
1605 tf->device = buf[7];
1606 tf->hob_lbal = buf[8];
1607 tf->hob_lbam = buf[9];
1608 tf->hob_lbah = buf[10];
1609 tf->nsect = buf[12];
1610 tf->hob_nsect = buf[13];
1611
1612 return 0;
1613 }
1614
1615 /**
1616 * atapi_eh_tur - perform ATAPI TEST_UNIT_READY
1617 * @dev: target ATAPI device
1618 * @r_sense_key: out parameter for sense_key
1619 *
1620 * Perform ATAPI TEST_UNIT_READY.
1621 *
1622 * LOCKING:
1623 * EH context (may sleep).
1624 *
1625 * RETURNS:
1626 * 0 on success, AC_ERR_* mask on failure.
1627 */
atapi_eh_tur(struct ata_device * dev,u8 * r_sense_key)1628 unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key)
1629 {
1630 u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 };
1631 struct ata_taskfile tf;
1632 unsigned int err_mask;
1633
1634 ata_tf_init(dev, &tf);
1635
1636 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
1637 tf.command = ATA_CMD_PACKET;
1638 tf.protocol = ATAPI_PROT_NODATA;
1639
1640 err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0);
1641 if (err_mask == AC_ERR_DEV)
1642 *r_sense_key = tf.feature >> 4;
1643 return err_mask;
1644 }
1645
1646 /**
1647 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
1648 * @dev: device to perform REQUEST_SENSE to
1649 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
1650 * @dfl_sense_key: default sense key to use
1651 *
1652 * Perform ATAPI REQUEST_SENSE after the device reported CHECK
1653 * SENSE. This function is EH helper.
1654 *
1655 * LOCKING:
1656 * Kernel thread context (may sleep).
1657 *
1658 * RETURNS:
1659 * 0 on success, AC_ERR_* mask on failure
1660 */
atapi_eh_request_sense(struct ata_device * dev,u8 * sense_buf,u8 dfl_sense_key)1661 unsigned int atapi_eh_request_sense(struct ata_device *dev,
1662 u8 *sense_buf, u8 dfl_sense_key)
1663 {
1664 u8 cdb[ATAPI_CDB_LEN] =
1665 { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 };
1666 struct ata_port *ap = dev->link->ap;
1667 struct ata_taskfile tf;
1668
1669 DPRINTK("ATAPI request sense\n");
1670
1671 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE);
1672
1673 /* initialize sense_buf with the error register,
1674 * for the case where they are -not- overwritten
1675 */
1676 sense_buf[0] = 0x70;
1677 sense_buf[2] = dfl_sense_key;
1678
1679 /* some devices time out if garbage left in tf */
1680 ata_tf_init(dev, &tf);
1681
1682 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
1683 tf.command = ATA_CMD_PACKET;
1684
1685 /* is it pointless to prefer PIO for "safety reasons"? */
1686 if (ap->flags & ATA_FLAG_PIO_DMA) {
1687 tf.protocol = ATAPI_PROT_DMA;
1688 tf.feature |= ATAPI_PKT_DMA;
1689 } else {
1690 tf.protocol = ATAPI_PROT_PIO;
1691 tf.lbam = SCSI_SENSE_BUFFERSIZE;
1692 tf.lbah = 0;
1693 }
1694
1695 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
1696 sense_buf, SCSI_SENSE_BUFFERSIZE, 0);
1697 }
1698
1699 /**
1700 * ata_eh_analyze_serror - analyze SError for a failed port
1701 * @link: ATA link to analyze SError for
1702 *
1703 * Analyze SError if available and further determine cause of
1704 * failure.
1705 *
1706 * LOCKING:
1707 * None.
1708 */
ata_eh_analyze_serror(struct ata_link * link)1709 static void ata_eh_analyze_serror(struct ata_link *link)
1710 {
1711 struct ata_eh_context *ehc = &link->eh_context;
1712 u32 serror = ehc->i.serror;
1713 unsigned int err_mask = 0, action = 0;
1714 u32 hotplug_mask;
1715
1716 if (serror & (SERR_PERSISTENT | SERR_DATA)) {
1717 err_mask |= AC_ERR_ATA_BUS;
1718 action |= ATA_EH_RESET;
1719 }
1720 if (serror & SERR_PROTOCOL) {
1721 err_mask |= AC_ERR_HSM;
1722 action |= ATA_EH_RESET;
1723 }
1724 if (serror & SERR_INTERNAL) {
1725 err_mask |= AC_ERR_SYSTEM;
1726 action |= ATA_EH_RESET;
1727 }
1728
1729 /* Determine whether a hotplug event has occurred. Both
1730 * SError.N/X are considered hotplug events for enabled or
1731 * host links. For disabled PMP links, only N bit is
1732 * considered as X bit is left at 1 for link plugging.
1733 */
1734 if (link->lpm_policy > ATA_LPM_MAX_POWER)
1735 hotplug_mask = 0; /* hotplug doesn't work w/ LPM */
1736 else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link))
1737 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG;
1738 else
1739 hotplug_mask = SERR_PHYRDY_CHG;
1740
1741 if (serror & hotplug_mask)
1742 ata_ehi_hotplugged(&ehc->i);
1743
1744 ehc->i.err_mask |= err_mask;
1745 ehc->i.action |= action;
1746 }
1747
1748 /**
1749 * ata_eh_analyze_ncq_error - analyze NCQ error
1750 * @link: ATA link to analyze NCQ error for
1751 *
1752 * Read log page 10h, determine the offending qc and acquire
1753 * error status TF. For NCQ device errors, all LLDDs have to do
1754 * is setting AC_ERR_DEV in ehi->err_mask. This function takes
1755 * care of the rest.
1756 *
1757 * LOCKING:
1758 * Kernel thread context (may sleep).
1759 */
ata_eh_analyze_ncq_error(struct ata_link * link)1760 void ata_eh_analyze_ncq_error(struct ata_link *link)
1761 {
1762 struct ata_port *ap = link->ap;
1763 struct ata_eh_context *ehc = &link->eh_context;
1764 struct ata_device *dev = link->device;
1765 struct ata_queued_cmd *qc;
1766 struct ata_taskfile tf;
1767 int tag, rc;
1768
1769 /* if frozen, we can't do much */
1770 if (ap->pflags & ATA_PFLAG_FROZEN)
1771 return;
1772
1773 /* is it NCQ device error? */
1774 if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV))
1775 return;
1776
1777 /* has LLDD analyzed already? */
1778 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
1779 qc = __ata_qc_from_tag(ap, tag);
1780
1781 if (!(qc->flags & ATA_QCFLAG_FAILED))
1782 continue;
1783
1784 if (qc->err_mask)
1785 return;
1786 }
1787
1788 /* okay, this error is ours */
1789 memset(&tf, 0, sizeof(tf));
1790 rc = ata_eh_read_log_10h(dev, &tag, &tf);
1791 if (rc) {
1792 ata_link_err(link, "failed to read log page 10h (errno=%d)\n",
1793 rc);
1794 return;
1795 }
1796
1797 if (!(link->sactive & (1 << tag))) {
1798 ata_link_err(link, "log page 10h reported inactive tag %d\n",
1799 tag);
1800 return;
1801 }
1802
1803 /* we've got the perpetrator, condemn it */
1804 qc = __ata_qc_from_tag(ap, tag);
1805 memcpy(&qc->result_tf, &tf, sizeof(tf));
1806 qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48;
1807 qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ;
1808 ehc->i.err_mask &= ~AC_ERR_DEV;
1809 }
1810
1811 /**
1812 * ata_eh_analyze_tf - analyze taskfile of a failed qc
1813 * @qc: qc to analyze
1814 * @tf: Taskfile registers to analyze
1815 *
1816 * Analyze taskfile of @qc and further determine cause of
1817 * failure. This function also requests ATAPI sense data if
1818 * available.
1819 *
1820 * LOCKING:
1821 * Kernel thread context (may sleep).
1822 *
1823 * RETURNS:
1824 * Determined recovery action
1825 */
ata_eh_analyze_tf(struct ata_queued_cmd * qc,const struct ata_taskfile * tf)1826 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
1827 const struct ata_taskfile *tf)
1828 {
1829 unsigned int tmp, action = 0;
1830 u8 stat = tf->command, err = tf->feature;
1831
1832 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) {
1833 qc->err_mask |= AC_ERR_HSM;
1834 return ATA_EH_RESET;
1835 }
1836
1837 if (stat & (ATA_ERR | ATA_DF))
1838 qc->err_mask |= AC_ERR_DEV;
1839 else
1840 return 0;
1841
1842 switch (qc->dev->class) {
1843 case ATA_DEV_ATA:
1844 case ATA_DEV_ZAC:
1845 if (err & ATA_ICRC)
1846 qc->err_mask |= AC_ERR_ATA_BUS;
1847 if (err & (ATA_UNC | ATA_AMNF))
1848 qc->err_mask |= AC_ERR_MEDIA;
1849 if (err & ATA_IDNF)
1850 qc->err_mask |= AC_ERR_INVALID;
1851 break;
1852
1853 case ATA_DEV_ATAPI:
1854 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) {
1855 tmp = atapi_eh_request_sense(qc->dev,
1856 qc->scsicmd->sense_buffer,
1857 qc->result_tf.feature >> 4);
1858 if (!tmp) {
1859 /* ATA_QCFLAG_SENSE_VALID is used to
1860 * tell atapi_qc_complete() that sense
1861 * data is already valid.
1862 *
1863 * TODO: interpret sense data and set
1864 * appropriate err_mask.
1865 */
1866 qc->flags |= ATA_QCFLAG_SENSE_VALID;
1867 } else
1868 qc->err_mask |= tmp;
1869 }
1870 }
1871
1872 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS))
1873 action |= ATA_EH_RESET;
1874
1875 return action;
1876 }
1877
ata_eh_categorize_error(unsigned int eflags,unsigned int err_mask,int * xfer_ok)1878 static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask,
1879 int *xfer_ok)
1880 {
1881 int base = 0;
1882
1883 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER))
1884 *xfer_ok = 1;
1885
1886 if (!*xfer_ok)
1887 base = ATA_ECAT_DUBIOUS_NONE;
1888
1889 if (err_mask & AC_ERR_ATA_BUS)
1890 return base + ATA_ECAT_ATA_BUS;
1891
1892 if (err_mask & AC_ERR_TIMEOUT)
1893 return base + ATA_ECAT_TOUT_HSM;
1894
1895 if (eflags & ATA_EFLAG_IS_IO) {
1896 if (err_mask & AC_ERR_HSM)
1897 return base + ATA_ECAT_TOUT_HSM;
1898 if ((err_mask &
1899 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
1900 return base + ATA_ECAT_UNK_DEV;
1901 }
1902
1903 return 0;
1904 }
1905
1906 struct speed_down_verdict_arg {
1907 u64 since;
1908 int xfer_ok;
1909 int nr_errors[ATA_ECAT_NR];
1910 };
1911
speed_down_verdict_cb(struct ata_ering_entry * ent,void * void_arg)1912 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
1913 {
1914 struct speed_down_verdict_arg *arg = void_arg;
1915 int cat;
1916
1917 if ((ent->eflags & ATA_EFLAG_OLD_ER) || (ent->timestamp < arg->since))
1918 return -1;
1919
1920 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask,
1921 &arg->xfer_ok);
1922 arg->nr_errors[cat]++;
1923
1924 return 0;
1925 }
1926
1927 /**
1928 * ata_eh_speed_down_verdict - Determine speed down verdict
1929 * @dev: Device of interest
1930 *
1931 * This function examines error ring of @dev and determines
1932 * whether NCQ needs to be turned off, transfer speed should be
1933 * stepped down, or falling back to PIO is necessary.
1934 *
1935 * ECAT_ATA_BUS : ATA_BUS error for any command
1936 *
1937 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for
1938 * IO commands
1939 *
1940 * ECAT_UNK_DEV : Unknown DEV error for IO commands
1941 *
1942 * ECAT_DUBIOUS_* : Identical to above three but occurred while
1943 * data transfer hasn't been verified.
1944 *
1945 * Verdicts are
1946 *
1947 * NCQ_OFF : Turn off NCQ.
1948 *
1949 * SPEED_DOWN : Speed down transfer speed but don't fall back
1950 * to PIO.
1951 *
1952 * FALLBACK_TO_PIO : Fall back to PIO.
1953 *
1954 * Even if multiple verdicts are returned, only one action is
1955 * taken per error. An action triggered by non-DUBIOUS errors
1956 * clears ering, while one triggered by DUBIOUS_* errors doesn't.
1957 * This is to expedite speed down decisions right after device is
1958 * initially configured.
1959 *
1960 * The followings are speed down rules. #1 and #2 deal with
1961 * DUBIOUS errors.
1962 *
1963 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors
1964 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO.
1965 *
1966 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors
1967 * occurred during last 5 mins, NCQ_OFF.
1968 *
1969 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors
1970 * occurred during last 5 mins, FALLBACK_TO_PIO
1971 *
1972 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred
1973 * during last 10 mins, NCQ_OFF.
1974 *
1975 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6
1976 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN.
1977 *
1978 * LOCKING:
1979 * Inherited from caller.
1980 *
1981 * RETURNS:
1982 * OR of ATA_EH_SPDN_* flags.
1983 */
ata_eh_speed_down_verdict(struct ata_device * dev)1984 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev)
1985 {
1986 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ;
1987 u64 j64 = get_jiffies_64();
1988 struct speed_down_verdict_arg arg;
1989 unsigned int verdict = 0;
1990
1991 /* scan past 5 mins of error history */
1992 memset(&arg, 0, sizeof(arg));
1993 arg.since = j64 - min(j64, j5mins);
1994 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
1995
1996 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] +
1997 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1)
1998 verdict |= ATA_EH_SPDN_SPEED_DOWN |
1999 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS;
2000
2001 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] +
2002 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1)
2003 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS;
2004
2005 if (arg.nr_errors[ATA_ECAT_ATA_BUS] +
2006 arg.nr_errors[ATA_ECAT_TOUT_HSM] +
2007 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6)
2008 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO;
2009
2010 /* scan past 10 mins of error history */
2011 memset(&arg, 0, sizeof(arg));
2012 arg.since = j64 - min(j64, j10mins);
2013 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
2014
2015 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] +
2016 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3)
2017 verdict |= ATA_EH_SPDN_NCQ_OFF;
2018
2019 if (arg.nr_errors[ATA_ECAT_ATA_BUS] +
2020 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 ||
2021 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6)
2022 verdict |= ATA_EH_SPDN_SPEED_DOWN;
2023
2024 return verdict;
2025 }
2026
2027 /**
2028 * ata_eh_speed_down - record error and speed down if necessary
2029 * @dev: Failed device
2030 * @eflags: mask of ATA_EFLAG_* flags
2031 * @err_mask: err_mask of the error
2032 *
2033 * Record error and examine error history to determine whether
2034 * adjusting transmission speed is necessary. It also sets
2035 * transmission limits appropriately if such adjustment is
2036 * necessary.
2037 *
2038 * LOCKING:
2039 * Kernel thread context (may sleep).
2040 *
2041 * RETURNS:
2042 * Determined recovery action.
2043 */
ata_eh_speed_down(struct ata_device * dev,unsigned int eflags,unsigned int err_mask)2044 static unsigned int ata_eh_speed_down(struct ata_device *dev,
2045 unsigned int eflags, unsigned int err_mask)
2046 {
2047 struct ata_link *link = ata_dev_phys_link(dev);
2048 int xfer_ok = 0;
2049 unsigned int verdict;
2050 unsigned int action = 0;
2051
2052 /* don't bother if Cat-0 error */
2053 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0)
2054 return 0;
2055
2056 /* record error and determine whether speed down is necessary */
2057 ata_ering_record(&dev->ering, eflags, err_mask);
2058 verdict = ata_eh_speed_down_verdict(dev);
2059
2060 /* turn off NCQ? */
2061 if ((verdict & ATA_EH_SPDN_NCQ_OFF) &&
2062 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ |
2063 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) {
2064 dev->flags |= ATA_DFLAG_NCQ_OFF;
2065 ata_dev_warn(dev, "NCQ disabled due to excessive errors\n");
2066 goto done;
2067 }
2068
2069 /* speed down? */
2070 if (verdict & ATA_EH_SPDN_SPEED_DOWN) {
2071 /* speed down SATA link speed if possible */
2072 if (sata_down_spd_limit(link, 0) == 0) {
2073 action |= ATA_EH_RESET;
2074 goto done;
2075 }
2076
2077 /* lower transfer mode */
2078 if (dev->spdn_cnt < 2) {
2079 static const int dma_dnxfer_sel[] =
2080 { ATA_DNXFER_DMA, ATA_DNXFER_40C };
2081 static const int pio_dnxfer_sel[] =
2082 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 };
2083 int sel;
2084
2085 if (dev->xfer_shift != ATA_SHIFT_PIO)
2086 sel = dma_dnxfer_sel[dev->spdn_cnt];
2087 else
2088 sel = pio_dnxfer_sel[dev->spdn_cnt];
2089
2090 dev->spdn_cnt++;
2091
2092 if (ata_down_xfermask_limit(dev, sel) == 0) {
2093 action |= ATA_EH_RESET;
2094 goto done;
2095 }
2096 }
2097 }
2098
2099 /* Fall back to PIO? Slowing down to PIO is meaningless for
2100 * SATA ATA devices. Consider it only for PATA and SATAPI.
2101 */
2102 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) &&
2103 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) &&
2104 (dev->xfer_shift != ATA_SHIFT_PIO)) {
2105 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) {
2106 dev->spdn_cnt = 0;
2107 action |= ATA_EH_RESET;
2108 goto done;
2109 }
2110 }
2111
2112 return 0;
2113 done:
2114 /* device has been slowed down, blow error history */
2115 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS))
2116 ata_ering_clear(&dev->ering);
2117 return action;
2118 }
2119
2120 /**
2121 * ata_eh_worth_retry - analyze error and decide whether to retry
2122 * @qc: qc to possibly retry
2123 *
2124 * Look at the cause of the error and decide if a retry
2125 * might be useful or not. We don't want to retry media errors
2126 * because the drive itself has probably already taken 10-30 seconds
2127 * doing its own internal retries before reporting the failure.
2128 */
ata_eh_worth_retry(struct ata_queued_cmd * qc)2129 static inline int ata_eh_worth_retry(struct ata_queued_cmd *qc)
2130 {
2131 if (qc->err_mask & AC_ERR_MEDIA)
2132 return 0; /* don't retry media errors */
2133 if (qc->flags & ATA_QCFLAG_IO)
2134 return 1; /* otherwise retry anything from fs stack */
2135 if (qc->err_mask & AC_ERR_INVALID)
2136 return 0; /* don't retry these */
2137 return qc->err_mask != AC_ERR_DEV; /* retry if not dev error */
2138 }
2139
2140 /**
2141 * ata_eh_link_autopsy - analyze error and determine recovery action
2142 * @link: host link to perform autopsy on
2143 *
2144 * Analyze why @link failed and determine which recovery actions
2145 * are needed. This function also sets more detailed AC_ERR_*
2146 * values and fills sense data for ATAPI CHECK SENSE.
2147 *
2148 * LOCKING:
2149 * Kernel thread context (may sleep).
2150 */
ata_eh_link_autopsy(struct ata_link * link)2151 static void ata_eh_link_autopsy(struct ata_link *link)
2152 {
2153 struct ata_port *ap = link->ap;
2154 struct ata_eh_context *ehc = &link->eh_context;
2155 struct ata_device *dev;
2156 unsigned int all_err_mask = 0, eflags = 0;
2157 int tag;
2158 u32 serror;
2159 int rc;
2160
2161 DPRINTK("ENTER\n");
2162
2163 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY)
2164 return;
2165
2166 /* obtain and analyze SError */
2167 rc = sata_scr_read(link, SCR_ERROR, &serror);
2168 if (rc == 0) {
2169 ehc->i.serror |= serror;
2170 ata_eh_analyze_serror(link);
2171 } else if (rc != -EOPNOTSUPP) {
2172 /* SError read failed, force reset and probing */
2173 ehc->i.probe_mask |= ATA_ALL_DEVICES;
2174 ehc->i.action |= ATA_EH_RESET;
2175 ehc->i.err_mask |= AC_ERR_OTHER;
2176 }
2177
2178 /* analyze NCQ failure */
2179 ata_eh_analyze_ncq_error(link);
2180
2181 /* any real error trumps AC_ERR_OTHER */
2182 if (ehc->i.err_mask & ~AC_ERR_OTHER)
2183 ehc->i.err_mask &= ~AC_ERR_OTHER;
2184
2185 all_err_mask |= ehc->i.err_mask;
2186
2187 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
2188 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
2189
2190 if (!(qc->flags & ATA_QCFLAG_FAILED) ||
2191 ata_dev_phys_link(qc->dev) != link)
2192 continue;
2193
2194 /* inherit upper level err_mask */
2195 qc->err_mask |= ehc->i.err_mask;
2196
2197 /* analyze TF */
2198 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf);
2199
2200 /* DEV errors are probably spurious in case of ATA_BUS error */
2201 if (qc->err_mask & AC_ERR_ATA_BUS)
2202 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA |
2203 AC_ERR_INVALID);
2204
2205 /* any real error trumps unknown error */
2206 if (qc->err_mask & ~AC_ERR_OTHER)
2207 qc->err_mask &= ~AC_ERR_OTHER;
2208
2209 /*
2210 * SENSE_VALID trumps dev/unknown error and revalidation. Upper
2211 * layers will determine whether the command is worth retrying
2212 * based on the sense data and device class/type. Otherwise,
2213 * determine directly if the command is worth retrying using its
2214 * error mask and flags.
2215 */
2216 if (qc->flags & ATA_QCFLAG_SENSE_VALID)
2217 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER);
2218 else if (ata_eh_worth_retry(qc))
2219 qc->flags |= ATA_QCFLAG_RETRY;
2220
2221 /* accumulate error info */
2222 ehc->i.dev = qc->dev;
2223 all_err_mask |= qc->err_mask;
2224 if (qc->flags & ATA_QCFLAG_IO)
2225 eflags |= ATA_EFLAG_IS_IO;
2226 trace_ata_eh_link_autopsy_qc(qc);
2227 }
2228
2229 /* enforce default EH actions */
2230 if (ap->pflags & ATA_PFLAG_FROZEN ||
2231 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT))
2232 ehc->i.action |= ATA_EH_RESET;
2233 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) ||
2234 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV)))
2235 ehc->i.action |= ATA_EH_REVALIDATE;
2236
2237 /* If we have offending qcs and the associated failed device,
2238 * perform per-dev EH action only on the offending device.
2239 */
2240 if (ehc->i.dev) {
2241 ehc->i.dev_action[ehc->i.dev->devno] |=
2242 ehc->i.action & ATA_EH_PERDEV_MASK;
2243 ehc->i.action &= ~ATA_EH_PERDEV_MASK;
2244 }
2245
2246 /* propagate timeout to host link */
2247 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link))
2248 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT;
2249
2250 /* record error and consider speeding down */
2251 dev = ehc->i.dev;
2252 if (!dev && ((ata_link_max_devices(link) == 1 &&
2253 ata_dev_enabled(link->device))))
2254 dev = link->device;
2255
2256 if (dev) {
2257 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER)
2258 eflags |= ATA_EFLAG_DUBIOUS_XFER;
2259 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask);
2260 trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask);
2261 }
2262 DPRINTK("EXIT\n");
2263 }
2264
2265 /**
2266 * ata_eh_autopsy - analyze error and determine recovery action
2267 * @ap: host port to perform autopsy on
2268 *
2269 * Analyze all links of @ap and determine why they failed and
2270 * which recovery actions are needed.
2271 *
2272 * LOCKING:
2273 * Kernel thread context (may sleep).
2274 */
ata_eh_autopsy(struct ata_port * ap)2275 void ata_eh_autopsy(struct ata_port *ap)
2276 {
2277 struct ata_link *link;
2278
2279 ata_for_each_link(link, ap, EDGE)
2280 ata_eh_link_autopsy(link);
2281
2282 /* Handle the frigging slave link. Autopsy is done similarly
2283 * but actions and flags are transferred over to the master
2284 * link and handled from there.
2285 */
2286 if (ap->slave_link) {
2287 struct ata_eh_context *mehc = &ap->link.eh_context;
2288 struct ata_eh_context *sehc = &ap->slave_link->eh_context;
2289
2290 /* transfer control flags from master to slave */
2291 sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK;
2292
2293 /* perform autopsy on the slave link */
2294 ata_eh_link_autopsy(ap->slave_link);
2295
2296 /* transfer actions from slave to master and clear slave */
2297 ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS);
2298 mehc->i.action |= sehc->i.action;
2299 mehc->i.dev_action[1] |= sehc->i.dev_action[1];
2300 mehc->i.flags |= sehc->i.flags;
2301 ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS);
2302 }
2303
2304 /* Autopsy of fanout ports can affect host link autopsy.
2305 * Perform host link autopsy last.
2306 */
2307 if (sata_pmp_attached(ap))
2308 ata_eh_link_autopsy(&ap->link);
2309 }
2310
2311 /**
2312 * ata_get_cmd_descript - get description for ATA command
2313 * @command: ATA command code to get description for
2314 *
2315 * Return a textual description of the given command, or NULL if the
2316 * command is not known.
2317 *
2318 * LOCKING:
2319 * None
2320 */
ata_get_cmd_descript(u8 command)2321 const char *ata_get_cmd_descript(u8 command)
2322 {
2323 #ifdef CONFIG_ATA_VERBOSE_ERROR
2324 static const struct
2325 {
2326 u8 command;
2327 const char *text;
2328 } cmd_descr[] = {
2329 { ATA_CMD_DEV_RESET, "DEVICE RESET" },
2330 { ATA_CMD_CHK_POWER, "CHECK POWER MODE" },
2331 { ATA_CMD_STANDBY, "STANDBY" },
2332 { ATA_CMD_IDLE, "IDLE" },
2333 { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" },
2334 { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" },
2335 { ATA_CMD_DOWNLOAD_MICRO_DMA, "DOWNLOAD MICROCODE DMA" },
2336 { ATA_CMD_NOP, "NOP" },
2337 { ATA_CMD_FLUSH, "FLUSH CACHE" },
2338 { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" },
2339 { ATA_CMD_ID_ATA, "IDENTIFY DEVICE" },
2340 { ATA_CMD_ID_ATAPI, "IDENTIFY PACKET DEVICE" },
2341 { ATA_CMD_SERVICE, "SERVICE" },
2342 { ATA_CMD_READ, "READ DMA" },
2343 { ATA_CMD_READ_EXT, "READ DMA EXT" },
2344 { ATA_CMD_READ_QUEUED, "READ DMA QUEUED" },
2345 { ATA_CMD_READ_STREAM_EXT, "READ STREAM EXT" },
2346 { ATA_CMD_READ_STREAM_DMA_EXT, "READ STREAM DMA EXT" },
2347 { ATA_CMD_WRITE, "WRITE DMA" },
2348 { ATA_CMD_WRITE_EXT, "WRITE DMA EXT" },
2349 { ATA_CMD_WRITE_QUEUED, "WRITE DMA QUEUED EXT" },
2350 { ATA_CMD_WRITE_STREAM_EXT, "WRITE STREAM EXT" },
2351 { ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" },
2352 { ATA_CMD_WRITE_FUA_EXT, "WRITE DMA FUA EXT" },
2353 { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" },
2354 { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" },
2355 { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" },
2356 { ATA_CMD_FPDMA_SEND, "SEND FPDMA QUEUED" },
2357 { ATA_CMD_FPDMA_RECV, "RECEIVE FPDMA QUEUED" },
2358 { ATA_CMD_PIO_READ, "READ SECTOR(S)" },
2359 { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" },
2360 { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" },
2361 { ATA_CMD_PIO_WRITE_EXT, "WRITE SECTOR(S) EXT" },
2362 { ATA_CMD_READ_MULTI, "READ MULTIPLE" },
2363 { ATA_CMD_READ_MULTI_EXT, "READ MULTIPLE EXT" },
2364 { ATA_CMD_WRITE_MULTI, "WRITE MULTIPLE" },
2365 { ATA_CMD_WRITE_MULTI_EXT, "WRITE MULTIPLE EXT" },
2366 { ATA_CMD_WRITE_MULTI_FUA_EXT, "WRITE MULTIPLE FUA EXT" },
2367 { ATA_CMD_SET_FEATURES, "SET FEATURES" },
2368 { ATA_CMD_SET_MULTI, "SET MULTIPLE MODE" },
2369 { ATA_CMD_VERIFY, "READ VERIFY SECTOR(S)" },
2370 { ATA_CMD_VERIFY_EXT, "READ VERIFY SECTOR(S) EXT" },
2371 { ATA_CMD_WRITE_UNCORR_EXT, "WRITE UNCORRECTABLE EXT" },
2372 { ATA_CMD_STANDBYNOW1, "STANDBY IMMEDIATE" },
2373 { ATA_CMD_IDLEIMMEDIATE, "IDLE IMMEDIATE" },
2374 { ATA_CMD_SLEEP, "SLEEP" },
2375 { ATA_CMD_INIT_DEV_PARAMS, "INITIALIZE DEVICE PARAMETERS" },
2376 { ATA_CMD_READ_NATIVE_MAX, "READ NATIVE MAX ADDRESS" },
2377 { ATA_CMD_READ_NATIVE_MAX_EXT, "READ NATIVE MAX ADDRESS EXT" },
2378 { ATA_CMD_SET_MAX, "SET MAX ADDRESS" },
2379 { ATA_CMD_SET_MAX_EXT, "SET MAX ADDRESS EXT" },
2380 { ATA_CMD_READ_LOG_EXT, "READ LOG EXT" },
2381 { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" },
2382 { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" },
2383 { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" },
2384 { ATA_CMD_TRUSTED_NONDATA, "TRUSTED NON-DATA" },
2385 { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" },
2386 { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" },
2387 { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" },
2388 { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" },
2389 { ATA_CMD_PMP_READ, "READ BUFFER" },
2390 { ATA_CMD_PMP_READ_DMA, "READ BUFFER DMA" },
2391 { ATA_CMD_PMP_WRITE, "WRITE BUFFER" },
2392 { ATA_CMD_PMP_WRITE_DMA, "WRITE BUFFER DMA" },
2393 { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" },
2394 { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" },
2395 { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" },
2396 { ATA_CMD_SEC_ERASE_PREP, "SECURITY ERASE PREPARE" },
2397 { ATA_CMD_SEC_ERASE_UNIT, "SECURITY ERASE UNIT" },
2398 { ATA_CMD_SEC_FREEZE_LOCK, "SECURITY FREEZE LOCK" },
2399 { ATA_CMD_SEC_DISABLE_PASS, "SECURITY DISABLE PASSWORD" },
2400 { ATA_CMD_CONFIG_STREAM, "CONFIGURE STREAM" },
2401 { ATA_CMD_SMART, "SMART" },
2402 { ATA_CMD_MEDIA_LOCK, "DOOR LOCK" },
2403 { ATA_CMD_MEDIA_UNLOCK, "DOOR UNLOCK" },
2404 { ATA_CMD_DSM, "DATA SET MANAGEMENT" },
2405 { ATA_CMD_CHK_MED_CRD_TYP, "CHECK MEDIA CARD TYPE" },
2406 { ATA_CMD_CFA_REQ_EXT_ERR, "CFA REQUEST EXTENDED ERROR" },
2407 { ATA_CMD_CFA_WRITE_NE, "CFA WRITE SECTORS WITHOUT ERASE" },
2408 { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" },
2409 { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" },
2410 { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" },
2411 { ATA_CMD_REQ_SENSE_DATA, "REQUEST SENSE DATA EXT" },
2412 { ATA_CMD_SANITIZE_DEVICE, "SANITIZE DEVICE" },
2413 { ATA_CMD_READ_LONG, "READ LONG (with retries)" },
2414 { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" },
2415 { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" },
2416 { ATA_CMD_WRITE_LONG_ONCE, "WRITE LONG (without retries)" },
2417 { ATA_CMD_RESTORE, "RECALIBRATE" },
2418 { 0, NULL } /* terminate list */
2419 };
2420
2421 unsigned int i;
2422 for (i = 0; cmd_descr[i].text; i++)
2423 if (cmd_descr[i].command == command)
2424 return cmd_descr[i].text;
2425 #endif
2426
2427 return NULL;
2428 }
2429 EXPORT_SYMBOL_GPL(ata_get_cmd_descript);
2430
2431 /**
2432 * ata_eh_link_report - report error handling to user
2433 * @link: ATA link EH is going on
2434 *
2435 * Report EH to user.
2436 *
2437 * LOCKING:
2438 * None.
2439 */
ata_eh_link_report(struct ata_link * link)2440 static void ata_eh_link_report(struct ata_link *link)
2441 {
2442 struct ata_port *ap = link->ap;
2443 struct ata_eh_context *ehc = &link->eh_context;
2444 const char *frozen, *desc;
2445 char tries_buf[6] = "";
2446 int tag, nr_failed = 0;
2447
2448 if (ehc->i.flags & ATA_EHI_QUIET)
2449 return;
2450
2451 desc = NULL;
2452 if (ehc->i.desc[0] != '\0')
2453 desc = ehc->i.desc;
2454
2455 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
2456 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
2457
2458 if (!(qc->flags & ATA_QCFLAG_FAILED) ||
2459 ata_dev_phys_link(qc->dev) != link ||
2460 ((qc->flags & ATA_QCFLAG_QUIET) &&
2461 qc->err_mask == AC_ERR_DEV))
2462 continue;
2463 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask)
2464 continue;
2465
2466 nr_failed++;
2467 }
2468
2469 if (!nr_failed && !ehc->i.err_mask)
2470 return;
2471
2472 frozen = "";
2473 if (ap->pflags & ATA_PFLAG_FROZEN)
2474 frozen = " frozen";
2475
2476 if (ap->eh_tries < ATA_EH_MAX_TRIES)
2477 snprintf(tries_buf, sizeof(tries_buf), " t%d",
2478 ap->eh_tries);
2479
2480 if (ehc->i.dev) {
2481 ata_dev_err(ehc->i.dev, "exception Emask 0x%x "
2482 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n",
2483 ehc->i.err_mask, link->sactive, ehc->i.serror,
2484 ehc->i.action, frozen, tries_buf);
2485 if (desc)
2486 ata_dev_err(ehc->i.dev, "%s\n", desc);
2487 } else {
2488 ata_link_err(link, "exception Emask 0x%x "
2489 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n",
2490 ehc->i.err_mask, link->sactive, ehc->i.serror,
2491 ehc->i.action, frozen, tries_buf);
2492 if (desc)
2493 ata_link_err(link, "%s\n", desc);
2494 }
2495
2496 #ifdef CONFIG_ATA_VERBOSE_ERROR
2497 if (ehc->i.serror)
2498 ata_link_err(link,
2499 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n",
2500 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "",
2501 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "",
2502 ehc->i.serror & SERR_DATA ? "UnrecovData " : "",
2503 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "",
2504 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "",
2505 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "",
2506 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "",
2507 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "",
2508 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "",
2509 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "",
2510 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "",
2511 ehc->i.serror & SERR_CRC ? "BadCRC " : "",
2512 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "",
2513 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "",
2514 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "",
2515 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "",
2516 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : "");
2517 #endif
2518
2519 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
2520 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
2521 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf;
2522 char data_buf[20] = "";
2523 char cdb_buf[70] = "";
2524
2525 if (!(qc->flags & ATA_QCFLAG_FAILED) ||
2526 ata_dev_phys_link(qc->dev) != link || !qc->err_mask)
2527 continue;
2528
2529 if (qc->dma_dir != DMA_NONE) {
2530 static const char *dma_str[] = {
2531 [DMA_BIDIRECTIONAL] = "bidi",
2532 [DMA_TO_DEVICE] = "out",
2533 [DMA_FROM_DEVICE] = "in",
2534 };
2535 static const char *prot_str[] = {
2536 [ATA_PROT_PIO] = "pio",
2537 [ATA_PROT_DMA] = "dma",
2538 [ATA_PROT_NCQ] = "ncq",
2539 [ATAPI_PROT_PIO] = "pio",
2540 [ATAPI_PROT_DMA] = "dma",
2541 };
2542
2543 snprintf(data_buf, sizeof(data_buf), " %s %u %s",
2544 prot_str[qc->tf.protocol], qc->nbytes,
2545 dma_str[qc->dma_dir]);
2546 }
2547
2548 if (ata_is_atapi(qc->tf.protocol)) {
2549 const u8 *cdb = qc->cdb;
2550 size_t cdb_len = qc->dev->cdb_len;
2551
2552 if (qc->scsicmd) {
2553 cdb = qc->scsicmd->cmnd;
2554 cdb_len = qc->scsicmd->cmd_len;
2555 }
2556 __scsi_format_command(cdb_buf, sizeof(cdb_buf),
2557 cdb, cdb_len);
2558 } else {
2559 const char *descr = ata_get_cmd_descript(cmd->command);
2560 if (descr)
2561 ata_dev_err(qc->dev, "failed command: %s\n",
2562 descr);
2563 }
2564
2565 ata_dev_err(qc->dev,
2566 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x "
2567 "tag %d%s\n %s"
2568 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x "
2569 "Emask 0x%x (%s)%s\n",
2570 cmd->command, cmd->feature, cmd->nsect,
2571 cmd->lbal, cmd->lbam, cmd->lbah,
2572 cmd->hob_feature, cmd->hob_nsect,
2573 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah,
2574 cmd->device, qc->tag, data_buf, cdb_buf,
2575 res->command, res->feature, res->nsect,
2576 res->lbal, res->lbam, res->lbah,
2577 res->hob_feature, res->hob_nsect,
2578 res->hob_lbal, res->hob_lbam, res->hob_lbah,
2579 res->device, qc->err_mask, ata_err_string(qc->err_mask),
2580 qc->err_mask & AC_ERR_NCQ ? " <F>" : "");
2581
2582 #ifdef CONFIG_ATA_VERBOSE_ERROR
2583 if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ |
2584 ATA_ERR)) {
2585 if (res->command & ATA_BUSY)
2586 ata_dev_err(qc->dev, "status: { Busy }\n");
2587 else
2588 ata_dev_err(qc->dev, "status: { %s%s%s%s}\n",
2589 res->command & ATA_DRDY ? "DRDY " : "",
2590 res->command & ATA_DF ? "DF " : "",
2591 res->command & ATA_DRQ ? "DRQ " : "",
2592 res->command & ATA_ERR ? "ERR " : "");
2593 }
2594
2595 if (cmd->command != ATA_CMD_PACKET &&
2596 (res->feature & (ATA_ICRC | ATA_UNC | ATA_AMNF |
2597 ATA_IDNF | ATA_ABORTED)))
2598 ata_dev_err(qc->dev, "error: { %s%s%s%s%s}\n",
2599 res->feature & ATA_ICRC ? "ICRC " : "",
2600 res->feature & ATA_UNC ? "UNC " : "",
2601 res->feature & ATA_AMNF ? "AMNF " : "",
2602 res->feature & ATA_IDNF ? "IDNF " : "",
2603 res->feature & ATA_ABORTED ? "ABRT " : "");
2604 #endif
2605 }
2606 }
2607
2608 /**
2609 * ata_eh_report - report error handling to user
2610 * @ap: ATA port to report EH about
2611 *
2612 * Report EH to user.
2613 *
2614 * LOCKING:
2615 * None.
2616 */
ata_eh_report(struct ata_port * ap)2617 void ata_eh_report(struct ata_port *ap)
2618 {
2619 struct ata_link *link;
2620
2621 ata_for_each_link(link, ap, HOST_FIRST)
2622 ata_eh_link_report(link);
2623 }
2624
ata_do_reset(struct ata_link * link,ata_reset_fn_t reset,unsigned int * classes,unsigned long deadline,bool clear_classes)2625 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset,
2626 unsigned int *classes, unsigned long deadline,
2627 bool clear_classes)
2628 {
2629 struct ata_device *dev;
2630
2631 if (clear_classes)
2632 ata_for_each_dev(dev, link, ALL)
2633 classes[dev->devno] = ATA_DEV_UNKNOWN;
2634
2635 return reset(link, classes, deadline);
2636 }
2637
ata_eh_followup_srst_needed(struct ata_link * link,int rc)2638 static int ata_eh_followup_srst_needed(struct ata_link *link, int rc)
2639 {
2640 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link))
2641 return 0;
2642 if (rc == -EAGAIN)
2643 return 1;
2644 if (sata_pmp_supported(link->ap) && ata_is_host_link(link))
2645 return 1;
2646 return 0;
2647 }
2648
ata_eh_reset(struct ata_link * link,int classify,ata_prereset_fn_t prereset,ata_reset_fn_t softreset,ata_reset_fn_t hardreset,ata_postreset_fn_t postreset)2649 int ata_eh_reset(struct ata_link *link, int classify,
2650 ata_prereset_fn_t prereset, ata_reset_fn_t softreset,
2651 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
2652 {
2653 struct ata_port *ap = link->ap;
2654 struct ata_link *slave = ap->slave_link;
2655 struct ata_eh_context *ehc = &link->eh_context;
2656 struct ata_eh_context *sehc = slave ? &slave->eh_context : NULL;
2657 unsigned int *classes = ehc->classes;
2658 unsigned int lflags = link->flags;
2659 int verbose = !(ehc->i.flags & ATA_EHI_QUIET);
2660 int max_tries = 0, try = 0;
2661 struct ata_link *failed_link;
2662 struct ata_device *dev;
2663 unsigned long deadline, now;
2664 ata_reset_fn_t reset;
2665 unsigned long flags;
2666 u32 sstatus;
2667 int nr_unknown, rc;
2668
2669 /*
2670 * Prepare to reset
2671 */
2672 while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX)
2673 max_tries++;
2674 if (link->flags & ATA_LFLAG_RST_ONCE)
2675 max_tries = 1;
2676 if (link->flags & ATA_LFLAG_NO_HRST)
2677 hardreset = NULL;
2678 if (link->flags & ATA_LFLAG_NO_SRST)
2679 softreset = NULL;
2680
2681 /* make sure each reset attempt is at least COOL_DOWN apart */
2682 if (ehc->i.flags & ATA_EHI_DID_RESET) {
2683 now = jiffies;
2684 WARN_ON(time_after(ehc->last_reset, now));
2685 deadline = ata_deadline(ehc->last_reset,
2686 ATA_EH_RESET_COOL_DOWN);
2687 if (time_before(now, deadline))
2688 schedule_timeout_uninterruptible(deadline - now);
2689 }
2690
2691 spin_lock_irqsave(ap->lock, flags);
2692 ap->pflags |= ATA_PFLAG_RESETTING;
2693 spin_unlock_irqrestore(ap->lock, flags);
2694
2695 ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
2696
2697 ata_for_each_dev(dev, link, ALL) {
2698 /* If we issue an SRST then an ATA drive (not ATAPI)
2699 * may change configuration and be in PIO0 timing. If
2700 * we do a hard reset (or are coming from power on)
2701 * this is true for ATA or ATAPI. Until we've set a
2702 * suitable controller mode we should not touch the
2703 * bus as we may be talking too fast.
2704 */
2705 dev->pio_mode = XFER_PIO_0;
2706 dev->dma_mode = 0xff;
2707
2708 /* If the controller has a pio mode setup function
2709 * then use it to set the chipset to rights. Don't
2710 * touch the DMA setup as that will be dealt with when
2711 * configuring devices.
2712 */
2713 if (ap->ops->set_piomode)
2714 ap->ops->set_piomode(ap, dev);
2715 }
2716
2717 /* prefer hardreset */
2718 reset = NULL;
2719 ehc->i.action &= ~ATA_EH_RESET;
2720 if (hardreset) {
2721 reset = hardreset;
2722 ehc->i.action |= ATA_EH_HARDRESET;
2723 } else if (softreset) {
2724 reset = softreset;
2725 ehc->i.action |= ATA_EH_SOFTRESET;
2726 }
2727
2728 if (prereset) {
2729 unsigned long deadline = ata_deadline(jiffies,
2730 ATA_EH_PRERESET_TIMEOUT);
2731
2732 if (slave) {
2733 sehc->i.action &= ~ATA_EH_RESET;
2734 sehc->i.action |= ehc->i.action;
2735 }
2736
2737 rc = prereset(link, deadline);
2738
2739 /* If present, do prereset on slave link too. Reset
2740 * is skipped iff both master and slave links report
2741 * -ENOENT or clear ATA_EH_RESET.
2742 */
2743 if (slave && (rc == 0 || rc == -ENOENT)) {
2744 int tmp;
2745
2746 tmp = prereset(slave, deadline);
2747 if (tmp != -ENOENT)
2748 rc = tmp;
2749
2750 ehc->i.action |= sehc->i.action;
2751 }
2752
2753 if (rc) {
2754 if (rc == -ENOENT) {
2755 ata_link_dbg(link, "port disabled--ignoring\n");
2756 ehc->i.action &= ~ATA_EH_RESET;
2757
2758 ata_for_each_dev(dev, link, ALL)
2759 classes[dev->devno] = ATA_DEV_NONE;
2760
2761 rc = 0;
2762 } else
2763 ata_link_err(link,
2764 "prereset failed (errno=%d)\n",
2765 rc);
2766 goto out;
2767 }
2768
2769 /* prereset() might have cleared ATA_EH_RESET. If so,
2770 * bang classes, thaw and return.
2771 */
2772 if (reset && !(ehc->i.action & ATA_EH_RESET)) {
2773 ata_for_each_dev(dev, link, ALL)
2774 classes[dev->devno] = ATA_DEV_NONE;
2775 if ((ap->pflags & ATA_PFLAG_FROZEN) &&
2776 ata_is_host_link(link))
2777 ata_eh_thaw_port(ap);
2778 rc = 0;
2779 goto out;
2780 }
2781 }
2782
2783 retry:
2784 /*
2785 * Perform reset
2786 */
2787 if (ata_is_host_link(link))
2788 ata_eh_freeze_port(ap);
2789
2790 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]);
2791
2792 if (reset) {
2793 if (verbose)
2794 ata_link_info(link, "%s resetting link\n",
2795 reset == softreset ? "soft" : "hard");
2796
2797 /* mark that this EH session started with reset */
2798 ehc->last_reset = jiffies;
2799 if (reset == hardreset)
2800 ehc->i.flags |= ATA_EHI_DID_HARDRESET;
2801 else
2802 ehc->i.flags |= ATA_EHI_DID_SOFTRESET;
2803
2804 rc = ata_do_reset(link, reset, classes, deadline, true);
2805 if (rc && rc != -EAGAIN) {
2806 failed_link = link;
2807 goto fail;
2808 }
2809
2810 /* hardreset slave link if existent */
2811 if (slave && reset == hardreset) {
2812 int tmp;
2813
2814 if (verbose)
2815 ata_link_info(slave, "hard resetting link\n");
2816
2817 ata_eh_about_to_do(slave, NULL, ATA_EH_RESET);
2818 tmp = ata_do_reset(slave, reset, classes, deadline,
2819 false);
2820 switch (tmp) {
2821 case -EAGAIN:
2822 rc = -EAGAIN;
2823 case 0:
2824 break;
2825 default:
2826 failed_link = slave;
2827 rc = tmp;
2828 goto fail;
2829 }
2830 }
2831
2832 /* perform follow-up SRST if necessary */
2833 if (reset == hardreset &&
2834 ata_eh_followup_srst_needed(link, rc)) {
2835 reset = softreset;
2836
2837 if (!reset) {
2838 ata_link_err(link,
2839 "follow-up softreset required but no softreset available\n");
2840 failed_link = link;
2841 rc = -EINVAL;
2842 goto fail;
2843 }
2844
2845 ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
2846 rc = ata_do_reset(link, reset, classes, deadline, true);
2847 if (rc) {
2848 failed_link = link;
2849 goto fail;
2850 }
2851 }
2852 } else {
2853 if (verbose)
2854 ata_link_info(link,
2855 "no reset method available, skipping reset\n");
2856 if (!(lflags & ATA_LFLAG_ASSUME_CLASS))
2857 lflags |= ATA_LFLAG_ASSUME_ATA;
2858 }
2859
2860 /*
2861 * Post-reset processing
2862 */
2863 ata_for_each_dev(dev, link, ALL) {
2864 /* After the reset, the device state is PIO 0 and the
2865 * controller state is undefined. Reset also wakes up
2866 * drives from sleeping mode.
2867 */
2868 dev->pio_mode = XFER_PIO_0;
2869 dev->flags &= ~ATA_DFLAG_SLEEPING;
2870
2871 if (ata_phys_link_offline(ata_dev_phys_link(dev)))
2872 continue;
2873
2874 /* apply class override */
2875 if (lflags & ATA_LFLAG_ASSUME_ATA)
2876 classes[dev->devno] = ATA_DEV_ATA;
2877 else if (lflags & ATA_LFLAG_ASSUME_SEMB)
2878 classes[dev->devno] = ATA_DEV_SEMB_UNSUP;
2879 }
2880
2881 /* record current link speed */
2882 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0)
2883 link->sata_spd = (sstatus >> 4) & 0xf;
2884 if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0)
2885 slave->sata_spd = (sstatus >> 4) & 0xf;
2886
2887 /* thaw the port */
2888 if (ata_is_host_link(link))
2889 ata_eh_thaw_port(ap);
2890
2891 /* postreset() should clear hardware SError. Although SError
2892 * is cleared during link resume, clearing SError here is
2893 * necessary as some PHYs raise hotplug events after SRST.
2894 * This introduces race condition where hotplug occurs between
2895 * reset and here. This race is mediated by cross checking
2896 * link onlineness and classification result later.
2897 */
2898 if (postreset) {
2899 postreset(link, classes);
2900 if (slave)
2901 postreset(slave, classes);
2902 }
2903
2904 /*
2905 * Some controllers can't be frozen very well and may set spurious
2906 * error conditions during reset. Clear accumulated error
2907 * information and re-thaw the port if frozen. As reset is the
2908 * final recovery action and we cross check link onlineness against
2909 * device classification later, no hotplug event is lost by this.
2910 */
2911 spin_lock_irqsave(link->ap->lock, flags);
2912 memset(&link->eh_info, 0, sizeof(link->eh_info));
2913 if (slave)
2914 memset(&slave->eh_info, 0, sizeof(link->eh_info));
2915 ap->pflags &= ~ATA_PFLAG_EH_PENDING;
2916 spin_unlock_irqrestore(link->ap->lock, flags);
2917
2918 if (ap->pflags & ATA_PFLAG_FROZEN)
2919 ata_eh_thaw_port(ap);
2920
2921 /*
2922 * Make sure onlineness and classification result correspond.
2923 * Hotplug could have happened during reset and some
2924 * controllers fail to wait while a drive is spinning up after
2925 * being hotplugged causing misdetection. By cross checking
2926 * link on/offlineness and classification result, those
2927 * conditions can be reliably detected and retried.
2928 */
2929 nr_unknown = 0;
2930 ata_for_each_dev(dev, link, ALL) {
2931 if (ata_phys_link_online(ata_dev_phys_link(dev))) {
2932 if (classes[dev->devno] == ATA_DEV_UNKNOWN) {
2933 ata_dev_dbg(dev, "link online but device misclassified\n");
2934 classes[dev->devno] = ATA_DEV_NONE;
2935 nr_unknown++;
2936 }
2937 } else if (ata_phys_link_offline(ata_dev_phys_link(dev))) {
2938 if (ata_class_enabled(classes[dev->devno]))
2939 ata_dev_dbg(dev,
2940 "link offline, clearing class %d to NONE\n",
2941 classes[dev->devno]);
2942 classes[dev->devno] = ATA_DEV_NONE;
2943 } else if (classes[dev->devno] == ATA_DEV_UNKNOWN) {
2944 ata_dev_dbg(dev,
2945 "link status unknown, clearing UNKNOWN to NONE\n");
2946 classes[dev->devno] = ATA_DEV_NONE;
2947 }
2948 }
2949
2950 if (classify && nr_unknown) {
2951 if (try < max_tries) {
2952 ata_link_warn(link,
2953 "link online but %d devices misclassified, retrying\n",
2954 nr_unknown);
2955 failed_link = link;
2956 rc = -EAGAIN;
2957 goto fail;
2958 }
2959 ata_link_warn(link,
2960 "link online but %d devices misclassified, "
2961 "device detection might fail\n", nr_unknown);
2962 }
2963
2964 /* reset successful, schedule revalidation */
2965 ata_eh_done(link, NULL, ATA_EH_RESET);
2966 if (slave)
2967 ata_eh_done(slave, NULL, ATA_EH_RESET);
2968 ehc->last_reset = jiffies; /* update to completion time */
2969 ehc->i.action |= ATA_EH_REVALIDATE;
2970 link->lpm_policy = ATA_LPM_UNKNOWN; /* reset LPM state */
2971
2972 rc = 0;
2973 out:
2974 /* clear hotplug flag */
2975 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED;
2976 if (slave)
2977 sehc->i.flags &= ~ATA_EHI_HOTPLUGGED;
2978
2979 spin_lock_irqsave(ap->lock, flags);
2980 ap->pflags &= ~ATA_PFLAG_RESETTING;
2981 spin_unlock_irqrestore(ap->lock, flags);
2982
2983 return rc;
2984
2985 fail:
2986 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */
2987 if (!ata_is_host_link(link) &&
2988 sata_scr_read(link, SCR_STATUS, &sstatus))
2989 rc = -ERESTART;
2990
2991 if (try >= max_tries) {
2992 /*
2993 * Thaw host port even if reset failed, so that the port
2994 * can be retried on the next phy event. This risks
2995 * repeated EH runs but seems to be a better tradeoff than
2996 * shutting down a port after a botched hotplug attempt.
2997 */
2998 if (ata_is_host_link(link))
2999 ata_eh_thaw_port(ap);
3000 goto out;
3001 }
3002
3003 now = jiffies;
3004 if (time_before(now, deadline)) {
3005 unsigned long delta = deadline - now;
3006
3007 ata_link_warn(failed_link,
3008 "reset failed (errno=%d), retrying in %u secs\n",
3009 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000));
3010
3011 ata_eh_release(ap);
3012 while (delta)
3013 delta = schedule_timeout_uninterruptible(delta);
3014 ata_eh_acquire(ap);
3015 }
3016
3017 /*
3018 * While disks spinup behind PMP, some controllers fail sending SRST.
3019 * They need to be reset - as well as the PMP - before retrying.
3020 */
3021 if (rc == -ERESTART) {
3022 if (ata_is_host_link(link))
3023 ata_eh_thaw_port(ap);
3024 goto out;
3025 }
3026
3027 if (try == max_tries - 1) {
3028 sata_down_spd_limit(link, 0);
3029 if (slave)
3030 sata_down_spd_limit(slave, 0);
3031 } else if (rc == -EPIPE)
3032 sata_down_spd_limit(failed_link, 0);
3033
3034 if (hardreset)
3035 reset = hardreset;
3036 goto retry;
3037 }
3038
ata_eh_pull_park_action(struct ata_port * ap)3039 static inline void ata_eh_pull_park_action(struct ata_port *ap)
3040 {
3041 struct ata_link *link;
3042 struct ata_device *dev;
3043 unsigned long flags;
3044
3045 /*
3046 * This function can be thought of as an extended version of
3047 * ata_eh_about_to_do() specially crafted to accommodate the
3048 * requirements of ATA_EH_PARK handling. Since the EH thread
3049 * does not leave the do {} while () loop in ata_eh_recover as
3050 * long as the timeout for a park request to *one* device on
3051 * the port has not expired, and since we still want to pick
3052 * up park requests to other devices on the same port or
3053 * timeout updates for the same device, we have to pull
3054 * ATA_EH_PARK actions from eh_info into eh_context.i
3055 * ourselves at the beginning of each pass over the loop.
3056 *
3057 * Additionally, all write accesses to &ap->park_req_pending
3058 * through reinit_completion() (see below) or complete_all()
3059 * (see ata_scsi_park_store()) are protected by the host lock.
3060 * As a result we have that park_req_pending.done is zero on
3061 * exit from this function, i.e. when ATA_EH_PARK actions for
3062 * *all* devices on port ap have been pulled into the
3063 * respective eh_context structs. If, and only if,
3064 * park_req_pending.done is non-zero by the time we reach
3065 * wait_for_completion_timeout(), another ATA_EH_PARK action
3066 * has been scheduled for at least one of the devices on port
3067 * ap and we have to cycle over the do {} while () loop in
3068 * ata_eh_recover() again.
3069 */
3070
3071 spin_lock_irqsave(ap->lock, flags);
3072 reinit_completion(&ap->park_req_pending);
3073 ata_for_each_link(link, ap, EDGE) {
3074 ata_for_each_dev(dev, link, ALL) {
3075 struct ata_eh_info *ehi = &link->eh_info;
3076
3077 link->eh_context.i.dev_action[dev->devno] |=
3078 ehi->dev_action[dev->devno] & ATA_EH_PARK;
3079 ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK);
3080 }
3081 }
3082 spin_unlock_irqrestore(ap->lock, flags);
3083 }
3084
ata_eh_park_issue_cmd(struct ata_device * dev,int park)3085 static void ata_eh_park_issue_cmd(struct ata_device *dev, int park)
3086 {
3087 struct ata_eh_context *ehc = &dev->link->eh_context;
3088 struct ata_taskfile tf;
3089 unsigned int err_mask;
3090
3091 ata_tf_init(dev, &tf);
3092 if (park) {
3093 ehc->unloaded_mask |= 1 << dev->devno;
3094 tf.command = ATA_CMD_IDLEIMMEDIATE;
3095 tf.feature = 0x44;
3096 tf.lbal = 0x4c;
3097 tf.lbam = 0x4e;
3098 tf.lbah = 0x55;
3099 } else {
3100 ehc->unloaded_mask &= ~(1 << dev->devno);
3101 tf.command = ATA_CMD_CHK_POWER;
3102 }
3103
3104 tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR;
3105 tf.protocol |= ATA_PROT_NODATA;
3106 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
3107 if (park && (err_mask || tf.lbal != 0xc4)) {
3108 ata_dev_err(dev, "head unload failed!\n");
3109 ehc->unloaded_mask &= ~(1 << dev->devno);
3110 }
3111 }
3112
ata_eh_revalidate_and_attach(struct ata_link * link,struct ata_device ** r_failed_dev)3113 static int ata_eh_revalidate_and_attach(struct ata_link *link,
3114 struct ata_device **r_failed_dev)
3115 {
3116 struct ata_port *ap = link->ap;
3117 struct ata_eh_context *ehc = &link->eh_context;
3118 struct ata_device *dev;
3119 unsigned int new_mask = 0;
3120 unsigned long flags;
3121 int rc = 0;
3122
3123 DPRINTK("ENTER\n");
3124
3125 /* For PATA drive side cable detection to work, IDENTIFY must
3126 * be done backwards such that PDIAG- is released by the slave
3127 * device before the master device is identified.
3128 */
3129 ata_for_each_dev(dev, link, ALL_REVERSE) {
3130 unsigned int action = ata_eh_dev_action(dev);
3131 unsigned int readid_flags = 0;
3132
3133 if (ehc->i.flags & ATA_EHI_DID_RESET)
3134 readid_flags |= ATA_READID_POSTRESET;
3135
3136 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) {
3137 WARN_ON(dev->class == ATA_DEV_PMP);
3138
3139 if (ata_phys_link_offline(ata_dev_phys_link(dev))) {
3140 rc = -EIO;
3141 goto err;
3142 }
3143
3144 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE);
3145 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno],
3146 readid_flags);
3147 if (rc)
3148 goto err;
3149
3150 ata_eh_done(link, dev, ATA_EH_REVALIDATE);
3151
3152 /* Configuration may have changed, reconfigure
3153 * transfer mode.
3154 */
3155 ehc->i.flags |= ATA_EHI_SETMODE;
3156
3157 /* schedule the scsi_rescan_device() here */
3158 schedule_work(&(ap->scsi_rescan_task));
3159 } else if (dev->class == ATA_DEV_UNKNOWN &&
3160 ehc->tries[dev->devno] &&
3161 ata_class_enabled(ehc->classes[dev->devno])) {
3162 /* Temporarily set dev->class, it will be
3163 * permanently set once all configurations are
3164 * complete. This is necessary because new
3165 * device configuration is done in two
3166 * separate loops.
3167 */
3168 dev->class = ehc->classes[dev->devno];
3169
3170 if (dev->class == ATA_DEV_PMP)
3171 rc = sata_pmp_attach(dev);
3172 else
3173 rc = ata_dev_read_id(dev, &dev->class,
3174 readid_flags, dev->id);
3175
3176 /* read_id might have changed class, store and reset */
3177 ehc->classes[dev->devno] = dev->class;
3178 dev->class = ATA_DEV_UNKNOWN;
3179
3180 switch (rc) {
3181 case 0:
3182 /* clear error info accumulated during probe */
3183 ata_ering_clear(&dev->ering);
3184 new_mask |= 1 << dev->devno;
3185 break;
3186 case -ENOENT:
3187 /* IDENTIFY was issued to non-existent
3188 * device. No need to reset. Just
3189 * thaw and ignore the device.
3190 */
3191 ata_eh_thaw_port(ap);
3192 break;
3193 default:
3194 goto err;
3195 }
3196 }
3197 }
3198
3199 /* PDIAG- should have been released, ask cable type if post-reset */
3200 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) {
3201 if (ap->ops->cable_detect)
3202 ap->cbl = ap->ops->cable_detect(ap);
3203 ata_force_cbl(ap);
3204 }
3205
3206 /* Configure new devices forward such that user doesn't see
3207 * device detection messages backwards.
3208 */
3209 ata_for_each_dev(dev, link, ALL) {
3210 if (!(new_mask & (1 << dev->devno)))
3211 continue;
3212
3213 dev->class = ehc->classes[dev->devno];
3214
3215 if (dev->class == ATA_DEV_PMP)
3216 continue;
3217
3218 ehc->i.flags |= ATA_EHI_PRINTINFO;
3219 rc = ata_dev_configure(dev);
3220 ehc->i.flags &= ~ATA_EHI_PRINTINFO;
3221 if (rc) {
3222 dev->class = ATA_DEV_UNKNOWN;
3223 goto err;
3224 }
3225
3226 spin_lock_irqsave(ap->lock, flags);
3227 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG;
3228 spin_unlock_irqrestore(ap->lock, flags);
3229
3230 /* new device discovered, configure xfermode */
3231 ehc->i.flags |= ATA_EHI_SETMODE;
3232 }
3233
3234 return 0;
3235
3236 err:
3237 *r_failed_dev = dev;
3238 DPRINTK("EXIT rc=%d\n", rc);
3239 return rc;
3240 }
3241
3242 /**
3243 * ata_set_mode - Program timings and issue SET FEATURES - XFER
3244 * @link: link on which timings will be programmed
3245 * @r_failed_dev: out parameter for failed device
3246 *
3247 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If
3248 * ata_set_mode() fails, pointer to the failing device is
3249 * returned in @r_failed_dev.
3250 *
3251 * LOCKING:
3252 * PCI/etc. bus probe sem.
3253 *
3254 * RETURNS:
3255 * 0 on success, negative errno otherwise
3256 */
ata_set_mode(struct ata_link * link,struct ata_device ** r_failed_dev)3257 int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
3258 {
3259 struct ata_port *ap = link->ap;
3260 struct ata_device *dev;
3261 int rc;
3262
3263 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */
3264 ata_for_each_dev(dev, link, ENABLED) {
3265 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) {
3266 struct ata_ering_entry *ent;
3267
3268 ent = ata_ering_top(&dev->ering);
3269 if (ent)
3270 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER;
3271 }
3272 }
3273
3274 /* has private set_mode? */
3275 if (ap->ops->set_mode)
3276 rc = ap->ops->set_mode(link, r_failed_dev);
3277 else
3278 rc = ata_do_set_mode(link, r_failed_dev);
3279
3280 /* if transfer mode has changed, set DUBIOUS_XFER on device */
3281 ata_for_each_dev(dev, link, ENABLED) {
3282 struct ata_eh_context *ehc = &link->eh_context;
3283 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno];
3284 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno));
3285
3286 if (dev->xfer_mode != saved_xfer_mode ||
3287 ata_ncq_enabled(dev) != saved_ncq)
3288 dev->flags |= ATA_DFLAG_DUBIOUS_XFER;
3289 }
3290
3291 return rc;
3292 }
3293
3294 /**
3295 * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset
3296 * @dev: ATAPI device to clear UA for
3297 *
3298 * Resets and other operations can make an ATAPI device raise
3299 * UNIT ATTENTION which causes the next operation to fail. This
3300 * function clears UA.
3301 *
3302 * LOCKING:
3303 * EH context (may sleep).
3304 *
3305 * RETURNS:
3306 * 0 on success, -errno on failure.
3307 */
atapi_eh_clear_ua(struct ata_device * dev)3308 static int atapi_eh_clear_ua(struct ata_device *dev)
3309 {
3310 int i;
3311
3312 for (i = 0; i < ATA_EH_UA_TRIES; i++) {
3313 u8 *sense_buffer = dev->link->ap->sector_buf;
3314 u8 sense_key = 0;
3315 unsigned int err_mask;
3316
3317 err_mask = atapi_eh_tur(dev, &sense_key);
3318 if (err_mask != 0 && err_mask != AC_ERR_DEV) {
3319 ata_dev_warn(dev,
3320 "TEST_UNIT_READY failed (err_mask=0x%x)\n",
3321 err_mask);
3322 return -EIO;
3323 }
3324
3325 if (!err_mask || sense_key != UNIT_ATTENTION)
3326 return 0;
3327
3328 err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key);
3329 if (err_mask) {
3330 ata_dev_warn(dev, "failed to clear "
3331 "UNIT ATTENTION (err_mask=0x%x)\n", err_mask);
3332 return -EIO;
3333 }
3334 }
3335
3336 ata_dev_warn(dev, "UNIT ATTENTION persists after %d tries\n",
3337 ATA_EH_UA_TRIES);
3338
3339 return 0;
3340 }
3341
3342 /**
3343 * ata_eh_maybe_retry_flush - Retry FLUSH if necessary
3344 * @dev: ATA device which may need FLUSH retry
3345 *
3346 * If @dev failed FLUSH, it needs to be reported upper layer
3347 * immediately as it means that @dev failed to remap and already
3348 * lost at least a sector and further FLUSH retrials won't make
3349 * any difference to the lost sector. However, if FLUSH failed
3350 * for other reasons, for example transmission error, FLUSH needs
3351 * to be retried.
3352 *
3353 * This function determines whether FLUSH failure retry is
3354 * necessary and performs it if so.
3355 *
3356 * RETURNS:
3357 * 0 if EH can continue, -errno if EH needs to be repeated.
3358 */
ata_eh_maybe_retry_flush(struct ata_device * dev)3359 static int ata_eh_maybe_retry_flush(struct ata_device *dev)
3360 {
3361 struct ata_link *link = dev->link;
3362 struct ata_port *ap = link->ap;
3363 struct ata_queued_cmd *qc;
3364 struct ata_taskfile tf;
3365 unsigned int err_mask;
3366 int rc = 0;
3367
3368 /* did flush fail for this device? */
3369 if (!ata_tag_valid(link->active_tag))
3370 return 0;
3371
3372 qc = __ata_qc_from_tag(ap, link->active_tag);
3373 if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT &&
3374 qc->tf.command != ATA_CMD_FLUSH))
3375 return 0;
3376
3377 /* if the device failed it, it should be reported to upper layers */
3378 if (qc->err_mask & AC_ERR_DEV)
3379 return 0;
3380
3381 /* flush failed for some other reason, give it another shot */
3382 ata_tf_init(dev, &tf);
3383
3384 tf.command = qc->tf.command;
3385 tf.flags |= ATA_TFLAG_DEVICE;
3386 tf.protocol = ATA_PROT_NODATA;
3387
3388 ata_dev_warn(dev, "retrying FLUSH 0x%x Emask 0x%x\n",
3389 tf.command, qc->err_mask);
3390
3391 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
3392 if (!err_mask) {
3393 /*
3394 * FLUSH is complete but there's no way to
3395 * successfully complete a failed command from EH.
3396 * Making sure retry is allowed at least once and
3397 * retrying it should do the trick - whatever was in
3398 * the cache is already on the platter and this won't
3399 * cause infinite loop.
3400 */
3401 qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1);
3402 } else {
3403 ata_dev_warn(dev, "FLUSH failed Emask 0x%x\n",
3404 err_mask);
3405 rc = -EIO;
3406
3407 /* if device failed it, report it to upper layers */
3408 if (err_mask & AC_ERR_DEV) {
3409 qc->err_mask |= AC_ERR_DEV;
3410 qc->result_tf = tf;
3411 if (!(ap->pflags & ATA_PFLAG_FROZEN))
3412 rc = 0;
3413 }
3414 }
3415 return rc;
3416 }
3417
3418 /**
3419 * ata_eh_set_lpm - configure SATA interface power management
3420 * @link: link to configure power management
3421 * @policy: the link power management policy
3422 * @r_failed_dev: out parameter for failed device
3423 *
3424 * Enable SATA Interface power management. This will enable
3425 * Device Interface Power Management (DIPM) for min_power
3426 * policy, and then call driver specific callbacks for
3427 * enabling Host Initiated Power management.
3428 *
3429 * LOCKING:
3430 * EH context.
3431 *
3432 * RETURNS:
3433 * 0 on success, -errno on failure.
3434 */
ata_eh_set_lpm(struct ata_link * link,enum ata_lpm_policy policy,struct ata_device ** r_failed_dev)3435 static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
3436 struct ata_device **r_failed_dev)
3437 {
3438 struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL;
3439 struct ata_eh_context *ehc = &link->eh_context;
3440 struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL;
3441 enum ata_lpm_policy old_policy = link->lpm_policy;
3442 bool no_dipm = link->ap->flags & ATA_FLAG_NO_DIPM;
3443 unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM;
3444 unsigned int err_mask;
3445 int rc;
3446
3447 /* if the link or host doesn't do LPM, noop */
3448 if ((link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm))
3449 return 0;
3450
3451 /*
3452 * DIPM is enabled only for MIN_POWER as some devices
3453 * misbehave when the host NACKs transition to SLUMBER. Order
3454 * device and link configurations such that the host always
3455 * allows DIPM requests.
3456 */
3457 ata_for_each_dev(dev, link, ENABLED) {
3458 bool hipm = ata_id_has_hipm(dev->id);
3459 bool dipm = ata_id_has_dipm(dev->id) && !no_dipm;
3460
3461 /* find the first enabled and LPM enabled devices */
3462 if (!link_dev)
3463 link_dev = dev;
3464
3465 if (!lpm_dev && (hipm || dipm))
3466 lpm_dev = dev;
3467
3468 hints &= ~ATA_LPM_EMPTY;
3469 if (!hipm)
3470 hints &= ~ATA_LPM_HIPM;
3471
3472 /* disable DIPM before changing link config */
3473 if (policy != ATA_LPM_MIN_POWER && dipm) {
3474 err_mask = ata_dev_set_feature(dev,
3475 SETFEATURES_SATA_DISABLE, SATA_DIPM);
3476 if (err_mask && err_mask != AC_ERR_DEV) {
3477 ata_dev_warn(dev,
3478 "failed to disable DIPM, Emask 0x%x\n",
3479 err_mask);
3480 rc = -EIO;
3481 goto fail;
3482 }
3483 }
3484 }
3485
3486 if (ap) {
3487 rc = ap->ops->set_lpm(link, policy, hints);
3488 if (!rc && ap->slave_link)
3489 rc = ap->ops->set_lpm(ap->slave_link, policy, hints);
3490 } else
3491 rc = sata_pmp_set_lpm(link, policy, hints);
3492
3493 /*
3494 * Attribute link config failure to the first (LPM) enabled
3495 * device on the link.
3496 */
3497 if (rc) {
3498 if (rc == -EOPNOTSUPP) {
3499 link->flags |= ATA_LFLAG_NO_LPM;
3500 return 0;
3501 }
3502 dev = lpm_dev ? lpm_dev : link_dev;
3503 goto fail;
3504 }
3505
3506 /*
3507 * Low level driver acked the transition. Issue DIPM command
3508 * with the new policy set.
3509 */
3510 link->lpm_policy = policy;
3511 if (ap && ap->slave_link)
3512 ap->slave_link->lpm_policy = policy;
3513
3514 /* host config updated, enable DIPM if transitioning to MIN_POWER */
3515 ata_for_each_dev(dev, link, ENABLED) {
3516 if (policy == ATA_LPM_MIN_POWER && !no_dipm &&
3517 ata_id_has_dipm(dev->id)) {
3518 err_mask = ata_dev_set_feature(dev,
3519 SETFEATURES_SATA_ENABLE, SATA_DIPM);
3520 if (err_mask && err_mask != AC_ERR_DEV) {
3521 ata_dev_warn(dev,
3522 "failed to enable DIPM, Emask 0x%x\n",
3523 err_mask);
3524 rc = -EIO;
3525 goto fail;
3526 }
3527 }
3528 }
3529
3530 link->last_lpm_change = jiffies;
3531 link->flags |= ATA_LFLAG_CHANGED;
3532
3533 return 0;
3534
3535 fail:
3536 /* restore the old policy */
3537 link->lpm_policy = old_policy;
3538 if (ap && ap->slave_link)
3539 ap->slave_link->lpm_policy = old_policy;
3540
3541 /* if no device or only one more chance is left, disable LPM */
3542 if (!dev || ehc->tries[dev->devno] <= 2) {
3543 ata_link_warn(link, "disabling LPM on the link\n");
3544 link->flags |= ATA_LFLAG_NO_LPM;
3545 }
3546 if (r_failed_dev)
3547 *r_failed_dev = dev;
3548 return rc;
3549 }
3550
ata_link_nr_enabled(struct ata_link * link)3551 int ata_link_nr_enabled(struct ata_link *link)
3552 {
3553 struct ata_device *dev;
3554 int cnt = 0;
3555
3556 ata_for_each_dev(dev, link, ENABLED)
3557 cnt++;
3558 return cnt;
3559 }
3560
ata_link_nr_vacant(struct ata_link * link)3561 static int ata_link_nr_vacant(struct ata_link *link)
3562 {
3563 struct ata_device *dev;
3564 int cnt = 0;
3565
3566 ata_for_each_dev(dev, link, ALL)
3567 if (dev->class == ATA_DEV_UNKNOWN)
3568 cnt++;
3569 return cnt;
3570 }
3571
ata_eh_skip_recovery(struct ata_link * link)3572 static int ata_eh_skip_recovery(struct ata_link *link)
3573 {
3574 struct ata_port *ap = link->ap;
3575 struct ata_eh_context *ehc = &link->eh_context;
3576 struct ata_device *dev;
3577
3578 /* skip disabled links */
3579 if (link->flags & ATA_LFLAG_DISABLED)
3580 return 1;
3581
3582 /* skip if explicitly requested */
3583 if (ehc->i.flags & ATA_EHI_NO_RECOVERY)
3584 return 1;
3585
3586 /* thaw frozen port and recover failed devices */
3587 if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link))
3588 return 0;
3589
3590 /* reset at least once if reset is requested */
3591 if ((ehc->i.action & ATA_EH_RESET) &&
3592 !(ehc->i.flags & ATA_EHI_DID_RESET))
3593 return 0;
3594
3595 /* skip if class codes for all vacant slots are ATA_DEV_NONE */
3596 ata_for_each_dev(dev, link, ALL) {
3597 if (dev->class == ATA_DEV_UNKNOWN &&
3598 ehc->classes[dev->devno] != ATA_DEV_NONE)
3599 return 0;
3600 }
3601
3602 return 1;
3603 }
3604
ata_count_probe_trials_cb(struct ata_ering_entry * ent,void * void_arg)3605 static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg)
3606 {
3607 u64 interval = msecs_to_jiffies(ATA_EH_PROBE_TRIAL_INTERVAL);
3608 u64 now = get_jiffies_64();
3609 int *trials = void_arg;
3610
3611 if ((ent->eflags & ATA_EFLAG_OLD_ER) ||
3612 (ent->timestamp < now - min(now, interval)))
3613 return -1;
3614
3615 (*trials)++;
3616 return 0;
3617 }
3618
ata_eh_schedule_probe(struct ata_device * dev)3619 static int ata_eh_schedule_probe(struct ata_device *dev)
3620 {
3621 struct ata_eh_context *ehc = &dev->link->eh_context;
3622 struct ata_link *link = ata_dev_phys_link(dev);
3623 int trials = 0;
3624
3625 if (!(ehc->i.probe_mask & (1 << dev->devno)) ||
3626 (ehc->did_probe_mask & (1 << dev->devno)))
3627 return 0;
3628
3629 ata_eh_detach_dev(dev);
3630 ata_dev_init(dev);
3631 ehc->did_probe_mask |= (1 << dev->devno);
3632 ehc->i.action |= ATA_EH_RESET;
3633 ehc->saved_xfer_mode[dev->devno] = 0;
3634 ehc->saved_ncq_enabled &= ~(1 << dev->devno);
3635
3636 /* the link maybe in a deep sleep, wake it up */
3637 if (link->lpm_policy > ATA_LPM_MAX_POWER) {
3638 if (ata_is_host_link(link))
3639 link->ap->ops->set_lpm(link, ATA_LPM_MAX_POWER,
3640 ATA_LPM_EMPTY);
3641 else
3642 sata_pmp_set_lpm(link, ATA_LPM_MAX_POWER,
3643 ATA_LPM_EMPTY);
3644 }
3645
3646 /* Record and count probe trials on the ering. The specific
3647 * error mask used is irrelevant. Because a successful device
3648 * detection clears the ering, this count accumulates only if
3649 * there are consecutive failed probes.
3650 *
3651 * If the count is equal to or higher than ATA_EH_PROBE_TRIALS
3652 * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is
3653 * forced to 1.5Gbps.
3654 *
3655 * This is to work around cases where failed link speed
3656 * negotiation results in device misdetection leading to
3657 * infinite DEVXCHG or PHRDY CHG events.
3658 */
3659 ata_ering_record(&dev->ering, 0, AC_ERR_OTHER);
3660 ata_ering_map(&dev->ering, ata_count_probe_trials_cb, &trials);
3661
3662 if (trials > ATA_EH_PROBE_TRIALS)
3663 sata_down_spd_limit(link, 1);
3664
3665 return 1;
3666 }
3667
ata_eh_handle_dev_fail(struct ata_device * dev,int err)3668 static int ata_eh_handle_dev_fail(struct ata_device *dev, int err)
3669 {
3670 struct ata_eh_context *ehc = &dev->link->eh_context;
3671
3672 /* -EAGAIN from EH routine indicates retry without prejudice.
3673 * The requester is responsible for ensuring forward progress.
3674 */
3675 if (err != -EAGAIN)
3676 ehc->tries[dev->devno]--;
3677
3678 switch (err) {
3679 case -ENODEV:
3680 /* device missing or wrong IDENTIFY data, schedule probing */
3681 ehc->i.probe_mask |= (1 << dev->devno);
3682 case -EINVAL:
3683 /* give it just one more chance */
3684 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1);
3685 case -EIO:
3686 if (ehc->tries[dev->devno] == 1) {
3687 /* This is the last chance, better to slow
3688 * down than lose it.
3689 */
3690 sata_down_spd_limit(ata_dev_phys_link(dev), 0);
3691 if (dev->pio_mode > XFER_PIO_0)
3692 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO);
3693 }
3694 }
3695
3696 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) {
3697 /* disable device if it has used up all its chances */
3698 ata_dev_disable(dev);
3699
3700 /* detach if offline */
3701 if (ata_phys_link_offline(ata_dev_phys_link(dev)))
3702 ata_eh_detach_dev(dev);
3703
3704 /* schedule probe if necessary */
3705 if (ata_eh_schedule_probe(dev)) {
3706 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
3707 memset(ehc->cmd_timeout_idx[dev->devno], 0,
3708 sizeof(ehc->cmd_timeout_idx[dev->devno]));
3709 }
3710
3711 return 1;
3712 } else {
3713 ehc->i.action |= ATA_EH_RESET;
3714 return 0;
3715 }
3716 }
3717
3718 /**
3719 * ata_eh_recover - recover host port after error
3720 * @ap: host port to recover
3721 * @prereset: prereset method (can be NULL)
3722 * @softreset: softreset method (can be NULL)
3723 * @hardreset: hardreset method (can be NULL)
3724 * @postreset: postreset method (can be NULL)
3725 * @r_failed_link: out parameter for failed link
3726 *
3727 * This is the alpha and omega, eum and yang, heart and soul of
3728 * libata exception handling. On entry, actions required to
3729 * recover each link and hotplug requests are recorded in the
3730 * link's eh_context. This function executes all the operations
3731 * with appropriate retrials and fallbacks to resurrect failed
3732 * devices, detach goners and greet newcomers.
3733 *
3734 * LOCKING:
3735 * Kernel thread context (may sleep).
3736 *
3737 * RETURNS:
3738 * 0 on success, -errno on failure.
3739 */
ata_eh_recover(struct ata_port * ap,ata_prereset_fn_t prereset,ata_reset_fn_t softreset,ata_reset_fn_t hardreset,ata_postreset_fn_t postreset,struct ata_link ** r_failed_link)3740 int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
3741 ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
3742 ata_postreset_fn_t postreset,
3743 struct ata_link **r_failed_link)
3744 {
3745 struct ata_link *link;
3746 struct ata_device *dev;
3747 int rc, nr_fails;
3748 unsigned long flags, deadline;
3749
3750 DPRINTK("ENTER\n");
3751
3752 /* prep for recovery */
3753 ata_for_each_link(link, ap, EDGE) {
3754 struct ata_eh_context *ehc = &link->eh_context;
3755
3756 /* re-enable link? */
3757 if (ehc->i.action & ATA_EH_ENABLE_LINK) {
3758 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK);
3759 spin_lock_irqsave(ap->lock, flags);
3760 link->flags &= ~ATA_LFLAG_DISABLED;
3761 spin_unlock_irqrestore(ap->lock, flags);
3762 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK);
3763 }
3764
3765 ata_for_each_dev(dev, link, ALL) {
3766 if (link->flags & ATA_LFLAG_NO_RETRY)
3767 ehc->tries[dev->devno] = 1;
3768 else
3769 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
3770
3771 /* collect port action mask recorded in dev actions */
3772 ehc->i.action |= ehc->i.dev_action[dev->devno] &
3773 ~ATA_EH_PERDEV_MASK;
3774 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK;
3775
3776 /* process hotplug request */
3777 if (dev->flags & ATA_DFLAG_DETACH)
3778 ata_eh_detach_dev(dev);
3779
3780 /* schedule probe if necessary */
3781 if (!ata_dev_enabled(dev))
3782 ata_eh_schedule_probe(dev);
3783 }
3784 }
3785
3786 retry:
3787 rc = 0;
3788
3789 /* if UNLOADING, finish immediately */
3790 if (ap->pflags & ATA_PFLAG_UNLOADING)
3791 goto out;
3792
3793 /* prep for EH */
3794 ata_for_each_link(link, ap, EDGE) {
3795 struct ata_eh_context *ehc = &link->eh_context;
3796
3797 /* skip EH if possible. */
3798 if (ata_eh_skip_recovery(link))
3799 ehc->i.action = 0;
3800
3801 ata_for_each_dev(dev, link, ALL)
3802 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN;
3803 }
3804
3805 /* reset */
3806 ata_for_each_link(link, ap, EDGE) {
3807 struct ata_eh_context *ehc = &link->eh_context;
3808
3809 if (!(ehc->i.action & ATA_EH_RESET))
3810 continue;
3811
3812 rc = ata_eh_reset(link, ata_link_nr_vacant(link),
3813 prereset, softreset, hardreset, postreset);
3814 if (rc) {
3815 ata_link_err(link, "reset failed, giving up\n");
3816 goto out;
3817 }
3818 }
3819
3820 do {
3821 unsigned long now;
3822
3823 /*
3824 * clears ATA_EH_PARK in eh_info and resets
3825 * ap->park_req_pending
3826 */
3827 ata_eh_pull_park_action(ap);
3828
3829 deadline = jiffies;
3830 ata_for_each_link(link, ap, EDGE) {
3831 ata_for_each_dev(dev, link, ALL) {
3832 struct ata_eh_context *ehc = &link->eh_context;
3833 unsigned long tmp;
3834
3835 if (dev->class != ATA_DEV_ATA &&
3836 dev->class != ATA_DEV_ZAC)
3837 continue;
3838 if (!(ehc->i.dev_action[dev->devno] &
3839 ATA_EH_PARK))
3840 continue;
3841 tmp = dev->unpark_deadline;
3842 if (time_before(deadline, tmp))
3843 deadline = tmp;
3844 else if (time_before_eq(tmp, jiffies))
3845 continue;
3846 if (ehc->unloaded_mask & (1 << dev->devno))
3847 continue;
3848
3849 ata_eh_park_issue_cmd(dev, 1);
3850 }
3851 }
3852
3853 now = jiffies;
3854 if (time_before_eq(deadline, now))
3855 break;
3856
3857 ata_eh_release(ap);
3858 deadline = wait_for_completion_timeout(&ap->park_req_pending,
3859 deadline - now);
3860 ata_eh_acquire(ap);
3861 } while (deadline);
3862 ata_for_each_link(link, ap, EDGE) {
3863 ata_for_each_dev(dev, link, ALL) {
3864 if (!(link->eh_context.unloaded_mask &
3865 (1 << dev->devno)))
3866 continue;
3867
3868 ata_eh_park_issue_cmd(dev, 0);
3869 ata_eh_done(link, dev, ATA_EH_PARK);
3870 }
3871 }
3872
3873 /* the rest */
3874 nr_fails = 0;
3875 ata_for_each_link(link, ap, PMP_FIRST) {
3876 struct ata_eh_context *ehc = &link->eh_context;
3877
3878 if (sata_pmp_attached(ap) && ata_is_host_link(link))
3879 goto config_lpm;
3880
3881 /* revalidate existing devices and attach new ones */
3882 rc = ata_eh_revalidate_and_attach(link, &dev);
3883 if (rc)
3884 goto rest_fail;
3885
3886 /* if PMP got attached, return, pmp EH will take care of it */
3887 if (link->device->class == ATA_DEV_PMP) {
3888 ehc->i.action = 0;
3889 return 0;
3890 }
3891
3892 /* configure transfer mode if necessary */
3893 if (ehc->i.flags & ATA_EHI_SETMODE) {
3894 rc = ata_set_mode(link, &dev);
3895 if (rc)
3896 goto rest_fail;
3897 ehc->i.flags &= ~ATA_EHI_SETMODE;
3898 }
3899
3900 /* If reset has been issued, clear UA to avoid
3901 * disrupting the current users of the device.
3902 */
3903 if (ehc->i.flags & ATA_EHI_DID_RESET) {
3904 ata_for_each_dev(dev, link, ALL) {
3905 if (dev->class != ATA_DEV_ATAPI)
3906 continue;
3907 rc = atapi_eh_clear_ua(dev);
3908 if (rc)
3909 goto rest_fail;
3910 if (zpodd_dev_enabled(dev))
3911 zpodd_post_poweron(dev);
3912 }
3913 }
3914
3915 /* retry flush if necessary */
3916 ata_for_each_dev(dev, link, ALL) {
3917 if (dev->class != ATA_DEV_ATA &&
3918 dev->class != ATA_DEV_ZAC)
3919 continue;
3920 rc = ata_eh_maybe_retry_flush(dev);
3921 if (rc)
3922 goto rest_fail;
3923 }
3924
3925 config_lpm:
3926 /* configure link power saving */
3927 if (link->lpm_policy != ap->target_lpm_policy) {
3928 rc = ata_eh_set_lpm(link, ap->target_lpm_policy, &dev);
3929 if (rc)
3930 goto rest_fail;
3931 }
3932
3933 /* this link is okay now */
3934 ehc->i.flags = 0;
3935 continue;
3936
3937 rest_fail:
3938 nr_fails++;
3939 if (dev)
3940 ata_eh_handle_dev_fail(dev, rc);
3941
3942 if (ap->pflags & ATA_PFLAG_FROZEN) {
3943 /* PMP reset requires working host port.
3944 * Can't retry if it's frozen.
3945 */
3946 if (sata_pmp_attached(ap))
3947 goto out;
3948 break;
3949 }
3950 }
3951
3952 if (nr_fails)
3953 goto retry;
3954
3955 out:
3956 if (rc && r_failed_link)
3957 *r_failed_link = link;
3958
3959 DPRINTK("EXIT, rc=%d\n", rc);
3960 return rc;
3961 }
3962
3963 /**
3964 * ata_eh_finish - finish up EH
3965 * @ap: host port to finish EH for
3966 *
3967 * Recovery is complete. Clean up EH states and retry or finish
3968 * failed qcs.
3969 *
3970 * LOCKING:
3971 * None.
3972 */
ata_eh_finish(struct ata_port * ap)3973 void ata_eh_finish(struct ata_port *ap)
3974 {
3975 int tag;
3976
3977 /* retry or finish qcs */
3978 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
3979 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
3980
3981 if (!(qc->flags & ATA_QCFLAG_FAILED))
3982 continue;
3983
3984 if (qc->err_mask) {
3985 /* FIXME: Once EH migration is complete,
3986 * generate sense data in this function,
3987 * considering both err_mask and tf.
3988 */
3989 if (qc->flags & ATA_QCFLAG_RETRY)
3990 ata_eh_qc_retry(qc);
3991 else
3992 ata_eh_qc_complete(qc);
3993 } else {
3994 if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
3995 ata_eh_qc_complete(qc);
3996 } else {
3997 /* feed zero TF to sense generation */
3998 memset(&qc->result_tf, 0, sizeof(qc->result_tf));
3999 ata_eh_qc_retry(qc);
4000 }
4001 }
4002 }
4003
4004 /* make sure nr_active_links is zero after EH */
4005 WARN_ON(ap->nr_active_links);
4006 ap->nr_active_links = 0;
4007 }
4008
4009 /**
4010 * ata_do_eh - do standard error handling
4011 * @ap: host port to handle error for
4012 *
4013 * @prereset: prereset method (can be NULL)
4014 * @softreset: softreset method (can be NULL)
4015 * @hardreset: hardreset method (can be NULL)
4016 * @postreset: postreset method (can be NULL)
4017 *
4018 * Perform standard error handling sequence.
4019 *
4020 * LOCKING:
4021 * Kernel thread context (may sleep).
4022 */
ata_do_eh(struct ata_port * ap,ata_prereset_fn_t prereset,ata_reset_fn_t softreset,ata_reset_fn_t hardreset,ata_postreset_fn_t postreset)4023 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
4024 ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
4025 ata_postreset_fn_t postreset)
4026 {
4027 struct ata_device *dev;
4028 int rc;
4029
4030 ata_eh_autopsy(ap);
4031 ata_eh_report(ap);
4032
4033 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset,
4034 NULL);
4035 if (rc) {
4036 ata_for_each_dev(dev, &ap->link, ALL)
4037 ata_dev_disable(dev);
4038 }
4039
4040 ata_eh_finish(ap);
4041 }
4042
4043 /**
4044 * ata_std_error_handler - standard error handler
4045 * @ap: host port to handle error for
4046 *
4047 * Standard error handler
4048 *
4049 * LOCKING:
4050 * Kernel thread context (may sleep).
4051 */
ata_std_error_handler(struct ata_port * ap)4052 void ata_std_error_handler(struct ata_port *ap)
4053 {
4054 struct ata_port_operations *ops = ap->ops;
4055 ata_reset_fn_t hardreset = ops->hardreset;
4056
4057 /* ignore built-in hardreset if SCR access is not available */
4058 if (hardreset == sata_std_hardreset && !sata_scr_valid(&ap->link))
4059 hardreset = NULL;
4060
4061 ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset);
4062 }
4063
4064 #ifdef CONFIG_PM
4065 /**
4066 * ata_eh_handle_port_suspend - perform port suspend operation
4067 * @ap: port to suspend
4068 *
4069 * Suspend @ap.
4070 *
4071 * LOCKING:
4072 * Kernel thread context (may sleep).
4073 */
ata_eh_handle_port_suspend(struct ata_port * ap)4074 static void ata_eh_handle_port_suspend(struct ata_port *ap)
4075 {
4076 unsigned long flags;
4077 int rc = 0;
4078 struct ata_device *dev;
4079
4080 /* are we suspending? */
4081 spin_lock_irqsave(ap->lock, flags);
4082 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) ||
4083 ap->pm_mesg.event & PM_EVENT_RESUME) {
4084 spin_unlock_irqrestore(ap->lock, flags);
4085 return;
4086 }
4087 spin_unlock_irqrestore(ap->lock, flags);
4088
4089 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED);
4090
4091 /*
4092 * If we have a ZPODD attached, check its zero
4093 * power ready status before the port is frozen.
4094 * Only needed for runtime suspend.
4095 */
4096 if (PMSG_IS_AUTO(ap->pm_mesg)) {
4097 ata_for_each_dev(dev, &ap->link, ENABLED) {
4098 if (zpodd_dev_enabled(dev))
4099 zpodd_on_suspend(dev);
4100 }
4101 }
4102
4103 /* tell ACPI we're suspending */
4104 rc = ata_acpi_on_suspend(ap);
4105 if (rc)
4106 goto out;
4107
4108 /* suspend */
4109 ata_eh_freeze_port(ap);
4110
4111 if (ap->ops->port_suspend)
4112 rc = ap->ops->port_suspend(ap, ap->pm_mesg);
4113
4114 ata_acpi_set_state(ap, ap->pm_mesg);
4115 out:
4116 /* update the flags */
4117 spin_lock_irqsave(ap->lock, flags);
4118
4119 ap->pflags &= ~ATA_PFLAG_PM_PENDING;
4120 if (rc == 0)
4121 ap->pflags |= ATA_PFLAG_SUSPENDED;
4122 else if (ap->pflags & ATA_PFLAG_FROZEN)
4123 ata_port_schedule_eh(ap);
4124
4125 spin_unlock_irqrestore(ap->lock, flags);
4126
4127 return;
4128 }
4129
4130 /**
4131 * ata_eh_handle_port_resume - perform port resume operation
4132 * @ap: port to resume
4133 *
4134 * Resume @ap.
4135 *
4136 * LOCKING:
4137 * Kernel thread context (may sleep).
4138 */
ata_eh_handle_port_resume(struct ata_port * ap)4139 static void ata_eh_handle_port_resume(struct ata_port *ap)
4140 {
4141 struct ata_link *link;
4142 struct ata_device *dev;
4143 unsigned long flags;
4144 int rc = 0;
4145
4146 /* are we resuming? */
4147 spin_lock_irqsave(ap->lock, flags);
4148 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) ||
4149 !(ap->pm_mesg.event & PM_EVENT_RESUME)) {
4150 spin_unlock_irqrestore(ap->lock, flags);
4151 return;
4152 }
4153 spin_unlock_irqrestore(ap->lock, flags);
4154
4155 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED));
4156
4157 /*
4158 * Error timestamps are in jiffies which doesn't run while
4159 * suspended and PHY events during resume isn't too uncommon.
4160 * When the two are combined, it can lead to unnecessary speed
4161 * downs if the machine is suspended and resumed repeatedly.
4162 * Clear error history.
4163 */
4164 ata_for_each_link(link, ap, HOST_FIRST)
4165 ata_for_each_dev(dev, link, ALL)
4166 ata_ering_clear(&dev->ering);
4167
4168 ata_acpi_set_state(ap, ap->pm_mesg);
4169
4170 if (ap->ops->port_resume)
4171 rc = ap->ops->port_resume(ap);
4172
4173 /* tell ACPI that we're resuming */
4174 ata_acpi_on_resume(ap);
4175
4176 /* update the flags */
4177 spin_lock_irqsave(ap->lock, flags);
4178 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED);
4179 spin_unlock_irqrestore(ap->lock, flags);
4180 }
4181 #endif /* CONFIG_PM */
4182