• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * This file is provided under a dual BSD/GPLv2 license.  When using or
3  * redistributing this file, you may do so under either license.
4  *
5  * GPL LICENSE SUMMARY
6  *
7  * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
8  *
9  * This program is free software; you can redistribute it and/or modify it
10  * under the terms and conditions of the GNU General Public License,
11  * version 2, as published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but WITHOUT
14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
16  * more details.
17  *
18  * You should have received a copy of the GNU General Public License along with
19  * this program; if not, write to the Free Software Foundation, Inc.,
20  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  * The full GNU General Public License is included in this distribution in
23  * the file called "COPYING".
24  *
25  * BSD LICENSE
26  *
27  * Copyright(c) 2004-2009 Intel Corporation. All rights reserved.
28  *
29  * Redistribution and use in source and binary forms, with or without
30  * modification, are permitted provided that the following conditions are met:
31  *
32  *   * Redistributions of source code must retain the above copyright
33  *     notice, this list of conditions and the following disclaimer.
34  *   * Redistributions in binary form must reproduce the above copyright
35  *     notice, this list of conditions and the following disclaimer in
36  *     the documentation and/or other materials provided with the
37  *     distribution.
38  *   * Neither the name of Intel Corporation nor the names of its
39  *     contributors may be used to endorse or promote products derived
40  *     from this software without specific prior written permission.
41  *
42  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
43  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
46  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
47  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
48  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
49  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
50  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
51  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
52  * POSSIBILITY OF SUCH DAMAGE.
53  */
54 
55 /*
56  * Support routines for v3+ hardware
57  */
58 #include <linux/module.h>
59 #include <linux/pci.h>
60 #include <linux/gfp.h>
61 #include <linux/dmaengine.h>
62 #include <linux/dma-mapping.h>
63 #include <linux/prefetch.h>
64 #include "../dmaengine.h"
65 #include "registers.h"
66 #include "hw.h"
67 #include "dma.h"
68 #include "dma_v2.h"
69 
70 /* ioat hardware assumes at least two sources for raid operations */
71 #define src_cnt_to_sw(x) ((x) + 2)
72 #define src_cnt_to_hw(x) ((x) - 2)
73 #define ndest_to_sw(x) ((x) + 1)
74 #define ndest_to_hw(x) ((x) - 1)
75 #define src16_cnt_to_sw(x) ((x) + 9)
76 #define src16_cnt_to_hw(x) ((x) - 9)
77 
78 /* provide a lookup table for setting the source address in the base or
79  * extended descriptor of an xor or pq descriptor
80  */
81 static const u8 xor_idx_to_desc = 0xe0;
82 static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 };
83 static const u8 pq_idx_to_desc = 0xf8;
84 static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1,
85 				       2, 2, 2, 2, 2, 2, 2 };
86 static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
87 static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7,
88 					0, 1, 2, 3, 4, 5, 6 };
89 
90 /*
91  * technically sources 1 and 2 do not require SED, but the op will have
92  * at least 9 descriptors so that's irrelevant.
93  */
94 static const u8 pq16_idx_to_sed[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0,
95 				      1, 1, 1, 1, 1, 1, 1 };
96 
97 static void ioat3_eh(struct ioat2_dma_chan *ioat);
98 
xor_get_src(struct ioat_raw_descriptor * descs[2],int idx)99 static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
100 {
101 	struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
102 
103 	return raw->field[xor_idx_to_field[idx]];
104 }
105 
xor_set_src(struct ioat_raw_descriptor * descs[2],dma_addr_t addr,u32 offset,int idx)106 static void xor_set_src(struct ioat_raw_descriptor *descs[2],
107 			dma_addr_t addr, u32 offset, int idx)
108 {
109 	struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
110 
111 	raw->field[xor_idx_to_field[idx]] = addr + offset;
112 }
113 
pq_get_src(struct ioat_raw_descriptor * descs[2],int idx)114 static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
115 {
116 	struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
117 
118 	return raw->field[pq_idx_to_field[idx]];
119 }
120 
pq16_get_src(struct ioat_raw_descriptor * desc[3],int idx)121 static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx)
122 {
123 	struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
124 
125 	return raw->field[pq16_idx_to_field[idx]];
126 }
127 
pq_set_src(struct ioat_raw_descriptor * descs[2],dma_addr_t addr,u32 offset,u8 coef,int idx)128 static void pq_set_src(struct ioat_raw_descriptor *descs[2],
129 		       dma_addr_t addr, u32 offset, u8 coef, int idx)
130 {
131 	struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
132 	struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
133 
134 	raw->field[pq_idx_to_field[idx]] = addr + offset;
135 	pq->coef[idx] = coef;
136 }
137 
sed_get_pq16_pool_idx(int src_cnt)138 static int sed_get_pq16_pool_idx(int src_cnt)
139 {
140 
141 	return pq16_idx_to_sed[src_cnt];
142 }
143 
is_jf_ioat(struct pci_dev * pdev)144 static bool is_jf_ioat(struct pci_dev *pdev)
145 {
146 	switch (pdev->device) {
147 	case PCI_DEVICE_ID_INTEL_IOAT_JSF0:
148 	case PCI_DEVICE_ID_INTEL_IOAT_JSF1:
149 	case PCI_DEVICE_ID_INTEL_IOAT_JSF2:
150 	case PCI_DEVICE_ID_INTEL_IOAT_JSF3:
151 	case PCI_DEVICE_ID_INTEL_IOAT_JSF4:
152 	case PCI_DEVICE_ID_INTEL_IOAT_JSF5:
153 	case PCI_DEVICE_ID_INTEL_IOAT_JSF6:
154 	case PCI_DEVICE_ID_INTEL_IOAT_JSF7:
155 	case PCI_DEVICE_ID_INTEL_IOAT_JSF8:
156 	case PCI_DEVICE_ID_INTEL_IOAT_JSF9:
157 		return true;
158 	default:
159 		return false;
160 	}
161 }
162 
is_snb_ioat(struct pci_dev * pdev)163 static bool is_snb_ioat(struct pci_dev *pdev)
164 {
165 	switch (pdev->device) {
166 	case PCI_DEVICE_ID_INTEL_IOAT_SNB0:
167 	case PCI_DEVICE_ID_INTEL_IOAT_SNB1:
168 	case PCI_DEVICE_ID_INTEL_IOAT_SNB2:
169 	case PCI_DEVICE_ID_INTEL_IOAT_SNB3:
170 	case PCI_DEVICE_ID_INTEL_IOAT_SNB4:
171 	case PCI_DEVICE_ID_INTEL_IOAT_SNB5:
172 	case PCI_DEVICE_ID_INTEL_IOAT_SNB6:
173 	case PCI_DEVICE_ID_INTEL_IOAT_SNB7:
174 	case PCI_DEVICE_ID_INTEL_IOAT_SNB8:
175 	case PCI_DEVICE_ID_INTEL_IOAT_SNB9:
176 		return true;
177 	default:
178 		return false;
179 	}
180 }
181 
is_ivb_ioat(struct pci_dev * pdev)182 static bool is_ivb_ioat(struct pci_dev *pdev)
183 {
184 	switch (pdev->device) {
185 	case PCI_DEVICE_ID_INTEL_IOAT_IVB0:
186 	case PCI_DEVICE_ID_INTEL_IOAT_IVB1:
187 	case PCI_DEVICE_ID_INTEL_IOAT_IVB2:
188 	case PCI_DEVICE_ID_INTEL_IOAT_IVB3:
189 	case PCI_DEVICE_ID_INTEL_IOAT_IVB4:
190 	case PCI_DEVICE_ID_INTEL_IOAT_IVB5:
191 	case PCI_DEVICE_ID_INTEL_IOAT_IVB6:
192 	case PCI_DEVICE_ID_INTEL_IOAT_IVB7:
193 	case PCI_DEVICE_ID_INTEL_IOAT_IVB8:
194 	case PCI_DEVICE_ID_INTEL_IOAT_IVB9:
195 		return true;
196 	default:
197 		return false;
198 	}
199 
200 }
201 
is_hsw_ioat(struct pci_dev * pdev)202 static bool is_hsw_ioat(struct pci_dev *pdev)
203 {
204 	switch (pdev->device) {
205 	case PCI_DEVICE_ID_INTEL_IOAT_HSW0:
206 	case PCI_DEVICE_ID_INTEL_IOAT_HSW1:
207 	case PCI_DEVICE_ID_INTEL_IOAT_HSW2:
208 	case PCI_DEVICE_ID_INTEL_IOAT_HSW3:
209 	case PCI_DEVICE_ID_INTEL_IOAT_HSW4:
210 	case PCI_DEVICE_ID_INTEL_IOAT_HSW5:
211 	case PCI_DEVICE_ID_INTEL_IOAT_HSW6:
212 	case PCI_DEVICE_ID_INTEL_IOAT_HSW7:
213 	case PCI_DEVICE_ID_INTEL_IOAT_HSW8:
214 	case PCI_DEVICE_ID_INTEL_IOAT_HSW9:
215 		return true;
216 	default:
217 		return false;
218 	}
219 
220 }
221 
is_xeon_cb32(struct pci_dev * pdev)222 static bool is_xeon_cb32(struct pci_dev *pdev)
223 {
224 	return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) ||
225 		is_hsw_ioat(pdev);
226 }
227 
is_bwd_ioat(struct pci_dev * pdev)228 static bool is_bwd_ioat(struct pci_dev *pdev)
229 {
230 	switch (pdev->device) {
231 	case PCI_DEVICE_ID_INTEL_IOAT_BWD0:
232 	case PCI_DEVICE_ID_INTEL_IOAT_BWD1:
233 	case PCI_DEVICE_ID_INTEL_IOAT_BWD2:
234 	case PCI_DEVICE_ID_INTEL_IOAT_BWD3:
235 		return true;
236 	default:
237 		return false;
238 	}
239 }
240 
is_bwd_noraid(struct pci_dev * pdev)241 static bool is_bwd_noraid(struct pci_dev *pdev)
242 {
243 	switch (pdev->device) {
244 	case PCI_DEVICE_ID_INTEL_IOAT_BWD2:
245 	case PCI_DEVICE_ID_INTEL_IOAT_BWD3:
246 		return true;
247 	default:
248 		return false;
249 	}
250 
251 }
252 
pq16_set_src(struct ioat_raw_descriptor * desc[3],dma_addr_t addr,u32 offset,u8 coef,int idx)253 static void pq16_set_src(struct ioat_raw_descriptor *desc[3],
254 			dma_addr_t addr, u32 offset, u8 coef, int idx)
255 {
256 	struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0];
257 	struct ioat_pq16a_descriptor *pq16 =
258 		(struct ioat_pq16a_descriptor *)desc[1];
259 	struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
260 
261 	raw->field[pq16_idx_to_field[idx]] = addr + offset;
262 
263 	if (idx < 8)
264 		pq->coef[idx] = coef;
265 	else
266 		pq16->coef[idx - 8] = coef;
267 }
268 
269 static struct ioat_sed_ent *
ioat3_alloc_sed(struct ioatdma_device * device,unsigned int hw_pool)270 ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool)
271 {
272 	struct ioat_sed_ent *sed;
273 	gfp_t flags = __GFP_ZERO | GFP_ATOMIC;
274 
275 	sed = kmem_cache_alloc(device->sed_pool, flags);
276 	if (!sed)
277 		return NULL;
278 
279 	sed->hw_pool = hw_pool;
280 	sed->hw = dma_pool_alloc(device->sed_hw_pool[hw_pool],
281 				 flags, &sed->dma);
282 	if (!sed->hw) {
283 		kmem_cache_free(device->sed_pool, sed);
284 		return NULL;
285 	}
286 
287 	return sed;
288 }
289 
ioat3_free_sed(struct ioatdma_device * device,struct ioat_sed_ent * sed)290 static void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *sed)
291 {
292 	if (!sed)
293 		return;
294 
295 	dma_pool_free(device->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma);
296 	kmem_cache_free(device->sed_pool, sed);
297 }
298 
ioat3_dma_unmap(struct ioat2_dma_chan * ioat,struct ioat_ring_ent * desc,int idx)299 static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
300 			    struct ioat_ring_ent *desc, int idx)
301 {
302 	struct ioat_chan_common *chan = &ioat->base;
303 	struct pci_dev *pdev = chan->device->pdev;
304 	size_t len = desc->len;
305 	size_t offset = len - desc->hw->size;
306 	struct dma_async_tx_descriptor *tx = &desc->txd;
307 	enum dma_ctrl_flags flags = tx->flags;
308 
309 	switch (desc->hw->ctl_f.op) {
310 	case IOAT_OP_COPY:
311 		if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
312 			ioat_dma_unmap(chan, flags, len, desc->hw);
313 		break;
314 	case IOAT_OP_FILL: {
315 		struct ioat_fill_descriptor *hw = desc->fill;
316 
317 		if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
318 			ioat_unmap(pdev, hw->dst_addr - offset, len,
319 				   PCI_DMA_FROMDEVICE, flags, 1);
320 		break;
321 	}
322 	case IOAT_OP_XOR_VAL:
323 	case IOAT_OP_XOR: {
324 		struct ioat_xor_descriptor *xor = desc->xor;
325 		struct ioat_ring_ent *ext;
326 		struct ioat_xor_ext_descriptor *xor_ex = NULL;
327 		int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
328 		struct ioat_raw_descriptor *descs[2];
329 		int i;
330 
331 		if (src_cnt > 5) {
332 			ext = ioat2_get_ring_ent(ioat, idx + 1);
333 			xor_ex = ext->xor_ex;
334 		}
335 
336 		if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
337 			descs[0] = (struct ioat_raw_descriptor *) xor;
338 			descs[1] = (struct ioat_raw_descriptor *) xor_ex;
339 			for (i = 0; i < src_cnt; i++) {
340 				dma_addr_t src = xor_get_src(descs, i);
341 
342 				ioat_unmap(pdev, src - offset, len,
343 					   PCI_DMA_TODEVICE, flags, 0);
344 			}
345 
346 			/* dest is a source in xor validate operations */
347 			if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
348 				ioat_unmap(pdev, xor->dst_addr - offset, len,
349 					   PCI_DMA_TODEVICE, flags, 1);
350 				break;
351 			}
352 		}
353 
354 		if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
355 			ioat_unmap(pdev, xor->dst_addr - offset, len,
356 				   PCI_DMA_FROMDEVICE, flags, 1);
357 		break;
358 	}
359 	case IOAT_OP_PQ_VAL:
360 	case IOAT_OP_PQ: {
361 		struct ioat_pq_descriptor *pq = desc->pq;
362 		struct ioat_ring_ent *ext;
363 		struct ioat_pq_ext_descriptor *pq_ex = NULL;
364 		int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
365 		struct ioat_raw_descriptor *descs[2];
366 		int i;
367 
368 		if (src_cnt > 3) {
369 			ext = ioat2_get_ring_ent(ioat, idx + 1);
370 			pq_ex = ext->pq_ex;
371 		}
372 
373 		/* in the 'continue' case don't unmap the dests as sources */
374 		if (dmaf_p_disabled_continue(flags))
375 			src_cnt--;
376 		else if (dmaf_continue(flags))
377 			src_cnt -= 3;
378 
379 		if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
380 			descs[0] = (struct ioat_raw_descriptor *) pq;
381 			descs[1] = (struct ioat_raw_descriptor *) pq_ex;
382 			for (i = 0; i < src_cnt; i++) {
383 				dma_addr_t src = pq_get_src(descs, i);
384 
385 				ioat_unmap(pdev, src - offset, len,
386 					   PCI_DMA_TODEVICE, flags, 0);
387 			}
388 
389 			/* the dests are sources in pq validate operations */
390 			if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
391 				if (!(flags & DMA_PREP_PQ_DISABLE_P))
392 					ioat_unmap(pdev, pq->p_addr - offset,
393 						   len, PCI_DMA_TODEVICE, flags, 0);
394 				if (!(flags & DMA_PREP_PQ_DISABLE_Q))
395 					ioat_unmap(pdev, pq->q_addr - offset,
396 						   len, PCI_DMA_TODEVICE, flags, 0);
397 				break;
398 			}
399 		}
400 
401 		if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
402 			if (!(flags & DMA_PREP_PQ_DISABLE_P))
403 				ioat_unmap(pdev, pq->p_addr - offset, len,
404 					   PCI_DMA_BIDIRECTIONAL, flags, 1);
405 			if (!(flags & DMA_PREP_PQ_DISABLE_Q))
406 				ioat_unmap(pdev, pq->q_addr - offset, len,
407 					   PCI_DMA_BIDIRECTIONAL, flags, 1);
408 		}
409 		break;
410 	}
411 	case IOAT_OP_PQ_16S:
412 	case IOAT_OP_PQ_VAL_16S: {
413 		struct ioat_pq_descriptor *pq = desc->pq;
414 		int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
415 		struct ioat_raw_descriptor *descs[4];
416 		int i;
417 
418 		/* in the 'continue' case don't unmap the dests as sources */
419 		if (dmaf_p_disabled_continue(flags))
420 			src_cnt--;
421 		else if (dmaf_continue(flags))
422 			src_cnt -= 3;
423 
424 		if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
425 			descs[0] = (struct ioat_raw_descriptor *)pq;
426 			descs[1] = (struct ioat_raw_descriptor *)(desc->sed->hw);
427 			descs[2] = (struct ioat_raw_descriptor *)(&desc->sed->hw->b[0]);
428 			for (i = 0; i < src_cnt; i++) {
429 				dma_addr_t src = pq16_get_src(descs, i);
430 
431 				ioat_unmap(pdev, src - offset, len,
432 					   PCI_DMA_TODEVICE, flags, 0);
433 			}
434 
435 			/* the dests are sources in pq validate operations */
436 			if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
437 				if (!(flags & DMA_PREP_PQ_DISABLE_P))
438 					ioat_unmap(pdev, pq->p_addr - offset,
439 						   len, PCI_DMA_TODEVICE,
440 						   flags, 0);
441 				if (!(flags & DMA_PREP_PQ_DISABLE_Q))
442 					ioat_unmap(pdev, pq->q_addr - offset,
443 						   len, PCI_DMA_TODEVICE,
444 						   flags, 0);
445 				break;
446 			}
447 		}
448 
449 		if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
450 			if (!(flags & DMA_PREP_PQ_DISABLE_P))
451 				ioat_unmap(pdev, pq->p_addr - offset, len,
452 					   PCI_DMA_BIDIRECTIONAL, flags, 1);
453 			if (!(flags & DMA_PREP_PQ_DISABLE_Q))
454 				ioat_unmap(pdev, pq->q_addr - offset, len,
455 					   PCI_DMA_BIDIRECTIONAL, flags, 1);
456 		}
457 		break;
458 	}
459 	default:
460 		dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
461 			__func__, desc->hw->ctl_f.op);
462 	}
463 }
464 
desc_has_ext(struct ioat_ring_ent * desc)465 static bool desc_has_ext(struct ioat_ring_ent *desc)
466 {
467 	struct ioat_dma_descriptor *hw = desc->hw;
468 
469 	if (hw->ctl_f.op == IOAT_OP_XOR ||
470 	    hw->ctl_f.op == IOAT_OP_XOR_VAL) {
471 		struct ioat_xor_descriptor *xor = desc->xor;
472 
473 		if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
474 			return true;
475 	} else if (hw->ctl_f.op == IOAT_OP_PQ ||
476 		   hw->ctl_f.op == IOAT_OP_PQ_VAL) {
477 		struct ioat_pq_descriptor *pq = desc->pq;
478 
479 		if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
480 			return true;
481 	}
482 
483 	return false;
484 }
485 
ioat3_get_current_completion(struct ioat_chan_common * chan)486 static u64 ioat3_get_current_completion(struct ioat_chan_common *chan)
487 {
488 	u64 phys_complete;
489 	u64 completion;
490 
491 	completion = *chan->completion;
492 	phys_complete = ioat_chansts_to_addr(completion);
493 
494 	dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__,
495 		(unsigned long long) phys_complete);
496 
497 	return phys_complete;
498 }
499 
ioat3_cleanup_preamble(struct ioat_chan_common * chan,u64 * phys_complete)500 static bool ioat3_cleanup_preamble(struct ioat_chan_common *chan,
501 				   u64 *phys_complete)
502 {
503 	*phys_complete = ioat3_get_current_completion(chan);
504 	if (*phys_complete == chan->last_completion)
505 		return false;
506 
507 	clear_bit(IOAT_COMPLETION_ACK, &chan->state);
508 	mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
509 
510 	return true;
511 }
512 
513 static void
desc_get_errstat(struct ioat2_dma_chan * ioat,struct ioat_ring_ent * desc)514 desc_get_errstat(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc)
515 {
516 	struct ioat_dma_descriptor *hw = desc->hw;
517 
518 	switch (hw->ctl_f.op) {
519 	case IOAT_OP_PQ_VAL:
520 	case IOAT_OP_PQ_VAL_16S:
521 	{
522 		struct ioat_pq_descriptor *pq = desc->pq;
523 
524 		/* check if there's error written */
525 		if (!pq->dwbes_f.wbes)
526 			return;
527 
528 		/* need to set a chanerr var for checking to clear later */
529 
530 		if (pq->dwbes_f.p_val_err)
531 			*desc->result |= SUM_CHECK_P_RESULT;
532 
533 		if (pq->dwbes_f.q_val_err)
534 			*desc->result |= SUM_CHECK_Q_RESULT;
535 
536 		return;
537 	}
538 	default:
539 		return;
540 	}
541 }
542 
543 /**
544  * __cleanup - reclaim used descriptors
545  * @ioat: channel (ring) to clean
546  *
547  * The difference from the dma_v2.c __cleanup() is that this routine
548  * handles extended descriptors and dma-unmapping raid operations.
549  */
__cleanup(struct ioat2_dma_chan * ioat,dma_addr_t phys_complete)550 static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
551 {
552 	struct ioat_chan_common *chan = &ioat->base;
553 	struct ioatdma_device *device = chan->device;
554 	struct ioat_ring_ent *desc;
555 	bool seen_current = false;
556 	int idx = ioat->tail, i;
557 	u16 active;
558 
559 	dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
560 		__func__, ioat->head, ioat->tail, ioat->issued);
561 
562 	/*
563 	 * At restart of the channel, the completion address and the
564 	 * channel status will be 0 due to starting a new chain. Since
565 	 * it's new chain and the first descriptor "fails", there is
566 	 * nothing to clean up. We do not want to reap the entire submitted
567 	 * chain due to this 0 address value and then BUG.
568 	 */
569 	if (!phys_complete)
570 		return;
571 
572 	active = ioat2_ring_active(ioat);
573 	for (i = 0; i < active && !seen_current; i++) {
574 		struct dma_async_tx_descriptor *tx;
575 
576 		smp_read_barrier_depends();
577 		prefetch(ioat2_get_ring_ent(ioat, idx + i + 1));
578 		desc = ioat2_get_ring_ent(ioat, idx + i);
579 		dump_desc_dbg(ioat, desc);
580 
581 		/* set err stat if we are using dwbes */
582 		if (device->cap & IOAT_CAP_DWBES)
583 			desc_get_errstat(ioat, desc);
584 
585 		tx = &desc->txd;
586 		if (tx->cookie) {
587 			dma_cookie_complete(tx);
588 			ioat3_dma_unmap(ioat, desc, idx + i);
589 			if (tx->callback) {
590 				tx->callback(tx->callback_param);
591 				tx->callback = NULL;
592 			}
593 		}
594 
595 		if (tx->phys == phys_complete)
596 			seen_current = true;
597 
598 		/* skip extended descriptors */
599 		if (desc_has_ext(desc)) {
600 			BUG_ON(i + 1 >= active);
601 			i++;
602 		}
603 
604 		/* cleanup super extended descriptors */
605 		if (desc->sed) {
606 			ioat3_free_sed(device, desc->sed);
607 			desc->sed = NULL;
608 		}
609 	}
610 	smp_mb(); /* finish all descriptor reads before incrementing tail */
611 	ioat->tail = idx + i;
612 	BUG_ON(active && !seen_current); /* no active descs have written a completion? */
613 	chan->last_completion = phys_complete;
614 
615 	if (active - i == 0) {
616 		dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
617 			__func__);
618 		clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
619 		mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
620 	}
621 	/* 5 microsecond delay per pending descriptor */
622 	writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK),
623 	       chan->device->reg_base + IOAT_INTRDELAY_OFFSET);
624 }
625 
ioat3_cleanup(struct ioat2_dma_chan * ioat)626 static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
627 {
628 	struct ioat_chan_common *chan = &ioat->base;
629 	u64 phys_complete;
630 
631 	spin_lock_bh(&chan->cleanup_lock);
632 
633 	if (ioat3_cleanup_preamble(chan, &phys_complete))
634 		__cleanup(ioat, phys_complete);
635 
636 	if (is_ioat_halted(*chan->completion)) {
637 		u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
638 
639 		if (chanerr & IOAT_CHANERR_HANDLE_MASK) {
640 			mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
641 			ioat3_eh(ioat);
642 		}
643 	}
644 
645 	spin_unlock_bh(&chan->cleanup_lock);
646 }
647 
ioat3_cleanup_event(unsigned long data)648 static void ioat3_cleanup_event(unsigned long data)
649 {
650 	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
651 
652 	ioat3_cleanup(ioat);
653 	writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
654 }
655 
ioat3_restart_channel(struct ioat2_dma_chan * ioat)656 static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
657 {
658 	struct ioat_chan_common *chan = &ioat->base;
659 	u64 phys_complete;
660 
661 	ioat2_quiesce(chan, 0);
662 	if (ioat3_cleanup_preamble(chan, &phys_complete))
663 		__cleanup(ioat, phys_complete);
664 
665 	__ioat2_restart_chan(ioat);
666 }
667 
ioat3_eh(struct ioat2_dma_chan * ioat)668 static void ioat3_eh(struct ioat2_dma_chan *ioat)
669 {
670 	struct ioat_chan_common *chan = &ioat->base;
671 	struct pci_dev *pdev = to_pdev(chan);
672 	struct ioat_dma_descriptor *hw;
673 	u64 phys_complete;
674 	struct ioat_ring_ent *desc;
675 	u32 err_handled = 0;
676 	u32 chanerr_int;
677 	u32 chanerr;
678 
679 	/* cleanup so tail points to descriptor that caused the error */
680 	if (ioat3_cleanup_preamble(chan, &phys_complete))
681 		__cleanup(ioat, phys_complete);
682 
683 	chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
684 	pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int);
685 
686 	dev_dbg(to_dev(chan), "%s: error = %x:%x\n",
687 		__func__, chanerr, chanerr_int);
688 
689 	desc = ioat2_get_ring_ent(ioat, ioat->tail);
690 	hw = desc->hw;
691 	dump_desc_dbg(ioat, desc);
692 
693 	switch (hw->ctl_f.op) {
694 	case IOAT_OP_XOR_VAL:
695 		if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) {
696 			*desc->result |= SUM_CHECK_P_RESULT;
697 			err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR;
698 		}
699 		break;
700 	case IOAT_OP_PQ_VAL:
701 	case IOAT_OP_PQ_VAL_16S:
702 		if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) {
703 			*desc->result |= SUM_CHECK_P_RESULT;
704 			err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR;
705 		}
706 		if (chanerr & IOAT_CHANERR_XOR_Q_ERR) {
707 			*desc->result |= SUM_CHECK_Q_RESULT;
708 			err_handled |= IOAT_CHANERR_XOR_Q_ERR;
709 		}
710 		break;
711 	}
712 
713 	/* fault on unhandled error or spurious halt */
714 	if (chanerr ^ err_handled || chanerr == 0) {
715 		dev_err(to_dev(chan), "%s: fatal error (%x:%x)\n",
716 			__func__, chanerr, err_handled);
717 		BUG();
718 	}
719 
720 	writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
721 	pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int);
722 
723 	/* mark faulting descriptor as complete */
724 	*chan->completion = desc->txd.phys;
725 
726 	spin_lock_bh(&ioat->prep_lock);
727 	ioat3_restart_channel(ioat);
728 	spin_unlock_bh(&ioat->prep_lock);
729 }
730 
check_active(struct ioat2_dma_chan * ioat)731 static void check_active(struct ioat2_dma_chan *ioat)
732 {
733 	struct ioat_chan_common *chan = &ioat->base;
734 
735 	if (ioat2_ring_active(ioat)) {
736 		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
737 		return;
738 	}
739 
740 	if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &chan->state))
741 		mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
742 	else if (ioat->alloc_order > ioat_get_alloc_order()) {
743 		/* if the ring is idle, empty, and oversized try to step
744 		 * down the size
745 		 */
746 		reshape_ring(ioat, ioat->alloc_order - 1);
747 
748 		/* keep shrinking until we get back to our minimum
749 		 * default size
750 		 */
751 		if (ioat->alloc_order > ioat_get_alloc_order())
752 			mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
753 	}
754 
755 }
756 
ioat3_timer_event(unsigned long data)757 static void ioat3_timer_event(unsigned long data)
758 {
759 	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
760 	struct ioat_chan_common *chan = &ioat->base;
761 	dma_addr_t phys_complete;
762 	u64 status;
763 
764 	status = ioat_chansts(chan);
765 
766 	/* when halted due to errors check for channel
767 	 * programming errors before advancing the completion state
768 	 */
769 	if (is_ioat_halted(status)) {
770 		u32 chanerr;
771 
772 		chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
773 		dev_err(to_dev(chan), "%s: Channel halted (%x)\n",
774 			__func__, chanerr);
775 		if (test_bit(IOAT_RUN, &chan->state))
776 			BUG_ON(is_ioat_bug(chanerr));
777 		else /* we never got off the ground */
778 			return;
779 	}
780 
781 	/* if we haven't made progress and we have already
782 	 * acknowledged a pending completion once, then be more
783 	 * forceful with a restart
784 	 */
785 	spin_lock_bh(&chan->cleanup_lock);
786 	if (ioat_cleanup_preamble(chan, &phys_complete))
787 		__cleanup(ioat, phys_complete);
788 	else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) {
789 		spin_lock_bh(&ioat->prep_lock);
790 		ioat3_restart_channel(ioat);
791 		spin_unlock_bh(&ioat->prep_lock);
792 		spin_unlock_bh(&chan->cleanup_lock);
793 		return;
794 	} else {
795 		set_bit(IOAT_COMPLETION_ACK, &chan->state);
796 		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
797 	}
798 
799 
800 	if (ioat2_ring_active(ioat))
801 		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
802 	else {
803 		spin_lock_bh(&ioat->prep_lock);
804 		check_active(ioat);
805 		spin_unlock_bh(&ioat->prep_lock);
806 	}
807 	spin_unlock_bh(&chan->cleanup_lock);
808 }
809 
810 static enum dma_status
ioat3_tx_status(struct dma_chan * c,dma_cookie_t cookie,struct dma_tx_state * txstate)811 ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie,
812 		struct dma_tx_state *txstate)
813 {
814 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
815 	enum dma_status ret;
816 
817 	ret = dma_cookie_status(c, cookie, txstate);
818 	if (ret == DMA_SUCCESS)
819 		return ret;
820 
821 	ioat3_cleanup(ioat);
822 
823 	return dma_cookie_status(c, cookie, txstate);
824 }
825 
826 static struct dma_async_tx_descriptor *
ioat3_prep_memset_lock(struct dma_chan * c,dma_addr_t dest,int value,size_t len,unsigned long flags)827 ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value,
828 		       size_t len, unsigned long flags)
829 {
830 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
831 	struct ioat_ring_ent *desc;
832 	size_t total_len = len;
833 	struct ioat_fill_descriptor *fill;
834 	u64 src_data = (0x0101010101010101ULL) * (value & 0xff);
835 	int num_descs, idx, i;
836 
837 	num_descs = ioat2_xferlen_to_descs(ioat, len);
838 	if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0)
839 		idx = ioat->head;
840 	else
841 		return NULL;
842 	i = 0;
843 	do {
844 		size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
845 
846 		desc = ioat2_get_ring_ent(ioat, idx + i);
847 		fill = desc->fill;
848 
849 		fill->size = xfer_size;
850 		fill->src_data = src_data;
851 		fill->dst_addr = dest;
852 		fill->ctl = 0;
853 		fill->ctl_f.op = IOAT_OP_FILL;
854 
855 		len -= xfer_size;
856 		dest += xfer_size;
857 		dump_desc_dbg(ioat, desc);
858 	} while (++i < num_descs);
859 
860 	desc->txd.flags = flags;
861 	desc->len = total_len;
862 	fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
863 	fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
864 	fill->ctl_f.compl_write = 1;
865 	dump_desc_dbg(ioat, desc);
866 
867 	/* we leave the channel locked to ensure in order submission */
868 	return &desc->txd;
869 }
870 
871 static struct dma_async_tx_descriptor *
__ioat3_prep_xor_lock(struct dma_chan * c,enum sum_check_flags * result,dma_addr_t dest,dma_addr_t * src,unsigned int src_cnt,size_t len,unsigned long flags)872 __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
873 		      dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
874 		      size_t len, unsigned long flags)
875 {
876 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
877 	struct ioat_ring_ent *compl_desc;
878 	struct ioat_ring_ent *desc;
879 	struct ioat_ring_ent *ext;
880 	size_t total_len = len;
881 	struct ioat_xor_descriptor *xor;
882 	struct ioat_xor_ext_descriptor *xor_ex = NULL;
883 	struct ioat_dma_descriptor *hw;
884 	int num_descs, with_ext, idx, i;
885 	u32 offset = 0;
886 	u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
887 
888 	BUG_ON(src_cnt < 2);
889 
890 	num_descs = ioat2_xferlen_to_descs(ioat, len);
891 	/* we need 2x the number of descriptors to cover greater than 5
892 	 * sources
893 	 */
894 	if (src_cnt > 5) {
895 		with_ext = 1;
896 		num_descs *= 2;
897 	} else
898 		with_ext = 0;
899 
900 	/* completion writes from the raid engine may pass completion
901 	 * writes from the legacy engine, so we need one extra null
902 	 * (legacy) descriptor to ensure all completion writes arrive in
903 	 * order.
904 	 */
905 	if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs+1) == 0)
906 		idx = ioat->head;
907 	else
908 		return NULL;
909 	i = 0;
910 	do {
911 		struct ioat_raw_descriptor *descs[2];
912 		size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
913 		int s;
914 
915 		desc = ioat2_get_ring_ent(ioat, idx + i);
916 		xor = desc->xor;
917 
918 		/* save a branch by unconditionally retrieving the
919 		 * extended descriptor xor_set_src() knows to not write
920 		 * to it in the single descriptor case
921 		 */
922 		ext = ioat2_get_ring_ent(ioat, idx + i + 1);
923 		xor_ex = ext->xor_ex;
924 
925 		descs[0] = (struct ioat_raw_descriptor *) xor;
926 		descs[1] = (struct ioat_raw_descriptor *) xor_ex;
927 		for (s = 0; s < src_cnt; s++)
928 			xor_set_src(descs, src[s], offset, s);
929 		xor->size = xfer_size;
930 		xor->dst_addr = dest + offset;
931 		xor->ctl = 0;
932 		xor->ctl_f.op = op;
933 		xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
934 
935 		len -= xfer_size;
936 		offset += xfer_size;
937 		dump_desc_dbg(ioat, desc);
938 	} while ((i += 1 + with_ext) < num_descs);
939 
940 	/* last xor descriptor carries the unmap parameters and fence bit */
941 	desc->txd.flags = flags;
942 	desc->len = total_len;
943 	if (result)
944 		desc->result = result;
945 	xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
946 
947 	/* completion descriptor carries interrupt bit */
948 	compl_desc = ioat2_get_ring_ent(ioat, idx + i);
949 	compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
950 	hw = compl_desc->hw;
951 	hw->ctl = 0;
952 	hw->ctl_f.null = 1;
953 	hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
954 	hw->ctl_f.compl_write = 1;
955 	hw->size = NULL_DESC_BUFFER_SIZE;
956 	dump_desc_dbg(ioat, compl_desc);
957 
958 	/* we leave the channel locked to ensure in order submission */
959 	return &compl_desc->txd;
960 }
961 
962 static struct dma_async_tx_descriptor *
ioat3_prep_xor(struct dma_chan * chan,dma_addr_t dest,dma_addr_t * src,unsigned int src_cnt,size_t len,unsigned long flags)963 ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
964 	       unsigned int src_cnt, size_t len, unsigned long flags)
965 {
966 	return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
967 }
968 
969 struct dma_async_tx_descriptor *
ioat3_prep_xor_val(struct dma_chan * chan,dma_addr_t * src,unsigned int src_cnt,size_t len,enum sum_check_flags * result,unsigned long flags)970 ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
971 		    unsigned int src_cnt, size_t len,
972 		    enum sum_check_flags *result, unsigned long flags)
973 {
974 	/* the cleanup routine only sets bits on validate failure, it
975 	 * does not clear bits on validate success... so clear it here
976 	 */
977 	*result = 0;
978 
979 	return __ioat3_prep_xor_lock(chan, result, src[0], &src[1],
980 				     src_cnt - 1, len, flags);
981 }
982 
983 static void
dump_pq_desc_dbg(struct ioat2_dma_chan * ioat,struct ioat_ring_ent * desc,struct ioat_ring_ent * ext)984 dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext)
985 {
986 	struct device *dev = to_dev(&ioat->base);
987 	struct ioat_pq_descriptor *pq = desc->pq;
988 	struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
989 	struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
990 	int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
991 	int i;
992 
993 	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
994 		" sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
995 		" src_cnt: %d)\n",
996 		desc_id(desc), (unsigned long long) desc->txd.phys,
997 		(unsigned long long) (pq_ex ? pq_ex->next : pq->next),
998 		desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en,
999 		pq->ctl_f.compl_write,
1000 		pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
1001 		pq->ctl_f.src_cnt);
1002 	for (i = 0; i < src_cnt; i++)
1003 		dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
1004 			(unsigned long long) pq_get_src(descs, i), pq->coef[i]);
1005 	dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
1006 	dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
1007 	dev_dbg(dev, "\tNEXT: %#llx\n", pq->next);
1008 }
1009 
dump_pq16_desc_dbg(struct ioat2_dma_chan * ioat,struct ioat_ring_ent * desc)1010 static void dump_pq16_desc_dbg(struct ioat2_dma_chan *ioat,
1011 			       struct ioat_ring_ent *desc)
1012 {
1013 	struct device *dev = to_dev(&ioat->base);
1014 	struct ioat_pq_descriptor *pq = desc->pq;
1015 	struct ioat_raw_descriptor *descs[] = { (void *)pq,
1016 						(void *)pq,
1017 						(void *)pq };
1018 	int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
1019 	int i;
1020 
1021 	if (desc->sed) {
1022 		descs[1] = (void *)desc->sed->hw;
1023 		descs[2] = (void *)desc->sed->hw + 64;
1024 	}
1025 
1026 	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
1027 		" sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
1028 		" src_cnt: %d)\n",
1029 		desc_id(desc), (unsigned long long) desc->txd.phys,
1030 		(unsigned long long) pq->next,
1031 		desc->txd.flags, pq->size, pq->ctl,
1032 		pq->ctl_f.op, pq->ctl_f.int_en,
1033 		pq->ctl_f.compl_write,
1034 		pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
1035 		pq->ctl_f.src_cnt);
1036 	for (i = 0; i < src_cnt; i++) {
1037 		dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
1038 			(unsigned long long) pq16_get_src(descs, i),
1039 			pq->coef[i]);
1040 	}
1041 	dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
1042 	dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
1043 }
1044 
1045 static struct dma_async_tx_descriptor *
__ioat3_prep_pq_lock(struct dma_chan * c,enum sum_check_flags * result,const dma_addr_t * dst,const dma_addr_t * src,unsigned int src_cnt,const unsigned char * scf,size_t len,unsigned long flags)1046 __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
1047 		     const dma_addr_t *dst, const dma_addr_t *src,
1048 		     unsigned int src_cnt, const unsigned char *scf,
1049 		     size_t len, unsigned long flags)
1050 {
1051 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
1052 	struct ioat_chan_common *chan = &ioat->base;
1053 	struct ioatdma_device *device = chan->device;
1054 	struct ioat_ring_ent *compl_desc;
1055 	struct ioat_ring_ent *desc;
1056 	struct ioat_ring_ent *ext;
1057 	size_t total_len = len;
1058 	struct ioat_pq_descriptor *pq;
1059 	struct ioat_pq_ext_descriptor *pq_ex = NULL;
1060 	struct ioat_dma_descriptor *hw;
1061 	u32 offset = 0;
1062 	u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
1063 	int i, s, idx, with_ext, num_descs;
1064 	int cb32 = (device->version < IOAT_VER_3_3) ? 1 : 0;
1065 
1066 	dev_dbg(to_dev(chan), "%s\n", __func__);
1067 	/* the engine requires at least two sources (we provide
1068 	 * at least 1 implied source in the DMA_PREP_CONTINUE case)
1069 	 */
1070 	BUG_ON(src_cnt + dmaf_continue(flags) < 2);
1071 
1072 	num_descs = ioat2_xferlen_to_descs(ioat, len);
1073 	/* we need 2x the number of descriptors to cover greater than 3
1074 	 * sources (we need 1 extra source in the q-only continuation
1075 	 * case and 3 extra sources in the p+q continuation case.
1076 	 */
1077 	if (src_cnt + dmaf_p_disabled_continue(flags) > 3 ||
1078 	    (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) {
1079 		with_ext = 1;
1080 		num_descs *= 2;
1081 	} else
1082 		with_ext = 0;
1083 
1084 	/* completion writes from the raid engine may pass completion
1085 	 * writes from the legacy engine, so we need one extra null
1086 	 * (legacy) descriptor to ensure all completion writes arrive in
1087 	 * order.
1088 	 */
1089 	if (likely(num_descs) &&
1090 	    ioat2_check_space_lock(ioat, num_descs + cb32) == 0)
1091 		idx = ioat->head;
1092 	else
1093 		return NULL;
1094 	i = 0;
1095 	do {
1096 		struct ioat_raw_descriptor *descs[2];
1097 		size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
1098 
1099 		desc = ioat2_get_ring_ent(ioat, idx + i);
1100 		pq = desc->pq;
1101 
1102 		/* save a branch by unconditionally retrieving the
1103 		 * extended descriptor pq_set_src() knows to not write
1104 		 * to it in the single descriptor case
1105 		 */
1106 		ext = ioat2_get_ring_ent(ioat, idx + i + with_ext);
1107 		pq_ex = ext->pq_ex;
1108 
1109 		descs[0] = (struct ioat_raw_descriptor *) pq;
1110 		descs[1] = (struct ioat_raw_descriptor *) pq_ex;
1111 
1112 		for (s = 0; s < src_cnt; s++)
1113 			pq_set_src(descs, src[s], offset, scf[s], s);
1114 
1115 		/* see the comment for dma_maxpq in include/linux/dmaengine.h */
1116 		if (dmaf_p_disabled_continue(flags))
1117 			pq_set_src(descs, dst[1], offset, 1, s++);
1118 		else if (dmaf_continue(flags)) {
1119 			pq_set_src(descs, dst[0], offset, 0, s++);
1120 			pq_set_src(descs, dst[1], offset, 1, s++);
1121 			pq_set_src(descs, dst[1], offset, 0, s++);
1122 		}
1123 		pq->size = xfer_size;
1124 		pq->p_addr = dst[0] + offset;
1125 		pq->q_addr = dst[1] + offset;
1126 		pq->ctl = 0;
1127 		pq->ctl_f.op = op;
1128 		/* we turn on descriptor write back error status */
1129 		if (device->cap & IOAT_CAP_DWBES)
1130 			pq->ctl_f.wb_en = result ? 1 : 0;
1131 		pq->ctl_f.src_cnt = src_cnt_to_hw(s);
1132 		pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
1133 		pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
1134 
1135 		len -= xfer_size;
1136 		offset += xfer_size;
1137 	} while ((i += 1 + with_ext) < num_descs);
1138 
1139 	/* last pq descriptor carries the unmap parameters and fence bit */
1140 	desc->txd.flags = flags;
1141 	desc->len = total_len;
1142 	if (result)
1143 		desc->result = result;
1144 	pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
1145 	dump_pq_desc_dbg(ioat, desc, ext);
1146 
1147 	if (!cb32) {
1148 		pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
1149 		pq->ctl_f.compl_write = 1;
1150 		compl_desc = desc;
1151 	} else {
1152 		/* completion descriptor carries interrupt bit */
1153 		compl_desc = ioat2_get_ring_ent(ioat, idx + i);
1154 		compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
1155 		hw = compl_desc->hw;
1156 		hw->ctl = 0;
1157 		hw->ctl_f.null = 1;
1158 		hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
1159 		hw->ctl_f.compl_write = 1;
1160 		hw->size = NULL_DESC_BUFFER_SIZE;
1161 		dump_desc_dbg(ioat, compl_desc);
1162 	}
1163 
1164 
1165 	/* we leave the channel locked to ensure in order submission */
1166 	return &compl_desc->txd;
1167 }
1168 
1169 static struct dma_async_tx_descriptor *
__ioat3_prep_pq16_lock(struct dma_chan * c,enum sum_check_flags * result,const dma_addr_t * dst,const dma_addr_t * src,unsigned int src_cnt,const unsigned char * scf,size_t len,unsigned long flags)1170 __ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
1171 		       const dma_addr_t *dst, const dma_addr_t *src,
1172 		       unsigned int src_cnt, const unsigned char *scf,
1173 		       size_t len, unsigned long flags)
1174 {
1175 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
1176 	struct ioat_chan_common *chan = &ioat->base;
1177 	struct ioatdma_device *device = chan->device;
1178 	struct ioat_ring_ent *desc;
1179 	size_t total_len = len;
1180 	struct ioat_pq_descriptor *pq;
1181 	u32 offset = 0;
1182 	u8 op;
1183 	int i, s, idx, num_descs;
1184 
1185 	/* this function only handles src_cnt 9 - 16 */
1186 	BUG_ON(src_cnt < 9);
1187 
1188 	/* this function is only called with 9-16 sources */
1189 	op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S;
1190 
1191 	dev_dbg(to_dev(chan), "%s\n", __func__);
1192 
1193 	num_descs = ioat2_xferlen_to_descs(ioat, len);
1194 
1195 	/*
1196 	 * 16 source pq is only available on cb3.3 and has no completion
1197 	 * write hw bug.
1198 	 */
1199 	if (num_descs && ioat2_check_space_lock(ioat, num_descs) == 0)
1200 		idx = ioat->head;
1201 	else
1202 		return NULL;
1203 
1204 	i = 0;
1205 
1206 	do {
1207 		struct ioat_raw_descriptor *descs[4];
1208 		size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
1209 
1210 		desc = ioat2_get_ring_ent(ioat, idx + i);
1211 		pq = desc->pq;
1212 
1213 		descs[0] = (struct ioat_raw_descriptor *) pq;
1214 
1215 		desc->sed = ioat3_alloc_sed(device,
1216 					    sed_get_pq16_pool_idx(src_cnt));
1217 		if (!desc->sed) {
1218 			dev_err(to_dev(chan),
1219 				"%s: no free sed entries\n", __func__);
1220 			return NULL;
1221 		}
1222 
1223 		pq->sed_addr = desc->sed->dma;
1224 		desc->sed->parent = desc;
1225 
1226 		descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw;
1227 		descs[2] = (void *)descs[1] + 64;
1228 
1229 		for (s = 0; s < src_cnt; s++)
1230 			pq16_set_src(descs, src[s], offset, scf[s], s);
1231 
1232 		/* see the comment for dma_maxpq in include/linux/dmaengine.h */
1233 		if (dmaf_p_disabled_continue(flags))
1234 			pq16_set_src(descs, dst[1], offset, 1, s++);
1235 		else if (dmaf_continue(flags)) {
1236 			pq16_set_src(descs, dst[0], offset, 0, s++);
1237 			pq16_set_src(descs, dst[1], offset, 1, s++);
1238 			pq16_set_src(descs, dst[1], offset, 0, s++);
1239 		}
1240 
1241 		pq->size = xfer_size;
1242 		pq->p_addr = dst[0] + offset;
1243 		pq->q_addr = dst[1] + offset;
1244 		pq->ctl = 0;
1245 		pq->ctl_f.op = op;
1246 		pq->ctl_f.src_cnt = src16_cnt_to_hw(s);
1247 		/* we turn on descriptor write back error status */
1248 		if (device->cap & IOAT_CAP_DWBES)
1249 			pq->ctl_f.wb_en = result ? 1 : 0;
1250 		pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
1251 		pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
1252 
1253 		len -= xfer_size;
1254 		offset += xfer_size;
1255 	} while (++i < num_descs);
1256 
1257 	/* last pq descriptor carries the unmap parameters and fence bit */
1258 	desc->txd.flags = flags;
1259 	desc->len = total_len;
1260 	if (result)
1261 		desc->result = result;
1262 	pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
1263 
1264 	/* with cb3.3 we should be able to do completion w/o a null desc */
1265 	pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
1266 	pq->ctl_f.compl_write = 1;
1267 
1268 	dump_pq16_desc_dbg(ioat, desc);
1269 
1270 	/* we leave the channel locked to ensure in order submission */
1271 	return &desc->txd;
1272 }
1273 
1274 static struct dma_async_tx_descriptor *
ioat3_prep_pq(struct dma_chan * chan,dma_addr_t * dst,dma_addr_t * src,unsigned int src_cnt,const unsigned char * scf,size_t len,unsigned long flags)1275 ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
1276 	      unsigned int src_cnt, const unsigned char *scf, size_t len,
1277 	      unsigned long flags)
1278 {
1279 	struct dma_device *dma = chan->device;
1280 
1281 	/* specify valid address for disabled result */
1282 	if (flags & DMA_PREP_PQ_DISABLE_P)
1283 		dst[0] = dst[1];
1284 	if (flags & DMA_PREP_PQ_DISABLE_Q)
1285 		dst[1] = dst[0];
1286 
1287 	/* handle the single source multiply case from the raid6
1288 	 * recovery path
1289 	 */
1290 	if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) {
1291 		dma_addr_t single_source[2];
1292 		unsigned char single_source_coef[2];
1293 
1294 		BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
1295 		single_source[0] = src[0];
1296 		single_source[1] = src[0];
1297 		single_source_coef[0] = scf[0];
1298 		single_source_coef[1] = 0;
1299 
1300 		return (src_cnt > 8) && (dma->max_pq > 8) ?
1301 			__ioat3_prep_pq16_lock(chan, NULL, dst, single_source,
1302 					       2, single_source_coef, len,
1303 					       flags) :
1304 			__ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
1305 					     single_source_coef, len, flags);
1306 
1307 	} else {
1308 		return (src_cnt > 8) && (dma->max_pq > 8) ?
1309 			__ioat3_prep_pq16_lock(chan, NULL, dst, src, src_cnt,
1310 					       scf, len, flags) :
1311 			__ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt,
1312 					     scf, len, flags);
1313 	}
1314 }
1315 
1316 struct dma_async_tx_descriptor *
ioat3_prep_pq_val(struct dma_chan * chan,dma_addr_t * pq,dma_addr_t * src,unsigned int src_cnt,const unsigned char * scf,size_t len,enum sum_check_flags * pqres,unsigned long flags)1317 ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
1318 		  unsigned int src_cnt, const unsigned char *scf, size_t len,
1319 		  enum sum_check_flags *pqres, unsigned long flags)
1320 {
1321 	struct dma_device *dma = chan->device;
1322 
1323 	/* specify valid address for disabled result */
1324 	if (flags & DMA_PREP_PQ_DISABLE_P)
1325 		pq[0] = pq[1];
1326 	if (flags & DMA_PREP_PQ_DISABLE_Q)
1327 		pq[1] = pq[0];
1328 
1329 	/* the cleanup routine only sets bits on validate failure, it
1330 	 * does not clear bits on validate success... so clear it here
1331 	 */
1332 	*pqres = 0;
1333 
1334 	return (src_cnt > 8) && (dma->max_pq > 8) ?
1335 		__ioat3_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len,
1336 				       flags) :
1337 		__ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
1338 				     flags);
1339 }
1340 
1341 static struct dma_async_tx_descriptor *
ioat3_prep_pqxor(struct dma_chan * chan,dma_addr_t dst,dma_addr_t * src,unsigned int src_cnt,size_t len,unsigned long flags)1342 ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
1343 		 unsigned int src_cnt, size_t len, unsigned long flags)
1344 {
1345 	struct dma_device *dma = chan->device;
1346 	unsigned char scf[src_cnt];
1347 	dma_addr_t pq[2];
1348 
1349 	memset(scf, 0, src_cnt);
1350 	pq[0] = dst;
1351 	flags |= DMA_PREP_PQ_DISABLE_Q;
1352 	pq[1] = dst; /* specify valid address for disabled result */
1353 
1354 	return (src_cnt > 8) && (dma->max_pq > 8) ?
1355 		__ioat3_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len,
1356 				       flags) :
1357 		__ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
1358 				     flags);
1359 }
1360 
1361 struct dma_async_tx_descriptor *
ioat3_prep_pqxor_val(struct dma_chan * chan,dma_addr_t * src,unsigned int src_cnt,size_t len,enum sum_check_flags * result,unsigned long flags)1362 ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
1363 		     unsigned int src_cnt, size_t len,
1364 		     enum sum_check_flags *result, unsigned long flags)
1365 {
1366 	struct dma_device *dma = chan->device;
1367 	unsigned char scf[src_cnt];
1368 	dma_addr_t pq[2];
1369 
1370 	/* the cleanup routine only sets bits on validate failure, it
1371 	 * does not clear bits on validate success... so clear it here
1372 	 */
1373 	*result = 0;
1374 
1375 	memset(scf, 0, src_cnt);
1376 	pq[0] = src[0];
1377 	flags |= DMA_PREP_PQ_DISABLE_Q;
1378 	pq[1] = pq[0]; /* specify valid address for disabled result */
1379 
1380 
1381 	return (src_cnt > 8) && (dma->max_pq > 8) ?
1382 		__ioat3_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1,
1383 				       scf, len, flags) :
1384 		__ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1,
1385 				     scf, len, flags);
1386 }
1387 
1388 static struct dma_async_tx_descriptor *
ioat3_prep_interrupt_lock(struct dma_chan * c,unsigned long flags)1389 ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
1390 {
1391 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
1392 	struct ioat_ring_ent *desc;
1393 	struct ioat_dma_descriptor *hw;
1394 
1395 	if (ioat2_check_space_lock(ioat, 1) == 0)
1396 		desc = ioat2_get_ring_ent(ioat, ioat->head);
1397 	else
1398 		return NULL;
1399 
1400 	hw = desc->hw;
1401 	hw->ctl = 0;
1402 	hw->ctl_f.null = 1;
1403 	hw->ctl_f.int_en = 1;
1404 	hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
1405 	hw->ctl_f.compl_write = 1;
1406 	hw->size = NULL_DESC_BUFFER_SIZE;
1407 	hw->src_addr = 0;
1408 	hw->dst_addr = 0;
1409 
1410 	desc->txd.flags = flags;
1411 	desc->len = 1;
1412 
1413 	dump_desc_dbg(ioat, desc);
1414 
1415 	/* we leave the channel locked to ensure in order submission */
1416 	return &desc->txd;
1417 }
1418 
ioat3_dma_test_callback(void * dma_async_param)1419 static void ioat3_dma_test_callback(void *dma_async_param)
1420 {
1421 	struct completion *cmp = dma_async_param;
1422 
1423 	complete(cmp);
1424 }
1425 
1426 #define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
ioat_xor_val_self_test(struct ioatdma_device * device)1427 static int ioat_xor_val_self_test(struct ioatdma_device *device)
1428 {
1429 	int i, src_idx;
1430 	struct page *dest;
1431 	struct page *xor_srcs[IOAT_NUM_SRC_TEST];
1432 	struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
1433 	dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
1434 	dma_addr_t dma_addr, dest_dma;
1435 	struct dma_async_tx_descriptor *tx;
1436 	struct dma_chan *dma_chan;
1437 	dma_cookie_t cookie;
1438 	u8 cmp_byte = 0;
1439 	u32 cmp_word;
1440 	u32 xor_val_result;
1441 	int err = 0;
1442 	struct completion cmp;
1443 	unsigned long tmo;
1444 	struct device *dev = &device->pdev->dev;
1445 	struct dma_device *dma = &device->common;
1446 	u8 op = 0;
1447 
1448 	dev_dbg(dev, "%s\n", __func__);
1449 
1450 	if (!dma_has_cap(DMA_XOR, dma->cap_mask))
1451 		return 0;
1452 
1453 	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
1454 		xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
1455 		if (!xor_srcs[src_idx]) {
1456 			while (src_idx--)
1457 				__free_page(xor_srcs[src_idx]);
1458 			return -ENOMEM;
1459 		}
1460 	}
1461 
1462 	dest = alloc_page(GFP_KERNEL);
1463 	if (!dest) {
1464 		while (src_idx--)
1465 			__free_page(xor_srcs[src_idx]);
1466 		return -ENOMEM;
1467 	}
1468 
1469 	/* Fill in src buffers */
1470 	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
1471 		u8 *ptr = page_address(xor_srcs[src_idx]);
1472 		for (i = 0; i < PAGE_SIZE; i++)
1473 			ptr[i] = (1 << src_idx);
1474 	}
1475 
1476 	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
1477 		cmp_byte ^= (u8) (1 << src_idx);
1478 
1479 	cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
1480 			(cmp_byte << 8) | cmp_byte;
1481 
1482 	memset(page_address(dest), 0, PAGE_SIZE);
1483 
1484 	dma_chan = container_of(dma->channels.next, struct dma_chan,
1485 				device_node);
1486 	if (dma->device_alloc_chan_resources(dma_chan) < 1) {
1487 		err = -ENODEV;
1488 		goto out;
1489 	}
1490 
1491 	/* test xor */
1492 	op = IOAT_OP_XOR;
1493 
1494 	dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
1495 	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
1496 		dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
1497 					   DMA_TO_DEVICE);
1498 	tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
1499 				      IOAT_NUM_SRC_TEST, PAGE_SIZE,
1500 				      DMA_PREP_INTERRUPT |
1501 				      DMA_COMPL_SKIP_SRC_UNMAP |
1502 				      DMA_COMPL_SKIP_DEST_UNMAP);
1503 
1504 	if (!tx) {
1505 		dev_err(dev, "Self-test xor prep failed\n");
1506 		err = -ENODEV;
1507 		goto dma_unmap;
1508 	}
1509 
1510 	async_tx_ack(tx);
1511 	init_completion(&cmp);
1512 	tx->callback = ioat3_dma_test_callback;
1513 	tx->callback_param = &cmp;
1514 	cookie = tx->tx_submit(tx);
1515 	if (cookie < 0) {
1516 		dev_err(dev, "Self-test xor setup failed\n");
1517 		err = -ENODEV;
1518 		goto dma_unmap;
1519 	}
1520 	dma->device_issue_pending(dma_chan);
1521 
1522 	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1523 
1524 	if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
1525 		dev_err(dev, "Self-test xor timed out\n");
1526 		err = -ENODEV;
1527 		goto dma_unmap;
1528 	}
1529 
1530 	dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
1531 	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
1532 		dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
1533 
1534 	dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
1535 	for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
1536 		u32 *ptr = page_address(dest);
1537 		if (ptr[i] != cmp_word) {
1538 			dev_err(dev, "Self-test xor failed compare\n");
1539 			err = -ENODEV;
1540 			goto free_resources;
1541 		}
1542 	}
1543 	dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
1544 
1545 	/* skip validate if the capability is not present */
1546 	if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
1547 		goto free_resources;
1548 
1549 	op = IOAT_OP_XOR_VAL;
1550 
1551 	/* validate the sources with the destintation page */
1552 	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
1553 		xor_val_srcs[i] = xor_srcs[i];
1554 	xor_val_srcs[i] = dest;
1555 
1556 	xor_val_result = 1;
1557 
1558 	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1559 		dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
1560 					   DMA_TO_DEVICE);
1561 	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
1562 					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
1563 					  &xor_val_result, DMA_PREP_INTERRUPT |
1564 					  DMA_COMPL_SKIP_SRC_UNMAP |
1565 					  DMA_COMPL_SKIP_DEST_UNMAP);
1566 	if (!tx) {
1567 		dev_err(dev, "Self-test zero prep failed\n");
1568 		err = -ENODEV;
1569 		goto dma_unmap;
1570 	}
1571 
1572 	async_tx_ack(tx);
1573 	init_completion(&cmp);
1574 	tx->callback = ioat3_dma_test_callback;
1575 	tx->callback_param = &cmp;
1576 	cookie = tx->tx_submit(tx);
1577 	if (cookie < 0) {
1578 		dev_err(dev, "Self-test zero setup failed\n");
1579 		err = -ENODEV;
1580 		goto dma_unmap;
1581 	}
1582 	dma->device_issue_pending(dma_chan);
1583 
1584 	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1585 
1586 	if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
1587 		dev_err(dev, "Self-test validate timed out\n");
1588 		err = -ENODEV;
1589 		goto dma_unmap;
1590 	}
1591 
1592 	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1593 		dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
1594 
1595 	if (xor_val_result != 0) {
1596 		dev_err(dev, "Self-test validate failed compare\n");
1597 		err = -ENODEV;
1598 		goto free_resources;
1599 	}
1600 
1601 	/* skip memset if the capability is not present */
1602 	if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask))
1603 		goto free_resources;
1604 
1605 	/* test memset */
1606 	op = IOAT_OP_FILL;
1607 
1608 	dma_addr = dma_map_page(dev, dest, 0,
1609 			PAGE_SIZE, DMA_FROM_DEVICE);
1610 	tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE,
1611 					 DMA_PREP_INTERRUPT |
1612 					 DMA_COMPL_SKIP_SRC_UNMAP |
1613 					 DMA_COMPL_SKIP_DEST_UNMAP);
1614 	if (!tx) {
1615 		dev_err(dev, "Self-test memset prep failed\n");
1616 		err = -ENODEV;
1617 		goto dma_unmap;
1618 	}
1619 
1620 	async_tx_ack(tx);
1621 	init_completion(&cmp);
1622 	tx->callback = ioat3_dma_test_callback;
1623 	tx->callback_param = &cmp;
1624 	cookie = tx->tx_submit(tx);
1625 	if (cookie < 0) {
1626 		dev_err(dev, "Self-test memset setup failed\n");
1627 		err = -ENODEV;
1628 		goto dma_unmap;
1629 	}
1630 	dma->device_issue_pending(dma_chan);
1631 
1632 	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1633 
1634 	if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
1635 		dev_err(dev, "Self-test memset timed out\n");
1636 		err = -ENODEV;
1637 		goto dma_unmap;
1638 	}
1639 
1640 	dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
1641 
1642 	for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
1643 		u32 *ptr = page_address(dest);
1644 		if (ptr[i]) {
1645 			dev_err(dev, "Self-test memset failed compare\n");
1646 			err = -ENODEV;
1647 			goto free_resources;
1648 		}
1649 	}
1650 
1651 	/* test for non-zero parity sum */
1652 	op = IOAT_OP_XOR_VAL;
1653 
1654 	xor_val_result = 0;
1655 	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1656 		dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
1657 					   DMA_TO_DEVICE);
1658 	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
1659 					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
1660 					  &xor_val_result, DMA_PREP_INTERRUPT |
1661 					  DMA_COMPL_SKIP_SRC_UNMAP |
1662 					  DMA_COMPL_SKIP_DEST_UNMAP);
1663 	if (!tx) {
1664 		dev_err(dev, "Self-test 2nd zero prep failed\n");
1665 		err = -ENODEV;
1666 		goto dma_unmap;
1667 	}
1668 
1669 	async_tx_ack(tx);
1670 	init_completion(&cmp);
1671 	tx->callback = ioat3_dma_test_callback;
1672 	tx->callback_param = &cmp;
1673 	cookie = tx->tx_submit(tx);
1674 	if (cookie < 0) {
1675 		dev_err(dev, "Self-test  2nd zero setup failed\n");
1676 		err = -ENODEV;
1677 		goto dma_unmap;
1678 	}
1679 	dma->device_issue_pending(dma_chan);
1680 
1681 	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1682 
1683 	if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
1684 		dev_err(dev, "Self-test 2nd validate timed out\n");
1685 		err = -ENODEV;
1686 		goto dma_unmap;
1687 	}
1688 
1689 	if (xor_val_result != SUM_CHECK_P_RESULT) {
1690 		dev_err(dev, "Self-test validate failed compare\n");
1691 		err = -ENODEV;
1692 		goto dma_unmap;
1693 	}
1694 
1695 	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1696 		dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
1697 
1698 	goto free_resources;
1699 dma_unmap:
1700 	if (op == IOAT_OP_XOR) {
1701 		dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
1702 		for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
1703 			dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
1704 				       DMA_TO_DEVICE);
1705 	} else if (op == IOAT_OP_XOR_VAL) {
1706 		for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1707 			dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
1708 				       DMA_TO_DEVICE);
1709 	} else if (op == IOAT_OP_FILL)
1710 		dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
1711 free_resources:
1712 	dma->device_free_chan_resources(dma_chan);
1713 out:
1714 	src_idx = IOAT_NUM_SRC_TEST;
1715 	while (src_idx--)
1716 		__free_page(xor_srcs[src_idx]);
1717 	__free_page(dest);
1718 	return err;
1719 }
1720 
ioat3_dma_self_test(struct ioatdma_device * device)1721 static int ioat3_dma_self_test(struct ioatdma_device *device)
1722 {
1723 	int rc = ioat_dma_self_test(device);
1724 
1725 	if (rc)
1726 		return rc;
1727 
1728 	rc = ioat_xor_val_self_test(device);
1729 	if (rc)
1730 		return rc;
1731 
1732 	return 0;
1733 }
1734 
ioat3_irq_reinit(struct ioatdma_device * device)1735 static int ioat3_irq_reinit(struct ioatdma_device *device)
1736 {
1737 	int msixcnt = device->common.chancnt;
1738 	struct pci_dev *pdev = device->pdev;
1739 	int i;
1740 	struct msix_entry *msix;
1741 	struct ioat_chan_common *chan;
1742 	int err = 0;
1743 
1744 	switch (device->irq_mode) {
1745 	case IOAT_MSIX:
1746 
1747 		for (i = 0; i < msixcnt; i++) {
1748 			msix = &device->msix_entries[i];
1749 			chan = ioat_chan_by_index(device, i);
1750 			devm_free_irq(&pdev->dev, msix->vector, chan);
1751 		}
1752 
1753 		pci_disable_msix(pdev);
1754 		break;
1755 
1756 	case IOAT_MSIX_SINGLE:
1757 		msix = &device->msix_entries[0];
1758 		chan = ioat_chan_by_index(device, 0);
1759 		devm_free_irq(&pdev->dev, msix->vector, chan);
1760 		pci_disable_msix(pdev);
1761 		break;
1762 
1763 	case IOAT_MSI:
1764 		chan = ioat_chan_by_index(device, 0);
1765 		devm_free_irq(&pdev->dev, pdev->irq, chan);
1766 		pci_disable_msi(pdev);
1767 		break;
1768 
1769 	case IOAT_INTX:
1770 		chan = ioat_chan_by_index(device, 0);
1771 		devm_free_irq(&pdev->dev, pdev->irq, chan);
1772 		break;
1773 
1774 	default:
1775 		return 0;
1776 	}
1777 
1778 	device->irq_mode = IOAT_NOIRQ;
1779 
1780 	err = ioat_dma_setup_interrupts(device);
1781 
1782 	return err;
1783 }
1784 
ioat3_reset_hw(struct ioat_chan_common * chan)1785 static int ioat3_reset_hw(struct ioat_chan_common *chan)
1786 {
1787 	/* throw away whatever the channel was doing and get it
1788 	 * initialized, with ioat3 specific workarounds
1789 	 */
1790 	struct ioatdma_device *device = chan->device;
1791 	struct pci_dev *pdev = device->pdev;
1792 	u32 chanerr;
1793 	u16 dev_id;
1794 	int err;
1795 
1796 	ioat2_quiesce(chan, msecs_to_jiffies(100));
1797 
1798 	chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
1799 	writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
1800 
1801 	if (device->version < IOAT_VER_3_3) {
1802 		/* clear any pending errors */
1803 		err = pci_read_config_dword(pdev,
1804 				IOAT_PCI_CHANERR_INT_OFFSET, &chanerr);
1805 		if (err) {
1806 			dev_err(&pdev->dev,
1807 				"channel error register unreachable\n");
1808 			return err;
1809 		}
1810 		pci_write_config_dword(pdev,
1811 				IOAT_PCI_CHANERR_INT_OFFSET, chanerr);
1812 
1813 		/* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
1814 		 * (workaround for spurious config parity error after restart)
1815 		 */
1816 		pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
1817 		if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
1818 			pci_write_config_dword(pdev,
1819 					       IOAT_PCI_DMAUNCERRSTS_OFFSET,
1820 					       0x10);
1821 		}
1822 	}
1823 
1824 	err = ioat2_reset_sync(chan, msecs_to_jiffies(200));
1825 	if (err) {
1826 		dev_err(&pdev->dev, "Failed to reset!\n");
1827 		return err;
1828 	}
1829 
1830 	if (device->irq_mode != IOAT_NOIRQ && is_bwd_ioat(pdev))
1831 		err = ioat3_irq_reinit(device);
1832 
1833 	return err;
1834 }
1835 
ioat3_intr_quirk(struct ioatdma_device * device)1836 static void ioat3_intr_quirk(struct ioatdma_device *device)
1837 {
1838 	struct dma_device *dma;
1839 	struct dma_chan *c;
1840 	struct ioat_chan_common *chan;
1841 	u32 errmask;
1842 
1843 	dma = &device->common;
1844 
1845 	/*
1846 	 * if we have descriptor write back error status, we mask the
1847 	 * error interrupts
1848 	 */
1849 	if (device->cap & IOAT_CAP_DWBES) {
1850 		list_for_each_entry(c, &dma->channels, device_node) {
1851 			chan = to_chan_common(c);
1852 			errmask = readl(chan->reg_base +
1853 					IOAT_CHANERR_MASK_OFFSET);
1854 			errmask |= IOAT_CHANERR_XOR_P_OR_CRC_ERR |
1855 				   IOAT_CHANERR_XOR_Q_ERR;
1856 			writel(errmask, chan->reg_base +
1857 					IOAT_CHANERR_MASK_OFFSET);
1858 		}
1859 	}
1860 }
1861 
ioat3_dma_probe(struct ioatdma_device * device,int dca)1862 int ioat3_dma_probe(struct ioatdma_device *device, int dca)
1863 {
1864 	struct pci_dev *pdev = device->pdev;
1865 	int dca_en = system_has_dca_enabled(pdev);
1866 	struct dma_device *dma;
1867 	struct dma_chan *c;
1868 	struct ioat_chan_common *chan;
1869 	bool is_raid_device = false;
1870 	int err;
1871 
1872 	device->enumerate_channels = ioat2_enumerate_channels;
1873 	device->reset_hw = ioat3_reset_hw;
1874 	device->self_test = ioat3_dma_self_test;
1875 	device->intr_quirk = ioat3_intr_quirk;
1876 	dma = &device->common;
1877 	dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
1878 	dma->device_issue_pending = ioat2_issue_pending;
1879 	dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
1880 	dma->device_free_chan_resources = ioat2_free_chan_resources;
1881 
1882 	if (is_xeon_cb32(pdev))
1883 		dma->copy_align = 6;
1884 
1885 	dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
1886 	dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
1887 
1888 	device->cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
1889 
1890 	if (is_bwd_noraid(pdev))
1891 		device->cap &= ~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS);
1892 
1893 	/* dca is incompatible with raid operations */
1894 	if (dca_en && (device->cap & (IOAT_CAP_XOR|IOAT_CAP_PQ)))
1895 		device->cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ);
1896 
1897 	if (device->cap & IOAT_CAP_XOR) {
1898 		is_raid_device = true;
1899 		dma->max_xor = 8;
1900 		dma->xor_align = 6;
1901 
1902 		dma_cap_set(DMA_XOR, dma->cap_mask);
1903 		dma->device_prep_dma_xor = ioat3_prep_xor;
1904 
1905 		dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1906 		dma->device_prep_dma_xor_val = ioat3_prep_xor_val;
1907 	}
1908 
1909 	if (device->cap & IOAT_CAP_PQ) {
1910 		is_raid_device = true;
1911 
1912 		dma->device_prep_dma_pq = ioat3_prep_pq;
1913 		dma->device_prep_dma_pq_val = ioat3_prep_pq_val;
1914 		dma_cap_set(DMA_PQ, dma->cap_mask);
1915 		dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
1916 
1917 		if (device->cap & IOAT_CAP_RAID16SS) {
1918 			dma_set_maxpq(dma, 16, 0);
1919 			dma->pq_align = 0;
1920 		} else {
1921 			dma_set_maxpq(dma, 8, 0);
1922 			if (is_xeon_cb32(pdev))
1923 				dma->pq_align = 6;
1924 			else
1925 				dma->pq_align = 0;
1926 		}
1927 
1928 		if (!(device->cap & IOAT_CAP_XOR)) {
1929 			dma->device_prep_dma_xor = ioat3_prep_pqxor;
1930 			dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val;
1931 			dma_cap_set(DMA_XOR, dma->cap_mask);
1932 			dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1933 
1934 			if (device->cap & IOAT_CAP_RAID16SS) {
1935 				dma->max_xor = 16;
1936 				dma->xor_align = 0;
1937 			} else {
1938 				dma->max_xor = 8;
1939 				if (is_xeon_cb32(pdev))
1940 					dma->xor_align = 6;
1941 				else
1942 					dma->xor_align = 0;
1943 			}
1944 		}
1945 	}
1946 
1947 	if (is_raid_device && (device->cap & IOAT_CAP_FILL_BLOCK)) {
1948 		dma_cap_set(DMA_MEMSET, dma->cap_mask);
1949 		dma->device_prep_dma_memset = ioat3_prep_memset_lock;
1950 	}
1951 
1952 
1953 	dma->device_tx_status = ioat3_tx_status;
1954 	device->cleanup_fn = ioat3_cleanup_event;
1955 	device->timer_fn = ioat3_timer_event;
1956 
1957 	if (is_xeon_cb32(pdev)) {
1958 		dma_cap_clear(DMA_XOR_VAL, dma->cap_mask);
1959 		dma->device_prep_dma_xor_val = NULL;
1960 
1961 		dma_cap_clear(DMA_PQ_VAL, dma->cap_mask);
1962 		dma->device_prep_dma_pq_val = NULL;
1963 	}
1964 
1965 	/* starting with CB3.3 super extended descriptors are supported */
1966 	if (device->cap & IOAT_CAP_RAID16SS) {
1967 		char pool_name[14];
1968 		int i;
1969 
1970 		/* allocate sw descriptor pool for SED */
1971 		device->sed_pool = kmem_cache_create("ioat_sed",
1972 				sizeof(struct ioat_sed_ent), 0, 0, NULL);
1973 		if (!device->sed_pool)
1974 			return -ENOMEM;
1975 
1976 		for (i = 0; i < MAX_SED_POOLS; i++) {
1977 			snprintf(pool_name, 14, "ioat_hw%d_sed", i);
1978 
1979 			/* allocate SED DMA pool */
1980 			device->sed_hw_pool[i] = dma_pool_create(pool_name,
1981 					&pdev->dev,
1982 					SED_SIZE * (i + 1), 64, 0);
1983 			if (!device->sed_hw_pool[i])
1984 				goto sed_pool_cleanup;
1985 
1986 		}
1987 	}
1988 
1989 	err = ioat_probe(device);
1990 	if (err)
1991 		return err;
1992 	ioat_set_tcp_copy_break(262144);
1993 
1994 	list_for_each_entry(c, &dma->channels, device_node) {
1995 		chan = to_chan_common(c);
1996 		writel(IOAT_DMA_DCA_ANY_CPU,
1997 		       chan->reg_base + IOAT_DCACTRL_OFFSET);
1998 	}
1999 
2000 	err = ioat_register(device);
2001 	if (err)
2002 		return err;
2003 
2004 	ioat_kobject_add(device, &ioat2_ktype);
2005 
2006 	if (dca)
2007 		device->dca = ioat3_dca_init(pdev, device->reg_base);
2008 
2009 	return 0;
2010 
2011 sed_pool_cleanup:
2012 	if (device->sed_pool) {
2013 		int i;
2014 		kmem_cache_destroy(device->sed_pool);
2015 
2016 		for (i = 0; i < MAX_SED_POOLS; i++)
2017 			if (device->sed_hw_pool[i])
2018 				dma_pool_destroy(device->sed_hw_pool[i]);
2019 	}
2020 
2021 	return -ENOMEM;
2022 }
2023 
ioat3_dma_remove(struct ioatdma_device * device)2024 void ioat3_dma_remove(struct ioatdma_device *device)
2025 {
2026 	if (device->sed_pool) {
2027 		int i;
2028 		kmem_cache_destroy(device->sed_pool);
2029 
2030 		for (i = 0; i < MAX_SED_POOLS; i++)
2031 			if (device->sed_hw_pool[i])
2032 				dma_pool_destroy(device->sed_hw_pool[i]);
2033 	}
2034 }
2035