• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * This file implements the DMA operations for NVLink devices. The NPU
3  * devices all point to the same iommu table as the parent PCI device.
4  *
5  * Copyright Alistair Popple, IBM Corporation 2015.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of version 2 of the GNU General Public
9  * License as published by the Free Software Foundation.
10  */
11 
12 #include <linux/export.h>
13 #include <linux/pci.h>
14 #include <linux/memblock.h>
15 #include <linux/iommu.h>
16 
17 #include <asm/iommu.h>
18 #include <asm/pnv-pci.h>
19 #include <asm/msi_bitmap.h>
20 #include <asm/opal.h>
21 
22 #include "powernv.h"
23 #include "pci.h"
24 
25 /*
26  * Other types of TCE cache invalidation are not functional in the
27  * hardware.
28  */
get_pci_dev(struct device_node * dn)29 static struct pci_dev *get_pci_dev(struct device_node *dn)
30 {
31 	return PCI_DN(dn)->pcidev;
32 }
33 
34 /* Given a NPU device get the associated PCI device. */
pnv_pci_get_gpu_dev(struct pci_dev * npdev)35 struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev)
36 {
37 	struct device_node *dn;
38 	struct pci_dev *gpdev;
39 
40 	/* Get assoicated PCI device */
41 	dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0);
42 	if (!dn)
43 		return NULL;
44 
45 	gpdev = get_pci_dev(dn);
46 	of_node_put(dn);
47 
48 	return gpdev;
49 }
50 EXPORT_SYMBOL(pnv_pci_get_gpu_dev);
51 
52 /* Given the real PCI device get a linked NPU device. */
pnv_pci_get_npu_dev(struct pci_dev * gpdev,int index)53 struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
54 {
55 	struct device_node *dn;
56 	struct pci_dev *npdev;
57 
58 	/* Get assoicated PCI device */
59 	dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index);
60 	if (!dn)
61 		return NULL;
62 
63 	npdev = get_pci_dev(dn);
64 	of_node_put(dn);
65 
66 	return npdev;
67 }
68 EXPORT_SYMBOL(pnv_pci_get_npu_dev);
69 
70 #define NPU_DMA_OP_UNSUPPORTED()					\
71 	dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \
72 		__func__)
73 
dma_npu_alloc(struct device * dev,size_t size,dma_addr_t * dma_handle,gfp_t flag,unsigned long attrs)74 static void *dma_npu_alloc(struct device *dev, size_t size,
75 			   dma_addr_t *dma_handle, gfp_t flag,
76 			   unsigned long attrs)
77 {
78 	NPU_DMA_OP_UNSUPPORTED();
79 	return NULL;
80 }
81 
dma_npu_free(struct device * dev,size_t size,void * vaddr,dma_addr_t dma_handle,unsigned long attrs)82 static void dma_npu_free(struct device *dev, size_t size,
83 			 void *vaddr, dma_addr_t dma_handle,
84 			 unsigned long attrs)
85 {
86 	NPU_DMA_OP_UNSUPPORTED();
87 }
88 
dma_npu_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction direction,unsigned long attrs)89 static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page,
90 				   unsigned long offset, size_t size,
91 				   enum dma_data_direction direction,
92 				   unsigned long attrs)
93 {
94 	NPU_DMA_OP_UNSUPPORTED();
95 	return 0;
96 }
97 
dma_npu_map_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction direction,unsigned long attrs)98 static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist,
99 			  int nelems, enum dma_data_direction direction,
100 			  unsigned long attrs)
101 {
102 	NPU_DMA_OP_UNSUPPORTED();
103 	return 0;
104 }
105 
dma_npu_dma_supported(struct device * dev,u64 mask)106 static int dma_npu_dma_supported(struct device *dev, u64 mask)
107 {
108 	NPU_DMA_OP_UNSUPPORTED();
109 	return 0;
110 }
111 
dma_npu_get_required_mask(struct device * dev)112 static u64 dma_npu_get_required_mask(struct device *dev)
113 {
114 	NPU_DMA_OP_UNSUPPORTED();
115 	return 0;
116 }
117 
118 static struct dma_map_ops dma_npu_ops = {
119 	.map_page		= dma_npu_map_page,
120 	.map_sg			= dma_npu_map_sg,
121 	.alloc			= dma_npu_alloc,
122 	.free			= dma_npu_free,
123 	.dma_supported		= dma_npu_dma_supported,
124 	.get_required_mask	= dma_npu_get_required_mask,
125 };
126 
127 /*
128  * Returns the PE assoicated with the PCI device of the given
129  * NPU. Returns the linked pci device if pci_dev != NULL.
130  */
get_gpu_pci_dev_and_pe(struct pnv_ioda_pe * npe,struct pci_dev ** gpdev)131 static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
132 						  struct pci_dev **gpdev)
133 {
134 	struct pnv_phb *phb;
135 	struct pci_controller *hose;
136 	struct pci_dev *pdev;
137 	struct pnv_ioda_pe *pe;
138 	struct pci_dn *pdn;
139 
140 	pdev = pnv_pci_get_gpu_dev(npe->pdev);
141 	if (!pdev)
142 		return NULL;
143 
144 	pdn = pci_get_pdn(pdev);
145 	if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
146 		return NULL;
147 
148 	hose = pci_bus_to_host(pdev->bus);
149 	phb = hose->private_data;
150 	pe = &phb->ioda.pe_array[pdn->pe_number];
151 
152 	if (gpdev)
153 		*gpdev = pdev;
154 
155 	return pe;
156 }
157 
pnv_npu_set_window(struct pnv_ioda_pe * npe,int num,struct iommu_table * tbl)158 long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num,
159 		struct iommu_table *tbl)
160 {
161 	struct pnv_phb *phb = npe->phb;
162 	int64_t rc;
163 	const unsigned long size = tbl->it_indirect_levels ?
164 		tbl->it_level_size : tbl->it_size;
165 	const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
166 	const __u64 win_size = tbl->it_size << tbl->it_page_shift;
167 
168 	pe_info(npe, "Setting up window %llx..%llx pg=%lx\n",
169 			start_addr, start_addr + win_size - 1,
170 			IOMMU_PAGE_SIZE(tbl));
171 
172 	rc = opal_pci_map_pe_dma_window(phb->opal_id,
173 			npe->pe_number,
174 			npe->pe_number,
175 			tbl->it_indirect_levels + 1,
176 			__pa(tbl->it_base),
177 			size << 3,
178 			IOMMU_PAGE_SIZE(tbl));
179 	if (rc) {
180 		pe_err(npe, "Failed to configure TCE table, err %lld\n", rc);
181 		return rc;
182 	}
183 	pnv_pci_phb3_tce_invalidate_entire(phb, false);
184 
185 	/* Add the table to the list so its TCE cache will get invalidated */
186 	pnv_pci_link_table_and_group(phb->hose->node, num,
187 			tbl, &npe->table_group);
188 
189 	return 0;
190 }
191 
pnv_npu_unset_window(struct pnv_ioda_pe * npe,int num)192 long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num)
193 {
194 	struct pnv_phb *phb = npe->phb;
195 	int64_t rc;
196 
197 	pe_info(npe, "Removing DMA window\n");
198 
199 	rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
200 			npe->pe_number,
201 			0/* levels */, 0/* table address */,
202 			0/* table size */, 0/* page size */);
203 	if (rc) {
204 		pe_err(npe, "Unmapping failed, ret = %lld\n", rc);
205 		return rc;
206 	}
207 	pnv_pci_phb3_tce_invalidate_entire(phb, false);
208 
209 	pnv_pci_unlink_table_and_group(npe->table_group.tables[num],
210 			&npe->table_group);
211 
212 	return 0;
213 }
214 
215 /*
216  * Enables 32 bit DMA on NPU.
217  */
pnv_npu_dma_set_32(struct pnv_ioda_pe * npe)218 static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe)
219 {
220 	struct pci_dev *gpdev;
221 	struct pnv_ioda_pe *gpe;
222 	int64_t rc;
223 
224 	/*
225 	 * Find the assoicated PCI devices and get the dma window
226 	 * information from there.
227 	 */
228 	if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV))
229 		return;
230 
231 	gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
232 	if (!gpe)
233 		return;
234 
235 	rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]);
236 
237 	/*
238 	 * We don't initialise npu_pe->tce32_table as we always use
239 	 * dma_npu_ops which are nops.
240 	 */
241 	set_dma_ops(&npe->pdev->dev, &dma_npu_ops);
242 }
243 
244 /*
245  * Enables bypass mode on the NPU. The NPU only supports one
246  * window per link, so bypass needs to be explicitly enabled or
247  * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be
248  * active at the same time.
249  */
pnv_npu_dma_set_bypass(struct pnv_ioda_pe * npe)250 static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe)
251 {
252 	struct pnv_phb *phb = npe->phb;
253 	int64_t rc = 0;
254 	phys_addr_t top = memblock_end_of_DRAM();
255 
256 	if (phb->type != PNV_PHB_NPU || !npe->pdev)
257 		return -EINVAL;
258 
259 	rc = pnv_npu_unset_window(npe, 0);
260 	if (rc != OPAL_SUCCESS)
261 		return rc;
262 
263 	/* Enable the bypass window */
264 
265 	top = roundup_pow_of_two(top);
266 	dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n",
267 			npe->pe_number);
268 	rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
269 			npe->pe_number, npe->pe_number,
270 			0 /* bypass base */, top);
271 
272 	if (rc == OPAL_SUCCESS)
273 		pnv_pci_phb3_tce_invalidate_entire(phb, false);
274 
275 	return rc;
276 }
277 
pnv_npu_try_dma_set_bypass(struct pci_dev * gpdev,bool bypass)278 void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass)
279 {
280 	int i;
281 	struct pnv_phb *phb;
282 	struct pci_dn *pdn;
283 	struct pnv_ioda_pe *npe;
284 	struct pci_dev *npdev;
285 
286 	for (i = 0; ; ++i) {
287 		npdev = pnv_pci_get_npu_dev(gpdev, i);
288 
289 		if (!npdev)
290 			break;
291 
292 		pdn = pci_get_pdn(npdev);
293 		if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
294 			return;
295 
296 		phb = pci_bus_to_host(npdev->bus)->private_data;
297 
298 		/* We only do bypass if it's enabled on the linked device */
299 		npe = &phb->ioda.pe_array[pdn->pe_number];
300 
301 		if (bypass) {
302 			dev_info(&npdev->dev,
303 					"Using 64-bit DMA iommu bypass\n");
304 			pnv_npu_dma_set_bypass(npe);
305 		} else {
306 			dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n");
307 			pnv_npu_dma_set_32(npe);
308 		}
309 	}
310 }
311 
312 /* Switch ownership from platform code to external user (e.g. VFIO) */
pnv_npu_take_ownership(struct pnv_ioda_pe * npe)313 void pnv_npu_take_ownership(struct pnv_ioda_pe *npe)
314 {
315 	struct pnv_phb *phb = npe->phb;
316 	int64_t rc;
317 
318 	/*
319 	 * Note: NPU has just a single TVE in the hardware which means that
320 	 * while used by the kernel, it can have either 32bit window or
321 	 * DMA bypass but never both. So we deconfigure 32bit window only
322 	 * if it was enabled at the moment of ownership change.
323 	 */
324 	if (npe->table_group.tables[0]) {
325 		pnv_npu_unset_window(npe, 0);
326 		return;
327 	}
328 
329 	/* Disable bypass */
330 	rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
331 			npe->pe_number, npe->pe_number,
332 			0 /* bypass base */, 0);
333 	if (rc) {
334 		pe_err(npe, "Failed to disable bypass, err %lld\n", rc);
335 		return;
336 	}
337 	pnv_pci_phb3_tce_invalidate_entire(npe->phb, false);
338 }
339 
pnv_pci_npu_setup_iommu(struct pnv_ioda_pe * npe)340 struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe)
341 {
342 	struct pnv_phb *phb = npe->phb;
343 	struct pci_bus *pbus = phb->hose->bus;
344 	struct pci_dev *npdev, *gpdev = NULL, *gptmp;
345 	struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
346 
347 	if (!gpe || !gpdev)
348 		return NULL;
349 
350 	list_for_each_entry(npdev, &pbus->devices, bus_list) {
351 		gptmp = pnv_pci_get_gpu_dev(npdev);
352 
353 		if (gptmp != gpdev)
354 			continue;
355 
356 		pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev));
357 		iommu_group_add_device(gpe->table_group.group, &npdev->dev);
358 	}
359 
360 	return gpe;
361 }
362