• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Coherency fabric (Aurora) support for Armada 370 and XP platforms.
3  *
4  * Copyright (C) 2012 Marvell
5  *
6  * Yehuda Yitschak <yehuday@marvell.com>
7  * Gregory Clement <gregory.clement@free-electrons.com>
8  * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
9  *
10  * This file is licensed under the terms of the GNU General Public
11  * License version 2.  This program is licensed "as is" without any
12  * warranty of any kind, whether express or implied.
13  *
14  * The Armada 370 and Armada XP SOCs have a coherency fabric which is
15  * responsible for ensuring hardware coherency between all CPUs and between
16  * CPUs and I/O masters. This file initializes the coherency fabric and
17  * supplies basic routines for configuring and controlling hardware coherency
18  */
19 
20 #define pr_fmt(fmt) "mvebu-coherency: " fmt
21 
22 #include <linux/kernel.h>
23 #include <linux/init.h>
24 #include <linux/of_address.h>
25 #include <linux/io.h>
26 #include <linux/smp.h>
27 #include <linux/dma-mapping.h>
28 #include <linux/platform_device.h>
29 #include <linux/slab.h>
30 #include <linux/mbus.h>
31 #include <linux/clk.h>
32 #include <linux/pci.h>
33 #include <asm/smp_plat.h>
34 #include <asm/cacheflush.h>
35 #include <asm/mach/map.h>
36 #include "armada-370-xp.h"
37 #include "coherency.h"
38 #include "mvebu-soc-id.h"
39 
40 unsigned long coherency_phys_base;
41 void __iomem *coherency_base;
42 static void __iomem *coherency_cpu_base;
43 
44 /* Coherency fabric registers */
45 #define COHERENCY_FABRIC_CFG_OFFSET		   0x4
46 
47 #define IO_SYNC_BARRIER_CTL_OFFSET		   0x0
48 
49 enum {
50 	COHERENCY_FABRIC_TYPE_NONE,
51 	COHERENCY_FABRIC_TYPE_ARMADA_370_XP,
52 	COHERENCY_FABRIC_TYPE_ARMADA_375,
53 	COHERENCY_FABRIC_TYPE_ARMADA_380,
54 };
55 
56 static struct of_device_id of_coherency_table[] = {
57 	{.compatible = "marvell,coherency-fabric",
58 	 .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_370_XP },
59 	{.compatible = "marvell,armada-375-coherency-fabric",
60 	 .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_375 },
61 	{.compatible = "marvell,armada-380-coherency-fabric",
62 	 .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_380 },
63 	{ /* end of list */ },
64 };
65 
66 /* Functions defined in coherency_ll.S */
67 int ll_enable_coherency(void);
68 void ll_add_cpu_to_smp_group(void);
69 
set_cpu_coherent(void)70 int set_cpu_coherent(void)
71 {
72 	if (!coherency_base) {
73 		pr_warn("Can't make current CPU cache coherent.\n");
74 		pr_warn("Coherency fabric is not initialized\n");
75 		return 1;
76 	}
77 
78 	ll_add_cpu_to_smp_group();
79 	return ll_enable_coherency();
80 }
81 
82 /*
83  * The below code implements the I/O coherency workaround on Armada
84  * 375. This workaround consists in using the two channels of the
85  * first XOR engine to trigger a XOR transaction that serves as the
86  * I/O coherency barrier.
87  */
88 
89 static void __iomem *xor_base, *xor_high_base;
90 static dma_addr_t coherency_wa_buf_phys[CONFIG_NR_CPUS];
91 static void *coherency_wa_buf[CONFIG_NR_CPUS];
92 static bool coherency_wa_enabled;
93 
94 #define XOR_CONFIG(chan)            (0x10 + (chan * 4))
95 #define XOR_ACTIVATION(chan)        (0x20 + (chan * 4))
96 #define WINDOW_BAR_ENABLE(chan)     (0x240 + ((chan) << 2))
97 #define WINDOW_BASE(w)              (0x250 + ((w) << 2))
98 #define WINDOW_SIZE(w)              (0x270 + ((w) << 2))
99 #define WINDOW_REMAP_HIGH(w)        (0x290 + ((w) << 2))
100 #define WINDOW_OVERRIDE_CTRL(chan)  (0x2A0 + ((chan) << 2))
101 #define XOR_DEST_POINTER(chan)      (0x2B0 + (chan * 4))
102 #define XOR_BLOCK_SIZE(chan)        (0x2C0 + (chan * 4))
103 #define XOR_INIT_VALUE_LOW           0x2E0
104 #define XOR_INIT_VALUE_HIGH          0x2E4
105 
mvebu_hwcc_armada375_sync_io_barrier_wa(void)106 static inline void mvebu_hwcc_armada375_sync_io_barrier_wa(void)
107 {
108 	int idx = smp_processor_id();
109 
110 	/* Write '1' to the first word of the buffer */
111 	writel(0x1, coherency_wa_buf[idx]);
112 
113 	/* Wait until the engine is idle */
114 	while ((readl(xor_base + XOR_ACTIVATION(idx)) >> 4) & 0x3)
115 		;
116 
117 	dmb();
118 
119 	/* Trigger channel */
120 	writel(0x1, xor_base + XOR_ACTIVATION(idx));
121 
122 	/* Poll the data until it is cleared by the XOR transaction */
123 	while (readl(coherency_wa_buf[idx]))
124 		;
125 }
126 
armada_375_coherency_init_wa(void)127 static void __init armada_375_coherency_init_wa(void)
128 {
129 	const struct mbus_dram_target_info *dram;
130 	struct device_node *xor_node;
131 	struct property *xor_status;
132 	struct clk *xor_clk;
133 	u32 win_enable = 0;
134 	int i;
135 
136 	pr_warn("enabling coherency workaround for Armada 375 Z1, one XOR engine disabled\n");
137 
138 	/*
139 	 * Since the workaround uses one XOR engine, we grab a
140 	 * reference to its Device Tree node first.
141 	 */
142 	xor_node = of_find_compatible_node(NULL, NULL, "marvell,orion-xor");
143 	BUG_ON(!xor_node);
144 
145 	/*
146 	 * Then we mark it as disabled so that the real XOR driver
147 	 * will not use it.
148 	 */
149 	xor_status = kzalloc(sizeof(struct property), GFP_KERNEL);
150 	BUG_ON(!xor_status);
151 
152 	xor_status->value = kstrdup("disabled", GFP_KERNEL);
153 	BUG_ON(!xor_status->value);
154 
155 	xor_status->length = 8;
156 	xor_status->name = kstrdup("status", GFP_KERNEL);
157 	BUG_ON(!xor_status->name);
158 
159 	of_update_property(xor_node, xor_status);
160 
161 	/*
162 	 * And we remap the registers, get the clock, and do the
163 	 * initial configuration of the XOR engine.
164 	 */
165 	xor_base = of_iomap(xor_node, 0);
166 	xor_high_base = of_iomap(xor_node, 1);
167 
168 	xor_clk = of_clk_get_by_name(xor_node, NULL);
169 	BUG_ON(!xor_clk);
170 
171 	clk_prepare_enable(xor_clk);
172 
173 	dram = mv_mbus_dram_info();
174 
175 	for (i = 0; i < 8; i++) {
176 		writel(0, xor_base + WINDOW_BASE(i));
177 		writel(0, xor_base + WINDOW_SIZE(i));
178 		if (i < 4)
179 			writel(0, xor_base + WINDOW_REMAP_HIGH(i));
180 	}
181 
182 	for (i = 0; i < dram->num_cs; i++) {
183 		const struct mbus_dram_window *cs = dram->cs + i;
184 		writel((cs->base & 0xffff0000) |
185 		       (cs->mbus_attr << 8) |
186 		       dram->mbus_dram_target_id, xor_base + WINDOW_BASE(i));
187 		writel((cs->size - 1) & 0xffff0000, xor_base + WINDOW_SIZE(i));
188 
189 		win_enable |= (1 << i);
190 		win_enable |= 3 << (16 + (2 * i));
191 	}
192 
193 	writel(win_enable, xor_base + WINDOW_BAR_ENABLE(0));
194 	writel(win_enable, xor_base + WINDOW_BAR_ENABLE(1));
195 	writel(0, xor_base + WINDOW_OVERRIDE_CTRL(0));
196 	writel(0, xor_base + WINDOW_OVERRIDE_CTRL(1));
197 
198 	for (i = 0; i < CONFIG_NR_CPUS; i++) {
199 		coherency_wa_buf[i] = kzalloc(PAGE_SIZE, GFP_KERNEL);
200 		BUG_ON(!coherency_wa_buf[i]);
201 
202 		/*
203 		 * We can't use the DMA mapping API, since we don't
204 		 * have a valid 'struct device' pointer
205 		 */
206 		coherency_wa_buf_phys[i] =
207 			virt_to_phys(coherency_wa_buf[i]);
208 		BUG_ON(!coherency_wa_buf_phys[i]);
209 
210 		/*
211 		 * Configure the XOR engine for memset operation, with
212 		 * a 128 bytes block size
213 		 */
214 		writel(0x444, xor_base + XOR_CONFIG(i));
215 		writel(128, xor_base + XOR_BLOCK_SIZE(i));
216 		writel(coherency_wa_buf_phys[i],
217 		       xor_base + XOR_DEST_POINTER(i));
218 	}
219 
220 	writel(0x0, xor_base + XOR_INIT_VALUE_LOW);
221 	writel(0x0, xor_base + XOR_INIT_VALUE_HIGH);
222 
223 	coherency_wa_enabled = true;
224 }
225 
mvebu_hwcc_sync_io_barrier(void)226 static inline void mvebu_hwcc_sync_io_barrier(void)
227 {
228 	if (coherency_wa_enabled) {
229 		mvebu_hwcc_armada375_sync_io_barrier_wa();
230 		return;
231 	}
232 
233 	writel(0x1, coherency_cpu_base + IO_SYNC_BARRIER_CTL_OFFSET);
234 	while (readl(coherency_cpu_base + IO_SYNC_BARRIER_CTL_OFFSET) & 0x1);
235 }
236 
mvebu_hwcc_dma_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,struct dma_attrs * attrs)237 static dma_addr_t mvebu_hwcc_dma_map_page(struct device *dev, struct page *page,
238 				  unsigned long offset, size_t size,
239 				  enum dma_data_direction dir,
240 				  struct dma_attrs *attrs)
241 {
242 	if (dir != DMA_TO_DEVICE)
243 		mvebu_hwcc_sync_io_barrier();
244 	return pfn_to_dma(dev, page_to_pfn(page)) + offset;
245 }
246 
247 
mvebu_hwcc_dma_unmap_page(struct device * dev,dma_addr_t dma_handle,size_t size,enum dma_data_direction dir,struct dma_attrs * attrs)248 static void mvebu_hwcc_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
249 			      size_t size, enum dma_data_direction dir,
250 			      struct dma_attrs *attrs)
251 {
252 	if (dir != DMA_TO_DEVICE)
253 		mvebu_hwcc_sync_io_barrier();
254 }
255 
mvebu_hwcc_dma_sync(struct device * dev,dma_addr_t dma_handle,size_t size,enum dma_data_direction dir)256 static void mvebu_hwcc_dma_sync(struct device *dev, dma_addr_t dma_handle,
257 			size_t size, enum dma_data_direction dir)
258 {
259 	if (dir != DMA_TO_DEVICE)
260 		mvebu_hwcc_sync_io_barrier();
261 }
262 
263 static struct dma_map_ops mvebu_hwcc_dma_ops = {
264 	.alloc			= arm_dma_alloc,
265 	.free			= arm_dma_free,
266 	.mmap			= arm_dma_mmap,
267 	.map_page		= mvebu_hwcc_dma_map_page,
268 	.unmap_page		= mvebu_hwcc_dma_unmap_page,
269 	.get_sgtable		= arm_dma_get_sgtable,
270 	.map_sg			= arm_dma_map_sg,
271 	.unmap_sg		= arm_dma_unmap_sg,
272 	.sync_single_for_cpu	= mvebu_hwcc_dma_sync,
273 	.sync_single_for_device	= mvebu_hwcc_dma_sync,
274 	.sync_sg_for_cpu	= arm_dma_sync_sg_for_cpu,
275 	.sync_sg_for_device	= arm_dma_sync_sg_for_device,
276 	.set_dma_mask		= arm_dma_set_mask,
277 };
278 
mvebu_hwcc_notifier(struct notifier_block * nb,unsigned long event,void * __dev)279 static int mvebu_hwcc_notifier(struct notifier_block *nb,
280 			       unsigned long event, void *__dev)
281 {
282 	struct device *dev = __dev;
283 
284 	if (event != BUS_NOTIFY_ADD_DEVICE)
285 		return NOTIFY_DONE;
286 	set_dma_ops(dev, &mvebu_hwcc_dma_ops);
287 
288 	return NOTIFY_OK;
289 }
290 
291 static struct notifier_block mvebu_hwcc_nb = {
292 	.notifier_call = mvebu_hwcc_notifier,
293 };
294 
295 static struct notifier_block mvebu_hwcc_pci_nb = {
296 	.notifier_call = mvebu_hwcc_notifier,
297 };
298 
armada_370_coherency_init(struct device_node * np)299 static void __init armada_370_coherency_init(struct device_node *np)
300 {
301 	struct resource res;
302 
303 	of_address_to_resource(np, 0, &res);
304 	coherency_phys_base = res.start;
305 	/*
306 	 * Ensure secondary CPUs will see the updated value,
307 	 * which they read before they join the coherency
308 	 * fabric, and therefore before they are coherent with
309 	 * the boot CPU cache.
310 	 */
311 	sync_cache_w(&coherency_phys_base);
312 	coherency_base = of_iomap(np, 0);
313 	coherency_cpu_base = of_iomap(np, 1);
314 	set_cpu_coherent();
315 }
316 
317 /*
318  * This ioremap hook is used on Armada 375/38x to ensure that all MMIO
319  * areas are mapped as MT_UNCACHED instead of MT_DEVICE. This is
320  * needed for the HW I/O coherency mechanism to work properly without
321  * deadlock.
322  */
323 static void __iomem *
armada_wa_ioremap_caller(phys_addr_t phys_addr,size_t size,unsigned int mtype,void * caller)324 armada_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
325 			 unsigned int mtype, void *caller)
326 {
327 	mtype = MT_UNCACHED;
328 	return __arm_ioremap_caller(phys_addr, size, mtype, caller);
329 }
330 
armada_375_380_coherency_init(struct device_node * np)331 static void __init armada_375_380_coherency_init(struct device_node *np)
332 {
333 	struct device_node *cache_dn;
334 
335 	coherency_cpu_base = of_iomap(np, 0);
336 	arch_ioremap_caller = armada_wa_ioremap_caller;
337 
338 	/*
339 	 * We should switch the PL310 to I/O coherency mode only if
340 	 * I/O coherency is actually enabled.
341 	 */
342 	if (!coherency_available())
343 		return;
344 
345 	/*
346 	 * Add the PL310 property "arm,io-coherent". This makes sure the
347 	 * outer sync operation is not used, which allows to
348 	 * workaround the system erratum that causes deadlocks when
349 	 * doing PCIe in an SMP situation on Armada 375 and Armada
350 	 * 38x.
351 	 */
352 	for_each_compatible_node(cache_dn, NULL, "arm,pl310-cache") {
353 		struct property *p;
354 
355 		p = kzalloc(sizeof(*p), GFP_KERNEL);
356 		p->name = kstrdup("arm,io-coherent", GFP_KERNEL);
357 		of_add_property(cache_dn, p);
358 	}
359 }
360 
coherency_type(void)361 static int coherency_type(void)
362 {
363 	struct device_node *np;
364 	const struct of_device_id *match;
365 	int type;
366 
367 	/*
368 	 * The coherency fabric is needed:
369 	 * - For coherency between processors on Armada XP, so only
370 	 *   when SMP is enabled.
371 	 * - For coherency between the processor and I/O devices, but
372 	 *   this coherency requires many pre-requisites (write
373 	 *   allocate cache policy, shareable pages, SMP bit set) that
374 	 *   are only meant in SMP situations.
375 	 *
376 	 * Note that this means that on Armada 370, there is currently
377 	 * no way to use hardware I/O coherency, because even when
378 	 * CONFIG_SMP is enabled, is_smp() returns false due to the
379 	 * Armada 370 being a single-core processor. To lift this
380 	 * limitation, we would have to find a way to make the cache
381 	 * policy set to write-allocate (on all Armada SoCs), and to
382 	 * set the shareable attribute in page tables (on all Armada
383 	 * SoCs except the Armada 370). Unfortunately, such decisions
384 	 * are taken very early in the kernel boot process, at a point
385 	 * where we don't know yet on which SoC we are running.
386 
387 	 */
388 	if (!is_smp())
389 		return COHERENCY_FABRIC_TYPE_NONE;
390 
391 	np = of_find_matching_node_and_match(NULL, of_coherency_table, &match);
392 	if (!np)
393 		return COHERENCY_FABRIC_TYPE_NONE;
394 
395 	type = (int) match->data;
396 
397 	of_node_put(np);
398 
399 	return type;
400 }
401 
402 /*
403  * As a precaution, we currently completely disable hardware I/O
404  * coherency, until enough testing is done with automatic I/O
405  * synchronization barriers to validate that it is a proper solution.
406  */
coherency_available(void)407 int coherency_available(void)
408 {
409 	return false;
410 }
411 
coherency_init(void)412 int __init coherency_init(void)
413 {
414 	int type = coherency_type();
415 	struct device_node *np;
416 
417 	np = of_find_matching_node(NULL, of_coherency_table);
418 
419 	if (type == COHERENCY_FABRIC_TYPE_ARMADA_370_XP)
420 		armada_370_coherency_init(np);
421 	else if (type == COHERENCY_FABRIC_TYPE_ARMADA_375 ||
422 		 type == COHERENCY_FABRIC_TYPE_ARMADA_380)
423 		armada_375_380_coherency_init(np);
424 
425 	of_node_put(np);
426 
427 	return 0;
428 }
429 
coherency_late_init(void)430 static int __init coherency_late_init(void)
431 {
432 	int type = coherency_type();
433 
434 	if (type == COHERENCY_FABRIC_TYPE_NONE)
435 		return 0;
436 
437 	if (type == COHERENCY_FABRIC_TYPE_ARMADA_375) {
438 		u32 dev, rev;
439 
440 		if (mvebu_get_soc_id(&dev, &rev) == 0 &&
441 		    rev == ARMADA_375_Z1_REV)
442 			armada_375_coherency_init_wa();
443 	}
444 
445 	if (coherency_available())
446 		bus_register_notifier(&platform_bus_type,
447 				      &mvebu_hwcc_nb);
448 
449 	return 0;
450 }
451 
452 postcore_initcall(coherency_late_init);
453 
454 #if IS_ENABLED(CONFIG_PCI)
coherency_pci_init(void)455 static int __init coherency_pci_init(void)
456 {
457 	if (coherency_available())
458 		bus_register_notifier(&pci_bus_type,
459 				       &mvebu_hwcc_pci_nb);
460 	return 0;
461 }
462 
463 arch_initcall(coherency_pci_init);
464 #endif
465