• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * APM X-Gene SoC EDAC (error detection and correction)
4  *
5  * Copyright (c) 2015, Applied Micro Circuits Corporation
6  * Author: Feng Kan <fkan@apm.com>
7  *         Loc Ho <lho@apm.com>
8  */
9 
10 #include <linux/ctype.h>
11 #include <linux/edac.h>
12 #include <linux/interrupt.h>
13 #include <linux/mfd/syscon.h>
14 #include <linux/module.h>
15 #include <linux/of.h>
16 #include <linux/of_address.h>
17 #include <linux/regmap.h>
18 
19 #include "edac_module.h"
20 
21 #define EDAC_MOD_STR			"xgene_edac"
22 
23 /* Global error configuration status registers (CSR) */
24 #define PCPHPERRINTSTS			0x0000
25 #define PCPHPERRINTMSK			0x0004
26 #define  MCU_CTL_ERR_MASK		BIT(12)
27 #define  IOB_PA_ERR_MASK		BIT(11)
28 #define  IOB_BA_ERR_MASK		BIT(10)
29 #define  IOB_XGIC_ERR_MASK		BIT(9)
30 #define  IOB_RB_ERR_MASK		BIT(8)
31 #define  L3C_UNCORR_ERR_MASK		BIT(5)
32 #define  MCU_UNCORR_ERR_MASK		BIT(4)
33 #define  PMD3_MERR_MASK			BIT(3)
34 #define  PMD2_MERR_MASK			BIT(2)
35 #define  PMD1_MERR_MASK			BIT(1)
36 #define  PMD0_MERR_MASK			BIT(0)
37 #define PCPLPERRINTSTS			0x0008
38 #define PCPLPERRINTMSK			0x000C
39 #define  CSW_SWITCH_TRACE_ERR_MASK	BIT(2)
40 #define  L3C_CORR_ERR_MASK		BIT(1)
41 #define  MCU_CORR_ERR_MASK		BIT(0)
42 #define MEMERRINTSTS			0x0010
43 #define MEMERRINTMSK			0x0014
44 
45 struct xgene_edac {
46 	struct device		*dev;
47 	struct regmap		*csw_map;
48 	struct regmap		*mcba_map;
49 	struct regmap		*mcbb_map;
50 	struct regmap		*efuse_map;
51 	struct regmap		*rb_map;
52 	void __iomem		*pcp_csr;
53 	spinlock_t		lock;
54 	struct dentry           *dfs;
55 
56 	struct list_head	mcus;
57 	struct list_head	pmds;
58 	struct list_head	l3s;
59 	struct list_head	socs;
60 
61 	struct mutex		mc_lock;
62 	int			mc_active_mask;
63 	int			mc_registered_mask;
64 };
65 
xgene_edac_pcp_rd(struct xgene_edac * edac,u32 reg,u32 * val)66 static void xgene_edac_pcp_rd(struct xgene_edac *edac, u32 reg, u32 *val)
67 {
68 	*val = readl(edac->pcp_csr + reg);
69 }
70 
xgene_edac_pcp_clrbits(struct xgene_edac * edac,u32 reg,u32 bits_mask)71 static void xgene_edac_pcp_clrbits(struct xgene_edac *edac, u32 reg,
72 				   u32 bits_mask)
73 {
74 	u32 val;
75 
76 	spin_lock(&edac->lock);
77 	val = readl(edac->pcp_csr + reg);
78 	val &= ~bits_mask;
79 	writel(val, edac->pcp_csr + reg);
80 	spin_unlock(&edac->lock);
81 }
82 
xgene_edac_pcp_setbits(struct xgene_edac * edac,u32 reg,u32 bits_mask)83 static void xgene_edac_pcp_setbits(struct xgene_edac *edac, u32 reg,
84 				   u32 bits_mask)
85 {
86 	u32 val;
87 
88 	spin_lock(&edac->lock);
89 	val = readl(edac->pcp_csr + reg);
90 	val |= bits_mask;
91 	writel(val, edac->pcp_csr + reg);
92 	spin_unlock(&edac->lock);
93 }
94 
95 /* Memory controller error CSR */
96 #define MCU_MAX_RANK			8
97 #define MCU_RANK_STRIDE			0x40
98 
99 #define MCUGECR				0x0110
100 #define  MCU_GECR_DEMANDUCINTREN_MASK	BIT(0)
101 #define  MCU_GECR_BACKUCINTREN_MASK	BIT(1)
102 #define  MCU_GECR_CINTREN_MASK		BIT(2)
103 #define  MUC_GECR_MCUADDRERREN_MASK	BIT(9)
104 #define MCUGESR				0x0114
105 #define  MCU_GESR_ADDRNOMATCH_ERR_MASK	BIT(7)
106 #define  MCU_GESR_ADDRMULTIMATCH_ERR_MASK	BIT(6)
107 #define  MCU_GESR_PHYP_ERR_MASK		BIT(3)
108 #define MCUESRR0			0x0314
109 #define  MCU_ESRR_MULTUCERR_MASK	BIT(3)
110 #define  MCU_ESRR_BACKUCERR_MASK	BIT(2)
111 #define  MCU_ESRR_DEMANDUCERR_MASK	BIT(1)
112 #define  MCU_ESRR_CERR_MASK		BIT(0)
113 #define MCUESRRA0			0x0318
114 #define MCUEBLRR0			0x031c
115 #define  MCU_EBLRR_ERRBANK_RD(src)	(((src) & 0x00000007) >> 0)
116 #define MCUERCRR0			0x0320
117 #define  MCU_ERCRR_ERRROW_RD(src)	(((src) & 0xFFFF0000) >> 16)
118 #define  MCU_ERCRR_ERRCOL_RD(src)	((src) & 0x00000FFF)
119 #define MCUSBECNT0			0x0324
120 #define MCU_SBECNT_COUNT(src)		((src) & 0xFFFF)
121 
122 #define CSW_CSWCR			0x0000
123 #define  CSW_CSWCR_DUALMCB_MASK		BIT(0)
124 
125 #define MCBADDRMR			0x0000
126 #define  MCBADDRMR_MCU_INTLV_MODE_MASK	BIT(3)
127 #define  MCBADDRMR_DUALMCU_MODE_MASK	BIT(2)
128 #define  MCBADDRMR_MCB_INTLV_MODE_MASK	BIT(1)
129 #define  MCBADDRMR_ADDRESS_MODE_MASK	BIT(0)
130 
131 struct xgene_edac_mc_ctx {
132 	struct list_head	next;
133 	char			*name;
134 	struct mem_ctl_info	*mci;
135 	struct xgene_edac	*edac;
136 	void __iomem		*mcu_csr;
137 	u32			mcu_id;
138 };
139 
xgene_edac_mc_err_inject_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)140 static ssize_t xgene_edac_mc_err_inject_write(struct file *file,
141 					      const char __user *data,
142 					      size_t count, loff_t *ppos)
143 {
144 	struct mem_ctl_info *mci = file->private_data;
145 	struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
146 	int i;
147 
148 	for (i = 0; i < MCU_MAX_RANK; i++) {
149 		writel(MCU_ESRR_MULTUCERR_MASK | MCU_ESRR_BACKUCERR_MASK |
150 		       MCU_ESRR_DEMANDUCERR_MASK | MCU_ESRR_CERR_MASK,
151 		       ctx->mcu_csr + MCUESRRA0 + i * MCU_RANK_STRIDE);
152 	}
153 	return count;
154 }
155 
156 static const struct file_operations xgene_edac_mc_debug_inject_fops = {
157 	.open = simple_open,
158 	.write = xgene_edac_mc_err_inject_write,
159 	.llseek = generic_file_llseek,
160 };
161 
xgene_edac_mc_create_debugfs_node(struct mem_ctl_info * mci)162 static void xgene_edac_mc_create_debugfs_node(struct mem_ctl_info *mci)
163 {
164 	if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
165 		return;
166 
167 	if (!mci->debugfs)
168 		return;
169 
170 	edac_debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci,
171 				 &xgene_edac_mc_debug_inject_fops);
172 }
173 
xgene_edac_mc_check(struct mem_ctl_info * mci)174 static void xgene_edac_mc_check(struct mem_ctl_info *mci)
175 {
176 	struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
177 	unsigned int pcp_hp_stat;
178 	unsigned int pcp_lp_stat;
179 	u32 reg;
180 	u32 rank;
181 	u32 bank;
182 	u32 count;
183 	u32 col_row;
184 
185 	xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
186 	xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
187 	if (!((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
188 	      (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
189 	      (MCU_CORR_ERR_MASK & pcp_lp_stat)))
190 		return;
191 
192 	for (rank = 0; rank < MCU_MAX_RANK; rank++) {
193 		reg = readl(ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
194 
195 		/* Detect uncorrectable memory error */
196 		if (reg & (MCU_ESRR_DEMANDUCERR_MASK |
197 			   MCU_ESRR_BACKUCERR_MASK)) {
198 			/* Detected uncorrectable memory error */
199 			edac_mc_chipset_printk(mci, KERN_ERR, "X-Gene",
200 				"MCU uncorrectable error at rank %d\n", rank);
201 
202 			edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
203 				1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
204 		}
205 
206 		/* Detect correctable memory error */
207 		if (reg & MCU_ESRR_CERR_MASK) {
208 			bank = readl(ctx->mcu_csr + MCUEBLRR0 +
209 				     rank * MCU_RANK_STRIDE);
210 			col_row = readl(ctx->mcu_csr + MCUERCRR0 +
211 					rank * MCU_RANK_STRIDE);
212 			count = readl(ctx->mcu_csr + MCUSBECNT0 +
213 				      rank * MCU_RANK_STRIDE);
214 			edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
215 				"MCU correctable error at rank %d bank %d column %d row %d count %d\n",
216 				rank, MCU_EBLRR_ERRBANK_RD(bank),
217 				MCU_ERCRR_ERRCOL_RD(col_row),
218 				MCU_ERCRR_ERRROW_RD(col_row),
219 				MCU_SBECNT_COUNT(count));
220 
221 			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
222 				1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
223 		}
224 
225 		/* Clear all error registers */
226 		writel(0x0, ctx->mcu_csr + MCUEBLRR0 + rank * MCU_RANK_STRIDE);
227 		writel(0x0, ctx->mcu_csr + MCUERCRR0 + rank * MCU_RANK_STRIDE);
228 		writel(0x0, ctx->mcu_csr + MCUSBECNT0 +
229 		       rank * MCU_RANK_STRIDE);
230 		writel(reg, ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
231 	}
232 
233 	/* Detect memory controller error */
234 	reg = readl(ctx->mcu_csr + MCUGESR);
235 	if (reg) {
236 		if (reg & MCU_GESR_ADDRNOMATCH_ERR_MASK)
237 			edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
238 				"MCU address miss-match error\n");
239 		if (reg & MCU_GESR_ADDRMULTIMATCH_ERR_MASK)
240 			edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
241 				"MCU address multi-match error\n");
242 
243 		writel(reg, ctx->mcu_csr + MCUGESR);
244 	}
245 }
246 
xgene_edac_mc_irq_ctl(struct mem_ctl_info * mci,bool enable)247 static void xgene_edac_mc_irq_ctl(struct mem_ctl_info *mci, bool enable)
248 {
249 	struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
250 	unsigned int val;
251 
252 	if (edac_op_state != EDAC_OPSTATE_INT)
253 		return;
254 
255 	mutex_lock(&ctx->edac->mc_lock);
256 
257 	/*
258 	 * As there is only single bit for enable error and interrupt mask,
259 	 * we must only enable top level interrupt after all MCUs are
260 	 * registered. Otherwise, if there is an error and the corresponding
261 	 * MCU has not registered, the interrupt will never get cleared. To
262 	 * determine all MCU have registered, we will keep track of active
263 	 * MCUs and registered MCUs.
264 	 */
265 	if (enable) {
266 		/* Set registered MCU bit */
267 		ctx->edac->mc_registered_mask |= 1 << ctx->mcu_id;
268 
269 		/* Enable interrupt after all active MCU registered */
270 		if (ctx->edac->mc_registered_mask ==
271 		    ctx->edac->mc_active_mask) {
272 			/* Enable memory controller top level interrupt */
273 			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
274 					       MCU_UNCORR_ERR_MASK |
275 					       MCU_CTL_ERR_MASK);
276 			xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
277 					       MCU_CORR_ERR_MASK);
278 		}
279 
280 		/* Enable MCU interrupt and error reporting */
281 		val = readl(ctx->mcu_csr + MCUGECR);
282 		val |= MCU_GECR_DEMANDUCINTREN_MASK |
283 		       MCU_GECR_BACKUCINTREN_MASK |
284 		       MCU_GECR_CINTREN_MASK |
285 		       MUC_GECR_MCUADDRERREN_MASK;
286 		writel(val, ctx->mcu_csr + MCUGECR);
287 	} else {
288 		/* Disable MCU interrupt */
289 		val = readl(ctx->mcu_csr + MCUGECR);
290 		val &= ~(MCU_GECR_DEMANDUCINTREN_MASK |
291 			 MCU_GECR_BACKUCINTREN_MASK |
292 			 MCU_GECR_CINTREN_MASK |
293 			 MUC_GECR_MCUADDRERREN_MASK);
294 		writel(val, ctx->mcu_csr + MCUGECR);
295 
296 		/* Disable memory controller top level interrupt */
297 		xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
298 				       MCU_UNCORR_ERR_MASK | MCU_CTL_ERR_MASK);
299 		xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
300 				       MCU_CORR_ERR_MASK);
301 
302 		/* Clear registered MCU bit */
303 		ctx->edac->mc_registered_mask &= ~(1 << ctx->mcu_id);
304 	}
305 
306 	mutex_unlock(&ctx->edac->mc_lock);
307 }
308 
xgene_edac_mc_is_active(struct xgene_edac_mc_ctx * ctx,int mc_idx)309 static int xgene_edac_mc_is_active(struct xgene_edac_mc_ctx *ctx, int mc_idx)
310 {
311 	unsigned int reg;
312 	u32 mcu_mask;
313 
314 	if (regmap_read(ctx->edac->csw_map, CSW_CSWCR, &reg))
315 		return 0;
316 
317 	if (reg & CSW_CSWCR_DUALMCB_MASK) {
318 		/*
319 		 * Dual MCB active - Determine if all 4 active or just MCU0
320 		 * and MCU2 active
321 		 */
322 		if (regmap_read(ctx->edac->mcbb_map, MCBADDRMR, &reg))
323 			return 0;
324 		mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
325 	} else {
326 		/*
327 		 * Single MCB active - Determine if MCU0/MCU1 or just MCU0
328 		 * active
329 		 */
330 		if (regmap_read(ctx->edac->mcba_map, MCBADDRMR, &reg))
331 			return 0;
332 		mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
333 	}
334 
335 	/* Save active MC mask if hasn't set already */
336 	if (!ctx->edac->mc_active_mask)
337 		ctx->edac->mc_active_mask = mcu_mask;
338 
339 	return (mcu_mask & (1 << mc_idx)) ? 1 : 0;
340 }
341 
xgene_edac_mc_add(struct xgene_edac * edac,struct device_node * np)342 static int xgene_edac_mc_add(struct xgene_edac *edac, struct device_node *np)
343 {
344 	struct mem_ctl_info *mci;
345 	struct edac_mc_layer layers[2];
346 	struct xgene_edac_mc_ctx tmp_ctx;
347 	struct xgene_edac_mc_ctx *ctx;
348 	struct resource res;
349 	int rc;
350 
351 	memset(&tmp_ctx, 0, sizeof(tmp_ctx));
352 	tmp_ctx.edac = edac;
353 
354 	if (!devres_open_group(edac->dev, xgene_edac_mc_add, GFP_KERNEL))
355 		return -ENOMEM;
356 
357 	rc = of_address_to_resource(np, 0, &res);
358 	if (rc < 0) {
359 		dev_err(edac->dev, "no MCU resource address\n");
360 		goto err_group;
361 	}
362 	tmp_ctx.mcu_csr = devm_ioremap_resource(edac->dev, &res);
363 	if (IS_ERR(tmp_ctx.mcu_csr)) {
364 		dev_err(edac->dev, "unable to map MCU resource\n");
365 		rc = PTR_ERR(tmp_ctx.mcu_csr);
366 		goto err_group;
367 	}
368 
369 	/* Ignore non-active MCU */
370 	if (of_property_read_u32(np, "memory-controller", &tmp_ctx.mcu_id)) {
371 		dev_err(edac->dev, "no memory-controller property\n");
372 		rc = -ENODEV;
373 		goto err_group;
374 	}
375 	if (!xgene_edac_mc_is_active(&tmp_ctx, tmp_ctx.mcu_id)) {
376 		rc = -ENODEV;
377 		goto err_group;
378 	}
379 
380 	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
381 	layers[0].size = 4;
382 	layers[0].is_virt_csrow = true;
383 	layers[1].type = EDAC_MC_LAYER_CHANNEL;
384 	layers[1].size = 2;
385 	layers[1].is_virt_csrow = false;
386 	mci = edac_mc_alloc(tmp_ctx.mcu_id, ARRAY_SIZE(layers), layers,
387 			    sizeof(*ctx));
388 	if (!mci) {
389 		rc = -ENOMEM;
390 		goto err_group;
391 	}
392 
393 	ctx = mci->pvt_info;
394 	*ctx = tmp_ctx;		/* Copy over resource value */
395 	ctx->name = "xgene_edac_mc_err";
396 	ctx->mci = mci;
397 	mci->pdev = &mci->dev;
398 	mci->ctl_name = ctx->name;
399 	mci->dev_name = ctx->name;
400 
401 	mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 | MEM_FLAG_RDDR3 |
402 			 MEM_FLAG_DDR | MEM_FLAG_DDR2 | MEM_FLAG_DDR3;
403 	mci->edac_ctl_cap = EDAC_FLAG_SECDED;
404 	mci->edac_cap = EDAC_FLAG_SECDED;
405 	mci->mod_name = EDAC_MOD_STR;
406 	mci->ctl_page_to_phys = NULL;
407 	mci->scrub_cap = SCRUB_FLAG_HW_SRC;
408 	mci->scrub_mode = SCRUB_HW_SRC;
409 
410 	if (edac_op_state == EDAC_OPSTATE_POLL)
411 		mci->edac_check = xgene_edac_mc_check;
412 
413 	if (edac_mc_add_mc(mci)) {
414 		dev_err(edac->dev, "edac_mc_add_mc failed\n");
415 		rc = -EINVAL;
416 		goto err_free;
417 	}
418 
419 	xgene_edac_mc_create_debugfs_node(mci);
420 
421 	list_add(&ctx->next, &edac->mcus);
422 
423 	xgene_edac_mc_irq_ctl(mci, true);
424 
425 	devres_remove_group(edac->dev, xgene_edac_mc_add);
426 
427 	dev_info(edac->dev, "X-Gene EDAC MC registered\n");
428 	return 0;
429 
430 err_free:
431 	edac_mc_free(mci);
432 err_group:
433 	devres_release_group(edac->dev, xgene_edac_mc_add);
434 	return rc;
435 }
436 
xgene_edac_mc_remove(struct xgene_edac_mc_ctx * mcu)437 static int xgene_edac_mc_remove(struct xgene_edac_mc_ctx *mcu)
438 {
439 	xgene_edac_mc_irq_ctl(mcu->mci, false);
440 	edac_mc_del_mc(&mcu->mci->dev);
441 	edac_mc_free(mcu->mci);
442 	return 0;
443 }
444 
445 /* CPU L1/L2 error CSR */
446 #define MAX_CPU_PER_PMD				2
447 #define CPU_CSR_STRIDE				0x00100000
448 #define CPU_L2C_PAGE				0x000D0000
449 #define CPU_MEMERR_L2C_PAGE			0x000E0000
450 #define CPU_MEMERR_CPU_PAGE			0x000F0000
451 
452 #define MEMERR_CPU_ICFECR_PAGE_OFFSET		0x0000
453 #define MEMERR_CPU_ICFESR_PAGE_OFFSET		0x0004
454 #define  MEMERR_CPU_ICFESR_ERRWAY_RD(src)	(((src) & 0xFF000000) >> 24)
455 #define  MEMERR_CPU_ICFESR_ERRINDEX_RD(src)	(((src) & 0x003F0000) >> 16)
456 #define  MEMERR_CPU_ICFESR_ERRINFO_RD(src)	(((src) & 0x0000FF00) >> 8)
457 #define  MEMERR_CPU_ICFESR_ERRTYPE_RD(src)	(((src) & 0x00000070) >> 4)
458 #define  MEMERR_CPU_ICFESR_MULTCERR_MASK	BIT(2)
459 #define  MEMERR_CPU_ICFESR_CERR_MASK		BIT(0)
460 #define MEMERR_CPU_LSUESR_PAGE_OFFSET		0x000c
461 #define  MEMERR_CPU_LSUESR_ERRWAY_RD(src)	(((src) & 0xFF000000) >> 24)
462 #define  MEMERR_CPU_LSUESR_ERRINDEX_RD(src)	(((src) & 0x003F0000) >> 16)
463 #define  MEMERR_CPU_LSUESR_ERRINFO_RD(src)	(((src) & 0x0000FF00) >> 8)
464 #define  MEMERR_CPU_LSUESR_ERRTYPE_RD(src)	(((src) & 0x00000070) >> 4)
465 #define  MEMERR_CPU_LSUESR_MULTCERR_MASK	BIT(2)
466 #define  MEMERR_CPU_LSUESR_CERR_MASK		BIT(0)
467 #define MEMERR_CPU_LSUECR_PAGE_OFFSET		0x0008
468 #define MEMERR_CPU_MMUECR_PAGE_OFFSET		0x0010
469 #define MEMERR_CPU_MMUESR_PAGE_OFFSET		0x0014
470 #define  MEMERR_CPU_MMUESR_ERRWAY_RD(src)	(((src) & 0xFF000000) >> 24)
471 #define  MEMERR_CPU_MMUESR_ERRINDEX_RD(src)	(((src) & 0x007F0000) >> 16)
472 #define  MEMERR_CPU_MMUESR_ERRINFO_RD(src)	(((src) & 0x0000FF00) >> 8)
473 #define  MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK	BIT(7)
474 #define  MEMERR_CPU_MMUESR_ERRTYPE_RD(src)	(((src) & 0x00000070) >> 4)
475 #define  MEMERR_CPU_MMUESR_MULTCERR_MASK	BIT(2)
476 #define  MEMERR_CPU_MMUESR_CERR_MASK		BIT(0)
477 #define MEMERR_CPU_ICFESRA_PAGE_OFFSET		0x0804
478 #define MEMERR_CPU_LSUESRA_PAGE_OFFSET		0x080c
479 #define MEMERR_CPU_MMUESRA_PAGE_OFFSET		0x0814
480 
481 #define MEMERR_L2C_L2ECR_PAGE_OFFSET		0x0000
482 #define MEMERR_L2C_L2ESR_PAGE_OFFSET		0x0004
483 #define  MEMERR_L2C_L2ESR_ERRSYN_RD(src)	(((src) & 0xFF000000) >> 24)
484 #define  MEMERR_L2C_L2ESR_ERRWAY_RD(src)	(((src) & 0x00FC0000) >> 18)
485 #define  MEMERR_L2C_L2ESR_ERRCPU_RD(src)	(((src) & 0x00020000) >> 17)
486 #define  MEMERR_L2C_L2ESR_ERRGROUP_RD(src)	(((src) & 0x0000E000) >> 13)
487 #define  MEMERR_L2C_L2ESR_ERRACTION_RD(src)	(((src) & 0x00001C00) >> 10)
488 #define  MEMERR_L2C_L2ESR_ERRTYPE_RD(src)	(((src) & 0x00000300) >> 8)
489 #define  MEMERR_L2C_L2ESR_MULTUCERR_MASK	BIT(3)
490 #define  MEMERR_L2C_L2ESR_MULTICERR_MASK	BIT(2)
491 #define  MEMERR_L2C_L2ESR_UCERR_MASK		BIT(1)
492 #define  MEMERR_L2C_L2ESR_ERR_MASK		BIT(0)
493 #define MEMERR_L2C_L2EALR_PAGE_OFFSET		0x0008
494 #define CPUX_L2C_L2RTOCR_PAGE_OFFSET		0x0010
495 #define MEMERR_L2C_L2EAHR_PAGE_OFFSET		0x000c
496 #define CPUX_L2C_L2RTOSR_PAGE_OFFSET		0x0014
497 #define  MEMERR_L2C_L2RTOSR_MULTERR_MASK	BIT(1)
498 #define  MEMERR_L2C_L2RTOSR_ERR_MASK		BIT(0)
499 #define CPUX_L2C_L2RTOALR_PAGE_OFFSET		0x0018
500 #define CPUX_L2C_L2RTOAHR_PAGE_OFFSET		0x001c
501 #define MEMERR_L2C_L2ESRA_PAGE_OFFSET		0x0804
502 
503 /*
504  * Processor Module Domain (PMD) context - Context for a pair of processsors.
505  * Each PMD consists of 2 CPUs and a shared L2 cache. Each CPU consists of
506  * its own L1 cache.
507  */
508 struct xgene_edac_pmd_ctx {
509 	struct list_head	next;
510 	struct device		ddev;
511 	char			*name;
512 	struct xgene_edac	*edac;
513 	struct edac_device_ctl_info *edac_dev;
514 	void __iomem		*pmd_csr;
515 	u32			pmd;
516 	int			version;
517 };
518 
xgene_edac_pmd_l1_check(struct edac_device_ctl_info * edac_dev,int cpu_idx)519 static void xgene_edac_pmd_l1_check(struct edac_device_ctl_info *edac_dev,
520 				    int cpu_idx)
521 {
522 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
523 	void __iomem *pg_f;
524 	u32 val;
525 
526 	pg_f = ctx->pmd_csr + cpu_idx * CPU_CSR_STRIDE + CPU_MEMERR_CPU_PAGE;
527 
528 	val = readl(pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
529 	if (!val)
530 		goto chk_lsu;
531 	dev_err(edac_dev->dev,
532 		"CPU%d L1 memory error ICF 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
533 		ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
534 		MEMERR_CPU_ICFESR_ERRWAY_RD(val),
535 		MEMERR_CPU_ICFESR_ERRINDEX_RD(val),
536 		MEMERR_CPU_ICFESR_ERRINFO_RD(val));
537 	if (val & MEMERR_CPU_ICFESR_CERR_MASK)
538 		dev_err(edac_dev->dev, "One or more correctable error\n");
539 	if (val & MEMERR_CPU_ICFESR_MULTCERR_MASK)
540 		dev_err(edac_dev->dev, "Multiple correctable error\n");
541 	switch (MEMERR_CPU_ICFESR_ERRTYPE_RD(val)) {
542 	case 1:
543 		dev_err(edac_dev->dev, "L1 TLB multiple hit\n");
544 		break;
545 	case 2:
546 		dev_err(edac_dev->dev, "Way select multiple hit\n");
547 		break;
548 	case 3:
549 		dev_err(edac_dev->dev, "Physical tag parity error\n");
550 		break;
551 	case 4:
552 	case 5:
553 		dev_err(edac_dev->dev, "L1 data parity error\n");
554 		break;
555 	case 6:
556 		dev_err(edac_dev->dev, "L1 pre-decode parity error\n");
557 		break;
558 	}
559 
560 	/* Clear any HW errors */
561 	writel(val, pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
562 
563 	if (val & (MEMERR_CPU_ICFESR_CERR_MASK |
564 		   MEMERR_CPU_ICFESR_MULTCERR_MASK))
565 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
566 
567 chk_lsu:
568 	val = readl(pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
569 	if (!val)
570 		goto chk_mmu;
571 	dev_err(edac_dev->dev,
572 		"CPU%d memory error LSU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
573 		ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
574 		MEMERR_CPU_LSUESR_ERRWAY_RD(val),
575 		MEMERR_CPU_LSUESR_ERRINDEX_RD(val),
576 		MEMERR_CPU_LSUESR_ERRINFO_RD(val));
577 	if (val & MEMERR_CPU_LSUESR_CERR_MASK)
578 		dev_err(edac_dev->dev, "One or more correctable error\n");
579 	if (val & MEMERR_CPU_LSUESR_MULTCERR_MASK)
580 		dev_err(edac_dev->dev, "Multiple correctable error\n");
581 	switch (MEMERR_CPU_LSUESR_ERRTYPE_RD(val)) {
582 	case 0:
583 		dev_err(edac_dev->dev, "Load tag error\n");
584 		break;
585 	case 1:
586 		dev_err(edac_dev->dev, "Load data error\n");
587 		break;
588 	case 2:
589 		dev_err(edac_dev->dev, "WSL multihit error\n");
590 		break;
591 	case 3:
592 		dev_err(edac_dev->dev, "Store tag error\n");
593 		break;
594 	case 4:
595 		dev_err(edac_dev->dev,
596 			"DTB multihit from load pipeline error\n");
597 		break;
598 	case 5:
599 		dev_err(edac_dev->dev,
600 			"DTB multihit from store pipeline error\n");
601 		break;
602 	}
603 
604 	/* Clear any HW errors */
605 	writel(val, pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
606 
607 	if (val & (MEMERR_CPU_LSUESR_CERR_MASK |
608 		   MEMERR_CPU_LSUESR_MULTCERR_MASK))
609 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
610 
611 chk_mmu:
612 	val = readl(pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
613 	if (!val)
614 		return;
615 	dev_err(edac_dev->dev,
616 		"CPU%d memory error MMU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X %s\n",
617 		ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
618 		MEMERR_CPU_MMUESR_ERRWAY_RD(val),
619 		MEMERR_CPU_MMUESR_ERRINDEX_RD(val),
620 		MEMERR_CPU_MMUESR_ERRINFO_RD(val),
621 		val & MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK ? "LSU" : "ICF");
622 	if (val & MEMERR_CPU_MMUESR_CERR_MASK)
623 		dev_err(edac_dev->dev, "One or more correctable error\n");
624 	if (val & MEMERR_CPU_MMUESR_MULTCERR_MASK)
625 		dev_err(edac_dev->dev, "Multiple correctable error\n");
626 	switch (MEMERR_CPU_MMUESR_ERRTYPE_RD(val)) {
627 	case 0:
628 		dev_err(edac_dev->dev, "Stage 1 UTB hit error\n");
629 		break;
630 	case 1:
631 		dev_err(edac_dev->dev, "Stage 1 UTB miss error\n");
632 		break;
633 	case 2:
634 		dev_err(edac_dev->dev, "Stage 1 UTB allocate error\n");
635 		break;
636 	case 3:
637 		dev_err(edac_dev->dev, "TMO operation single bank error\n");
638 		break;
639 	case 4:
640 		dev_err(edac_dev->dev, "Stage 2 UTB error\n");
641 		break;
642 	case 5:
643 		dev_err(edac_dev->dev, "Stage 2 UTB miss error\n");
644 		break;
645 	case 6:
646 		dev_err(edac_dev->dev, "Stage 2 UTB allocate error\n");
647 		break;
648 	case 7:
649 		dev_err(edac_dev->dev, "TMO operation multiple bank error\n");
650 		break;
651 	}
652 
653 	/* Clear any HW errors */
654 	writel(val, pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
655 
656 	edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
657 }
658 
xgene_edac_pmd_l2_check(struct edac_device_ctl_info * edac_dev)659 static void xgene_edac_pmd_l2_check(struct edac_device_ctl_info *edac_dev)
660 {
661 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
662 	void __iomem *pg_d;
663 	void __iomem *pg_e;
664 	u32 val_hi;
665 	u32 val_lo;
666 	u32 val;
667 
668 	/* Check L2 */
669 	pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
670 	val = readl(pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
671 	if (!val)
672 		goto chk_l2c;
673 	val_lo = readl(pg_e + MEMERR_L2C_L2EALR_PAGE_OFFSET);
674 	val_hi = readl(pg_e + MEMERR_L2C_L2EAHR_PAGE_OFFSET);
675 	dev_err(edac_dev->dev,
676 		"PMD%d memory error L2C L2ESR 0x%08X @ 0x%08X.%08X\n",
677 		ctx->pmd, val, val_hi, val_lo);
678 	dev_err(edac_dev->dev,
679 		"ErrSyndrome 0x%02X ErrWay 0x%02X ErrCpu %d ErrGroup 0x%02X ErrAction 0x%02X\n",
680 		MEMERR_L2C_L2ESR_ERRSYN_RD(val),
681 		MEMERR_L2C_L2ESR_ERRWAY_RD(val),
682 		MEMERR_L2C_L2ESR_ERRCPU_RD(val),
683 		MEMERR_L2C_L2ESR_ERRGROUP_RD(val),
684 		MEMERR_L2C_L2ESR_ERRACTION_RD(val));
685 
686 	if (val & MEMERR_L2C_L2ESR_ERR_MASK)
687 		dev_err(edac_dev->dev, "One or more correctable error\n");
688 	if (val & MEMERR_L2C_L2ESR_MULTICERR_MASK)
689 		dev_err(edac_dev->dev, "Multiple correctable error\n");
690 	if (val & MEMERR_L2C_L2ESR_UCERR_MASK)
691 		dev_err(edac_dev->dev, "One or more uncorrectable error\n");
692 	if (val & MEMERR_L2C_L2ESR_MULTUCERR_MASK)
693 		dev_err(edac_dev->dev, "Multiple uncorrectable error\n");
694 
695 	switch (MEMERR_L2C_L2ESR_ERRTYPE_RD(val)) {
696 	case 0:
697 		dev_err(edac_dev->dev, "Outbound SDB parity error\n");
698 		break;
699 	case 1:
700 		dev_err(edac_dev->dev, "Inbound SDB parity error\n");
701 		break;
702 	case 2:
703 		dev_err(edac_dev->dev, "Tag ECC error\n");
704 		break;
705 	case 3:
706 		dev_err(edac_dev->dev, "Data ECC error\n");
707 		break;
708 	}
709 
710 	/* Clear any HW errors */
711 	writel(val, pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
712 
713 	if (val & (MEMERR_L2C_L2ESR_ERR_MASK |
714 		   MEMERR_L2C_L2ESR_MULTICERR_MASK))
715 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
716 	if (val & (MEMERR_L2C_L2ESR_UCERR_MASK |
717 		   MEMERR_L2C_L2ESR_MULTUCERR_MASK))
718 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
719 
720 chk_l2c:
721 	/* Check if any memory request timed out on L2 cache */
722 	pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
723 	val = readl(pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
724 	if (val) {
725 		val_lo = readl(pg_d + CPUX_L2C_L2RTOALR_PAGE_OFFSET);
726 		val_hi = readl(pg_d + CPUX_L2C_L2RTOAHR_PAGE_OFFSET);
727 		dev_err(edac_dev->dev,
728 			"PMD%d L2C error L2C RTOSR 0x%08X @ 0x%08X.%08X\n",
729 			ctx->pmd, val, val_hi, val_lo);
730 		writel(val, pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
731 	}
732 }
733 
xgene_edac_pmd_check(struct edac_device_ctl_info * edac_dev)734 static void xgene_edac_pmd_check(struct edac_device_ctl_info *edac_dev)
735 {
736 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
737 	unsigned int pcp_hp_stat;
738 	int i;
739 
740 	xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
741 	if (!((PMD0_MERR_MASK << ctx->pmd) & pcp_hp_stat))
742 		return;
743 
744 	/* Check CPU L1 error */
745 	for (i = 0; i < MAX_CPU_PER_PMD; i++)
746 		xgene_edac_pmd_l1_check(edac_dev, i);
747 
748 	/* Check CPU L2 error */
749 	xgene_edac_pmd_l2_check(edac_dev);
750 }
751 
xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info * edac_dev,int cpu)752 static void xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info *edac_dev,
753 				      int cpu)
754 {
755 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
756 	void __iomem *pg_f = ctx->pmd_csr + cpu * CPU_CSR_STRIDE +
757 			     CPU_MEMERR_CPU_PAGE;
758 
759 	/*
760 	 * Enable CPU memory error:
761 	 *  MEMERR_CPU_ICFESRA, MEMERR_CPU_LSUESRA, and MEMERR_CPU_MMUESRA
762 	 */
763 	writel(0x00000301, pg_f + MEMERR_CPU_ICFECR_PAGE_OFFSET);
764 	writel(0x00000301, pg_f + MEMERR_CPU_LSUECR_PAGE_OFFSET);
765 	writel(0x00000101, pg_f + MEMERR_CPU_MMUECR_PAGE_OFFSET);
766 }
767 
xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info * edac_dev)768 static void xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info *edac_dev)
769 {
770 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
771 	void __iomem *pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
772 	void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
773 
774 	/* Enable PMD memory error - MEMERR_L2C_L2ECR and L2C_L2RTOCR */
775 	writel(0x00000703, pg_e + MEMERR_L2C_L2ECR_PAGE_OFFSET);
776 	/* Configure L2C HW request time out feature if supported */
777 	if (ctx->version > 1)
778 		writel(0x00000119, pg_d + CPUX_L2C_L2RTOCR_PAGE_OFFSET);
779 }
780 
xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info * edac_dev,bool enable)781 static void xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info *edac_dev,
782 				  bool enable)
783 {
784 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
785 	int i;
786 
787 	/* Enable PMD error interrupt */
788 	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
789 		if (enable)
790 			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
791 					       PMD0_MERR_MASK << ctx->pmd);
792 		else
793 			xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
794 					       PMD0_MERR_MASK << ctx->pmd);
795 	}
796 
797 	if (enable) {
798 		xgene_edac_pmd_hw_cfg(edac_dev);
799 
800 		/* Two CPUs per a PMD */
801 		for (i = 0; i < MAX_CPU_PER_PMD; i++)
802 			xgene_edac_pmd_cpu_hw_cfg(edac_dev, i);
803 	}
804 }
805 
xgene_edac_pmd_l1_inject_ctrl_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)806 static ssize_t xgene_edac_pmd_l1_inject_ctrl_write(struct file *file,
807 						   const char __user *data,
808 						   size_t count, loff_t *ppos)
809 {
810 	struct edac_device_ctl_info *edac_dev = file->private_data;
811 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
812 	void __iomem *cpux_pg_f;
813 	int i;
814 
815 	for (i = 0; i < MAX_CPU_PER_PMD; i++) {
816 		cpux_pg_f = ctx->pmd_csr + i * CPU_CSR_STRIDE +
817 			    CPU_MEMERR_CPU_PAGE;
818 
819 		writel(MEMERR_CPU_ICFESR_MULTCERR_MASK |
820 		       MEMERR_CPU_ICFESR_CERR_MASK,
821 		       cpux_pg_f + MEMERR_CPU_ICFESRA_PAGE_OFFSET);
822 		writel(MEMERR_CPU_LSUESR_MULTCERR_MASK |
823 		       MEMERR_CPU_LSUESR_CERR_MASK,
824 		       cpux_pg_f + MEMERR_CPU_LSUESRA_PAGE_OFFSET);
825 		writel(MEMERR_CPU_MMUESR_MULTCERR_MASK |
826 		       MEMERR_CPU_MMUESR_CERR_MASK,
827 		       cpux_pg_f + MEMERR_CPU_MMUESRA_PAGE_OFFSET);
828 	}
829 	return count;
830 }
831 
xgene_edac_pmd_l2_inject_ctrl_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)832 static ssize_t xgene_edac_pmd_l2_inject_ctrl_write(struct file *file,
833 						   const char __user *data,
834 						   size_t count, loff_t *ppos)
835 {
836 	struct edac_device_ctl_info *edac_dev = file->private_data;
837 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
838 	void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
839 
840 	writel(MEMERR_L2C_L2ESR_MULTUCERR_MASK |
841 	       MEMERR_L2C_L2ESR_MULTICERR_MASK |
842 	       MEMERR_L2C_L2ESR_UCERR_MASK |
843 	       MEMERR_L2C_L2ESR_ERR_MASK,
844 	       pg_e + MEMERR_L2C_L2ESRA_PAGE_OFFSET);
845 	return count;
846 }
847 
848 static const struct file_operations xgene_edac_pmd_debug_inject_fops[] = {
849 	{
850 	.open = simple_open,
851 	.write = xgene_edac_pmd_l1_inject_ctrl_write,
852 	.llseek = generic_file_llseek, },
853 	{
854 	.open = simple_open,
855 	.write = xgene_edac_pmd_l2_inject_ctrl_write,
856 	.llseek = generic_file_llseek, },
857 	{ }
858 };
859 
860 static void
xgene_edac_pmd_create_debugfs_nodes(struct edac_device_ctl_info * edac_dev)861 xgene_edac_pmd_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
862 {
863 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
864 	struct dentry *dbgfs_dir;
865 	char name[10];
866 
867 	if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
868 		return;
869 
870 	snprintf(name, sizeof(name), "PMD%d", ctx->pmd);
871 	dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
872 	if (!dbgfs_dir)
873 		return;
874 
875 	edac_debugfs_create_file("l1_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
876 				 &xgene_edac_pmd_debug_inject_fops[0]);
877 	edac_debugfs_create_file("l2_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
878 				 &xgene_edac_pmd_debug_inject_fops[1]);
879 }
880 
xgene_edac_pmd_available(u32 efuse,int pmd)881 static int xgene_edac_pmd_available(u32 efuse, int pmd)
882 {
883 	return (efuse & (1 << pmd)) ? 0 : 1;
884 }
885 
xgene_edac_pmd_add(struct xgene_edac * edac,struct device_node * np,int version)886 static int xgene_edac_pmd_add(struct xgene_edac *edac, struct device_node *np,
887 			      int version)
888 {
889 	struct edac_device_ctl_info *edac_dev;
890 	struct xgene_edac_pmd_ctx *ctx;
891 	struct resource res;
892 	char edac_name[10];
893 	u32 pmd;
894 	int rc;
895 	u32 val;
896 
897 	if (!devres_open_group(edac->dev, xgene_edac_pmd_add, GFP_KERNEL))
898 		return -ENOMEM;
899 
900 	/* Determine if this PMD is disabled */
901 	if (of_property_read_u32(np, "pmd-controller", &pmd)) {
902 		dev_err(edac->dev, "no pmd-controller property\n");
903 		rc = -ENODEV;
904 		goto err_group;
905 	}
906 	rc = regmap_read(edac->efuse_map, 0, &val);
907 	if (rc)
908 		goto err_group;
909 	if (!xgene_edac_pmd_available(val, pmd)) {
910 		rc = -ENODEV;
911 		goto err_group;
912 	}
913 
914 	snprintf(edac_name, sizeof(edac_name), "l2c%d", pmd);
915 	edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
916 					      edac_name, 1, "l2c", 1, 2, NULL,
917 					      0, edac_device_alloc_index());
918 	if (!edac_dev) {
919 		rc = -ENOMEM;
920 		goto err_group;
921 	}
922 
923 	ctx = edac_dev->pvt_info;
924 	ctx->name = "xgene_pmd_err";
925 	ctx->pmd = pmd;
926 	ctx->edac = edac;
927 	ctx->edac_dev = edac_dev;
928 	ctx->ddev = *edac->dev;
929 	ctx->version = version;
930 	edac_dev->dev = &ctx->ddev;
931 	edac_dev->ctl_name = ctx->name;
932 	edac_dev->dev_name = ctx->name;
933 	edac_dev->mod_name = EDAC_MOD_STR;
934 
935 	rc = of_address_to_resource(np, 0, &res);
936 	if (rc < 0) {
937 		dev_err(edac->dev, "no PMD resource address\n");
938 		goto err_free;
939 	}
940 	ctx->pmd_csr = devm_ioremap_resource(edac->dev, &res);
941 	if (IS_ERR(ctx->pmd_csr)) {
942 		dev_err(edac->dev,
943 			"devm_ioremap_resource failed for PMD resource address\n");
944 		rc = PTR_ERR(ctx->pmd_csr);
945 		goto err_free;
946 	}
947 
948 	if (edac_op_state == EDAC_OPSTATE_POLL)
949 		edac_dev->edac_check = xgene_edac_pmd_check;
950 
951 	xgene_edac_pmd_create_debugfs_nodes(edac_dev);
952 
953 	rc = edac_device_add_device(edac_dev);
954 	if (rc > 0) {
955 		dev_err(edac->dev, "edac_device_add_device failed\n");
956 		rc = -ENOMEM;
957 		goto err_free;
958 	}
959 
960 	if (edac_op_state == EDAC_OPSTATE_INT)
961 		edac_dev->op_state = OP_RUNNING_INTERRUPT;
962 
963 	list_add(&ctx->next, &edac->pmds);
964 
965 	xgene_edac_pmd_hw_ctl(edac_dev, 1);
966 
967 	devres_remove_group(edac->dev, xgene_edac_pmd_add);
968 
969 	dev_info(edac->dev, "X-Gene EDAC PMD%d registered\n", ctx->pmd);
970 	return 0;
971 
972 err_free:
973 	edac_device_free_ctl_info(edac_dev);
974 err_group:
975 	devres_release_group(edac->dev, xgene_edac_pmd_add);
976 	return rc;
977 }
978 
xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx * pmd)979 static int xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx *pmd)
980 {
981 	struct edac_device_ctl_info *edac_dev = pmd->edac_dev;
982 
983 	xgene_edac_pmd_hw_ctl(edac_dev, 0);
984 	edac_device_del_device(edac_dev->dev);
985 	edac_device_free_ctl_info(edac_dev);
986 	return 0;
987 }
988 
989 /* L3 Error device */
990 #define L3C_ESR				(0x0A * 4)
991 #define  L3C_ESR_DATATAG_MASK		BIT(9)
992 #define  L3C_ESR_MULTIHIT_MASK		BIT(8)
993 #define  L3C_ESR_UCEVICT_MASK		BIT(6)
994 #define  L3C_ESR_MULTIUCERR_MASK	BIT(5)
995 #define  L3C_ESR_MULTICERR_MASK		BIT(4)
996 #define  L3C_ESR_UCERR_MASK		BIT(3)
997 #define  L3C_ESR_CERR_MASK		BIT(2)
998 #define  L3C_ESR_UCERRINTR_MASK		BIT(1)
999 #define  L3C_ESR_CERRINTR_MASK		BIT(0)
1000 #define L3C_ECR				(0x0B * 4)
1001 #define  L3C_ECR_UCINTREN		BIT(3)
1002 #define  L3C_ECR_CINTREN		BIT(2)
1003 #define  L3C_UCERREN			BIT(1)
1004 #define  L3C_CERREN			BIT(0)
1005 #define L3C_ELR				(0x0C * 4)
1006 #define  L3C_ELR_ERRSYN(src)		((src & 0xFF800000) >> 23)
1007 #define  L3C_ELR_ERRWAY(src)		((src & 0x007E0000) >> 17)
1008 #define  L3C_ELR_AGENTID(src)		((src & 0x0001E000) >> 13)
1009 #define  L3C_ELR_ERRGRP(src)		((src & 0x00000F00) >> 8)
1010 #define  L3C_ELR_OPTYPE(src)		((src & 0x000000F0) >> 4)
1011 #define  L3C_ELR_PADDRHIGH(src)		(src & 0x0000000F)
1012 #define L3C_AELR			(0x0D * 4)
1013 #define L3C_BELR			(0x0E * 4)
1014 #define  L3C_BELR_BANK(src)		(src & 0x0000000F)
1015 
1016 struct xgene_edac_dev_ctx {
1017 	struct list_head	next;
1018 	struct device		ddev;
1019 	char			*name;
1020 	struct xgene_edac	*edac;
1021 	struct edac_device_ctl_info *edac_dev;
1022 	int			edac_idx;
1023 	void __iomem		*dev_csr;
1024 	int			version;
1025 };
1026 
1027 /*
1028  * Version 1 of the L3 controller has broken single bit correctable logic for
1029  * certain error syndromes. Log them as uncorrectable in that case.
1030  */
xgene_edac_l3_promote_to_uc_err(u32 l3cesr,u32 l3celr)1031 static bool xgene_edac_l3_promote_to_uc_err(u32 l3cesr, u32 l3celr)
1032 {
1033 	if (l3cesr & L3C_ESR_DATATAG_MASK) {
1034 		switch (L3C_ELR_ERRSYN(l3celr)) {
1035 		case 0x13C:
1036 		case 0x0B4:
1037 		case 0x007:
1038 		case 0x00D:
1039 		case 0x00E:
1040 		case 0x019:
1041 		case 0x01A:
1042 		case 0x01C:
1043 		case 0x04E:
1044 		case 0x041:
1045 			return true;
1046 		}
1047 	} else if (L3C_ELR_ERRWAY(l3celr) == 9)
1048 		return true;
1049 
1050 	return false;
1051 }
1052 
xgene_edac_l3_check(struct edac_device_ctl_info * edac_dev)1053 static void xgene_edac_l3_check(struct edac_device_ctl_info *edac_dev)
1054 {
1055 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1056 	u32 l3cesr;
1057 	u32 l3celr;
1058 	u32 l3caelr;
1059 	u32 l3cbelr;
1060 
1061 	l3cesr = readl(ctx->dev_csr + L3C_ESR);
1062 	if (!(l3cesr & (L3C_ESR_UCERR_MASK | L3C_ESR_CERR_MASK)))
1063 		return;
1064 
1065 	if (l3cesr & L3C_ESR_UCERR_MASK)
1066 		dev_err(edac_dev->dev, "L3C uncorrectable error\n");
1067 	if (l3cesr & L3C_ESR_CERR_MASK)
1068 		dev_warn(edac_dev->dev, "L3C correctable error\n");
1069 
1070 	l3celr = readl(ctx->dev_csr + L3C_ELR);
1071 	l3caelr = readl(ctx->dev_csr + L3C_AELR);
1072 	l3cbelr = readl(ctx->dev_csr + L3C_BELR);
1073 	if (l3cesr & L3C_ESR_MULTIHIT_MASK)
1074 		dev_err(edac_dev->dev, "L3C multiple hit error\n");
1075 	if (l3cesr & L3C_ESR_UCEVICT_MASK)
1076 		dev_err(edac_dev->dev,
1077 			"L3C dropped eviction of line with error\n");
1078 	if (l3cesr & L3C_ESR_MULTIUCERR_MASK)
1079 		dev_err(edac_dev->dev, "L3C multiple uncorrectable error\n");
1080 	if (l3cesr & L3C_ESR_DATATAG_MASK)
1081 		dev_err(edac_dev->dev,
1082 			"L3C data error syndrome 0x%X group 0x%X\n",
1083 			L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRGRP(l3celr));
1084 	else
1085 		dev_err(edac_dev->dev,
1086 			"L3C tag error syndrome 0x%X Way of Tag 0x%X Agent ID 0x%X Operation type 0x%X\n",
1087 			L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRWAY(l3celr),
1088 			L3C_ELR_AGENTID(l3celr), L3C_ELR_OPTYPE(l3celr));
1089 	/*
1090 	 * NOTE: Address [41:38] in L3C_ELR_PADDRHIGH(l3celr).
1091 	 *       Address [37:6] in l3caelr. Lower 6 bits are zero.
1092 	 */
1093 	dev_err(edac_dev->dev, "L3C error address 0x%08X.%08X bank %d\n",
1094 		L3C_ELR_PADDRHIGH(l3celr) << 6 | (l3caelr >> 26),
1095 		(l3caelr & 0x3FFFFFFF) << 6, L3C_BELR_BANK(l3cbelr));
1096 	dev_err(edac_dev->dev,
1097 		"L3C error status register value 0x%X\n", l3cesr);
1098 
1099 	/* Clear L3C error interrupt */
1100 	writel(0, ctx->dev_csr + L3C_ESR);
1101 
1102 	if (ctx->version <= 1 &&
1103 	    xgene_edac_l3_promote_to_uc_err(l3cesr, l3celr)) {
1104 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1105 		return;
1106 	}
1107 	if (l3cesr & L3C_ESR_CERR_MASK)
1108 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1109 	if (l3cesr & L3C_ESR_UCERR_MASK)
1110 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1111 }
1112 
xgene_edac_l3_hw_init(struct edac_device_ctl_info * edac_dev,bool enable)1113 static void xgene_edac_l3_hw_init(struct edac_device_ctl_info *edac_dev,
1114 				  bool enable)
1115 {
1116 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1117 	u32 val;
1118 
1119 	val = readl(ctx->dev_csr + L3C_ECR);
1120 	val |= L3C_UCERREN | L3C_CERREN;
1121 	/* On disable, we just disable interrupt but keep error enabled */
1122 	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1123 		if (enable)
1124 			val |= L3C_ECR_UCINTREN | L3C_ECR_CINTREN;
1125 		else
1126 			val &= ~(L3C_ECR_UCINTREN | L3C_ECR_CINTREN);
1127 	}
1128 	writel(val, ctx->dev_csr + L3C_ECR);
1129 
1130 	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1131 		/* Enable/disable L3 error top level interrupt */
1132 		if (enable) {
1133 			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1134 					       L3C_UNCORR_ERR_MASK);
1135 			xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1136 					       L3C_CORR_ERR_MASK);
1137 		} else {
1138 			xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1139 					       L3C_UNCORR_ERR_MASK);
1140 			xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1141 					       L3C_CORR_ERR_MASK);
1142 		}
1143 	}
1144 }
1145 
xgene_edac_l3_inject_ctrl_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)1146 static ssize_t xgene_edac_l3_inject_ctrl_write(struct file *file,
1147 					       const char __user *data,
1148 					       size_t count, loff_t *ppos)
1149 {
1150 	struct edac_device_ctl_info *edac_dev = file->private_data;
1151 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1152 
1153 	/* Generate all errors */
1154 	writel(0xFFFFFFFF, ctx->dev_csr + L3C_ESR);
1155 	return count;
1156 }
1157 
1158 static const struct file_operations xgene_edac_l3_debug_inject_fops = {
1159 	.open = simple_open,
1160 	.write = xgene_edac_l3_inject_ctrl_write,
1161 	.llseek = generic_file_llseek
1162 };
1163 
1164 static void
xgene_edac_l3_create_debugfs_nodes(struct edac_device_ctl_info * edac_dev)1165 xgene_edac_l3_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
1166 {
1167 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1168 	struct dentry *dbgfs_dir;
1169 	char name[10];
1170 
1171 	if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
1172 		return;
1173 
1174 	snprintf(name, sizeof(name), "l3c%d", ctx->edac_idx);
1175 	dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
1176 	if (!dbgfs_dir)
1177 		return;
1178 
1179 	debugfs_create_file("l3_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
1180 			    &xgene_edac_l3_debug_inject_fops);
1181 }
1182 
xgene_edac_l3_add(struct xgene_edac * edac,struct device_node * np,int version)1183 static int xgene_edac_l3_add(struct xgene_edac *edac, struct device_node *np,
1184 			     int version)
1185 {
1186 	struct edac_device_ctl_info *edac_dev;
1187 	struct xgene_edac_dev_ctx *ctx;
1188 	struct resource res;
1189 	void __iomem *dev_csr;
1190 	int edac_idx;
1191 	int rc = 0;
1192 
1193 	if (!devres_open_group(edac->dev, xgene_edac_l3_add, GFP_KERNEL))
1194 		return -ENOMEM;
1195 
1196 	rc = of_address_to_resource(np, 0, &res);
1197 	if (rc < 0) {
1198 		dev_err(edac->dev, "no L3 resource address\n");
1199 		goto err_release_group;
1200 	}
1201 	dev_csr = devm_ioremap_resource(edac->dev, &res);
1202 	if (IS_ERR(dev_csr)) {
1203 		dev_err(edac->dev,
1204 			"devm_ioremap_resource failed for L3 resource address\n");
1205 		rc = PTR_ERR(dev_csr);
1206 		goto err_release_group;
1207 	}
1208 
1209 	edac_idx = edac_device_alloc_index();
1210 	edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1211 					      "l3c", 1, "l3c", 1, 0, NULL, 0,
1212 					      edac_idx);
1213 	if (!edac_dev) {
1214 		rc = -ENOMEM;
1215 		goto err_release_group;
1216 	}
1217 
1218 	ctx = edac_dev->pvt_info;
1219 	ctx->dev_csr = dev_csr;
1220 	ctx->name = "xgene_l3_err";
1221 	ctx->edac_idx = edac_idx;
1222 	ctx->edac = edac;
1223 	ctx->edac_dev = edac_dev;
1224 	ctx->ddev = *edac->dev;
1225 	ctx->version = version;
1226 	edac_dev->dev = &ctx->ddev;
1227 	edac_dev->ctl_name = ctx->name;
1228 	edac_dev->dev_name = ctx->name;
1229 	edac_dev->mod_name = EDAC_MOD_STR;
1230 
1231 	if (edac_op_state == EDAC_OPSTATE_POLL)
1232 		edac_dev->edac_check = xgene_edac_l3_check;
1233 
1234 	xgene_edac_l3_create_debugfs_nodes(edac_dev);
1235 
1236 	rc = edac_device_add_device(edac_dev);
1237 	if (rc > 0) {
1238 		dev_err(edac->dev, "failed edac_device_add_device()\n");
1239 		rc = -ENOMEM;
1240 		goto err_ctl_free;
1241 	}
1242 
1243 	if (edac_op_state == EDAC_OPSTATE_INT)
1244 		edac_dev->op_state = OP_RUNNING_INTERRUPT;
1245 
1246 	list_add(&ctx->next, &edac->l3s);
1247 
1248 	xgene_edac_l3_hw_init(edac_dev, 1);
1249 
1250 	devres_remove_group(edac->dev, xgene_edac_l3_add);
1251 
1252 	dev_info(edac->dev, "X-Gene EDAC L3 registered\n");
1253 	return 0;
1254 
1255 err_ctl_free:
1256 	edac_device_free_ctl_info(edac_dev);
1257 err_release_group:
1258 	devres_release_group(edac->dev, xgene_edac_l3_add);
1259 	return rc;
1260 }
1261 
xgene_edac_l3_remove(struct xgene_edac_dev_ctx * l3)1262 static int xgene_edac_l3_remove(struct xgene_edac_dev_ctx *l3)
1263 {
1264 	struct edac_device_ctl_info *edac_dev = l3->edac_dev;
1265 
1266 	xgene_edac_l3_hw_init(edac_dev, 0);
1267 	edac_device_del_device(l3->edac->dev);
1268 	edac_device_free_ctl_info(edac_dev);
1269 	return 0;
1270 }
1271 
1272 /* SoC error device */
1273 #define IOBAXIS0TRANSERRINTSTS		0x0000
1274 #define  IOBAXIS0_M_ILLEGAL_ACCESS_MASK	BIT(1)
1275 #define  IOBAXIS0_ILLEGAL_ACCESS_MASK	BIT(0)
1276 #define IOBAXIS0TRANSERRINTMSK		0x0004
1277 #define IOBAXIS0TRANSERRREQINFOL	0x0008
1278 #define IOBAXIS0TRANSERRREQINFOH	0x000c
1279 #define  REQTYPE_RD(src)		(((src) & BIT(0)))
1280 #define  ERRADDRH_RD(src)		(((src) & 0xffc00000) >> 22)
1281 #define IOBAXIS1TRANSERRINTSTS		0x0010
1282 #define IOBAXIS1TRANSERRINTMSK		0x0014
1283 #define IOBAXIS1TRANSERRREQINFOL	0x0018
1284 #define IOBAXIS1TRANSERRREQINFOH	0x001c
1285 #define IOBPATRANSERRINTSTS		0x0020
1286 #define  IOBPA_M_REQIDRAM_CORRUPT_MASK	BIT(7)
1287 #define  IOBPA_REQIDRAM_CORRUPT_MASK	BIT(6)
1288 #define  IOBPA_M_TRANS_CORRUPT_MASK	BIT(5)
1289 #define  IOBPA_TRANS_CORRUPT_MASK	BIT(4)
1290 #define  IOBPA_M_WDATA_CORRUPT_MASK	BIT(3)
1291 #define  IOBPA_WDATA_CORRUPT_MASK	BIT(2)
1292 #define  IOBPA_M_RDATA_CORRUPT_MASK	BIT(1)
1293 #define  IOBPA_RDATA_CORRUPT_MASK	BIT(0)
1294 #define IOBBATRANSERRINTSTS		0x0030
1295 #define  M_ILLEGAL_ACCESS_MASK		BIT(15)
1296 #define  ILLEGAL_ACCESS_MASK		BIT(14)
1297 #define  M_WIDRAM_CORRUPT_MASK		BIT(13)
1298 #define  WIDRAM_CORRUPT_MASK		BIT(12)
1299 #define  M_RIDRAM_CORRUPT_MASK		BIT(11)
1300 #define  RIDRAM_CORRUPT_MASK		BIT(10)
1301 #define  M_TRANS_CORRUPT_MASK		BIT(9)
1302 #define  TRANS_CORRUPT_MASK		BIT(8)
1303 #define  M_WDATA_CORRUPT_MASK		BIT(7)
1304 #define  WDATA_CORRUPT_MASK		BIT(6)
1305 #define  M_RBM_POISONED_REQ_MASK	BIT(5)
1306 #define  RBM_POISONED_REQ_MASK		BIT(4)
1307 #define  M_XGIC_POISONED_REQ_MASK	BIT(3)
1308 #define  XGIC_POISONED_REQ_MASK		BIT(2)
1309 #define  M_WRERR_RESP_MASK		BIT(1)
1310 #define  WRERR_RESP_MASK		BIT(0)
1311 #define IOBBATRANSERRREQINFOL		0x0038
1312 #define IOBBATRANSERRREQINFOH		0x003c
1313 #define  REQTYPE_F2_RD(src)		((src) & BIT(0))
1314 #define  ERRADDRH_F2_RD(src)		(((src) & 0xffc00000) >> 22)
1315 #define IOBBATRANSERRCSWREQID		0x0040
1316 #define XGICTRANSERRINTSTS		0x0050
1317 #define  M_WR_ACCESS_ERR_MASK		BIT(3)
1318 #define  WR_ACCESS_ERR_MASK		BIT(2)
1319 #define  M_RD_ACCESS_ERR_MASK		BIT(1)
1320 #define  RD_ACCESS_ERR_MASK		BIT(0)
1321 #define XGICTRANSERRINTMSK		0x0054
1322 #define XGICTRANSERRREQINFO		0x0058
1323 #define  REQTYPE_MASK			BIT(26)
1324 #define  ERRADDR_RD(src)		((src) & 0x03ffffff)
1325 #define GLBL_ERR_STS			0x0800
1326 #define  MDED_ERR_MASK			BIT(3)
1327 #define  DED_ERR_MASK			BIT(2)
1328 #define  MSEC_ERR_MASK			BIT(1)
1329 #define  SEC_ERR_MASK			BIT(0)
1330 #define GLBL_SEC_ERRL			0x0810
1331 #define GLBL_SEC_ERRH			0x0818
1332 #define GLBL_MSEC_ERRL			0x0820
1333 #define GLBL_MSEC_ERRH			0x0828
1334 #define GLBL_DED_ERRL			0x0830
1335 #define GLBL_DED_ERRLMASK		0x0834
1336 #define GLBL_DED_ERRH			0x0838
1337 #define GLBL_DED_ERRHMASK		0x083c
1338 #define GLBL_MDED_ERRL			0x0840
1339 #define GLBL_MDED_ERRLMASK		0x0844
1340 #define GLBL_MDED_ERRH			0x0848
1341 #define GLBL_MDED_ERRHMASK		0x084c
1342 
1343 /* IO Bus Registers */
1344 #define RBCSR				0x0000
1345 #define STICKYERR_MASK			BIT(0)
1346 #define RBEIR				0x0008
1347 #define AGENT_OFFLINE_ERR_MASK		BIT(30)
1348 #define UNIMPL_RBPAGE_ERR_MASK		BIT(29)
1349 #define WORD_ALIGNED_ERR_MASK		BIT(28)
1350 #define PAGE_ACCESS_ERR_MASK		BIT(27)
1351 #define WRITE_ACCESS_MASK		BIT(26)
1352 #define RBERRADDR_RD(src)		((src) & 0x03FFFFFF)
1353 
1354 static const char * const soc_mem_err_v1[] = {
1355 	"10GbE0",
1356 	"10GbE1",
1357 	"Security",
1358 	"SATA45",
1359 	"SATA23/ETH23",
1360 	"SATA01/ETH01",
1361 	"USB1",
1362 	"USB0",
1363 	"QML",
1364 	"QM0",
1365 	"QM1 (XGbE01)",
1366 	"PCIE4",
1367 	"PCIE3",
1368 	"PCIE2",
1369 	"PCIE1",
1370 	"PCIE0",
1371 	"CTX Manager",
1372 	"OCM",
1373 	"1GbE",
1374 	"CLE",
1375 	"AHBC",
1376 	"PktDMA",
1377 	"GFC",
1378 	"MSLIM",
1379 	"10GbE2",
1380 	"10GbE3",
1381 	"QM2 (XGbE23)",
1382 	"IOB",
1383 	"unknown",
1384 	"unknown",
1385 	"unknown",
1386 	"unknown",
1387 };
1388 
xgene_edac_iob_gic_report(struct edac_device_ctl_info * edac_dev)1389 static void xgene_edac_iob_gic_report(struct edac_device_ctl_info *edac_dev)
1390 {
1391 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1392 	u32 err_addr_lo;
1393 	u32 err_addr_hi;
1394 	u32 reg;
1395 	u32 info;
1396 
1397 	/* GIC transaction error interrupt */
1398 	reg = readl(ctx->dev_csr + XGICTRANSERRINTSTS);
1399 	if (!reg)
1400 		goto chk_iob_err;
1401 	dev_err(edac_dev->dev, "XGIC transaction error\n");
1402 	if (reg & RD_ACCESS_ERR_MASK)
1403 		dev_err(edac_dev->dev, "XGIC read size error\n");
1404 	if (reg & M_RD_ACCESS_ERR_MASK)
1405 		dev_err(edac_dev->dev, "Multiple XGIC read size error\n");
1406 	if (reg & WR_ACCESS_ERR_MASK)
1407 		dev_err(edac_dev->dev, "XGIC write size error\n");
1408 	if (reg & M_WR_ACCESS_ERR_MASK)
1409 		dev_err(edac_dev->dev, "Multiple XGIC write size error\n");
1410 	info = readl(ctx->dev_csr + XGICTRANSERRREQINFO);
1411 	dev_err(edac_dev->dev, "XGIC %s access @ 0x%08X (0x%08X)\n",
1412 		info & REQTYPE_MASK ? "read" : "write", ERRADDR_RD(info),
1413 		info);
1414 	writel(reg, ctx->dev_csr + XGICTRANSERRINTSTS);
1415 
1416 chk_iob_err:
1417 	/* IOB memory error */
1418 	reg = readl(ctx->dev_csr + GLBL_ERR_STS);
1419 	if (!reg)
1420 		return;
1421 	if (reg & SEC_ERR_MASK) {
1422 		err_addr_lo = readl(ctx->dev_csr + GLBL_SEC_ERRL);
1423 		err_addr_hi = readl(ctx->dev_csr + GLBL_SEC_ERRH);
1424 		dev_err(edac_dev->dev,
1425 			"IOB single-bit correctable memory at 0x%08X.%08X error\n",
1426 			err_addr_lo, err_addr_hi);
1427 		writel(err_addr_lo, ctx->dev_csr + GLBL_SEC_ERRL);
1428 		writel(err_addr_hi, ctx->dev_csr + GLBL_SEC_ERRH);
1429 	}
1430 	if (reg & MSEC_ERR_MASK) {
1431 		err_addr_lo = readl(ctx->dev_csr + GLBL_MSEC_ERRL);
1432 		err_addr_hi = readl(ctx->dev_csr + GLBL_MSEC_ERRH);
1433 		dev_err(edac_dev->dev,
1434 			"IOB multiple single-bit correctable memory at 0x%08X.%08X error\n",
1435 			err_addr_lo, err_addr_hi);
1436 		writel(err_addr_lo, ctx->dev_csr + GLBL_MSEC_ERRL);
1437 		writel(err_addr_hi, ctx->dev_csr + GLBL_MSEC_ERRH);
1438 	}
1439 	if (reg & (SEC_ERR_MASK | MSEC_ERR_MASK))
1440 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1441 
1442 	if (reg & DED_ERR_MASK) {
1443 		err_addr_lo = readl(ctx->dev_csr + GLBL_DED_ERRL);
1444 		err_addr_hi = readl(ctx->dev_csr + GLBL_DED_ERRH);
1445 		dev_err(edac_dev->dev,
1446 			"IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1447 			err_addr_lo, err_addr_hi);
1448 		writel(err_addr_lo, ctx->dev_csr + GLBL_DED_ERRL);
1449 		writel(err_addr_hi, ctx->dev_csr + GLBL_DED_ERRH);
1450 	}
1451 	if (reg & MDED_ERR_MASK) {
1452 		err_addr_lo = readl(ctx->dev_csr + GLBL_MDED_ERRL);
1453 		err_addr_hi = readl(ctx->dev_csr + GLBL_MDED_ERRH);
1454 		dev_err(edac_dev->dev,
1455 			"Multiple IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1456 			err_addr_lo, err_addr_hi);
1457 		writel(err_addr_lo, ctx->dev_csr + GLBL_MDED_ERRL);
1458 		writel(err_addr_hi, ctx->dev_csr + GLBL_MDED_ERRH);
1459 	}
1460 	if (reg & (DED_ERR_MASK | MDED_ERR_MASK))
1461 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1462 }
1463 
xgene_edac_rb_report(struct edac_device_ctl_info * edac_dev)1464 static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev)
1465 {
1466 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1467 	u32 err_addr_lo;
1468 	u32 err_addr_hi;
1469 	u32 reg;
1470 
1471 	/* If the register bus resource isn't available, just skip it */
1472 	if (!ctx->edac->rb_map)
1473 		goto rb_skip;
1474 
1475 	/*
1476 	 * Check RB access errors
1477 	 * 1. Out of range
1478 	 * 2. Un-implemented page
1479 	 * 3. Un-aligned access
1480 	 * 4. Offline slave IP
1481 	 */
1482 	if (regmap_read(ctx->edac->rb_map, RBCSR, &reg))
1483 		return;
1484 	if (reg & STICKYERR_MASK) {
1485 		bool write;
1486 		u32 address;
1487 
1488 		dev_err(edac_dev->dev, "IOB bus access error(s)\n");
1489 		if (regmap_read(ctx->edac->rb_map, RBEIR, &reg))
1490 			return;
1491 		write = reg & WRITE_ACCESS_MASK ? 1 : 0;
1492 		address = RBERRADDR_RD(reg);
1493 		if (reg & AGENT_OFFLINE_ERR_MASK)
1494 			dev_err(edac_dev->dev,
1495 				"IOB bus %s access to offline agent error\n",
1496 				write ? "write" : "read");
1497 		if (reg & UNIMPL_RBPAGE_ERR_MASK)
1498 			dev_err(edac_dev->dev,
1499 				"IOB bus %s access to unimplemented page error\n",
1500 				write ? "write" : "read");
1501 		if (reg & WORD_ALIGNED_ERR_MASK)
1502 			dev_err(edac_dev->dev,
1503 				"IOB bus %s word aligned access error\n",
1504 				write ? "write" : "read");
1505 		if (reg & PAGE_ACCESS_ERR_MASK)
1506 			dev_err(edac_dev->dev,
1507 				"IOB bus %s to page out of range access error\n",
1508 				write ? "write" : "read");
1509 		if (regmap_write(ctx->edac->rb_map, RBEIR, 0))
1510 			return;
1511 		if (regmap_write(ctx->edac->rb_map, RBCSR, 0))
1512 			return;
1513 	}
1514 rb_skip:
1515 
1516 	/* IOB Bridge agent transaction error interrupt */
1517 	reg = readl(ctx->dev_csr + IOBBATRANSERRINTSTS);
1518 	if (!reg)
1519 		return;
1520 
1521 	dev_err(edac_dev->dev, "IOB bridge agent (BA) transaction error\n");
1522 	if (reg & WRERR_RESP_MASK)
1523 		dev_err(edac_dev->dev, "IOB BA write response error\n");
1524 	if (reg & M_WRERR_RESP_MASK)
1525 		dev_err(edac_dev->dev,
1526 			"Multiple IOB BA write response error\n");
1527 	if (reg & XGIC_POISONED_REQ_MASK)
1528 		dev_err(edac_dev->dev, "IOB BA XGIC poisoned write error\n");
1529 	if (reg & M_XGIC_POISONED_REQ_MASK)
1530 		dev_err(edac_dev->dev,
1531 			"Multiple IOB BA XGIC poisoned write error\n");
1532 	if (reg & RBM_POISONED_REQ_MASK)
1533 		dev_err(edac_dev->dev, "IOB BA RBM poisoned write error\n");
1534 	if (reg & M_RBM_POISONED_REQ_MASK)
1535 		dev_err(edac_dev->dev,
1536 			"Multiple IOB BA RBM poisoned write error\n");
1537 	if (reg & WDATA_CORRUPT_MASK)
1538 		dev_err(edac_dev->dev, "IOB BA write error\n");
1539 	if (reg & M_WDATA_CORRUPT_MASK)
1540 		dev_err(edac_dev->dev, "Multiple IOB BA write error\n");
1541 	if (reg & TRANS_CORRUPT_MASK)
1542 		dev_err(edac_dev->dev, "IOB BA transaction error\n");
1543 	if (reg & M_TRANS_CORRUPT_MASK)
1544 		dev_err(edac_dev->dev, "Multiple IOB BA transaction error\n");
1545 	if (reg & RIDRAM_CORRUPT_MASK)
1546 		dev_err(edac_dev->dev,
1547 			"IOB BA RDIDRAM read transaction ID error\n");
1548 	if (reg & M_RIDRAM_CORRUPT_MASK)
1549 		dev_err(edac_dev->dev,
1550 			"Multiple IOB BA RDIDRAM read transaction ID error\n");
1551 	if (reg & WIDRAM_CORRUPT_MASK)
1552 		dev_err(edac_dev->dev,
1553 			"IOB BA RDIDRAM write transaction ID error\n");
1554 	if (reg & M_WIDRAM_CORRUPT_MASK)
1555 		dev_err(edac_dev->dev,
1556 			"Multiple IOB BA RDIDRAM write transaction ID error\n");
1557 	if (reg & ILLEGAL_ACCESS_MASK)
1558 		dev_err(edac_dev->dev,
1559 			"IOB BA XGIC/RB illegal access error\n");
1560 	if (reg & M_ILLEGAL_ACCESS_MASK)
1561 		dev_err(edac_dev->dev,
1562 			"Multiple IOB BA XGIC/RB illegal access error\n");
1563 
1564 	err_addr_lo = readl(ctx->dev_csr + IOBBATRANSERRREQINFOL);
1565 	err_addr_hi = readl(ctx->dev_csr + IOBBATRANSERRREQINFOH);
1566 	dev_err(edac_dev->dev, "IOB BA %s access at 0x%02X.%08X (0x%08X)\n",
1567 		REQTYPE_F2_RD(err_addr_hi) ? "read" : "write",
1568 		ERRADDRH_F2_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1569 	if (reg & WRERR_RESP_MASK)
1570 		dev_err(edac_dev->dev, "IOB BA requestor ID 0x%08X\n",
1571 			readl(ctx->dev_csr + IOBBATRANSERRCSWREQID));
1572 	writel(reg, ctx->dev_csr + IOBBATRANSERRINTSTS);
1573 }
1574 
xgene_edac_pa_report(struct edac_device_ctl_info * edac_dev)1575 static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev)
1576 {
1577 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1578 	u32 err_addr_lo;
1579 	u32 err_addr_hi;
1580 	u32 reg;
1581 
1582 	/* IOB Processing agent transaction error interrupt */
1583 	reg = readl(ctx->dev_csr + IOBPATRANSERRINTSTS);
1584 	if (!reg)
1585 		goto chk_iob_axi0;
1586 	dev_err(edac_dev->dev, "IOB processing agent (PA) transaction error\n");
1587 	if (reg & IOBPA_RDATA_CORRUPT_MASK)
1588 		dev_err(edac_dev->dev, "IOB PA read data RAM error\n");
1589 	if (reg & IOBPA_M_RDATA_CORRUPT_MASK)
1590 		dev_err(edac_dev->dev,
1591 			"Multiple IOB PA read data RAM error\n");
1592 	if (reg & IOBPA_WDATA_CORRUPT_MASK)
1593 		dev_err(edac_dev->dev, "IOB PA write data RAM error\n");
1594 	if (reg & IOBPA_M_WDATA_CORRUPT_MASK)
1595 		dev_err(edac_dev->dev,
1596 			"Multiple IOB PA write data RAM error\n");
1597 	if (reg & IOBPA_TRANS_CORRUPT_MASK)
1598 		dev_err(edac_dev->dev, "IOB PA transaction error\n");
1599 	if (reg & IOBPA_M_TRANS_CORRUPT_MASK)
1600 		dev_err(edac_dev->dev, "Multiple IOB PA transaction error\n");
1601 	if (reg & IOBPA_REQIDRAM_CORRUPT_MASK)
1602 		dev_err(edac_dev->dev, "IOB PA transaction ID RAM error\n");
1603 	if (reg & IOBPA_M_REQIDRAM_CORRUPT_MASK)
1604 		dev_err(edac_dev->dev,
1605 			"Multiple IOB PA transaction ID RAM error\n");
1606 	writel(reg, ctx->dev_csr + IOBPATRANSERRINTSTS);
1607 
1608 chk_iob_axi0:
1609 	/* IOB AXI0 Error */
1610 	reg = readl(ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1611 	if (!reg)
1612 		goto chk_iob_axi1;
1613 	err_addr_lo = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOL);
1614 	err_addr_hi = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOH);
1615 	dev_err(edac_dev->dev,
1616 		"%sAXI slave 0 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1617 		reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1618 		REQTYPE_RD(err_addr_hi) ? "read" : "write",
1619 		ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1620 	writel(reg, ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1621 
1622 chk_iob_axi1:
1623 	/* IOB AXI1 Error */
1624 	reg = readl(ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1625 	if (!reg)
1626 		return;
1627 	err_addr_lo = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOL);
1628 	err_addr_hi = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOH);
1629 	dev_err(edac_dev->dev,
1630 		"%sAXI slave 1 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1631 		reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1632 		REQTYPE_RD(err_addr_hi) ? "read" : "write",
1633 		ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1634 	writel(reg, ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1635 }
1636 
xgene_edac_soc_check(struct edac_device_ctl_info * edac_dev)1637 static void xgene_edac_soc_check(struct edac_device_ctl_info *edac_dev)
1638 {
1639 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1640 	const char * const *soc_mem_err = NULL;
1641 	u32 pcp_hp_stat;
1642 	u32 pcp_lp_stat;
1643 	u32 reg;
1644 	int i;
1645 
1646 	xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
1647 	xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
1648 	xgene_edac_pcp_rd(ctx->edac, MEMERRINTSTS, &reg);
1649 	if (!((pcp_hp_stat & (IOB_PA_ERR_MASK | IOB_BA_ERR_MASK |
1650 			      IOB_XGIC_ERR_MASK | IOB_RB_ERR_MASK)) ||
1651 	      (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) || reg))
1652 		return;
1653 
1654 	if (pcp_hp_stat & IOB_XGIC_ERR_MASK)
1655 		xgene_edac_iob_gic_report(edac_dev);
1656 
1657 	if (pcp_hp_stat & (IOB_RB_ERR_MASK | IOB_BA_ERR_MASK))
1658 		xgene_edac_rb_report(edac_dev);
1659 
1660 	if (pcp_hp_stat & IOB_PA_ERR_MASK)
1661 		xgene_edac_pa_report(edac_dev);
1662 
1663 	if (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) {
1664 		dev_info(edac_dev->dev,
1665 			 "CSW switch trace correctable memory parity error\n");
1666 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1667 	}
1668 
1669 	if (!reg)
1670 		return;
1671 	if (ctx->version == 1)
1672 		soc_mem_err = soc_mem_err_v1;
1673 	if (!soc_mem_err) {
1674 		dev_err(edac_dev->dev, "SoC memory parity error 0x%08X\n",
1675 			reg);
1676 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1677 		return;
1678 	}
1679 	for (i = 0; i < 31; i++) {
1680 		if (reg & (1 << i)) {
1681 			dev_err(edac_dev->dev, "%s memory parity error\n",
1682 				soc_mem_err[i]);
1683 			edac_device_handle_ue(edac_dev, 0, 0,
1684 					      edac_dev->ctl_name);
1685 		}
1686 	}
1687 }
1688 
xgene_edac_soc_hw_init(struct edac_device_ctl_info * edac_dev,bool enable)1689 static void xgene_edac_soc_hw_init(struct edac_device_ctl_info *edac_dev,
1690 				   bool enable)
1691 {
1692 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1693 
1694 	/* Enable SoC IP error interrupt */
1695 	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1696 		if (enable) {
1697 			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1698 					       IOB_PA_ERR_MASK |
1699 					       IOB_BA_ERR_MASK |
1700 					       IOB_XGIC_ERR_MASK |
1701 					       IOB_RB_ERR_MASK);
1702 			xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1703 					       CSW_SWITCH_TRACE_ERR_MASK);
1704 		} else {
1705 			xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1706 					       IOB_PA_ERR_MASK |
1707 					       IOB_BA_ERR_MASK |
1708 					       IOB_XGIC_ERR_MASK |
1709 					       IOB_RB_ERR_MASK);
1710 			xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1711 					       CSW_SWITCH_TRACE_ERR_MASK);
1712 		}
1713 
1714 		writel(enable ? 0x0 : 0xFFFFFFFF,
1715 		       ctx->dev_csr + IOBAXIS0TRANSERRINTMSK);
1716 		writel(enable ? 0x0 : 0xFFFFFFFF,
1717 		       ctx->dev_csr + IOBAXIS1TRANSERRINTMSK);
1718 		writel(enable ? 0x0 : 0xFFFFFFFF,
1719 		       ctx->dev_csr + XGICTRANSERRINTMSK);
1720 
1721 		xgene_edac_pcp_setbits(ctx->edac, MEMERRINTMSK,
1722 				       enable ? 0x0 : 0xFFFFFFFF);
1723 	}
1724 }
1725 
xgene_edac_soc_add(struct xgene_edac * edac,struct device_node * np,int version)1726 static int xgene_edac_soc_add(struct xgene_edac *edac, struct device_node *np,
1727 			      int version)
1728 {
1729 	struct edac_device_ctl_info *edac_dev;
1730 	struct xgene_edac_dev_ctx *ctx;
1731 	void __iomem *dev_csr;
1732 	struct resource res;
1733 	int edac_idx;
1734 	int rc;
1735 
1736 	if (!devres_open_group(edac->dev, xgene_edac_soc_add, GFP_KERNEL))
1737 		return -ENOMEM;
1738 
1739 	rc = of_address_to_resource(np, 0, &res);
1740 	if (rc < 0) {
1741 		dev_err(edac->dev, "no SoC resource address\n");
1742 		goto err_release_group;
1743 	}
1744 	dev_csr = devm_ioremap_resource(edac->dev, &res);
1745 	if (IS_ERR(dev_csr)) {
1746 		dev_err(edac->dev,
1747 			"devm_ioremap_resource failed for soc resource address\n");
1748 		rc = PTR_ERR(dev_csr);
1749 		goto err_release_group;
1750 	}
1751 
1752 	edac_idx = edac_device_alloc_index();
1753 	edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1754 					      "SOC", 1, "SOC", 1, 2, NULL, 0,
1755 					      edac_idx);
1756 	if (!edac_dev) {
1757 		rc = -ENOMEM;
1758 		goto err_release_group;
1759 	}
1760 
1761 	ctx = edac_dev->pvt_info;
1762 	ctx->dev_csr = dev_csr;
1763 	ctx->name = "xgene_soc_err";
1764 	ctx->edac_idx = edac_idx;
1765 	ctx->edac = edac;
1766 	ctx->edac_dev = edac_dev;
1767 	ctx->ddev = *edac->dev;
1768 	ctx->version = version;
1769 	edac_dev->dev = &ctx->ddev;
1770 	edac_dev->ctl_name = ctx->name;
1771 	edac_dev->dev_name = ctx->name;
1772 	edac_dev->mod_name = EDAC_MOD_STR;
1773 
1774 	if (edac_op_state == EDAC_OPSTATE_POLL)
1775 		edac_dev->edac_check = xgene_edac_soc_check;
1776 
1777 	rc = edac_device_add_device(edac_dev);
1778 	if (rc > 0) {
1779 		dev_err(edac->dev, "failed edac_device_add_device()\n");
1780 		rc = -ENOMEM;
1781 		goto err_ctl_free;
1782 	}
1783 
1784 	if (edac_op_state == EDAC_OPSTATE_INT)
1785 		edac_dev->op_state = OP_RUNNING_INTERRUPT;
1786 
1787 	list_add(&ctx->next, &edac->socs);
1788 
1789 	xgene_edac_soc_hw_init(edac_dev, 1);
1790 
1791 	devres_remove_group(edac->dev, xgene_edac_soc_add);
1792 
1793 	dev_info(edac->dev, "X-Gene EDAC SoC registered\n");
1794 
1795 	return 0;
1796 
1797 err_ctl_free:
1798 	edac_device_free_ctl_info(edac_dev);
1799 err_release_group:
1800 	devres_release_group(edac->dev, xgene_edac_soc_add);
1801 	return rc;
1802 }
1803 
xgene_edac_soc_remove(struct xgene_edac_dev_ctx * soc)1804 static int xgene_edac_soc_remove(struct xgene_edac_dev_ctx *soc)
1805 {
1806 	struct edac_device_ctl_info *edac_dev = soc->edac_dev;
1807 
1808 	xgene_edac_soc_hw_init(edac_dev, 0);
1809 	edac_device_del_device(soc->edac->dev);
1810 	edac_device_free_ctl_info(edac_dev);
1811 	return 0;
1812 }
1813 
xgene_edac_isr(int irq,void * dev_id)1814 static irqreturn_t xgene_edac_isr(int irq, void *dev_id)
1815 {
1816 	struct xgene_edac *ctx = dev_id;
1817 	struct xgene_edac_pmd_ctx *pmd;
1818 	struct xgene_edac_dev_ctx *node;
1819 	unsigned int pcp_hp_stat;
1820 	unsigned int pcp_lp_stat;
1821 
1822 	xgene_edac_pcp_rd(ctx, PCPHPERRINTSTS, &pcp_hp_stat);
1823 	xgene_edac_pcp_rd(ctx, PCPLPERRINTSTS, &pcp_lp_stat);
1824 	if ((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
1825 	    (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
1826 	    (MCU_CORR_ERR_MASK & pcp_lp_stat)) {
1827 		struct xgene_edac_mc_ctx *mcu;
1828 
1829 		list_for_each_entry(mcu, &ctx->mcus, next)
1830 			xgene_edac_mc_check(mcu->mci);
1831 	}
1832 
1833 	list_for_each_entry(pmd, &ctx->pmds, next) {
1834 		if ((PMD0_MERR_MASK << pmd->pmd) & pcp_hp_stat)
1835 			xgene_edac_pmd_check(pmd->edac_dev);
1836 	}
1837 
1838 	list_for_each_entry(node, &ctx->l3s, next)
1839 		xgene_edac_l3_check(node->edac_dev);
1840 
1841 	list_for_each_entry(node, &ctx->socs, next)
1842 		xgene_edac_soc_check(node->edac_dev);
1843 
1844 	return IRQ_HANDLED;
1845 }
1846 
xgene_edac_probe(struct platform_device * pdev)1847 static int xgene_edac_probe(struct platform_device *pdev)
1848 {
1849 	struct xgene_edac *edac;
1850 	struct device_node *child;
1851 	struct resource *res;
1852 	int rc;
1853 
1854 	edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
1855 	if (!edac)
1856 		return -ENOMEM;
1857 
1858 	edac->dev = &pdev->dev;
1859 	platform_set_drvdata(pdev, edac);
1860 	INIT_LIST_HEAD(&edac->mcus);
1861 	INIT_LIST_HEAD(&edac->pmds);
1862 	INIT_LIST_HEAD(&edac->l3s);
1863 	INIT_LIST_HEAD(&edac->socs);
1864 	spin_lock_init(&edac->lock);
1865 	mutex_init(&edac->mc_lock);
1866 
1867 	edac->csw_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1868 							"regmap-csw");
1869 	if (IS_ERR(edac->csw_map)) {
1870 		dev_err(edac->dev, "unable to get syscon regmap csw\n");
1871 		rc = PTR_ERR(edac->csw_map);
1872 		goto out_err;
1873 	}
1874 
1875 	edac->mcba_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1876 							 "regmap-mcba");
1877 	if (IS_ERR(edac->mcba_map)) {
1878 		dev_err(edac->dev, "unable to get syscon regmap mcba\n");
1879 		rc = PTR_ERR(edac->mcba_map);
1880 		goto out_err;
1881 	}
1882 
1883 	edac->mcbb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1884 							 "regmap-mcbb");
1885 	if (IS_ERR(edac->mcbb_map)) {
1886 		dev_err(edac->dev, "unable to get syscon regmap mcbb\n");
1887 		rc = PTR_ERR(edac->mcbb_map);
1888 		goto out_err;
1889 	}
1890 	edac->efuse_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1891 							  "regmap-efuse");
1892 	if (IS_ERR(edac->efuse_map)) {
1893 		dev_err(edac->dev, "unable to get syscon regmap efuse\n");
1894 		rc = PTR_ERR(edac->efuse_map);
1895 		goto out_err;
1896 	}
1897 
1898 	/*
1899 	 * NOTE: The register bus resource is optional for compatibility
1900 	 * reason.
1901 	 */
1902 	edac->rb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1903 						       "regmap-rb");
1904 	if (IS_ERR(edac->rb_map)) {
1905 		dev_warn(edac->dev, "missing syscon regmap rb\n");
1906 		edac->rb_map = NULL;
1907 	}
1908 
1909 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1910 	edac->pcp_csr = devm_ioremap_resource(&pdev->dev, res);
1911 	if (IS_ERR(edac->pcp_csr)) {
1912 		dev_err(&pdev->dev, "no PCP resource address\n");
1913 		rc = PTR_ERR(edac->pcp_csr);
1914 		goto out_err;
1915 	}
1916 
1917 	if (edac_op_state == EDAC_OPSTATE_INT) {
1918 		int irq;
1919 		int i;
1920 
1921 		for (i = 0; i < 3; i++) {
1922 			irq = platform_get_irq(pdev, i);
1923 			if (irq < 0) {
1924 				dev_err(&pdev->dev, "No IRQ resource\n");
1925 				rc = -EINVAL;
1926 				goto out_err;
1927 			}
1928 			rc = devm_request_irq(&pdev->dev, irq,
1929 					      xgene_edac_isr, IRQF_SHARED,
1930 					      dev_name(&pdev->dev), edac);
1931 			if (rc) {
1932 				dev_err(&pdev->dev,
1933 					"Could not request IRQ %d\n", irq);
1934 				goto out_err;
1935 			}
1936 		}
1937 	}
1938 
1939 	edac->dfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
1940 
1941 	for_each_child_of_node(pdev->dev.of_node, child) {
1942 		if (!of_device_is_available(child))
1943 			continue;
1944 		if (of_device_is_compatible(child, "apm,xgene-edac-mc"))
1945 			xgene_edac_mc_add(edac, child);
1946 		if (of_device_is_compatible(child, "apm,xgene-edac-pmd"))
1947 			xgene_edac_pmd_add(edac, child, 1);
1948 		if (of_device_is_compatible(child, "apm,xgene-edac-pmd-v2"))
1949 			xgene_edac_pmd_add(edac, child, 2);
1950 		if (of_device_is_compatible(child, "apm,xgene-edac-l3"))
1951 			xgene_edac_l3_add(edac, child, 1);
1952 		if (of_device_is_compatible(child, "apm,xgene-edac-l3-v2"))
1953 			xgene_edac_l3_add(edac, child, 2);
1954 		if (of_device_is_compatible(child, "apm,xgene-edac-soc"))
1955 			xgene_edac_soc_add(edac, child, 0);
1956 		if (of_device_is_compatible(child, "apm,xgene-edac-soc-v1"))
1957 			xgene_edac_soc_add(edac, child, 1);
1958 	}
1959 
1960 	return 0;
1961 
1962 out_err:
1963 	return rc;
1964 }
1965 
xgene_edac_remove(struct platform_device * pdev)1966 static int xgene_edac_remove(struct platform_device *pdev)
1967 {
1968 	struct xgene_edac *edac = dev_get_drvdata(&pdev->dev);
1969 	struct xgene_edac_mc_ctx *mcu;
1970 	struct xgene_edac_mc_ctx *temp_mcu;
1971 	struct xgene_edac_pmd_ctx *pmd;
1972 	struct xgene_edac_pmd_ctx *temp_pmd;
1973 	struct xgene_edac_dev_ctx *node;
1974 	struct xgene_edac_dev_ctx *temp_node;
1975 
1976 	list_for_each_entry_safe(mcu, temp_mcu, &edac->mcus, next)
1977 		xgene_edac_mc_remove(mcu);
1978 
1979 	list_for_each_entry_safe(pmd, temp_pmd, &edac->pmds, next)
1980 		xgene_edac_pmd_remove(pmd);
1981 
1982 	list_for_each_entry_safe(node, temp_node, &edac->l3s, next)
1983 		xgene_edac_l3_remove(node);
1984 
1985 	list_for_each_entry_safe(node, temp_node, &edac->socs, next)
1986 		xgene_edac_soc_remove(node);
1987 
1988 	return 0;
1989 }
1990 
1991 static const struct of_device_id xgene_edac_of_match[] = {
1992 	{ .compatible = "apm,xgene-edac" },
1993 	{},
1994 };
1995 MODULE_DEVICE_TABLE(of, xgene_edac_of_match);
1996 
1997 static struct platform_driver xgene_edac_driver = {
1998 	.probe = xgene_edac_probe,
1999 	.remove = xgene_edac_remove,
2000 	.driver = {
2001 		.name = "xgene-edac",
2002 		.of_match_table = xgene_edac_of_match,
2003 	},
2004 };
2005 
xgene_edac_init(void)2006 static int __init xgene_edac_init(void)
2007 {
2008 	int rc;
2009 
2010 	/* Make sure error reporting method is sane */
2011 	switch (edac_op_state) {
2012 	case EDAC_OPSTATE_POLL:
2013 	case EDAC_OPSTATE_INT:
2014 		break;
2015 	default:
2016 		edac_op_state = EDAC_OPSTATE_INT;
2017 		break;
2018 	}
2019 
2020 	rc = platform_driver_register(&xgene_edac_driver);
2021 	if (rc) {
2022 		edac_printk(KERN_ERR, EDAC_MOD_STR,
2023 			    "EDAC fails to register\n");
2024 		goto reg_failed;
2025 	}
2026 
2027 	return 0;
2028 
2029 reg_failed:
2030 	return rc;
2031 }
2032 module_init(xgene_edac_init);
2033 
xgene_edac_exit(void)2034 static void __exit xgene_edac_exit(void)
2035 {
2036 	platform_driver_unregister(&xgene_edac_driver);
2037 }
2038 module_exit(xgene_edac_exit);
2039 
2040 MODULE_LICENSE("GPL");
2041 MODULE_AUTHOR("Feng Kan <fkan@apm.com>");
2042 MODULE_DESCRIPTION("APM X-Gene EDAC driver");
2043 module_param(edac_op_state, int, 0444);
2044 MODULE_PARM_DESC(edac_op_state,
2045 		 "EDAC error reporting state: 0=Poll, 2=Interrupt");
2046