• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * APM X-Gene SoC EDAC (error detection and correction)
3  *
4  * Copyright (c) 2015, Applied Micro Circuits Corporation
5  * Author: Feng Kan <fkan@apm.com>
6  *         Loc Ho <lho@apm.com>
7  *
8  * This program is free software; you can redistribute  it and/or modify it
9  * under  the terms of  the GNU General  Public License as published by the
10  * Free Software Foundation;  either version 2 of the  License, or (at your
11  * option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
20  */
21 
22 #include <linux/ctype.h>
23 #include <linux/edac.h>
24 #include <linux/interrupt.h>
25 #include <linux/mfd/syscon.h>
26 #include <linux/module.h>
27 #include <linux/of.h>
28 #include <linux/of_address.h>
29 #include <linux/regmap.h>
30 
31 #include "edac_core.h"
32 #include "edac_module.h"
33 
34 #define EDAC_MOD_STR			"xgene_edac"
35 
36 /* Global error configuration status registers (CSR) */
37 #define PCPHPERRINTSTS			0x0000
38 #define PCPHPERRINTMSK			0x0004
39 #define  MCU_CTL_ERR_MASK		BIT(12)
40 #define  IOB_PA_ERR_MASK		BIT(11)
41 #define  IOB_BA_ERR_MASK		BIT(10)
42 #define  IOB_XGIC_ERR_MASK		BIT(9)
43 #define  IOB_RB_ERR_MASK		BIT(8)
44 #define  L3C_UNCORR_ERR_MASK		BIT(5)
45 #define  MCU_UNCORR_ERR_MASK		BIT(4)
46 #define  PMD3_MERR_MASK			BIT(3)
47 #define  PMD2_MERR_MASK			BIT(2)
48 #define  PMD1_MERR_MASK			BIT(1)
49 #define  PMD0_MERR_MASK			BIT(0)
50 #define PCPLPERRINTSTS			0x0008
51 #define PCPLPERRINTMSK			0x000C
52 #define  CSW_SWITCH_TRACE_ERR_MASK	BIT(2)
53 #define  L3C_CORR_ERR_MASK		BIT(1)
54 #define  MCU_CORR_ERR_MASK		BIT(0)
55 #define MEMERRINTSTS			0x0010
56 #define MEMERRINTMSK			0x0014
57 
58 struct xgene_edac {
59 	struct device		*dev;
60 	struct regmap		*csw_map;
61 	struct regmap		*mcba_map;
62 	struct regmap		*mcbb_map;
63 	struct regmap		*efuse_map;
64 	struct regmap		*rb_map;
65 	void __iomem		*pcp_csr;
66 	spinlock_t		lock;
67 	struct dentry           *dfs;
68 
69 	struct list_head	mcus;
70 	struct list_head	pmds;
71 	struct list_head	l3s;
72 	struct list_head	socs;
73 
74 	struct mutex		mc_lock;
75 	int			mc_active_mask;
76 	int			mc_registered_mask;
77 };
78 
xgene_edac_pcp_rd(struct xgene_edac * edac,u32 reg,u32 * val)79 static void xgene_edac_pcp_rd(struct xgene_edac *edac, u32 reg, u32 *val)
80 {
81 	*val = readl(edac->pcp_csr + reg);
82 }
83 
xgene_edac_pcp_clrbits(struct xgene_edac * edac,u32 reg,u32 bits_mask)84 static void xgene_edac_pcp_clrbits(struct xgene_edac *edac, u32 reg,
85 				   u32 bits_mask)
86 {
87 	u32 val;
88 
89 	spin_lock(&edac->lock);
90 	val = readl(edac->pcp_csr + reg);
91 	val &= ~bits_mask;
92 	writel(val, edac->pcp_csr + reg);
93 	spin_unlock(&edac->lock);
94 }
95 
xgene_edac_pcp_setbits(struct xgene_edac * edac,u32 reg,u32 bits_mask)96 static void xgene_edac_pcp_setbits(struct xgene_edac *edac, u32 reg,
97 				   u32 bits_mask)
98 {
99 	u32 val;
100 
101 	spin_lock(&edac->lock);
102 	val = readl(edac->pcp_csr + reg);
103 	val |= bits_mask;
104 	writel(val, edac->pcp_csr + reg);
105 	spin_unlock(&edac->lock);
106 }
107 
108 /* Memory controller error CSR */
109 #define MCU_MAX_RANK			8
110 #define MCU_RANK_STRIDE			0x40
111 
112 #define MCUGECR				0x0110
113 #define  MCU_GECR_DEMANDUCINTREN_MASK	BIT(0)
114 #define  MCU_GECR_BACKUCINTREN_MASK	BIT(1)
115 #define  MCU_GECR_CINTREN_MASK		BIT(2)
116 #define  MUC_GECR_MCUADDRERREN_MASK	BIT(9)
117 #define MCUGESR				0x0114
118 #define  MCU_GESR_ADDRNOMATCH_ERR_MASK	BIT(7)
119 #define  MCU_GESR_ADDRMULTIMATCH_ERR_MASK	BIT(6)
120 #define  MCU_GESR_PHYP_ERR_MASK		BIT(3)
121 #define MCUESRR0			0x0314
122 #define  MCU_ESRR_MULTUCERR_MASK	BIT(3)
123 #define  MCU_ESRR_BACKUCERR_MASK	BIT(2)
124 #define  MCU_ESRR_DEMANDUCERR_MASK	BIT(1)
125 #define  MCU_ESRR_CERR_MASK		BIT(0)
126 #define MCUESRRA0			0x0318
127 #define MCUEBLRR0			0x031c
128 #define  MCU_EBLRR_ERRBANK_RD(src)	(((src) & 0x00000007) >> 0)
129 #define MCUERCRR0			0x0320
130 #define  MCU_ERCRR_ERRROW_RD(src)	(((src) & 0xFFFF0000) >> 16)
131 #define  MCU_ERCRR_ERRCOL_RD(src)	((src) & 0x00000FFF)
132 #define MCUSBECNT0			0x0324
133 #define MCU_SBECNT_COUNT(src)		((src) & 0xFFFF)
134 
135 #define CSW_CSWCR			0x0000
136 #define  CSW_CSWCR_DUALMCB_MASK		BIT(0)
137 
138 #define MCBADDRMR			0x0000
139 #define  MCBADDRMR_MCU_INTLV_MODE_MASK	BIT(3)
140 #define  MCBADDRMR_DUALMCU_MODE_MASK	BIT(2)
141 #define  MCBADDRMR_MCB_INTLV_MODE_MASK	BIT(1)
142 #define  MCBADDRMR_ADDRESS_MODE_MASK	BIT(0)
143 
144 struct xgene_edac_mc_ctx {
145 	struct list_head	next;
146 	char			*name;
147 	struct mem_ctl_info	*mci;
148 	struct xgene_edac	*edac;
149 	void __iomem		*mcu_csr;
150 	u32			mcu_id;
151 };
152 
xgene_edac_mc_err_inject_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)153 static ssize_t xgene_edac_mc_err_inject_write(struct file *file,
154 					      const char __user *data,
155 					      size_t count, loff_t *ppos)
156 {
157 	struct mem_ctl_info *mci = file->private_data;
158 	struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
159 	int i;
160 
161 	for (i = 0; i < MCU_MAX_RANK; i++) {
162 		writel(MCU_ESRR_MULTUCERR_MASK | MCU_ESRR_BACKUCERR_MASK |
163 		       MCU_ESRR_DEMANDUCERR_MASK | MCU_ESRR_CERR_MASK,
164 		       ctx->mcu_csr + MCUESRRA0 + i * MCU_RANK_STRIDE);
165 	}
166 	return count;
167 }
168 
169 static const struct file_operations xgene_edac_mc_debug_inject_fops = {
170 	.open = simple_open,
171 	.write = xgene_edac_mc_err_inject_write,
172 	.llseek = generic_file_llseek,
173 };
174 
xgene_edac_mc_create_debugfs_node(struct mem_ctl_info * mci)175 static void xgene_edac_mc_create_debugfs_node(struct mem_ctl_info *mci)
176 {
177 	if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
178 		return;
179 
180 	if (!mci->debugfs)
181 		return;
182 
183 	edac_debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci,
184 				 &xgene_edac_mc_debug_inject_fops);
185 }
186 
xgene_edac_mc_check(struct mem_ctl_info * mci)187 static void xgene_edac_mc_check(struct mem_ctl_info *mci)
188 {
189 	struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
190 	unsigned int pcp_hp_stat;
191 	unsigned int pcp_lp_stat;
192 	u32 reg;
193 	u32 rank;
194 	u32 bank;
195 	u32 count;
196 	u32 col_row;
197 
198 	xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
199 	xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
200 	if (!((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
201 	      (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
202 	      (MCU_CORR_ERR_MASK & pcp_lp_stat)))
203 		return;
204 
205 	for (rank = 0; rank < MCU_MAX_RANK; rank++) {
206 		reg = readl(ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
207 
208 		/* Detect uncorrectable memory error */
209 		if (reg & (MCU_ESRR_DEMANDUCERR_MASK |
210 			   MCU_ESRR_BACKUCERR_MASK)) {
211 			/* Detected uncorrectable memory error */
212 			edac_mc_chipset_printk(mci, KERN_ERR, "X-Gene",
213 				"MCU uncorrectable error at rank %d\n", rank);
214 
215 			edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
216 				1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
217 		}
218 
219 		/* Detect correctable memory error */
220 		if (reg & MCU_ESRR_CERR_MASK) {
221 			bank = readl(ctx->mcu_csr + MCUEBLRR0 +
222 				     rank * MCU_RANK_STRIDE);
223 			col_row = readl(ctx->mcu_csr + MCUERCRR0 +
224 					rank * MCU_RANK_STRIDE);
225 			count = readl(ctx->mcu_csr + MCUSBECNT0 +
226 				      rank * MCU_RANK_STRIDE);
227 			edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
228 				"MCU correctable error at rank %d bank %d column %d row %d count %d\n",
229 				rank, MCU_EBLRR_ERRBANK_RD(bank),
230 				MCU_ERCRR_ERRCOL_RD(col_row),
231 				MCU_ERCRR_ERRROW_RD(col_row),
232 				MCU_SBECNT_COUNT(count));
233 
234 			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
235 				1, 0, 0, 0, 0, 0, -1, mci->ctl_name, "");
236 		}
237 
238 		/* Clear all error registers */
239 		writel(0x0, ctx->mcu_csr + MCUEBLRR0 + rank * MCU_RANK_STRIDE);
240 		writel(0x0, ctx->mcu_csr + MCUERCRR0 + rank * MCU_RANK_STRIDE);
241 		writel(0x0, ctx->mcu_csr + MCUSBECNT0 +
242 		       rank * MCU_RANK_STRIDE);
243 		writel(reg, ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE);
244 	}
245 
246 	/* Detect memory controller error */
247 	reg = readl(ctx->mcu_csr + MCUGESR);
248 	if (reg) {
249 		if (reg & MCU_GESR_ADDRNOMATCH_ERR_MASK)
250 			edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
251 				"MCU address miss-match error\n");
252 		if (reg & MCU_GESR_ADDRMULTIMATCH_ERR_MASK)
253 			edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene",
254 				"MCU address multi-match error\n");
255 
256 		writel(reg, ctx->mcu_csr + MCUGESR);
257 	}
258 }
259 
xgene_edac_mc_irq_ctl(struct mem_ctl_info * mci,bool enable)260 static void xgene_edac_mc_irq_ctl(struct mem_ctl_info *mci, bool enable)
261 {
262 	struct xgene_edac_mc_ctx *ctx = mci->pvt_info;
263 	unsigned int val;
264 
265 	if (edac_op_state != EDAC_OPSTATE_INT)
266 		return;
267 
268 	mutex_lock(&ctx->edac->mc_lock);
269 
270 	/*
271 	 * As there is only single bit for enable error and interrupt mask,
272 	 * we must only enable top level interrupt after all MCUs are
273 	 * registered. Otherwise, if there is an error and the corresponding
274 	 * MCU has not registered, the interrupt will never get cleared. To
275 	 * determine all MCU have registered, we will keep track of active
276 	 * MCUs and registered MCUs.
277 	 */
278 	if (enable) {
279 		/* Set registered MCU bit */
280 		ctx->edac->mc_registered_mask |= 1 << ctx->mcu_id;
281 
282 		/* Enable interrupt after all active MCU registered */
283 		if (ctx->edac->mc_registered_mask ==
284 		    ctx->edac->mc_active_mask) {
285 			/* Enable memory controller top level interrupt */
286 			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
287 					       MCU_UNCORR_ERR_MASK |
288 					       MCU_CTL_ERR_MASK);
289 			xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
290 					       MCU_CORR_ERR_MASK);
291 		}
292 
293 		/* Enable MCU interrupt and error reporting */
294 		val = readl(ctx->mcu_csr + MCUGECR);
295 		val |= MCU_GECR_DEMANDUCINTREN_MASK |
296 		       MCU_GECR_BACKUCINTREN_MASK |
297 		       MCU_GECR_CINTREN_MASK |
298 		       MUC_GECR_MCUADDRERREN_MASK;
299 		writel(val, ctx->mcu_csr + MCUGECR);
300 	} else {
301 		/* Disable MCU interrupt */
302 		val = readl(ctx->mcu_csr + MCUGECR);
303 		val &= ~(MCU_GECR_DEMANDUCINTREN_MASK |
304 			 MCU_GECR_BACKUCINTREN_MASK |
305 			 MCU_GECR_CINTREN_MASK |
306 			 MUC_GECR_MCUADDRERREN_MASK);
307 		writel(val, ctx->mcu_csr + MCUGECR);
308 
309 		/* Disable memory controller top level interrupt */
310 		xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
311 				       MCU_UNCORR_ERR_MASK | MCU_CTL_ERR_MASK);
312 		xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
313 				       MCU_CORR_ERR_MASK);
314 
315 		/* Clear registered MCU bit */
316 		ctx->edac->mc_registered_mask &= ~(1 << ctx->mcu_id);
317 	}
318 
319 	mutex_unlock(&ctx->edac->mc_lock);
320 }
321 
xgene_edac_mc_is_active(struct xgene_edac_mc_ctx * ctx,int mc_idx)322 static int xgene_edac_mc_is_active(struct xgene_edac_mc_ctx *ctx, int mc_idx)
323 {
324 	unsigned int reg;
325 	u32 mcu_mask;
326 
327 	if (regmap_read(ctx->edac->csw_map, CSW_CSWCR, &reg))
328 		return 0;
329 
330 	if (reg & CSW_CSWCR_DUALMCB_MASK) {
331 		/*
332 		 * Dual MCB active - Determine if all 4 active or just MCU0
333 		 * and MCU2 active
334 		 */
335 		if (regmap_read(ctx->edac->mcbb_map, MCBADDRMR, &reg))
336 			return 0;
337 		mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5;
338 	} else {
339 		/*
340 		 * Single MCB active - Determine if MCU0/MCU1 or just MCU0
341 		 * active
342 		 */
343 		if (regmap_read(ctx->edac->mcba_map, MCBADDRMR, &reg))
344 			return 0;
345 		mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1;
346 	}
347 
348 	/* Save active MC mask if hasn't set already */
349 	if (!ctx->edac->mc_active_mask)
350 		ctx->edac->mc_active_mask = mcu_mask;
351 
352 	return (mcu_mask & (1 << mc_idx)) ? 1 : 0;
353 }
354 
xgene_edac_mc_add(struct xgene_edac * edac,struct device_node * np)355 static int xgene_edac_mc_add(struct xgene_edac *edac, struct device_node *np)
356 {
357 	struct mem_ctl_info *mci;
358 	struct edac_mc_layer layers[2];
359 	struct xgene_edac_mc_ctx tmp_ctx;
360 	struct xgene_edac_mc_ctx *ctx;
361 	struct resource res;
362 	int rc;
363 
364 	memset(&tmp_ctx, 0, sizeof(tmp_ctx));
365 	tmp_ctx.edac = edac;
366 
367 	if (!devres_open_group(edac->dev, xgene_edac_mc_add, GFP_KERNEL))
368 		return -ENOMEM;
369 
370 	rc = of_address_to_resource(np, 0, &res);
371 	if (rc < 0) {
372 		dev_err(edac->dev, "no MCU resource address\n");
373 		goto err_group;
374 	}
375 	tmp_ctx.mcu_csr = devm_ioremap_resource(edac->dev, &res);
376 	if (IS_ERR(tmp_ctx.mcu_csr)) {
377 		dev_err(edac->dev, "unable to map MCU resource\n");
378 		rc = PTR_ERR(tmp_ctx.mcu_csr);
379 		goto err_group;
380 	}
381 
382 	/* Ignore non-active MCU */
383 	if (of_property_read_u32(np, "memory-controller", &tmp_ctx.mcu_id)) {
384 		dev_err(edac->dev, "no memory-controller property\n");
385 		rc = -ENODEV;
386 		goto err_group;
387 	}
388 	if (!xgene_edac_mc_is_active(&tmp_ctx, tmp_ctx.mcu_id)) {
389 		rc = -ENODEV;
390 		goto err_group;
391 	}
392 
393 	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
394 	layers[0].size = 4;
395 	layers[0].is_virt_csrow = true;
396 	layers[1].type = EDAC_MC_LAYER_CHANNEL;
397 	layers[1].size = 2;
398 	layers[1].is_virt_csrow = false;
399 	mci = edac_mc_alloc(tmp_ctx.mcu_id, ARRAY_SIZE(layers), layers,
400 			    sizeof(*ctx));
401 	if (!mci) {
402 		rc = -ENOMEM;
403 		goto err_group;
404 	}
405 
406 	ctx = mci->pvt_info;
407 	*ctx = tmp_ctx;		/* Copy over resource value */
408 	ctx->name = "xgene_edac_mc_err";
409 	ctx->mci = mci;
410 	mci->pdev = &mci->dev;
411 	mci->ctl_name = ctx->name;
412 	mci->dev_name = ctx->name;
413 
414 	mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 | MEM_FLAG_RDDR3 |
415 			 MEM_FLAG_DDR | MEM_FLAG_DDR2 | MEM_FLAG_DDR3;
416 	mci->edac_ctl_cap = EDAC_FLAG_SECDED;
417 	mci->edac_cap = EDAC_FLAG_SECDED;
418 	mci->mod_name = EDAC_MOD_STR;
419 	mci->mod_ver = "0.1";
420 	mci->ctl_page_to_phys = NULL;
421 	mci->scrub_cap = SCRUB_FLAG_HW_SRC;
422 	mci->scrub_mode = SCRUB_HW_SRC;
423 
424 	if (edac_op_state == EDAC_OPSTATE_POLL)
425 		mci->edac_check = xgene_edac_mc_check;
426 
427 	if (edac_mc_add_mc(mci)) {
428 		dev_err(edac->dev, "edac_mc_add_mc failed\n");
429 		rc = -EINVAL;
430 		goto err_free;
431 	}
432 
433 	xgene_edac_mc_create_debugfs_node(mci);
434 
435 	list_add(&ctx->next, &edac->mcus);
436 
437 	xgene_edac_mc_irq_ctl(mci, true);
438 
439 	devres_remove_group(edac->dev, xgene_edac_mc_add);
440 
441 	dev_info(edac->dev, "X-Gene EDAC MC registered\n");
442 	return 0;
443 
444 err_free:
445 	edac_mc_free(mci);
446 err_group:
447 	devres_release_group(edac->dev, xgene_edac_mc_add);
448 	return rc;
449 }
450 
xgene_edac_mc_remove(struct xgene_edac_mc_ctx * mcu)451 static int xgene_edac_mc_remove(struct xgene_edac_mc_ctx *mcu)
452 {
453 	xgene_edac_mc_irq_ctl(mcu->mci, false);
454 	edac_mc_del_mc(&mcu->mci->dev);
455 	edac_mc_free(mcu->mci);
456 	return 0;
457 }
458 
459 /* CPU L1/L2 error CSR */
460 #define MAX_CPU_PER_PMD				2
461 #define CPU_CSR_STRIDE				0x00100000
462 #define CPU_L2C_PAGE				0x000D0000
463 #define CPU_MEMERR_L2C_PAGE			0x000E0000
464 #define CPU_MEMERR_CPU_PAGE			0x000F0000
465 
466 #define MEMERR_CPU_ICFECR_PAGE_OFFSET		0x0000
467 #define MEMERR_CPU_ICFESR_PAGE_OFFSET		0x0004
468 #define  MEMERR_CPU_ICFESR_ERRWAY_RD(src)	(((src) & 0xFF000000) >> 24)
469 #define  MEMERR_CPU_ICFESR_ERRINDEX_RD(src)	(((src) & 0x003F0000) >> 16)
470 #define  MEMERR_CPU_ICFESR_ERRINFO_RD(src)	(((src) & 0x0000FF00) >> 8)
471 #define  MEMERR_CPU_ICFESR_ERRTYPE_RD(src)	(((src) & 0x00000070) >> 4)
472 #define  MEMERR_CPU_ICFESR_MULTCERR_MASK	BIT(2)
473 #define  MEMERR_CPU_ICFESR_CERR_MASK		BIT(0)
474 #define MEMERR_CPU_LSUESR_PAGE_OFFSET		0x000c
475 #define  MEMERR_CPU_LSUESR_ERRWAY_RD(src)	(((src) & 0xFF000000) >> 24)
476 #define  MEMERR_CPU_LSUESR_ERRINDEX_RD(src)	(((src) & 0x003F0000) >> 16)
477 #define  MEMERR_CPU_LSUESR_ERRINFO_RD(src)	(((src) & 0x0000FF00) >> 8)
478 #define  MEMERR_CPU_LSUESR_ERRTYPE_RD(src)	(((src) & 0x00000070) >> 4)
479 #define  MEMERR_CPU_LSUESR_MULTCERR_MASK	BIT(2)
480 #define  MEMERR_CPU_LSUESR_CERR_MASK		BIT(0)
481 #define MEMERR_CPU_LSUECR_PAGE_OFFSET		0x0008
482 #define MEMERR_CPU_MMUECR_PAGE_OFFSET		0x0010
483 #define MEMERR_CPU_MMUESR_PAGE_OFFSET		0x0014
484 #define  MEMERR_CPU_MMUESR_ERRWAY_RD(src)	(((src) & 0xFF000000) >> 24)
485 #define  MEMERR_CPU_MMUESR_ERRINDEX_RD(src)	(((src) & 0x007F0000) >> 16)
486 #define  MEMERR_CPU_MMUESR_ERRINFO_RD(src)	(((src) & 0x0000FF00) >> 8)
487 #define  MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK	BIT(7)
488 #define  MEMERR_CPU_MMUESR_ERRTYPE_RD(src)	(((src) & 0x00000070) >> 4)
489 #define  MEMERR_CPU_MMUESR_MULTCERR_MASK	BIT(2)
490 #define  MEMERR_CPU_MMUESR_CERR_MASK		BIT(0)
491 #define MEMERR_CPU_ICFESRA_PAGE_OFFSET		0x0804
492 #define MEMERR_CPU_LSUESRA_PAGE_OFFSET		0x080c
493 #define MEMERR_CPU_MMUESRA_PAGE_OFFSET		0x0814
494 
495 #define MEMERR_L2C_L2ECR_PAGE_OFFSET		0x0000
496 #define MEMERR_L2C_L2ESR_PAGE_OFFSET		0x0004
497 #define  MEMERR_L2C_L2ESR_ERRSYN_RD(src)	(((src) & 0xFF000000) >> 24)
498 #define  MEMERR_L2C_L2ESR_ERRWAY_RD(src)	(((src) & 0x00FC0000) >> 18)
499 #define  MEMERR_L2C_L2ESR_ERRCPU_RD(src)	(((src) & 0x00020000) >> 17)
500 #define  MEMERR_L2C_L2ESR_ERRGROUP_RD(src)	(((src) & 0x0000E000) >> 13)
501 #define  MEMERR_L2C_L2ESR_ERRACTION_RD(src)	(((src) & 0x00001C00) >> 10)
502 #define  MEMERR_L2C_L2ESR_ERRTYPE_RD(src)	(((src) & 0x00000300) >> 8)
503 #define  MEMERR_L2C_L2ESR_MULTUCERR_MASK	BIT(3)
504 #define  MEMERR_L2C_L2ESR_MULTICERR_MASK	BIT(2)
505 #define  MEMERR_L2C_L2ESR_UCERR_MASK		BIT(1)
506 #define  MEMERR_L2C_L2ESR_ERR_MASK		BIT(0)
507 #define MEMERR_L2C_L2EALR_PAGE_OFFSET		0x0008
508 #define CPUX_L2C_L2RTOCR_PAGE_OFFSET		0x0010
509 #define MEMERR_L2C_L2EAHR_PAGE_OFFSET		0x000c
510 #define CPUX_L2C_L2RTOSR_PAGE_OFFSET		0x0014
511 #define  MEMERR_L2C_L2RTOSR_MULTERR_MASK	BIT(1)
512 #define  MEMERR_L2C_L2RTOSR_ERR_MASK		BIT(0)
513 #define CPUX_L2C_L2RTOALR_PAGE_OFFSET		0x0018
514 #define CPUX_L2C_L2RTOAHR_PAGE_OFFSET		0x001c
515 #define MEMERR_L2C_L2ESRA_PAGE_OFFSET		0x0804
516 
517 /*
518  * Processor Module Domain (PMD) context - Context for a pair of processsors.
519  * Each PMD consists of 2 CPUs and a shared L2 cache. Each CPU consists of
520  * its own L1 cache.
521  */
522 struct xgene_edac_pmd_ctx {
523 	struct list_head	next;
524 	struct device		ddev;
525 	char			*name;
526 	struct xgene_edac	*edac;
527 	struct edac_device_ctl_info *edac_dev;
528 	void __iomem		*pmd_csr;
529 	u32			pmd;
530 	int			version;
531 };
532 
xgene_edac_pmd_l1_check(struct edac_device_ctl_info * edac_dev,int cpu_idx)533 static void xgene_edac_pmd_l1_check(struct edac_device_ctl_info *edac_dev,
534 				    int cpu_idx)
535 {
536 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
537 	void __iomem *pg_f;
538 	u32 val;
539 
540 	pg_f = ctx->pmd_csr + cpu_idx * CPU_CSR_STRIDE + CPU_MEMERR_CPU_PAGE;
541 
542 	val = readl(pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
543 	if (!val)
544 		goto chk_lsu;
545 	dev_err(edac_dev->dev,
546 		"CPU%d L1 memory error ICF 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
547 		ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
548 		MEMERR_CPU_ICFESR_ERRWAY_RD(val),
549 		MEMERR_CPU_ICFESR_ERRINDEX_RD(val),
550 		MEMERR_CPU_ICFESR_ERRINFO_RD(val));
551 	if (val & MEMERR_CPU_ICFESR_CERR_MASK)
552 		dev_err(edac_dev->dev, "One or more correctable error\n");
553 	if (val & MEMERR_CPU_ICFESR_MULTCERR_MASK)
554 		dev_err(edac_dev->dev, "Multiple correctable error\n");
555 	switch (MEMERR_CPU_ICFESR_ERRTYPE_RD(val)) {
556 	case 1:
557 		dev_err(edac_dev->dev, "L1 TLB multiple hit\n");
558 		break;
559 	case 2:
560 		dev_err(edac_dev->dev, "Way select multiple hit\n");
561 		break;
562 	case 3:
563 		dev_err(edac_dev->dev, "Physical tag parity error\n");
564 		break;
565 	case 4:
566 	case 5:
567 		dev_err(edac_dev->dev, "L1 data parity error\n");
568 		break;
569 	case 6:
570 		dev_err(edac_dev->dev, "L1 pre-decode parity error\n");
571 		break;
572 	}
573 
574 	/* Clear any HW errors */
575 	writel(val, pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET);
576 
577 	if (val & (MEMERR_CPU_ICFESR_CERR_MASK |
578 		   MEMERR_CPU_ICFESR_MULTCERR_MASK))
579 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
580 
581 chk_lsu:
582 	val = readl(pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
583 	if (!val)
584 		goto chk_mmu;
585 	dev_err(edac_dev->dev,
586 		"CPU%d memory error LSU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n",
587 		ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
588 		MEMERR_CPU_LSUESR_ERRWAY_RD(val),
589 		MEMERR_CPU_LSUESR_ERRINDEX_RD(val),
590 		MEMERR_CPU_LSUESR_ERRINFO_RD(val));
591 	if (val & MEMERR_CPU_LSUESR_CERR_MASK)
592 		dev_err(edac_dev->dev, "One or more correctable error\n");
593 	if (val & MEMERR_CPU_LSUESR_MULTCERR_MASK)
594 		dev_err(edac_dev->dev, "Multiple correctable error\n");
595 	switch (MEMERR_CPU_LSUESR_ERRTYPE_RD(val)) {
596 	case 0:
597 		dev_err(edac_dev->dev, "Load tag error\n");
598 		break;
599 	case 1:
600 		dev_err(edac_dev->dev, "Load data error\n");
601 		break;
602 	case 2:
603 		dev_err(edac_dev->dev, "WSL multihit error\n");
604 		break;
605 	case 3:
606 		dev_err(edac_dev->dev, "Store tag error\n");
607 		break;
608 	case 4:
609 		dev_err(edac_dev->dev,
610 			"DTB multihit from load pipeline error\n");
611 		break;
612 	case 5:
613 		dev_err(edac_dev->dev,
614 			"DTB multihit from store pipeline error\n");
615 		break;
616 	}
617 
618 	/* Clear any HW errors */
619 	writel(val, pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET);
620 
621 	if (val & (MEMERR_CPU_LSUESR_CERR_MASK |
622 		   MEMERR_CPU_LSUESR_MULTCERR_MASK))
623 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
624 
625 chk_mmu:
626 	val = readl(pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
627 	if (!val)
628 		return;
629 	dev_err(edac_dev->dev,
630 		"CPU%d memory error MMU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X %s\n",
631 		ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val,
632 		MEMERR_CPU_MMUESR_ERRWAY_RD(val),
633 		MEMERR_CPU_MMUESR_ERRINDEX_RD(val),
634 		MEMERR_CPU_MMUESR_ERRINFO_RD(val),
635 		val & MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK ? "LSU" : "ICF");
636 	if (val & MEMERR_CPU_MMUESR_CERR_MASK)
637 		dev_err(edac_dev->dev, "One or more correctable error\n");
638 	if (val & MEMERR_CPU_MMUESR_MULTCERR_MASK)
639 		dev_err(edac_dev->dev, "Multiple correctable error\n");
640 	switch (MEMERR_CPU_MMUESR_ERRTYPE_RD(val)) {
641 	case 0:
642 		dev_err(edac_dev->dev, "Stage 1 UTB hit error\n");
643 		break;
644 	case 1:
645 		dev_err(edac_dev->dev, "Stage 1 UTB miss error\n");
646 		break;
647 	case 2:
648 		dev_err(edac_dev->dev, "Stage 1 UTB allocate error\n");
649 		break;
650 	case 3:
651 		dev_err(edac_dev->dev, "TMO operation single bank error\n");
652 		break;
653 	case 4:
654 		dev_err(edac_dev->dev, "Stage 2 UTB error\n");
655 		break;
656 	case 5:
657 		dev_err(edac_dev->dev, "Stage 2 UTB miss error\n");
658 		break;
659 	case 6:
660 		dev_err(edac_dev->dev, "Stage 2 UTB allocate error\n");
661 		break;
662 	case 7:
663 		dev_err(edac_dev->dev, "TMO operation multiple bank error\n");
664 		break;
665 	}
666 
667 	/* Clear any HW errors */
668 	writel(val, pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET);
669 
670 	edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
671 }
672 
xgene_edac_pmd_l2_check(struct edac_device_ctl_info * edac_dev)673 static void xgene_edac_pmd_l2_check(struct edac_device_ctl_info *edac_dev)
674 {
675 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
676 	void __iomem *pg_d;
677 	void __iomem *pg_e;
678 	u32 val_hi;
679 	u32 val_lo;
680 	u32 val;
681 
682 	/* Check L2 */
683 	pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
684 	val = readl(pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
685 	if (!val)
686 		goto chk_l2c;
687 	val_lo = readl(pg_e + MEMERR_L2C_L2EALR_PAGE_OFFSET);
688 	val_hi = readl(pg_e + MEMERR_L2C_L2EAHR_PAGE_OFFSET);
689 	dev_err(edac_dev->dev,
690 		"PMD%d memory error L2C L2ESR 0x%08X @ 0x%08X.%08X\n",
691 		ctx->pmd, val, val_hi, val_lo);
692 	dev_err(edac_dev->dev,
693 		"ErrSyndrome 0x%02X ErrWay 0x%02X ErrCpu %d ErrGroup 0x%02X ErrAction 0x%02X\n",
694 		MEMERR_L2C_L2ESR_ERRSYN_RD(val),
695 		MEMERR_L2C_L2ESR_ERRWAY_RD(val),
696 		MEMERR_L2C_L2ESR_ERRCPU_RD(val),
697 		MEMERR_L2C_L2ESR_ERRGROUP_RD(val),
698 		MEMERR_L2C_L2ESR_ERRACTION_RD(val));
699 
700 	if (val & MEMERR_L2C_L2ESR_ERR_MASK)
701 		dev_err(edac_dev->dev, "One or more correctable error\n");
702 	if (val & MEMERR_L2C_L2ESR_MULTICERR_MASK)
703 		dev_err(edac_dev->dev, "Multiple correctable error\n");
704 	if (val & MEMERR_L2C_L2ESR_UCERR_MASK)
705 		dev_err(edac_dev->dev, "One or more uncorrectable error\n");
706 	if (val & MEMERR_L2C_L2ESR_MULTUCERR_MASK)
707 		dev_err(edac_dev->dev, "Multiple uncorrectable error\n");
708 
709 	switch (MEMERR_L2C_L2ESR_ERRTYPE_RD(val)) {
710 	case 0:
711 		dev_err(edac_dev->dev, "Outbound SDB parity error\n");
712 		break;
713 	case 1:
714 		dev_err(edac_dev->dev, "Inbound SDB parity error\n");
715 		break;
716 	case 2:
717 		dev_err(edac_dev->dev, "Tag ECC error\n");
718 		break;
719 	case 3:
720 		dev_err(edac_dev->dev, "Data ECC error\n");
721 		break;
722 	}
723 
724 	/* Clear any HW errors */
725 	writel(val, pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET);
726 
727 	if (val & (MEMERR_L2C_L2ESR_ERR_MASK |
728 		   MEMERR_L2C_L2ESR_MULTICERR_MASK))
729 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
730 	if (val & (MEMERR_L2C_L2ESR_UCERR_MASK |
731 		   MEMERR_L2C_L2ESR_MULTUCERR_MASK))
732 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
733 
734 chk_l2c:
735 	/* Check if any memory request timed out on L2 cache */
736 	pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
737 	val = readl(pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
738 	if (val) {
739 		val_lo = readl(pg_d + CPUX_L2C_L2RTOALR_PAGE_OFFSET);
740 		val_hi = readl(pg_d + CPUX_L2C_L2RTOAHR_PAGE_OFFSET);
741 		dev_err(edac_dev->dev,
742 			"PMD%d L2C error L2C RTOSR 0x%08X @ 0x%08X.%08X\n",
743 			ctx->pmd, val, val_hi, val_lo);
744 		writel(val, pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET);
745 	}
746 }
747 
xgene_edac_pmd_check(struct edac_device_ctl_info * edac_dev)748 static void xgene_edac_pmd_check(struct edac_device_ctl_info *edac_dev)
749 {
750 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
751 	unsigned int pcp_hp_stat;
752 	int i;
753 
754 	xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
755 	if (!((PMD0_MERR_MASK << ctx->pmd) & pcp_hp_stat))
756 		return;
757 
758 	/* Check CPU L1 error */
759 	for (i = 0; i < MAX_CPU_PER_PMD; i++)
760 		xgene_edac_pmd_l1_check(edac_dev, i);
761 
762 	/* Check CPU L2 error */
763 	xgene_edac_pmd_l2_check(edac_dev);
764 }
765 
xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info * edac_dev,int cpu)766 static void xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info *edac_dev,
767 				      int cpu)
768 {
769 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
770 	void __iomem *pg_f = ctx->pmd_csr + cpu * CPU_CSR_STRIDE +
771 			     CPU_MEMERR_CPU_PAGE;
772 
773 	/*
774 	 * Enable CPU memory error:
775 	 *  MEMERR_CPU_ICFESRA, MEMERR_CPU_LSUESRA, and MEMERR_CPU_MMUESRA
776 	 */
777 	writel(0x00000301, pg_f + MEMERR_CPU_ICFECR_PAGE_OFFSET);
778 	writel(0x00000301, pg_f + MEMERR_CPU_LSUECR_PAGE_OFFSET);
779 	writel(0x00000101, pg_f + MEMERR_CPU_MMUECR_PAGE_OFFSET);
780 }
781 
xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info * edac_dev)782 static void xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info *edac_dev)
783 {
784 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
785 	void __iomem *pg_d = ctx->pmd_csr + CPU_L2C_PAGE;
786 	void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
787 
788 	/* Enable PMD memory error - MEMERR_L2C_L2ECR and L2C_L2RTOCR */
789 	writel(0x00000703, pg_e + MEMERR_L2C_L2ECR_PAGE_OFFSET);
790 	/* Configure L2C HW request time out feature if supported */
791 	if (ctx->version > 1)
792 		writel(0x00000119, pg_d + CPUX_L2C_L2RTOCR_PAGE_OFFSET);
793 }
794 
xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info * edac_dev,bool enable)795 static void xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info *edac_dev,
796 				  bool enable)
797 {
798 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
799 	int i;
800 
801 	/* Enable PMD error interrupt */
802 	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
803 		if (enable)
804 			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
805 					       PMD0_MERR_MASK << ctx->pmd);
806 		else
807 			xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
808 					       PMD0_MERR_MASK << ctx->pmd);
809 	}
810 
811 	if (enable) {
812 		xgene_edac_pmd_hw_cfg(edac_dev);
813 
814 		/* Two CPUs per a PMD */
815 		for (i = 0; i < MAX_CPU_PER_PMD; i++)
816 			xgene_edac_pmd_cpu_hw_cfg(edac_dev, i);
817 	}
818 }
819 
xgene_edac_pmd_l1_inject_ctrl_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)820 static ssize_t xgene_edac_pmd_l1_inject_ctrl_write(struct file *file,
821 						   const char __user *data,
822 						   size_t count, loff_t *ppos)
823 {
824 	struct edac_device_ctl_info *edac_dev = file->private_data;
825 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
826 	void __iomem *cpux_pg_f;
827 	int i;
828 
829 	for (i = 0; i < MAX_CPU_PER_PMD; i++) {
830 		cpux_pg_f = ctx->pmd_csr + i * CPU_CSR_STRIDE +
831 			    CPU_MEMERR_CPU_PAGE;
832 
833 		writel(MEMERR_CPU_ICFESR_MULTCERR_MASK |
834 		       MEMERR_CPU_ICFESR_CERR_MASK,
835 		       cpux_pg_f + MEMERR_CPU_ICFESRA_PAGE_OFFSET);
836 		writel(MEMERR_CPU_LSUESR_MULTCERR_MASK |
837 		       MEMERR_CPU_LSUESR_CERR_MASK,
838 		       cpux_pg_f + MEMERR_CPU_LSUESRA_PAGE_OFFSET);
839 		writel(MEMERR_CPU_MMUESR_MULTCERR_MASK |
840 		       MEMERR_CPU_MMUESR_CERR_MASK,
841 		       cpux_pg_f + MEMERR_CPU_MMUESRA_PAGE_OFFSET);
842 	}
843 	return count;
844 }
845 
xgene_edac_pmd_l2_inject_ctrl_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)846 static ssize_t xgene_edac_pmd_l2_inject_ctrl_write(struct file *file,
847 						   const char __user *data,
848 						   size_t count, loff_t *ppos)
849 {
850 	struct edac_device_ctl_info *edac_dev = file->private_data;
851 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
852 	void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE;
853 
854 	writel(MEMERR_L2C_L2ESR_MULTUCERR_MASK |
855 	       MEMERR_L2C_L2ESR_MULTICERR_MASK |
856 	       MEMERR_L2C_L2ESR_UCERR_MASK |
857 	       MEMERR_L2C_L2ESR_ERR_MASK,
858 	       pg_e + MEMERR_L2C_L2ESRA_PAGE_OFFSET);
859 	return count;
860 }
861 
862 static const struct file_operations xgene_edac_pmd_debug_inject_fops[] = {
863 	{
864 	.open = simple_open,
865 	.write = xgene_edac_pmd_l1_inject_ctrl_write,
866 	.llseek = generic_file_llseek, },
867 	{
868 	.open = simple_open,
869 	.write = xgene_edac_pmd_l2_inject_ctrl_write,
870 	.llseek = generic_file_llseek, },
871 	{ }
872 };
873 
874 static void
xgene_edac_pmd_create_debugfs_nodes(struct edac_device_ctl_info * edac_dev)875 xgene_edac_pmd_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
876 {
877 	struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info;
878 	struct dentry *dbgfs_dir;
879 	char name[10];
880 
881 	if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
882 		return;
883 
884 	snprintf(name, sizeof(name), "PMD%d", ctx->pmd);
885 	dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
886 	if (!dbgfs_dir)
887 		return;
888 
889 	edac_debugfs_create_file("l1_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
890 				 &xgene_edac_pmd_debug_inject_fops[0]);
891 	edac_debugfs_create_file("l2_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
892 				 &xgene_edac_pmd_debug_inject_fops[1]);
893 }
894 
xgene_edac_pmd_available(u32 efuse,int pmd)895 static int xgene_edac_pmd_available(u32 efuse, int pmd)
896 {
897 	return (efuse & (1 << pmd)) ? 0 : 1;
898 }
899 
xgene_edac_pmd_add(struct xgene_edac * edac,struct device_node * np,int version)900 static int xgene_edac_pmd_add(struct xgene_edac *edac, struct device_node *np,
901 			      int version)
902 {
903 	struct edac_device_ctl_info *edac_dev;
904 	struct xgene_edac_pmd_ctx *ctx;
905 	struct resource res;
906 	char edac_name[10];
907 	u32 pmd;
908 	int rc;
909 	u32 val;
910 
911 	if (!devres_open_group(edac->dev, xgene_edac_pmd_add, GFP_KERNEL))
912 		return -ENOMEM;
913 
914 	/* Determine if this PMD is disabled */
915 	if (of_property_read_u32(np, "pmd-controller", &pmd)) {
916 		dev_err(edac->dev, "no pmd-controller property\n");
917 		rc = -ENODEV;
918 		goto err_group;
919 	}
920 	rc = regmap_read(edac->efuse_map, 0, &val);
921 	if (rc)
922 		goto err_group;
923 	if (!xgene_edac_pmd_available(val, pmd)) {
924 		rc = -ENODEV;
925 		goto err_group;
926 	}
927 
928 	snprintf(edac_name, sizeof(edac_name), "l2c%d", pmd);
929 	edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
930 					      edac_name, 1, "l2c", 1, 2, NULL,
931 					      0, edac_device_alloc_index());
932 	if (!edac_dev) {
933 		rc = -ENOMEM;
934 		goto err_group;
935 	}
936 
937 	ctx = edac_dev->pvt_info;
938 	ctx->name = "xgene_pmd_err";
939 	ctx->pmd = pmd;
940 	ctx->edac = edac;
941 	ctx->edac_dev = edac_dev;
942 	ctx->ddev = *edac->dev;
943 	ctx->version = version;
944 	edac_dev->dev = &ctx->ddev;
945 	edac_dev->ctl_name = ctx->name;
946 	edac_dev->dev_name = ctx->name;
947 	edac_dev->mod_name = EDAC_MOD_STR;
948 
949 	rc = of_address_to_resource(np, 0, &res);
950 	if (rc < 0) {
951 		dev_err(edac->dev, "no PMD resource address\n");
952 		goto err_free;
953 	}
954 	ctx->pmd_csr = devm_ioremap_resource(edac->dev, &res);
955 	if (IS_ERR(ctx->pmd_csr)) {
956 		dev_err(edac->dev,
957 			"devm_ioremap_resource failed for PMD resource address\n");
958 		rc = PTR_ERR(ctx->pmd_csr);
959 		goto err_free;
960 	}
961 
962 	if (edac_op_state == EDAC_OPSTATE_POLL)
963 		edac_dev->edac_check = xgene_edac_pmd_check;
964 
965 	xgene_edac_pmd_create_debugfs_nodes(edac_dev);
966 
967 	rc = edac_device_add_device(edac_dev);
968 	if (rc > 0) {
969 		dev_err(edac->dev, "edac_device_add_device failed\n");
970 		rc = -ENOMEM;
971 		goto err_free;
972 	}
973 
974 	if (edac_op_state == EDAC_OPSTATE_INT)
975 		edac_dev->op_state = OP_RUNNING_INTERRUPT;
976 
977 	list_add(&ctx->next, &edac->pmds);
978 
979 	xgene_edac_pmd_hw_ctl(edac_dev, 1);
980 
981 	devres_remove_group(edac->dev, xgene_edac_pmd_add);
982 
983 	dev_info(edac->dev, "X-Gene EDAC PMD%d registered\n", ctx->pmd);
984 	return 0;
985 
986 err_free:
987 	edac_device_free_ctl_info(edac_dev);
988 err_group:
989 	devres_release_group(edac->dev, xgene_edac_pmd_add);
990 	return rc;
991 }
992 
xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx * pmd)993 static int xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx *pmd)
994 {
995 	struct edac_device_ctl_info *edac_dev = pmd->edac_dev;
996 
997 	xgene_edac_pmd_hw_ctl(edac_dev, 0);
998 	edac_device_del_device(edac_dev->dev);
999 	edac_device_free_ctl_info(edac_dev);
1000 	return 0;
1001 }
1002 
1003 /* L3 Error device */
1004 #define L3C_ESR				(0x0A * 4)
1005 #define  L3C_ESR_DATATAG_MASK		BIT(9)
1006 #define  L3C_ESR_MULTIHIT_MASK		BIT(8)
1007 #define  L3C_ESR_UCEVICT_MASK		BIT(6)
1008 #define  L3C_ESR_MULTIUCERR_MASK	BIT(5)
1009 #define  L3C_ESR_MULTICERR_MASK		BIT(4)
1010 #define  L3C_ESR_UCERR_MASK		BIT(3)
1011 #define  L3C_ESR_CERR_MASK		BIT(2)
1012 #define  L3C_ESR_UCERRINTR_MASK		BIT(1)
1013 #define  L3C_ESR_CERRINTR_MASK		BIT(0)
1014 #define L3C_ECR				(0x0B * 4)
1015 #define  L3C_ECR_UCINTREN		BIT(3)
1016 #define  L3C_ECR_CINTREN		BIT(2)
1017 #define  L3C_UCERREN			BIT(1)
1018 #define  L3C_CERREN			BIT(0)
1019 #define L3C_ELR				(0x0C * 4)
1020 #define  L3C_ELR_ERRSYN(src)		((src & 0xFF800000) >> 23)
1021 #define  L3C_ELR_ERRWAY(src)		((src & 0x007E0000) >> 17)
1022 #define  L3C_ELR_AGENTID(src)		((src & 0x0001E000) >> 13)
1023 #define  L3C_ELR_ERRGRP(src)		((src & 0x00000F00) >> 8)
1024 #define  L3C_ELR_OPTYPE(src)		((src & 0x000000F0) >> 4)
1025 #define  L3C_ELR_PADDRHIGH(src)		(src & 0x0000000F)
1026 #define L3C_AELR			(0x0D * 4)
1027 #define L3C_BELR			(0x0E * 4)
1028 #define  L3C_BELR_BANK(src)		(src & 0x0000000F)
1029 
1030 struct xgene_edac_dev_ctx {
1031 	struct list_head	next;
1032 	struct device		ddev;
1033 	char			*name;
1034 	struct xgene_edac	*edac;
1035 	struct edac_device_ctl_info *edac_dev;
1036 	int			edac_idx;
1037 	void __iomem		*dev_csr;
1038 	int			version;
1039 };
1040 
1041 /*
1042  * Version 1 of the L3 controller has broken single bit correctable logic for
1043  * certain error syndromes. Log them as uncorrectable in that case.
1044  */
xgene_edac_l3_promote_to_uc_err(u32 l3cesr,u32 l3celr)1045 static bool xgene_edac_l3_promote_to_uc_err(u32 l3cesr, u32 l3celr)
1046 {
1047 	if (l3cesr & L3C_ESR_DATATAG_MASK) {
1048 		switch (L3C_ELR_ERRSYN(l3celr)) {
1049 		case 0x13C:
1050 		case 0x0B4:
1051 		case 0x007:
1052 		case 0x00D:
1053 		case 0x00E:
1054 		case 0x019:
1055 		case 0x01A:
1056 		case 0x01C:
1057 		case 0x04E:
1058 		case 0x041:
1059 			return true;
1060 		}
1061 	} else if (L3C_ELR_ERRWAY(l3celr) == 9)
1062 		return true;
1063 
1064 	return false;
1065 }
1066 
xgene_edac_l3_check(struct edac_device_ctl_info * edac_dev)1067 static void xgene_edac_l3_check(struct edac_device_ctl_info *edac_dev)
1068 {
1069 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1070 	u32 l3cesr;
1071 	u32 l3celr;
1072 	u32 l3caelr;
1073 	u32 l3cbelr;
1074 
1075 	l3cesr = readl(ctx->dev_csr + L3C_ESR);
1076 	if (!(l3cesr & (L3C_ESR_UCERR_MASK | L3C_ESR_CERR_MASK)))
1077 		return;
1078 
1079 	if (l3cesr & L3C_ESR_UCERR_MASK)
1080 		dev_err(edac_dev->dev, "L3C uncorrectable error\n");
1081 	if (l3cesr & L3C_ESR_CERR_MASK)
1082 		dev_warn(edac_dev->dev, "L3C correctable error\n");
1083 
1084 	l3celr = readl(ctx->dev_csr + L3C_ELR);
1085 	l3caelr = readl(ctx->dev_csr + L3C_AELR);
1086 	l3cbelr = readl(ctx->dev_csr + L3C_BELR);
1087 	if (l3cesr & L3C_ESR_MULTIHIT_MASK)
1088 		dev_err(edac_dev->dev, "L3C multiple hit error\n");
1089 	if (l3cesr & L3C_ESR_UCEVICT_MASK)
1090 		dev_err(edac_dev->dev,
1091 			"L3C dropped eviction of line with error\n");
1092 	if (l3cesr & L3C_ESR_MULTIUCERR_MASK)
1093 		dev_err(edac_dev->dev, "L3C multiple uncorrectable error\n");
1094 	if (l3cesr & L3C_ESR_DATATAG_MASK)
1095 		dev_err(edac_dev->dev,
1096 			"L3C data error syndrome 0x%X group 0x%X\n",
1097 			L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRGRP(l3celr));
1098 	else
1099 		dev_err(edac_dev->dev,
1100 			"L3C tag error syndrome 0x%X Way of Tag 0x%X Agent ID 0x%X Operation type 0x%X\n",
1101 			L3C_ELR_ERRSYN(l3celr), L3C_ELR_ERRWAY(l3celr),
1102 			L3C_ELR_AGENTID(l3celr), L3C_ELR_OPTYPE(l3celr));
1103 	/*
1104 	 * NOTE: Address [41:38] in L3C_ELR_PADDRHIGH(l3celr).
1105 	 *       Address [37:6] in l3caelr. Lower 6 bits are zero.
1106 	 */
1107 	dev_err(edac_dev->dev, "L3C error address 0x%08X.%08X bank %d\n",
1108 		L3C_ELR_PADDRHIGH(l3celr) << 6 | (l3caelr >> 26),
1109 		(l3caelr & 0x3FFFFFFF) << 6, L3C_BELR_BANK(l3cbelr));
1110 	dev_err(edac_dev->dev,
1111 		"L3C error status register value 0x%X\n", l3cesr);
1112 
1113 	/* Clear L3C error interrupt */
1114 	writel(0, ctx->dev_csr + L3C_ESR);
1115 
1116 	if (ctx->version <= 1 &&
1117 	    xgene_edac_l3_promote_to_uc_err(l3cesr, l3celr)) {
1118 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1119 		return;
1120 	}
1121 	if (l3cesr & L3C_ESR_CERR_MASK)
1122 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1123 	if (l3cesr & L3C_ESR_UCERR_MASK)
1124 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1125 }
1126 
xgene_edac_l3_hw_init(struct edac_device_ctl_info * edac_dev,bool enable)1127 static void xgene_edac_l3_hw_init(struct edac_device_ctl_info *edac_dev,
1128 				  bool enable)
1129 {
1130 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1131 	u32 val;
1132 
1133 	val = readl(ctx->dev_csr + L3C_ECR);
1134 	val |= L3C_UCERREN | L3C_CERREN;
1135 	/* On disable, we just disable interrupt but keep error enabled */
1136 	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1137 		if (enable)
1138 			val |= L3C_ECR_UCINTREN | L3C_ECR_CINTREN;
1139 		else
1140 			val &= ~(L3C_ECR_UCINTREN | L3C_ECR_CINTREN);
1141 	}
1142 	writel(val, ctx->dev_csr + L3C_ECR);
1143 
1144 	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1145 		/* Enable/disable L3 error top level interrupt */
1146 		if (enable) {
1147 			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1148 					       L3C_UNCORR_ERR_MASK);
1149 			xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1150 					       L3C_CORR_ERR_MASK);
1151 		} else {
1152 			xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1153 					       L3C_UNCORR_ERR_MASK);
1154 			xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1155 					       L3C_CORR_ERR_MASK);
1156 		}
1157 	}
1158 }
1159 
xgene_edac_l3_inject_ctrl_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)1160 static ssize_t xgene_edac_l3_inject_ctrl_write(struct file *file,
1161 					       const char __user *data,
1162 					       size_t count, loff_t *ppos)
1163 {
1164 	struct edac_device_ctl_info *edac_dev = file->private_data;
1165 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1166 
1167 	/* Generate all errors */
1168 	writel(0xFFFFFFFF, ctx->dev_csr + L3C_ESR);
1169 	return count;
1170 }
1171 
1172 static const struct file_operations xgene_edac_l3_debug_inject_fops = {
1173 	.open = simple_open,
1174 	.write = xgene_edac_l3_inject_ctrl_write,
1175 	.llseek = generic_file_llseek
1176 };
1177 
1178 static void
xgene_edac_l3_create_debugfs_nodes(struct edac_device_ctl_info * edac_dev)1179 xgene_edac_l3_create_debugfs_nodes(struct edac_device_ctl_info *edac_dev)
1180 {
1181 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1182 	struct dentry *dbgfs_dir;
1183 	char name[10];
1184 
1185 	if (!IS_ENABLED(CONFIG_EDAC_DEBUG) || !ctx->edac->dfs)
1186 		return;
1187 
1188 	snprintf(name, sizeof(name), "l3c%d", ctx->edac_idx);
1189 	dbgfs_dir = edac_debugfs_create_dir_at(name, ctx->edac->dfs);
1190 	if (!dbgfs_dir)
1191 		return;
1192 
1193 	debugfs_create_file("l3_inject_ctrl", S_IWUSR, dbgfs_dir, edac_dev,
1194 			    &xgene_edac_l3_debug_inject_fops);
1195 }
1196 
xgene_edac_l3_add(struct xgene_edac * edac,struct device_node * np,int version)1197 static int xgene_edac_l3_add(struct xgene_edac *edac, struct device_node *np,
1198 			     int version)
1199 {
1200 	struct edac_device_ctl_info *edac_dev;
1201 	struct xgene_edac_dev_ctx *ctx;
1202 	struct resource res;
1203 	void __iomem *dev_csr;
1204 	int edac_idx;
1205 	int rc = 0;
1206 
1207 	if (!devres_open_group(edac->dev, xgene_edac_l3_add, GFP_KERNEL))
1208 		return -ENOMEM;
1209 
1210 	rc = of_address_to_resource(np, 0, &res);
1211 	if (rc < 0) {
1212 		dev_err(edac->dev, "no L3 resource address\n");
1213 		goto err_release_group;
1214 	}
1215 	dev_csr = devm_ioremap_resource(edac->dev, &res);
1216 	if (IS_ERR(dev_csr)) {
1217 		dev_err(edac->dev,
1218 			"devm_ioremap_resource failed for L3 resource address\n");
1219 		rc = PTR_ERR(dev_csr);
1220 		goto err_release_group;
1221 	}
1222 
1223 	edac_idx = edac_device_alloc_index();
1224 	edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1225 					      "l3c", 1, "l3c", 1, 0, NULL, 0,
1226 					      edac_idx);
1227 	if (!edac_dev) {
1228 		rc = -ENOMEM;
1229 		goto err_release_group;
1230 	}
1231 
1232 	ctx = edac_dev->pvt_info;
1233 	ctx->dev_csr = dev_csr;
1234 	ctx->name = "xgene_l3_err";
1235 	ctx->edac_idx = edac_idx;
1236 	ctx->edac = edac;
1237 	ctx->edac_dev = edac_dev;
1238 	ctx->ddev = *edac->dev;
1239 	ctx->version = version;
1240 	edac_dev->dev = &ctx->ddev;
1241 	edac_dev->ctl_name = ctx->name;
1242 	edac_dev->dev_name = ctx->name;
1243 	edac_dev->mod_name = EDAC_MOD_STR;
1244 
1245 	if (edac_op_state == EDAC_OPSTATE_POLL)
1246 		edac_dev->edac_check = xgene_edac_l3_check;
1247 
1248 	xgene_edac_l3_create_debugfs_nodes(edac_dev);
1249 
1250 	rc = edac_device_add_device(edac_dev);
1251 	if (rc > 0) {
1252 		dev_err(edac->dev, "failed edac_device_add_device()\n");
1253 		rc = -ENOMEM;
1254 		goto err_ctl_free;
1255 	}
1256 
1257 	if (edac_op_state == EDAC_OPSTATE_INT)
1258 		edac_dev->op_state = OP_RUNNING_INTERRUPT;
1259 
1260 	list_add(&ctx->next, &edac->l3s);
1261 
1262 	xgene_edac_l3_hw_init(edac_dev, 1);
1263 
1264 	devres_remove_group(edac->dev, xgene_edac_l3_add);
1265 
1266 	dev_info(edac->dev, "X-Gene EDAC L3 registered\n");
1267 	return 0;
1268 
1269 err_ctl_free:
1270 	edac_device_free_ctl_info(edac_dev);
1271 err_release_group:
1272 	devres_release_group(edac->dev, xgene_edac_l3_add);
1273 	return rc;
1274 }
1275 
xgene_edac_l3_remove(struct xgene_edac_dev_ctx * l3)1276 static int xgene_edac_l3_remove(struct xgene_edac_dev_ctx *l3)
1277 {
1278 	struct edac_device_ctl_info *edac_dev = l3->edac_dev;
1279 
1280 	xgene_edac_l3_hw_init(edac_dev, 0);
1281 	edac_device_del_device(l3->edac->dev);
1282 	edac_device_free_ctl_info(edac_dev);
1283 	return 0;
1284 }
1285 
1286 /* SoC error device */
1287 #define IOBAXIS0TRANSERRINTSTS		0x0000
1288 #define  IOBAXIS0_M_ILLEGAL_ACCESS_MASK	BIT(1)
1289 #define  IOBAXIS0_ILLEGAL_ACCESS_MASK	BIT(0)
1290 #define IOBAXIS0TRANSERRINTMSK		0x0004
1291 #define IOBAXIS0TRANSERRREQINFOL	0x0008
1292 #define IOBAXIS0TRANSERRREQINFOH	0x000c
1293 #define  REQTYPE_RD(src)		(((src) & BIT(0)))
1294 #define  ERRADDRH_RD(src)		(((src) & 0xffc00000) >> 22)
1295 #define IOBAXIS1TRANSERRINTSTS		0x0010
1296 #define IOBAXIS1TRANSERRINTMSK		0x0014
1297 #define IOBAXIS1TRANSERRREQINFOL	0x0018
1298 #define IOBAXIS1TRANSERRREQINFOH	0x001c
1299 #define IOBPATRANSERRINTSTS		0x0020
1300 #define  IOBPA_M_REQIDRAM_CORRUPT_MASK	BIT(7)
1301 #define  IOBPA_REQIDRAM_CORRUPT_MASK	BIT(6)
1302 #define  IOBPA_M_TRANS_CORRUPT_MASK	BIT(5)
1303 #define  IOBPA_TRANS_CORRUPT_MASK	BIT(4)
1304 #define  IOBPA_M_WDATA_CORRUPT_MASK	BIT(3)
1305 #define  IOBPA_WDATA_CORRUPT_MASK	BIT(2)
1306 #define  IOBPA_M_RDATA_CORRUPT_MASK	BIT(1)
1307 #define  IOBPA_RDATA_CORRUPT_MASK	BIT(0)
1308 #define IOBBATRANSERRINTSTS		0x0030
1309 #define  M_ILLEGAL_ACCESS_MASK		BIT(15)
1310 #define  ILLEGAL_ACCESS_MASK		BIT(14)
1311 #define  M_WIDRAM_CORRUPT_MASK		BIT(13)
1312 #define  WIDRAM_CORRUPT_MASK		BIT(12)
1313 #define  M_RIDRAM_CORRUPT_MASK		BIT(11)
1314 #define  RIDRAM_CORRUPT_MASK		BIT(10)
1315 #define  M_TRANS_CORRUPT_MASK		BIT(9)
1316 #define  TRANS_CORRUPT_MASK		BIT(8)
1317 #define  M_WDATA_CORRUPT_MASK		BIT(7)
1318 #define  WDATA_CORRUPT_MASK		BIT(6)
1319 #define  M_RBM_POISONED_REQ_MASK	BIT(5)
1320 #define  RBM_POISONED_REQ_MASK		BIT(4)
1321 #define  M_XGIC_POISONED_REQ_MASK	BIT(3)
1322 #define  XGIC_POISONED_REQ_MASK		BIT(2)
1323 #define  M_WRERR_RESP_MASK		BIT(1)
1324 #define  WRERR_RESP_MASK		BIT(0)
1325 #define IOBBATRANSERRREQINFOL		0x0038
1326 #define IOBBATRANSERRREQINFOH		0x003c
1327 #define  REQTYPE_F2_RD(src)		((src) & BIT(0))
1328 #define  ERRADDRH_F2_RD(src)		(((src) & 0xffc00000) >> 22)
1329 #define IOBBATRANSERRCSWREQID		0x0040
1330 #define XGICTRANSERRINTSTS		0x0050
1331 #define  M_WR_ACCESS_ERR_MASK		BIT(3)
1332 #define  WR_ACCESS_ERR_MASK		BIT(2)
1333 #define  M_RD_ACCESS_ERR_MASK		BIT(1)
1334 #define  RD_ACCESS_ERR_MASK		BIT(0)
1335 #define XGICTRANSERRINTMSK		0x0054
1336 #define XGICTRANSERRREQINFO		0x0058
1337 #define  REQTYPE_MASK			BIT(26)
1338 #define  ERRADDR_RD(src)		((src) & 0x03ffffff)
1339 #define GLBL_ERR_STS			0x0800
1340 #define  MDED_ERR_MASK			BIT(3)
1341 #define  DED_ERR_MASK			BIT(2)
1342 #define  MSEC_ERR_MASK			BIT(1)
1343 #define  SEC_ERR_MASK			BIT(0)
1344 #define GLBL_SEC_ERRL			0x0810
1345 #define GLBL_SEC_ERRH			0x0818
1346 #define GLBL_MSEC_ERRL			0x0820
1347 #define GLBL_MSEC_ERRH			0x0828
1348 #define GLBL_DED_ERRL			0x0830
1349 #define GLBL_DED_ERRLMASK		0x0834
1350 #define GLBL_DED_ERRH			0x0838
1351 #define GLBL_DED_ERRHMASK		0x083c
1352 #define GLBL_MDED_ERRL			0x0840
1353 #define GLBL_MDED_ERRLMASK		0x0844
1354 #define GLBL_MDED_ERRH			0x0848
1355 #define GLBL_MDED_ERRHMASK		0x084c
1356 
1357 /* IO Bus Registers */
1358 #define RBCSR				0x0000
1359 #define STICKYERR_MASK			BIT(0)
1360 #define RBEIR				0x0008
1361 #define AGENT_OFFLINE_ERR_MASK		BIT(30)
1362 #define UNIMPL_RBPAGE_ERR_MASK		BIT(29)
1363 #define WORD_ALIGNED_ERR_MASK		BIT(28)
1364 #define PAGE_ACCESS_ERR_MASK		BIT(27)
1365 #define WRITE_ACCESS_MASK		BIT(26)
1366 #define RBERRADDR_RD(src)		((src) & 0x03FFFFFF)
1367 
1368 static const char * const soc_mem_err_v1[] = {
1369 	"10GbE0",
1370 	"10GbE1",
1371 	"Security",
1372 	"SATA45",
1373 	"SATA23/ETH23",
1374 	"SATA01/ETH01",
1375 	"USB1",
1376 	"USB0",
1377 	"QML",
1378 	"QM0",
1379 	"QM1 (XGbE01)",
1380 	"PCIE4",
1381 	"PCIE3",
1382 	"PCIE2",
1383 	"PCIE1",
1384 	"PCIE0",
1385 	"CTX Manager",
1386 	"OCM",
1387 	"1GbE",
1388 	"CLE",
1389 	"AHBC",
1390 	"PktDMA",
1391 	"GFC",
1392 	"MSLIM",
1393 	"10GbE2",
1394 	"10GbE3",
1395 	"QM2 (XGbE23)",
1396 	"IOB",
1397 	"unknown",
1398 	"unknown",
1399 	"unknown",
1400 	"unknown",
1401 };
1402 
xgene_edac_iob_gic_report(struct edac_device_ctl_info * edac_dev)1403 static void xgene_edac_iob_gic_report(struct edac_device_ctl_info *edac_dev)
1404 {
1405 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1406 	u32 err_addr_lo;
1407 	u32 err_addr_hi;
1408 	u32 reg;
1409 	u32 info;
1410 
1411 	/* GIC transaction error interrupt */
1412 	reg = readl(ctx->dev_csr + XGICTRANSERRINTSTS);
1413 	if (!reg)
1414 		goto chk_iob_err;
1415 	dev_err(edac_dev->dev, "XGIC transaction error\n");
1416 	if (reg & RD_ACCESS_ERR_MASK)
1417 		dev_err(edac_dev->dev, "XGIC read size error\n");
1418 	if (reg & M_RD_ACCESS_ERR_MASK)
1419 		dev_err(edac_dev->dev, "Multiple XGIC read size error\n");
1420 	if (reg & WR_ACCESS_ERR_MASK)
1421 		dev_err(edac_dev->dev, "XGIC write size error\n");
1422 	if (reg & M_WR_ACCESS_ERR_MASK)
1423 		dev_err(edac_dev->dev, "Multiple XGIC write size error\n");
1424 	info = readl(ctx->dev_csr + XGICTRANSERRREQINFO);
1425 	dev_err(edac_dev->dev, "XGIC %s access @ 0x%08X (0x%08X)\n",
1426 		info & REQTYPE_MASK ? "read" : "write", ERRADDR_RD(info),
1427 		info);
1428 	writel(reg, ctx->dev_csr + XGICTRANSERRINTSTS);
1429 
1430 chk_iob_err:
1431 	/* IOB memory error */
1432 	reg = readl(ctx->dev_csr + GLBL_ERR_STS);
1433 	if (!reg)
1434 		return;
1435 	if (reg & SEC_ERR_MASK) {
1436 		err_addr_lo = readl(ctx->dev_csr + GLBL_SEC_ERRL);
1437 		err_addr_hi = readl(ctx->dev_csr + GLBL_SEC_ERRH);
1438 		dev_err(edac_dev->dev,
1439 			"IOB single-bit correctable memory at 0x%08X.%08X error\n",
1440 			err_addr_lo, err_addr_hi);
1441 		writel(err_addr_lo, ctx->dev_csr + GLBL_SEC_ERRL);
1442 		writel(err_addr_hi, ctx->dev_csr + GLBL_SEC_ERRH);
1443 	}
1444 	if (reg & MSEC_ERR_MASK) {
1445 		err_addr_lo = readl(ctx->dev_csr + GLBL_MSEC_ERRL);
1446 		err_addr_hi = readl(ctx->dev_csr + GLBL_MSEC_ERRH);
1447 		dev_err(edac_dev->dev,
1448 			"IOB multiple single-bit correctable memory at 0x%08X.%08X error\n",
1449 			err_addr_lo, err_addr_hi);
1450 		writel(err_addr_lo, ctx->dev_csr + GLBL_MSEC_ERRL);
1451 		writel(err_addr_hi, ctx->dev_csr + GLBL_MSEC_ERRH);
1452 	}
1453 	if (reg & (SEC_ERR_MASK | MSEC_ERR_MASK))
1454 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1455 
1456 	if (reg & DED_ERR_MASK) {
1457 		err_addr_lo = readl(ctx->dev_csr + GLBL_DED_ERRL);
1458 		err_addr_hi = readl(ctx->dev_csr + GLBL_DED_ERRH);
1459 		dev_err(edac_dev->dev,
1460 			"IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1461 			err_addr_lo, err_addr_hi);
1462 		writel(err_addr_lo, ctx->dev_csr + GLBL_DED_ERRL);
1463 		writel(err_addr_hi, ctx->dev_csr + GLBL_DED_ERRH);
1464 	}
1465 	if (reg & MDED_ERR_MASK) {
1466 		err_addr_lo = readl(ctx->dev_csr + GLBL_MDED_ERRL);
1467 		err_addr_hi = readl(ctx->dev_csr + GLBL_MDED_ERRH);
1468 		dev_err(edac_dev->dev,
1469 			"Multiple IOB double-bit uncorrectable memory at 0x%08X.%08X error\n",
1470 			err_addr_lo, err_addr_hi);
1471 		writel(err_addr_lo, ctx->dev_csr + GLBL_MDED_ERRL);
1472 		writel(err_addr_hi, ctx->dev_csr + GLBL_MDED_ERRH);
1473 	}
1474 	if (reg & (DED_ERR_MASK | MDED_ERR_MASK))
1475 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1476 }
1477 
xgene_edac_rb_report(struct edac_device_ctl_info * edac_dev)1478 static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev)
1479 {
1480 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1481 	u32 err_addr_lo;
1482 	u32 err_addr_hi;
1483 	u32 reg;
1484 
1485 	/* If the register bus resource isn't available, just skip it */
1486 	if (!ctx->edac->rb_map)
1487 		goto rb_skip;
1488 
1489 	/*
1490 	 * Check RB access errors
1491 	 * 1. Out of range
1492 	 * 2. Un-implemented page
1493 	 * 3. Un-aligned access
1494 	 * 4. Offline slave IP
1495 	 */
1496 	if (regmap_read(ctx->edac->rb_map, RBCSR, &reg))
1497 		return;
1498 	if (reg & STICKYERR_MASK) {
1499 		bool write;
1500 		u32 address;
1501 
1502 		dev_err(edac_dev->dev, "IOB bus access error(s)\n");
1503 		if (regmap_read(ctx->edac->rb_map, RBEIR, &reg))
1504 			return;
1505 		write = reg & WRITE_ACCESS_MASK ? 1 : 0;
1506 		address = RBERRADDR_RD(reg);
1507 		if (reg & AGENT_OFFLINE_ERR_MASK)
1508 			dev_err(edac_dev->dev,
1509 				"IOB bus %s access to offline agent error\n",
1510 				write ? "write" : "read");
1511 		if (reg & UNIMPL_RBPAGE_ERR_MASK)
1512 			dev_err(edac_dev->dev,
1513 				"IOB bus %s access to unimplemented page error\n",
1514 				write ? "write" : "read");
1515 		if (reg & WORD_ALIGNED_ERR_MASK)
1516 			dev_err(edac_dev->dev,
1517 				"IOB bus %s word aligned access error\n",
1518 				write ? "write" : "read");
1519 		if (reg & PAGE_ACCESS_ERR_MASK)
1520 			dev_err(edac_dev->dev,
1521 				"IOB bus %s to page out of range access error\n",
1522 				write ? "write" : "read");
1523 		if (regmap_write(ctx->edac->rb_map, RBEIR, 0))
1524 			return;
1525 		if (regmap_write(ctx->edac->rb_map, RBCSR, 0))
1526 			return;
1527 	}
1528 rb_skip:
1529 
1530 	/* IOB Bridge agent transaction error interrupt */
1531 	reg = readl(ctx->dev_csr + IOBBATRANSERRINTSTS);
1532 	if (!reg)
1533 		return;
1534 
1535 	dev_err(edac_dev->dev, "IOB bridge agent (BA) transaction error\n");
1536 	if (reg & WRERR_RESP_MASK)
1537 		dev_err(edac_dev->dev, "IOB BA write response error\n");
1538 	if (reg & M_WRERR_RESP_MASK)
1539 		dev_err(edac_dev->dev,
1540 			"Multiple IOB BA write response error\n");
1541 	if (reg & XGIC_POISONED_REQ_MASK)
1542 		dev_err(edac_dev->dev, "IOB BA XGIC poisoned write error\n");
1543 	if (reg & M_XGIC_POISONED_REQ_MASK)
1544 		dev_err(edac_dev->dev,
1545 			"Multiple IOB BA XGIC poisoned write error\n");
1546 	if (reg & RBM_POISONED_REQ_MASK)
1547 		dev_err(edac_dev->dev, "IOB BA RBM poisoned write error\n");
1548 	if (reg & M_RBM_POISONED_REQ_MASK)
1549 		dev_err(edac_dev->dev,
1550 			"Multiple IOB BA RBM poisoned write error\n");
1551 	if (reg & WDATA_CORRUPT_MASK)
1552 		dev_err(edac_dev->dev, "IOB BA write error\n");
1553 	if (reg & M_WDATA_CORRUPT_MASK)
1554 		dev_err(edac_dev->dev, "Multiple IOB BA write error\n");
1555 	if (reg & TRANS_CORRUPT_MASK)
1556 		dev_err(edac_dev->dev, "IOB BA transaction error\n");
1557 	if (reg & M_TRANS_CORRUPT_MASK)
1558 		dev_err(edac_dev->dev, "Multiple IOB BA transaction error\n");
1559 	if (reg & RIDRAM_CORRUPT_MASK)
1560 		dev_err(edac_dev->dev,
1561 			"IOB BA RDIDRAM read transaction ID error\n");
1562 	if (reg & M_RIDRAM_CORRUPT_MASK)
1563 		dev_err(edac_dev->dev,
1564 			"Multiple IOB BA RDIDRAM read transaction ID error\n");
1565 	if (reg & WIDRAM_CORRUPT_MASK)
1566 		dev_err(edac_dev->dev,
1567 			"IOB BA RDIDRAM write transaction ID error\n");
1568 	if (reg & M_WIDRAM_CORRUPT_MASK)
1569 		dev_err(edac_dev->dev,
1570 			"Multiple IOB BA RDIDRAM write transaction ID error\n");
1571 	if (reg & ILLEGAL_ACCESS_MASK)
1572 		dev_err(edac_dev->dev,
1573 			"IOB BA XGIC/RB illegal access error\n");
1574 	if (reg & M_ILLEGAL_ACCESS_MASK)
1575 		dev_err(edac_dev->dev,
1576 			"Multiple IOB BA XGIC/RB illegal access error\n");
1577 
1578 	err_addr_lo = readl(ctx->dev_csr + IOBBATRANSERRREQINFOL);
1579 	err_addr_hi = readl(ctx->dev_csr + IOBBATRANSERRREQINFOH);
1580 	dev_err(edac_dev->dev, "IOB BA %s access at 0x%02X.%08X (0x%08X)\n",
1581 		REQTYPE_F2_RD(err_addr_hi) ? "read" : "write",
1582 		ERRADDRH_F2_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1583 	if (reg & WRERR_RESP_MASK)
1584 		dev_err(edac_dev->dev, "IOB BA requestor ID 0x%08X\n",
1585 			readl(ctx->dev_csr + IOBBATRANSERRCSWREQID));
1586 	writel(reg, ctx->dev_csr + IOBBATRANSERRINTSTS);
1587 }
1588 
xgene_edac_pa_report(struct edac_device_ctl_info * edac_dev)1589 static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev)
1590 {
1591 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1592 	u32 err_addr_lo;
1593 	u32 err_addr_hi;
1594 	u32 reg;
1595 
1596 	/* IOB Processing agent transaction error interrupt */
1597 	reg = readl(ctx->dev_csr + IOBPATRANSERRINTSTS);
1598 	if (!reg)
1599 		goto chk_iob_axi0;
1600 	dev_err(edac_dev->dev, "IOB procesing agent (PA) transaction error\n");
1601 	if (reg & IOBPA_RDATA_CORRUPT_MASK)
1602 		dev_err(edac_dev->dev, "IOB PA read data RAM error\n");
1603 	if (reg & IOBPA_M_RDATA_CORRUPT_MASK)
1604 		dev_err(edac_dev->dev,
1605 			"Mutilple IOB PA read data RAM error\n");
1606 	if (reg & IOBPA_WDATA_CORRUPT_MASK)
1607 		dev_err(edac_dev->dev, "IOB PA write data RAM error\n");
1608 	if (reg & IOBPA_M_WDATA_CORRUPT_MASK)
1609 		dev_err(edac_dev->dev,
1610 			"Mutilple IOB PA write data RAM error\n");
1611 	if (reg & IOBPA_TRANS_CORRUPT_MASK)
1612 		dev_err(edac_dev->dev, "IOB PA transaction error\n");
1613 	if (reg & IOBPA_M_TRANS_CORRUPT_MASK)
1614 		dev_err(edac_dev->dev, "Mutilple IOB PA transaction error\n");
1615 	if (reg & IOBPA_REQIDRAM_CORRUPT_MASK)
1616 		dev_err(edac_dev->dev, "IOB PA transaction ID RAM error\n");
1617 	if (reg & IOBPA_M_REQIDRAM_CORRUPT_MASK)
1618 		dev_err(edac_dev->dev,
1619 			"Multiple IOB PA transaction ID RAM error\n");
1620 	writel(reg, ctx->dev_csr + IOBPATRANSERRINTSTS);
1621 
1622 chk_iob_axi0:
1623 	/* IOB AXI0 Error */
1624 	reg = readl(ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1625 	if (!reg)
1626 		goto chk_iob_axi1;
1627 	err_addr_lo = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOL);
1628 	err_addr_hi = readl(ctx->dev_csr + IOBAXIS0TRANSERRREQINFOH);
1629 	dev_err(edac_dev->dev,
1630 		"%sAXI slave 0 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1631 		reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1632 		REQTYPE_RD(err_addr_hi) ? "read" : "write",
1633 		ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1634 	writel(reg, ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
1635 
1636 chk_iob_axi1:
1637 	/* IOB AXI1 Error */
1638 	reg = readl(ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1639 	if (!reg)
1640 		return;
1641 	err_addr_lo = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOL);
1642 	err_addr_hi = readl(ctx->dev_csr + IOBAXIS1TRANSERRREQINFOH);
1643 	dev_err(edac_dev->dev,
1644 		"%sAXI slave 1 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
1645 		reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
1646 		REQTYPE_RD(err_addr_hi) ? "read" : "write",
1647 		ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
1648 	writel(reg, ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
1649 }
1650 
xgene_edac_soc_check(struct edac_device_ctl_info * edac_dev)1651 static void xgene_edac_soc_check(struct edac_device_ctl_info *edac_dev)
1652 {
1653 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1654 	const char * const *soc_mem_err = NULL;
1655 	u32 pcp_hp_stat;
1656 	u32 pcp_lp_stat;
1657 	u32 reg;
1658 	int i;
1659 
1660 	xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat);
1661 	xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat);
1662 	xgene_edac_pcp_rd(ctx->edac, MEMERRINTSTS, &reg);
1663 	if (!((pcp_hp_stat & (IOB_PA_ERR_MASK | IOB_BA_ERR_MASK |
1664 			      IOB_XGIC_ERR_MASK | IOB_RB_ERR_MASK)) ||
1665 	      (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) || reg))
1666 		return;
1667 
1668 	if (pcp_hp_stat & IOB_XGIC_ERR_MASK)
1669 		xgene_edac_iob_gic_report(edac_dev);
1670 
1671 	if (pcp_hp_stat & (IOB_RB_ERR_MASK | IOB_BA_ERR_MASK))
1672 		xgene_edac_rb_report(edac_dev);
1673 
1674 	if (pcp_hp_stat & IOB_PA_ERR_MASK)
1675 		xgene_edac_pa_report(edac_dev);
1676 
1677 	if (pcp_lp_stat & CSW_SWITCH_TRACE_ERR_MASK) {
1678 		dev_info(edac_dev->dev,
1679 			 "CSW switch trace correctable memory parity error\n");
1680 		edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
1681 	}
1682 
1683 	if (!reg)
1684 		return;
1685 	if (ctx->version == 1)
1686 		soc_mem_err = soc_mem_err_v1;
1687 	if (!soc_mem_err) {
1688 		dev_err(edac_dev->dev, "SoC memory parity error 0x%08X\n",
1689 			reg);
1690 		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
1691 		return;
1692 	}
1693 	for (i = 0; i < 31; i++) {
1694 		if (reg & (1 << i)) {
1695 			dev_err(edac_dev->dev, "%s memory parity error\n",
1696 				soc_mem_err[i]);
1697 			edac_device_handle_ue(edac_dev, 0, 0,
1698 					      edac_dev->ctl_name);
1699 		}
1700 	}
1701 }
1702 
xgene_edac_soc_hw_init(struct edac_device_ctl_info * edac_dev,bool enable)1703 static void xgene_edac_soc_hw_init(struct edac_device_ctl_info *edac_dev,
1704 				   bool enable)
1705 {
1706 	struct xgene_edac_dev_ctx *ctx = edac_dev->pvt_info;
1707 
1708 	/* Enable SoC IP error interrupt */
1709 	if (edac_dev->op_state == OP_RUNNING_INTERRUPT) {
1710 		if (enable) {
1711 			xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK,
1712 					       IOB_PA_ERR_MASK |
1713 					       IOB_BA_ERR_MASK |
1714 					       IOB_XGIC_ERR_MASK |
1715 					       IOB_RB_ERR_MASK);
1716 			xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK,
1717 					       CSW_SWITCH_TRACE_ERR_MASK);
1718 		} else {
1719 			xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK,
1720 					       IOB_PA_ERR_MASK |
1721 					       IOB_BA_ERR_MASK |
1722 					       IOB_XGIC_ERR_MASK |
1723 					       IOB_RB_ERR_MASK);
1724 			xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK,
1725 					       CSW_SWITCH_TRACE_ERR_MASK);
1726 		}
1727 
1728 		writel(enable ? 0x0 : 0xFFFFFFFF,
1729 		       ctx->dev_csr + IOBAXIS0TRANSERRINTMSK);
1730 		writel(enable ? 0x0 : 0xFFFFFFFF,
1731 		       ctx->dev_csr + IOBAXIS1TRANSERRINTMSK);
1732 		writel(enable ? 0x0 : 0xFFFFFFFF,
1733 		       ctx->dev_csr + XGICTRANSERRINTMSK);
1734 
1735 		xgene_edac_pcp_setbits(ctx->edac, MEMERRINTMSK,
1736 				       enable ? 0x0 : 0xFFFFFFFF);
1737 	}
1738 }
1739 
xgene_edac_soc_add(struct xgene_edac * edac,struct device_node * np,int version)1740 static int xgene_edac_soc_add(struct xgene_edac *edac, struct device_node *np,
1741 			      int version)
1742 {
1743 	struct edac_device_ctl_info *edac_dev;
1744 	struct xgene_edac_dev_ctx *ctx;
1745 	void __iomem *dev_csr;
1746 	struct resource res;
1747 	int edac_idx;
1748 	int rc;
1749 
1750 	if (!devres_open_group(edac->dev, xgene_edac_soc_add, GFP_KERNEL))
1751 		return -ENOMEM;
1752 
1753 	rc = of_address_to_resource(np, 0, &res);
1754 	if (rc < 0) {
1755 		dev_err(edac->dev, "no SoC resource address\n");
1756 		goto err_release_group;
1757 	}
1758 	dev_csr = devm_ioremap_resource(edac->dev, &res);
1759 	if (IS_ERR(dev_csr)) {
1760 		dev_err(edac->dev,
1761 			"devm_ioremap_resource failed for soc resource address\n");
1762 		rc = PTR_ERR(dev_csr);
1763 		goto err_release_group;
1764 	}
1765 
1766 	edac_idx = edac_device_alloc_index();
1767 	edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx),
1768 					      "SOC", 1, "SOC", 1, 2, NULL, 0,
1769 					      edac_idx);
1770 	if (!edac_dev) {
1771 		rc = -ENOMEM;
1772 		goto err_release_group;
1773 	}
1774 
1775 	ctx = edac_dev->pvt_info;
1776 	ctx->dev_csr = dev_csr;
1777 	ctx->name = "xgene_soc_err";
1778 	ctx->edac_idx = edac_idx;
1779 	ctx->edac = edac;
1780 	ctx->edac_dev = edac_dev;
1781 	ctx->ddev = *edac->dev;
1782 	ctx->version = version;
1783 	edac_dev->dev = &ctx->ddev;
1784 	edac_dev->ctl_name = ctx->name;
1785 	edac_dev->dev_name = ctx->name;
1786 	edac_dev->mod_name = EDAC_MOD_STR;
1787 
1788 	if (edac_op_state == EDAC_OPSTATE_POLL)
1789 		edac_dev->edac_check = xgene_edac_soc_check;
1790 
1791 	rc = edac_device_add_device(edac_dev);
1792 	if (rc > 0) {
1793 		dev_err(edac->dev, "failed edac_device_add_device()\n");
1794 		rc = -ENOMEM;
1795 		goto err_ctl_free;
1796 	}
1797 
1798 	if (edac_op_state == EDAC_OPSTATE_INT)
1799 		edac_dev->op_state = OP_RUNNING_INTERRUPT;
1800 
1801 	list_add(&ctx->next, &edac->socs);
1802 
1803 	xgene_edac_soc_hw_init(edac_dev, 1);
1804 
1805 	devres_remove_group(edac->dev, xgene_edac_soc_add);
1806 
1807 	dev_info(edac->dev, "X-Gene EDAC SoC registered\n");
1808 
1809 	return 0;
1810 
1811 err_ctl_free:
1812 	edac_device_free_ctl_info(edac_dev);
1813 err_release_group:
1814 	devres_release_group(edac->dev, xgene_edac_soc_add);
1815 	return rc;
1816 }
1817 
xgene_edac_soc_remove(struct xgene_edac_dev_ctx * soc)1818 static int xgene_edac_soc_remove(struct xgene_edac_dev_ctx *soc)
1819 {
1820 	struct edac_device_ctl_info *edac_dev = soc->edac_dev;
1821 
1822 	xgene_edac_soc_hw_init(edac_dev, 0);
1823 	edac_device_del_device(soc->edac->dev);
1824 	edac_device_free_ctl_info(edac_dev);
1825 	return 0;
1826 }
1827 
xgene_edac_isr(int irq,void * dev_id)1828 static irqreturn_t xgene_edac_isr(int irq, void *dev_id)
1829 {
1830 	struct xgene_edac *ctx = dev_id;
1831 	struct xgene_edac_pmd_ctx *pmd;
1832 	struct xgene_edac_dev_ctx *node;
1833 	unsigned int pcp_hp_stat;
1834 	unsigned int pcp_lp_stat;
1835 
1836 	xgene_edac_pcp_rd(ctx, PCPHPERRINTSTS, &pcp_hp_stat);
1837 	xgene_edac_pcp_rd(ctx, PCPLPERRINTSTS, &pcp_lp_stat);
1838 	if ((MCU_UNCORR_ERR_MASK & pcp_hp_stat) ||
1839 	    (MCU_CTL_ERR_MASK & pcp_hp_stat) ||
1840 	    (MCU_CORR_ERR_MASK & pcp_lp_stat)) {
1841 		struct xgene_edac_mc_ctx *mcu;
1842 
1843 		list_for_each_entry(mcu, &ctx->mcus, next)
1844 			xgene_edac_mc_check(mcu->mci);
1845 	}
1846 
1847 	list_for_each_entry(pmd, &ctx->pmds, next) {
1848 		if ((PMD0_MERR_MASK << pmd->pmd) & pcp_hp_stat)
1849 			xgene_edac_pmd_check(pmd->edac_dev);
1850 	}
1851 
1852 	list_for_each_entry(node, &ctx->l3s, next)
1853 		xgene_edac_l3_check(node->edac_dev);
1854 
1855 	list_for_each_entry(node, &ctx->socs, next)
1856 		xgene_edac_soc_check(node->edac_dev);
1857 
1858 	return IRQ_HANDLED;
1859 }
1860 
xgene_edac_probe(struct platform_device * pdev)1861 static int xgene_edac_probe(struct platform_device *pdev)
1862 {
1863 	struct xgene_edac *edac;
1864 	struct device_node *child;
1865 	struct resource *res;
1866 	int rc;
1867 
1868 	edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
1869 	if (!edac)
1870 		return -ENOMEM;
1871 
1872 	edac->dev = &pdev->dev;
1873 	platform_set_drvdata(pdev, edac);
1874 	INIT_LIST_HEAD(&edac->mcus);
1875 	INIT_LIST_HEAD(&edac->pmds);
1876 	INIT_LIST_HEAD(&edac->l3s);
1877 	INIT_LIST_HEAD(&edac->socs);
1878 	spin_lock_init(&edac->lock);
1879 	mutex_init(&edac->mc_lock);
1880 
1881 	edac->csw_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1882 							"regmap-csw");
1883 	if (IS_ERR(edac->csw_map)) {
1884 		dev_err(edac->dev, "unable to get syscon regmap csw\n");
1885 		rc = PTR_ERR(edac->csw_map);
1886 		goto out_err;
1887 	}
1888 
1889 	edac->mcba_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1890 							 "regmap-mcba");
1891 	if (IS_ERR(edac->mcba_map)) {
1892 		dev_err(edac->dev, "unable to get syscon regmap mcba\n");
1893 		rc = PTR_ERR(edac->mcba_map);
1894 		goto out_err;
1895 	}
1896 
1897 	edac->mcbb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1898 							 "regmap-mcbb");
1899 	if (IS_ERR(edac->mcbb_map)) {
1900 		dev_err(edac->dev, "unable to get syscon regmap mcbb\n");
1901 		rc = PTR_ERR(edac->mcbb_map);
1902 		goto out_err;
1903 	}
1904 	edac->efuse_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1905 							  "regmap-efuse");
1906 	if (IS_ERR(edac->efuse_map)) {
1907 		dev_err(edac->dev, "unable to get syscon regmap efuse\n");
1908 		rc = PTR_ERR(edac->efuse_map);
1909 		goto out_err;
1910 	}
1911 
1912 	/*
1913 	 * NOTE: The register bus resource is optional for compatibility
1914 	 * reason.
1915 	 */
1916 	edac->rb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
1917 						       "regmap-rb");
1918 	if (IS_ERR(edac->rb_map)) {
1919 		dev_warn(edac->dev, "missing syscon regmap rb\n");
1920 		edac->rb_map = NULL;
1921 	}
1922 
1923 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1924 	edac->pcp_csr = devm_ioremap_resource(&pdev->dev, res);
1925 	if (IS_ERR(edac->pcp_csr)) {
1926 		dev_err(&pdev->dev, "no PCP resource address\n");
1927 		rc = PTR_ERR(edac->pcp_csr);
1928 		goto out_err;
1929 	}
1930 
1931 	if (edac_op_state == EDAC_OPSTATE_INT) {
1932 		int irq;
1933 		int i;
1934 
1935 		for (i = 0; i < 3; i++) {
1936 			irq = platform_get_irq(pdev, i);
1937 			if (irq < 0) {
1938 				dev_err(&pdev->dev, "No IRQ resource\n");
1939 				rc = -EINVAL;
1940 				goto out_err;
1941 			}
1942 			rc = devm_request_irq(&pdev->dev, irq,
1943 					      xgene_edac_isr, IRQF_SHARED,
1944 					      dev_name(&pdev->dev), edac);
1945 			if (rc) {
1946 				dev_err(&pdev->dev,
1947 					"Could not request IRQ %d\n", irq);
1948 				goto out_err;
1949 			}
1950 		}
1951 	}
1952 
1953 	edac->dfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
1954 
1955 	for_each_child_of_node(pdev->dev.of_node, child) {
1956 		if (!of_device_is_available(child))
1957 			continue;
1958 		if (of_device_is_compatible(child, "apm,xgene-edac-mc"))
1959 			xgene_edac_mc_add(edac, child);
1960 		if (of_device_is_compatible(child, "apm,xgene-edac-pmd"))
1961 			xgene_edac_pmd_add(edac, child, 1);
1962 		if (of_device_is_compatible(child, "apm,xgene-edac-pmd-v2"))
1963 			xgene_edac_pmd_add(edac, child, 2);
1964 		if (of_device_is_compatible(child, "apm,xgene-edac-l3"))
1965 			xgene_edac_l3_add(edac, child, 1);
1966 		if (of_device_is_compatible(child, "apm,xgene-edac-l3-v2"))
1967 			xgene_edac_l3_add(edac, child, 2);
1968 		if (of_device_is_compatible(child, "apm,xgene-edac-soc"))
1969 			xgene_edac_soc_add(edac, child, 0);
1970 		if (of_device_is_compatible(child, "apm,xgene-edac-soc-v1"))
1971 			xgene_edac_soc_add(edac, child, 1);
1972 	}
1973 
1974 	return 0;
1975 
1976 out_err:
1977 	return rc;
1978 }
1979 
xgene_edac_remove(struct platform_device * pdev)1980 static int xgene_edac_remove(struct platform_device *pdev)
1981 {
1982 	struct xgene_edac *edac = dev_get_drvdata(&pdev->dev);
1983 	struct xgene_edac_mc_ctx *mcu;
1984 	struct xgene_edac_mc_ctx *temp_mcu;
1985 	struct xgene_edac_pmd_ctx *pmd;
1986 	struct xgene_edac_pmd_ctx *temp_pmd;
1987 	struct xgene_edac_dev_ctx *node;
1988 	struct xgene_edac_dev_ctx *temp_node;
1989 
1990 	list_for_each_entry_safe(mcu, temp_mcu, &edac->mcus, next)
1991 		xgene_edac_mc_remove(mcu);
1992 
1993 	list_for_each_entry_safe(pmd, temp_pmd, &edac->pmds, next)
1994 		xgene_edac_pmd_remove(pmd);
1995 
1996 	list_for_each_entry_safe(node, temp_node, &edac->l3s, next)
1997 		xgene_edac_l3_remove(node);
1998 
1999 	list_for_each_entry_safe(node, temp_node, &edac->socs, next)
2000 		xgene_edac_soc_remove(node);
2001 
2002 	return 0;
2003 }
2004 
2005 static const struct of_device_id xgene_edac_of_match[] = {
2006 	{ .compatible = "apm,xgene-edac" },
2007 	{},
2008 };
2009 MODULE_DEVICE_TABLE(of, xgene_edac_of_match);
2010 
2011 static struct platform_driver xgene_edac_driver = {
2012 	.probe = xgene_edac_probe,
2013 	.remove = xgene_edac_remove,
2014 	.driver = {
2015 		.name = "xgene-edac",
2016 		.of_match_table = xgene_edac_of_match,
2017 	},
2018 };
2019 
xgene_edac_init(void)2020 static int __init xgene_edac_init(void)
2021 {
2022 	int rc;
2023 
2024 	/* Make sure error reporting method is sane */
2025 	switch (edac_op_state) {
2026 	case EDAC_OPSTATE_POLL:
2027 	case EDAC_OPSTATE_INT:
2028 		break;
2029 	default:
2030 		edac_op_state = EDAC_OPSTATE_INT;
2031 		break;
2032 	}
2033 
2034 	rc = platform_driver_register(&xgene_edac_driver);
2035 	if (rc) {
2036 		edac_printk(KERN_ERR, EDAC_MOD_STR,
2037 			    "EDAC fails to register\n");
2038 		goto reg_failed;
2039 	}
2040 
2041 	return 0;
2042 
2043 reg_failed:
2044 	return rc;
2045 }
2046 module_init(xgene_edac_init);
2047 
xgene_edac_exit(void)2048 static void __exit xgene_edac_exit(void)
2049 {
2050 	platform_driver_unregister(&xgene_edac_driver);
2051 }
2052 module_exit(xgene_edac_exit);
2053 
2054 MODULE_LICENSE("GPL");
2055 MODULE_AUTHOR("Feng Kan <fkan@apm.com>");
2056 MODULE_DESCRIPTION("APM X-Gene EDAC driver");
2057 module_param(edac_op_state, int, 0444);
2058 MODULE_PARM_DESC(edac_op_state,
2059 		 "EDAC error reporting state: 0=Poll, 2=Interrupt");
2060