• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2013 Tilera Corporation. All Rights Reserved.
3  *
4  *   This program is free software; you can redistribute it and/or
5  *   modify it under the terms of the GNU General Public License
6  *   as published by the Free Software Foundation, version 2.
7  *
8  *   This program is distributed in the hope that it will be useful, but
9  *   WITHOUT ANY WARRANTY; without even the implied warranty of
10  *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11  *   NON INFRINGEMENT.  See the GNU General Public License for
12  *   more details.
13  *
14  * A code-rewriter that handles unaligned exception.
15  */
16 
17 #include <linux/smp.h>
18 #include <linux/ptrace.h>
19 #include <linux/slab.h>
20 #include <linux/sched/debug.h>
21 #include <linux/sched/task.h>
22 #include <linux/thread_info.h>
23 #include <linux/uaccess.h>
24 #include <linux/mman.h>
25 #include <linux/types.h>
26 #include <linux/err.h>
27 #include <linux/extable.h>
28 #include <linux/compat.h>
29 #include <linux/prctl.h>
30 #include <asm/cacheflush.h>
31 #include <asm/traps.h>
32 #include <linux/uaccess.h>
33 #include <asm/unaligned.h>
34 #include <arch/abi.h>
35 #include <arch/spr_def.h>
36 #include <arch/opcode.h>
37 
38 
39 /*
40  * This file handles unaligned exception for tile-Gx. The tilepro's unaligned
41  * exception is supported out of single_step.c
42  */
43 
44 int unaligned_printk;
45 
setup_unaligned_printk(char * str)46 static int __init setup_unaligned_printk(char *str)
47 {
48 	long val;
49 	if (kstrtol(str, 0, &val) != 0)
50 		return 0;
51 	unaligned_printk = val;
52 	pr_info("Printk for each unaligned data accesses is %s\n",
53 		unaligned_printk ? "enabled" : "disabled");
54 	return 1;
55 }
56 __setup("unaligned_printk=", setup_unaligned_printk);
57 
58 unsigned int unaligned_fixup_count;
59 
60 #ifdef __tilegx__
61 
62 /*
63  * Unalign data jit fixup code fragement. Reserved space is 128 bytes.
64  * The 1st 64-bit word saves fault PC address, 2nd word is the fault
65  * instruction bundle followed by 14 JIT bundles.
66  */
67 
68 struct unaligned_jit_fragment {
69 	unsigned long       pc;
70 	tilegx_bundle_bits  bundle;
71 	tilegx_bundle_bits  insn[14];
72 };
73 
74 /*
75  * Check if a nop or fnop at bundle's pipeline X0.
76  */
77 
is_bundle_x0_nop(tilegx_bundle_bits bundle)78 static bool is_bundle_x0_nop(tilegx_bundle_bits bundle)
79 {
80 	return (((get_UnaryOpcodeExtension_X0(bundle) ==
81 		  NOP_UNARY_OPCODE_X0) &&
82 		 (get_RRROpcodeExtension_X0(bundle) ==
83 		  UNARY_RRR_0_OPCODE_X0) &&
84 		 (get_Opcode_X0(bundle) ==
85 		  RRR_0_OPCODE_X0)) ||
86 		((get_UnaryOpcodeExtension_X0(bundle) ==
87 		  FNOP_UNARY_OPCODE_X0) &&
88 		 (get_RRROpcodeExtension_X0(bundle) ==
89 		  UNARY_RRR_0_OPCODE_X0) &&
90 		 (get_Opcode_X0(bundle) ==
91 		  RRR_0_OPCODE_X0)));
92 }
93 
94 /*
95  * Check if nop or fnop at bundle's pipeline X1.
96  */
97 
is_bundle_x1_nop(tilegx_bundle_bits bundle)98 static bool is_bundle_x1_nop(tilegx_bundle_bits bundle)
99 {
100 	return (((get_UnaryOpcodeExtension_X1(bundle) ==
101 		  NOP_UNARY_OPCODE_X1) &&
102 		 (get_RRROpcodeExtension_X1(bundle) ==
103 		  UNARY_RRR_0_OPCODE_X1) &&
104 		 (get_Opcode_X1(bundle) ==
105 		  RRR_0_OPCODE_X1)) ||
106 		((get_UnaryOpcodeExtension_X1(bundle) ==
107 		  FNOP_UNARY_OPCODE_X1) &&
108 		 (get_RRROpcodeExtension_X1(bundle) ==
109 		  UNARY_RRR_0_OPCODE_X1) &&
110 		 (get_Opcode_X1(bundle) ==
111 		  RRR_0_OPCODE_X1)));
112 }
113 
114 /*
115  * Check if nop or fnop at bundle's Y0 pipeline.
116  */
117 
is_bundle_y0_nop(tilegx_bundle_bits bundle)118 static bool is_bundle_y0_nop(tilegx_bundle_bits bundle)
119 {
120 	return (((get_UnaryOpcodeExtension_Y0(bundle) ==
121 		  NOP_UNARY_OPCODE_Y0) &&
122 		 (get_RRROpcodeExtension_Y0(bundle) ==
123 		  UNARY_RRR_1_OPCODE_Y0) &&
124 		 (get_Opcode_Y0(bundle) ==
125 		  RRR_1_OPCODE_Y0)) ||
126 		((get_UnaryOpcodeExtension_Y0(bundle) ==
127 		  FNOP_UNARY_OPCODE_Y0) &&
128 		 (get_RRROpcodeExtension_Y0(bundle) ==
129 		  UNARY_RRR_1_OPCODE_Y0) &&
130 		 (get_Opcode_Y0(bundle) ==
131 		  RRR_1_OPCODE_Y0)));
132 }
133 
134 /*
135  * Check if nop or fnop at bundle's pipeline Y1.
136  */
137 
is_bundle_y1_nop(tilegx_bundle_bits bundle)138 static bool is_bundle_y1_nop(tilegx_bundle_bits bundle)
139 {
140 	return (((get_UnaryOpcodeExtension_Y1(bundle) ==
141 		  NOP_UNARY_OPCODE_Y1) &&
142 		 (get_RRROpcodeExtension_Y1(bundle) ==
143 		  UNARY_RRR_1_OPCODE_Y1) &&
144 		 (get_Opcode_Y1(bundle) ==
145 		  RRR_1_OPCODE_Y1)) ||
146 		((get_UnaryOpcodeExtension_Y1(bundle) ==
147 		  FNOP_UNARY_OPCODE_Y1) &&
148 		 (get_RRROpcodeExtension_Y1(bundle) ==
149 		  UNARY_RRR_1_OPCODE_Y1) &&
150 		 (get_Opcode_Y1(bundle) ==
151 		  RRR_1_OPCODE_Y1)));
152 }
153 
154 /*
155  * Test if a bundle's y0 and y1 pipelines are both nop or fnop.
156  */
157 
is_y0_y1_nop(tilegx_bundle_bits bundle)158 static bool is_y0_y1_nop(tilegx_bundle_bits bundle)
159 {
160 	return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle);
161 }
162 
163 /*
164  * Test if a bundle's x0 and x1 pipelines are both nop or fnop.
165  */
166 
is_x0_x1_nop(tilegx_bundle_bits bundle)167 static bool is_x0_x1_nop(tilegx_bundle_bits bundle)
168 {
169 	return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle);
170 }
171 
172 /*
173  * Find the destination, source registers of fault unalign access instruction
174  * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and
175  * clob3, which are guaranteed different from any register used in the fault
176  * bundle. r_alias is used to return if the other instructions other than the
177  * unalign load/store shares same register with ra, rb and rd.
178  */
179 
find_regs(tilegx_bundle_bits bundle,uint64_t * rd,uint64_t * ra,uint64_t * rb,uint64_t * clob1,uint64_t * clob2,uint64_t * clob3,bool * r_alias)180 static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra,
181 		      uint64_t *rb, uint64_t *clob1, uint64_t *clob2,
182 		      uint64_t *clob3, bool *r_alias)
183 {
184 	int i;
185 	uint64_t reg;
186 	uint64_t reg_map = 0, alias_reg_map = 0, map;
187 	bool alias = false;
188 
189 	/*
190 	 * Parse fault bundle, find potential used registers and mark
191 	 * corresponding bits in reg_map and alias_map. These 2 bit maps
192 	 * are used to find the scratch registers and determine if there
193 	 * is register alias.
194 	 */
195 	if (bundle & TILEGX_BUNDLE_MODE_MASK) {  /* Y Mode Bundle. */
196 
197 		reg = get_SrcA_Y2(bundle);
198 		reg_map |= 1ULL << reg;
199 		*ra = reg;
200 		reg = get_SrcBDest_Y2(bundle);
201 		reg_map |= 1ULL << reg;
202 
203 		if (rd) {
204 			/* Load. */
205 			*rd = reg;
206 			alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
207 		} else {
208 			/* Store. */
209 			*rb = reg;
210 			alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
211 		}
212 
213 		if (!is_bundle_y1_nop(bundle)) {
214 			reg = get_SrcA_Y1(bundle);
215 			reg_map |= (1ULL << reg);
216 			map = (1ULL << reg);
217 
218 			reg = get_SrcB_Y1(bundle);
219 			reg_map |= (1ULL << reg);
220 			map |= (1ULL << reg);
221 
222 			reg = get_Dest_Y1(bundle);
223 			reg_map |= (1ULL << reg);
224 			map |= (1ULL << reg);
225 
226 			if (map & alias_reg_map)
227 				alias = true;
228 		}
229 
230 		if (!is_bundle_y0_nop(bundle)) {
231 			reg = get_SrcA_Y0(bundle);
232 			reg_map |= (1ULL << reg);
233 			map = (1ULL << reg);
234 
235 			reg = get_SrcB_Y0(bundle);
236 			reg_map |= (1ULL << reg);
237 			map |= (1ULL << reg);
238 
239 			reg = get_Dest_Y0(bundle);
240 			reg_map |= (1ULL << reg);
241 			map |= (1ULL << reg);
242 
243 			if (map & alias_reg_map)
244 				alias = true;
245 		}
246 	} else	{ /* X Mode Bundle. */
247 
248 		reg = get_SrcA_X1(bundle);
249 		reg_map |= (1ULL << reg);
250 		*ra = reg;
251 		if (rd)	{
252 			/* Load. */
253 			reg = get_Dest_X1(bundle);
254 			reg_map |= (1ULL << reg);
255 			*rd = reg;
256 			alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
257 		} else {
258 			/* Store. */
259 			reg = get_SrcB_X1(bundle);
260 			reg_map |= (1ULL << reg);
261 			*rb = reg;
262 			alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
263 		}
264 
265 		if (!is_bundle_x0_nop(bundle)) {
266 			reg = get_SrcA_X0(bundle);
267 			reg_map |= (1ULL << reg);
268 			map = (1ULL << reg);
269 
270 			reg = get_SrcB_X0(bundle);
271 			reg_map |= (1ULL << reg);
272 			map |= (1ULL << reg);
273 
274 			reg = get_Dest_X0(bundle);
275 			reg_map |= (1ULL << reg);
276 			map |= (1ULL << reg);
277 
278 			if (map & alias_reg_map)
279 				alias = true;
280 		}
281 	}
282 
283 	/*
284 	 * "alias" indicates if the unalign access registers have collision
285 	 * with others in the same bundle. We jsut simply test all register
286 	 * operands case (RRR), ignored the case with immidate. If a bundle
287 	 * has no register alias, we may do fixup in a simple or fast manner.
288 	 * So if an immidata field happens to hit with a register, we may end
289 	 * up fall back to the generic handling.
290 	 */
291 
292 	*r_alias = alias;
293 
294 	/* Flip bits on reg_map. */
295 	reg_map ^= -1ULL;
296 
297 	/* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */
298 	for (i = 0; i < TREG_SP; i++) {
299 		if (reg_map & (0x1ULL << i)) {
300 			if (*clob1 == -1) {
301 				*clob1 = i;
302 			} else if (*clob2 == -1) {
303 				*clob2 = i;
304 			} else if (*clob3 == -1) {
305 				*clob3 = i;
306 				return;
307 			}
308 		}
309 	}
310 }
311 
312 /*
313  * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them
314  * is unexpected.
315  */
316 
check_regs(uint64_t rd,uint64_t ra,uint64_t rb,uint64_t clob1,uint64_t clob2,uint64_t clob3)317 static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb,
318 		       uint64_t clob1, uint64_t clob2,  uint64_t clob3)
319 {
320 	bool unexpected = false;
321 	if ((ra >= 56) && (ra != TREG_ZERO))
322 		unexpected = true;
323 
324 	if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56))
325 		unexpected = true;
326 
327 	if (rd != -1) {
328 		if ((rd >= 56) && (rd != TREG_ZERO))
329 			unexpected = true;
330 	} else {
331 		if ((rb >= 56) && (rb != TREG_ZERO))
332 			unexpected = true;
333 	}
334 	return unexpected;
335 }
336 
337 
338 #define  GX_INSN_X0_MASK   ((1ULL << 31) - 1)
339 #define  GX_INSN_X1_MASK   (((1ULL << 31) - 1) << 31)
340 #define  GX_INSN_Y0_MASK   ((0xFULL << 27) | (0xFFFFFULL))
341 #define  GX_INSN_Y1_MASK   (GX_INSN_Y0_MASK << 31)
342 #define  GX_INSN_Y2_MASK   ((0x7FULL << 51) | (0x7FULL << 20))
343 
344 #ifdef __LITTLE_ENDIAN
345 #define  GX_INSN_BSWAP(_bundle_)    (_bundle_)
346 #else
347 #define  GX_INSN_BSWAP(_bundle_)    swab64(_bundle_)
348 #endif /* __LITTLE_ENDIAN */
349 
350 /*
351  * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section.
352  * The corresponding static function jix_x#_###(.) generates partial or
353  * whole bundle based on the template and given arguments.
354  */
355 
356 #define __JIT_CODE(_X_)						\
357 	asm (".pushsection .rodata.unalign_data, \"a\"\n"	\
358 	     _X_"\n"						\
359 	     ".popsection\n")
360 
361 __JIT_CODE("__unalign_jit_x1_mtspr:   {mtspr 0,  r0}");
jit_x1_mtspr(int spr,int reg)362 static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg)
363 {
364 	extern  tilegx_bundle_bits __unalign_jit_x1_mtspr;
365 	return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) |
366 		create_MT_Imm14_X1(spr) | create_SrcA_X1(reg);
367 }
368 
369 __JIT_CODE("__unalign_jit_x1_mfspr:   {mfspr r0, 0}");
jit_x1_mfspr(int reg,int spr)370 static tilegx_bundle_bits  jit_x1_mfspr(int reg, int spr)
371 {
372 	extern  tilegx_bundle_bits __unalign_jit_x1_mfspr;
373 	return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) |
374 		create_MF_Imm14_X1(spr) | create_Dest_X1(reg);
375 }
376 
377 __JIT_CODE("__unalign_jit_x0_addi:   {addi  r0, r0, 0; iret}");
jit_x0_addi(int rd,int ra,int imm8)378 static tilegx_bundle_bits  jit_x0_addi(int rd, int ra, int imm8)
379 {
380 	extern  tilegx_bundle_bits __unalign_jit_x0_addi;
381 	return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) |
382 		create_Dest_X0(rd) | create_SrcA_X0(ra) |
383 		create_Imm8_X0(imm8);
384 }
385 
386 __JIT_CODE("__unalign_jit_x1_ldna:   {ldna  r0, r0}");
jit_x1_ldna(int rd,int ra)387 static tilegx_bundle_bits  jit_x1_ldna(int rd, int ra)
388 {
389 	extern  tilegx_bundle_bits __unalign_jit_x1_ldna;
390 	return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) &  GX_INSN_X1_MASK) |
391 		create_Dest_X1(rd) | create_SrcA_X1(ra);
392 }
393 
394 __JIT_CODE("__unalign_jit_x0_dblalign:   {dblalign r0, r0 ,r0}");
jit_x0_dblalign(int rd,int ra,int rb)395 static tilegx_bundle_bits  jit_x0_dblalign(int rd, int ra, int rb)
396 {
397 	extern  tilegx_bundle_bits __unalign_jit_x0_dblalign;
398 	return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) |
399 		create_Dest_X0(rd) | create_SrcA_X0(ra) |
400 		create_SrcB_X0(rb);
401 }
402 
403 __JIT_CODE("__unalign_jit_x1_iret:   {iret}");
jit_x1_iret(void)404 static tilegx_bundle_bits  jit_x1_iret(void)
405 {
406 	extern  tilegx_bundle_bits __unalign_jit_x1_iret;
407 	return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK;
408 }
409 
410 __JIT_CODE("__unalign_jit_x01_fnop:   {fnop;fnop}");
jit_x0_fnop(void)411 static tilegx_bundle_bits  jit_x0_fnop(void)
412 {
413 	extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
414 	return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK;
415 }
416 
jit_x1_fnop(void)417 static tilegx_bundle_bits  jit_x1_fnop(void)
418 {
419 	extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
420 	return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK;
421 }
422 
423 __JIT_CODE("__unalign_jit_y2_dummy:   {fnop; fnop; ld zero, sp}");
jit_y2_dummy(void)424 static tilegx_bundle_bits  jit_y2_dummy(void)
425 {
426 	extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
427 	return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK;
428 }
429 
jit_y1_fnop(void)430 static tilegx_bundle_bits  jit_y1_fnop(void)
431 {
432 	extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
433 	return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK;
434 }
435 
436 __JIT_CODE("__unalign_jit_x1_st1_add:  {st1_add r1, r0, 0}");
jit_x1_st1_add(int ra,int rb,int imm8)437 static tilegx_bundle_bits  jit_x1_st1_add(int ra, int rb, int imm8)
438 {
439 	extern  tilegx_bundle_bits __unalign_jit_x1_st1_add;
440 	return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) &
441 		(~create_SrcA_X1(-1)) &
442 		GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
443 		create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
444 }
445 
446 __JIT_CODE("__unalign_jit_x1_st:  {crc32_8 r1, r0, r0; st  r0, r0}");
jit_x1_st(int ra,int rb)447 static tilegx_bundle_bits  jit_x1_st(int ra, int rb)
448 {
449 	extern  tilegx_bundle_bits __unalign_jit_x1_st;
450 	return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) |
451 		create_SrcA_X1(ra) | create_SrcB_X1(rb);
452 }
453 
454 __JIT_CODE("__unalign_jit_x1_st_add:  {st_add  r1, r0, 0}");
jit_x1_st_add(int ra,int rb,int imm8)455 static tilegx_bundle_bits  jit_x1_st_add(int ra, int rb, int imm8)
456 {
457 	extern  tilegx_bundle_bits __unalign_jit_x1_st_add;
458 	return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) &
459 		(~create_SrcA_X1(-1)) &
460 		GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
461 		create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
462 }
463 
464 __JIT_CODE("__unalign_jit_x1_ld:  {crc32_8 r1, r0, r0; ld  r0, r0}");
jit_x1_ld(int rd,int ra)465 static tilegx_bundle_bits  jit_x1_ld(int rd, int ra)
466 {
467 	extern  tilegx_bundle_bits __unalign_jit_x1_ld;
468 	return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) |
469 		create_Dest_X1(rd) | create_SrcA_X1(ra);
470 }
471 
472 __JIT_CODE("__unalign_jit_x1_ld_add:  {ld_add  r1, r0, 0}");
jit_x1_ld_add(int rd,int ra,int imm8)473 static tilegx_bundle_bits  jit_x1_ld_add(int rd, int ra, int imm8)
474 {
475 	extern  tilegx_bundle_bits __unalign_jit_x1_ld_add;
476 	return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) &
477 		(~create_Dest_X1(-1)) &
478 		GX_INSN_X1_MASK) | create_Dest_X1(rd) |
479 		create_SrcA_X1(ra) | create_Imm8_X1(imm8);
480 }
481 
482 __JIT_CODE("__unalign_jit_x0_bfexts:  {bfexts r0, r0, 0, 0}");
jit_x0_bfexts(int rd,int ra,int bfs,int bfe)483 static tilegx_bundle_bits  jit_x0_bfexts(int rd, int ra, int bfs, int bfe)
484 {
485 	extern  tilegx_bundle_bits __unalign_jit_x0_bfexts;
486 	return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) &
487 		GX_INSN_X0_MASK) |
488 		create_Dest_X0(rd) | create_SrcA_X0(ra) |
489 		create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
490 }
491 
492 __JIT_CODE("__unalign_jit_x0_bfextu:  {bfextu r0, r0, 0, 0}");
jit_x0_bfextu(int rd,int ra,int bfs,int bfe)493 static tilegx_bundle_bits  jit_x0_bfextu(int rd, int ra, int bfs, int bfe)
494 {
495 	extern  tilegx_bundle_bits __unalign_jit_x0_bfextu;
496 	return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) &
497 		GX_INSN_X0_MASK) |
498 		create_Dest_X0(rd) | create_SrcA_X0(ra) |
499 		create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
500 }
501 
502 __JIT_CODE("__unalign_jit_x1_addi:  {bfextu r1, r1, 0, 0; addi r0, r0, 0}");
jit_x1_addi(int rd,int ra,int imm8)503 static tilegx_bundle_bits  jit_x1_addi(int rd, int ra, int imm8)
504 {
505 	extern  tilegx_bundle_bits __unalign_jit_x1_addi;
506 	return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) |
507 		create_Dest_X1(rd) | create_SrcA_X1(ra) |
508 		create_Imm8_X1(imm8);
509 }
510 
511 __JIT_CODE("__unalign_jit_x0_shrui:  {shrui r0, r0, 0; iret}");
jit_x0_shrui(int rd,int ra,int imm6)512 static tilegx_bundle_bits  jit_x0_shrui(int rd, int ra, int imm6)
513 {
514 	extern  tilegx_bundle_bits __unalign_jit_x0_shrui;
515 	return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) &
516 		GX_INSN_X0_MASK) |
517 		create_Dest_X0(rd) | create_SrcA_X0(ra) |
518 		create_ShAmt_X0(imm6);
519 }
520 
521 __JIT_CODE("__unalign_jit_x0_rotli:  {rotli r0, r0, 0; iret}");
jit_x0_rotli(int rd,int ra,int imm6)522 static tilegx_bundle_bits  jit_x0_rotli(int rd, int ra, int imm6)
523 {
524 	extern  tilegx_bundle_bits __unalign_jit_x0_rotli;
525 	return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) &
526 		GX_INSN_X0_MASK) |
527 		create_Dest_X0(rd) | create_SrcA_X0(ra) |
528 		create_ShAmt_X0(imm6);
529 }
530 
531 __JIT_CODE("__unalign_jit_x1_bnezt:  {bnezt r0, __unalign_jit_x1_bnezt}");
jit_x1_bnezt(int ra,int broff)532 static tilegx_bundle_bits  jit_x1_bnezt(int ra, int broff)
533 {
534 	extern  tilegx_bundle_bits __unalign_jit_x1_bnezt;
535 	return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) &
536 		GX_INSN_X1_MASK) |
537 		create_SrcA_X1(ra) | create_BrOff_X1(broff);
538 }
539 
540 #undef __JIT_CODE
541 
542 /*
543  * This function generates unalign fixup JIT.
544  *
545  * We first find unalign load/store instruction's destination, source
546  * registers: ra, rb and rd. and 3 scratch registers by calling
547  * find_regs(...). 3 scratch clobbers should not alias with any register
548  * used in the fault bundle. Then analyze the fault bundle to determine
549  * if it's a load or store, operand width, branch or address increment etc.
550  * At last generated JIT is copied into JIT code area in user space.
551  */
552 
553 static
jit_bundle_gen(struct pt_regs * regs,tilegx_bundle_bits bundle,int align_ctl)554 void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
555 		    int align_ctl)
556 {
557 	struct thread_info *info = current_thread_info();
558 	struct unaligned_jit_fragment frag;
559 	struct unaligned_jit_fragment *jit_code_area;
560 	tilegx_bundle_bits bundle_2 = 0;
561 	/* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */
562 	bool     bundle_2_enable = true;
563 	uint64_t ra = -1, rb = -1, rd = -1, clob1 = -1, clob2 = -1, clob3 = -1;
564 	/*
565 	 * Indicate if the unalign access
566 	 * instruction's registers hit with
567 	 * others in the same bundle.
568 	 */
569 	bool     alias = false;
570 	bool     load_n_store = true;
571 	bool     load_store_signed = false;
572 	unsigned int  load_store_size = 8;
573 	bool     y1_br = false;  /* True, for a branch in same bundle at Y1.*/
574 	int      y1_br_reg = 0;
575 	/* True for link operation. i.e. jalr or lnk at Y1 */
576 	bool     y1_lr = false;
577 	int      y1_lr_reg = 0;
578 	bool     x1_add = false;/* True, for load/store ADD instruction at X1*/
579 	int      x1_add_imm8 = 0;
580 	bool     unexpected = false;
581 	int      n = 0, k;
582 
583 	jit_code_area =
584 		(struct unaligned_jit_fragment *)(info->unalign_jit_base);
585 
586 	memset((void *)&frag, 0, sizeof(frag));
587 
588 	/* 0: X mode, Otherwise: Y mode. */
589 	if (bundle & TILEGX_BUNDLE_MODE_MASK) {
590 		unsigned int mod, opcode;
591 
592 		if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 &&
593 		    get_RRROpcodeExtension_Y1(bundle) ==
594 		    UNARY_RRR_1_OPCODE_Y1) {
595 
596 			opcode = get_UnaryOpcodeExtension_Y1(bundle);
597 
598 			/*
599 			 * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1
600 			 * pipeline.
601 			 */
602 			switch (opcode) {
603 			case JALR_UNARY_OPCODE_Y1:
604 			case JALRP_UNARY_OPCODE_Y1:
605 				y1_lr = true;
606 				y1_lr_reg = 55; /* Link register. */
607 				/* FALLTHROUGH */
608 			case JR_UNARY_OPCODE_Y1:
609 			case JRP_UNARY_OPCODE_Y1:
610 				y1_br = true;
611 				y1_br_reg = get_SrcA_Y1(bundle);
612 				break;
613 			case LNK_UNARY_OPCODE_Y1:
614 				/* "lnk" at Y1 pipeline. */
615 				y1_lr = true;
616 				y1_lr_reg = get_Dest_Y1(bundle);
617 				break;
618 			}
619 		}
620 
621 		opcode = get_Opcode_Y2(bundle);
622 		mod = get_Mode(bundle);
623 
624 		/*
625 		 *  bundle_2 is bundle after making Y2 as a dummy operation
626 		 *  - ld zero, sp
627 		 */
628 		bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy();
629 
630 		/* Make Y1 as fnop if Y1 is a branch or lnk operation. */
631 		if (y1_br || y1_lr) {
632 			bundle_2 &= ~(GX_INSN_Y1_MASK);
633 			bundle_2 |= jit_y1_fnop();
634 		}
635 
636 		if (is_y0_y1_nop(bundle_2))
637 			bundle_2_enable = false;
638 
639 		if (mod == MODE_OPCODE_YC2) {
640 			/* Store. */
641 			load_n_store = false;
642 			load_store_size = 1 << opcode;
643 			load_store_signed = false;
644 			find_regs(bundle, 0, &ra, &rb, &clob1, &clob2,
645 				  &clob3, &alias);
646 			if (load_store_size > 8)
647 				unexpected = true;
648 		} else {
649 			/* Load. */
650 			load_n_store = true;
651 			if (mod == MODE_OPCODE_YB2) {
652 				switch (opcode) {
653 				case LD_OPCODE_Y2:
654 					load_store_signed = false;
655 					load_store_size = 8;
656 					break;
657 				case LD4S_OPCODE_Y2:
658 					load_store_signed = true;
659 					load_store_size = 4;
660 					break;
661 				case LD4U_OPCODE_Y2:
662 					load_store_signed = false;
663 					load_store_size = 4;
664 					break;
665 				default:
666 					unexpected = true;
667 				}
668 			} else if (mod == MODE_OPCODE_YA2) {
669 				if (opcode == LD2S_OPCODE_Y2) {
670 					load_store_signed = true;
671 					load_store_size = 2;
672 				} else if (opcode == LD2U_OPCODE_Y2) {
673 					load_store_signed = false;
674 					load_store_size = 2;
675 				} else
676 					unexpected = true;
677 			} else
678 				unexpected = true;
679 			find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2,
680 				  &clob3, &alias);
681 		}
682 	} else {
683 		unsigned int opcode;
684 
685 		/* bundle_2 is bundle after making X1 as "fnop". */
686 		bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop();
687 
688 		if (is_x0_x1_nop(bundle_2))
689 			bundle_2_enable = false;
690 
691 		if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) {
692 			opcode = get_UnaryOpcodeExtension_X1(bundle);
693 
694 			if (get_RRROpcodeExtension_X1(bundle) ==
695 			    UNARY_RRR_0_OPCODE_X1) {
696 				load_n_store = true;
697 				find_regs(bundle, &rd, &ra, &rb, &clob1,
698 					  &clob2, &clob3, &alias);
699 
700 				switch (opcode) {
701 				case LD_UNARY_OPCODE_X1:
702 					load_store_signed = false;
703 					load_store_size = 8;
704 					break;
705 				case LD4S_UNARY_OPCODE_X1:
706 					load_store_signed = true;
707 					/* FALLTHROUGH */
708 				case LD4U_UNARY_OPCODE_X1:
709 					load_store_size = 4;
710 					break;
711 
712 				case LD2S_UNARY_OPCODE_X1:
713 					load_store_signed = true;
714 					/* FALLTHROUGH */
715 				case LD2U_UNARY_OPCODE_X1:
716 					load_store_size = 2;
717 					break;
718 				default:
719 					unexpected = true;
720 				}
721 			} else {
722 				load_n_store = false;
723 				load_store_signed = false;
724 				find_regs(bundle, 0, &ra, &rb,
725 					  &clob1, &clob2, &clob3,
726 					  &alias);
727 
728 				opcode = get_RRROpcodeExtension_X1(bundle);
729 				switch (opcode)	{
730 				case ST_RRR_0_OPCODE_X1:
731 					load_store_size = 8;
732 					break;
733 				case ST4_RRR_0_OPCODE_X1:
734 					load_store_size = 4;
735 					break;
736 				case ST2_RRR_0_OPCODE_X1:
737 					load_store_size = 2;
738 					break;
739 				default:
740 					unexpected = true;
741 				}
742 			}
743 		} else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) {
744 			load_n_store = true;
745 			opcode = get_Imm8OpcodeExtension_X1(bundle);
746 			switch (opcode)	{
747 			case LD_ADD_IMM8_OPCODE_X1:
748 				load_store_size = 8;
749 				break;
750 
751 			case LD4S_ADD_IMM8_OPCODE_X1:
752 				load_store_signed = true;
753 				/* FALLTHROUGH */
754 			case LD4U_ADD_IMM8_OPCODE_X1:
755 				load_store_size = 4;
756 				break;
757 
758 			case LD2S_ADD_IMM8_OPCODE_X1:
759 				load_store_signed = true;
760 				/* FALLTHROUGH */
761 			case LD2U_ADD_IMM8_OPCODE_X1:
762 				load_store_size = 2;
763 				break;
764 
765 			case ST_ADD_IMM8_OPCODE_X1:
766 				load_n_store = false;
767 				load_store_size = 8;
768 				break;
769 			case ST4_ADD_IMM8_OPCODE_X1:
770 				load_n_store = false;
771 				load_store_size = 4;
772 				break;
773 			case ST2_ADD_IMM8_OPCODE_X1:
774 				load_n_store = false;
775 				load_store_size = 2;
776 				break;
777 			default:
778 				unexpected = true;
779 			}
780 
781 			if (!unexpected) {
782 				x1_add = true;
783 				if (load_n_store)
784 					x1_add_imm8 = get_Imm8_X1(bundle);
785 				else
786 					x1_add_imm8 = get_Dest_Imm8_X1(bundle);
787 			}
788 
789 			find_regs(bundle, load_n_store ? (&rd) : NULL,
790 				  &ra, &rb, &clob1, &clob2, &clob3, &alias);
791 		} else
792 			unexpected = true;
793 	}
794 
795 	/*
796 	 * Some sanity check for register numbers extracted from fault bundle.
797 	 */
798 	if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true)
799 		unexpected = true;
800 
801 	/* Give warning if register ra has an aligned address. */
802 	if (!unexpected)
803 		WARN_ON(!((load_store_size - 1) & (regs->regs[ra])));
804 
805 
806 	/*
807 	 * Fault came from kernel space, here we only need take care of
808 	 * unaligned "get_user/put_user" macros defined in "uaccess.h".
809 	 * Basically, we will handle bundle like this:
810 	 * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0}
811 	 * (Refer to file "arch/tile/include/asm/uaccess.h" for details).
812 	 * For either load or store, byte-wise operation is performed by calling
813 	 * get_user() or put_user(). If the macro returns non-zero value,
814 	 * set the value to rx, otherwise set zero to rx. Finally make pc point
815 	 * to next bundle and return.
816 	 */
817 
818 	if (EX1_PL(regs->ex1) != USER_PL) {
819 
820 		unsigned long rx = 0;
821 		unsigned long x = 0, ret = 0;
822 
823 		if (y1_br || y1_lr || x1_add ||
824 		    (load_store_signed !=
825 		     (load_n_store && load_store_size == 4))) {
826 			/* No branch, link, wrong sign-ext or load/store add. */
827 			unexpected = true;
828 		} else if (!unexpected) {
829 			if (bundle & TILEGX_BUNDLE_MODE_MASK) {
830 				/*
831 				 * Fault bundle is Y mode.
832 				 * Check if the Y1 and Y0 is the form of
833 				 * { movei rx, 0; nop/fnop }, if yes,
834 				 * find the rx.
835 				 */
836 
837 				if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1)
838 				    && (get_SrcA_Y1(bundle) == TREG_ZERO) &&
839 				    (get_Imm8_Y1(bundle) == 0) &&
840 				    is_bundle_y0_nop(bundle)) {
841 					rx = get_Dest_Y1(bundle);
842 				} else if ((get_Opcode_Y0(bundle) ==
843 					    ADDI_OPCODE_Y0) &&
844 					   (get_SrcA_Y0(bundle) == TREG_ZERO) &&
845 					   (get_Imm8_Y0(bundle) == 0) &&
846 					   is_bundle_y1_nop(bundle)) {
847 					rx = get_Dest_Y0(bundle);
848 				} else {
849 					unexpected = true;
850 				}
851 			} else {
852 				/*
853 				 * Fault bundle is X mode.
854 				 * Check if the X0 is 'movei rx, 0',
855 				 * if yes, find the rx.
856 				 */
857 
858 				if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0)
859 				    && (get_Imm8OpcodeExtension_X0(bundle) ==
860 					ADDI_IMM8_OPCODE_X0) &&
861 				    (get_SrcA_X0(bundle) == TREG_ZERO) &&
862 				    (get_Imm8_X0(bundle) == 0)) {
863 					rx = get_Dest_X0(bundle);
864 				} else {
865 					unexpected = true;
866 				}
867 			}
868 
869 			/* rx should be less than 56. */
870 			if (!unexpected && (rx >= 56))
871 				unexpected = true;
872 		}
873 
874 		if (!search_exception_tables(regs->pc)) {
875 			/* No fixup in the exception tables for the pc. */
876 			unexpected = true;
877 		}
878 
879 		if (unexpected) {
880 			/* Unexpected unalign kernel fault. */
881 			struct task_struct *tsk = validate_current();
882 
883 			bust_spinlocks(1);
884 
885 			show_regs(regs);
886 
887 			if (unlikely(tsk->pid < 2)) {
888 				panic("Kernel unalign fault running %s!",
889 				      tsk->pid ? "init" : "the idle task");
890 			}
891 #ifdef SUPPORT_DIE
892 			die("Oops", regs);
893 #endif
894 			bust_spinlocks(1);
895 
896 			do_group_exit(SIGKILL);
897 
898 		} else {
899 			unsigned long i, b = 0;
900 			unsigned char *ptr =
901 				(unsigned char *)regs->regs[ra];
902 			if (load_n_store) {
903 				/* handle get_user(x, ptr) */
904 				for (i = 0; i < load_store_size; i++) {
905 					ret = get_user(b, ptr++);
906 					if (!ret) {
907 						/* Success! update x. */
908 #ifdef __LITTLE_ENDIAN
909 						x |= (b << (8 * i));
910 #else
911 						x <<= 8;
912 						x |= b;
913 #endif /* __LITTLE_ENDIAN */
914 					} else {
915 						x = 0;
916 						break;
917 					}
918 				}
919 
920 				/* Sign-extend 4-byte loads. */
921 				if (load_store_size == 4)
922 					x = (long)(int)x;
923 
924 				/* Set register rd. */
925 				regs->regs[rd] = x;
926 
927 				/* Set register rx. */
928 				regs->regs[rx] = ret;
929 
930 				/* Bump pc. */
931 				regs->pc += 8;
932 
933 			} else {
934 				/* Handle put_user(x, ptr) */
935 				x = regs->regs[rb];
936 #ifdef __LITTLE_ENDIAN
937 				b = x;
938 #else
939 				/*
940 				 * Swap x in order to store x from low
941 				 * to high memory same as the
942 				 * little-endian case.
943 				 */
944 				switch (load_store_size) {
945 				case 8:
946 					b = swab64(x);
947 					break;
948 				case 4:
949 					b = swab32(x);
950 					break;
951 				case 2:
952 					b = swab16(x);
953 					break;
954 				}
955 #endif /* __LITTLE_ENDIAN */
956 				for (i = 0; i < load_store_size; i++) {
957 					ret = put_user(b, ptr++);
958 					if (ret)
959 						break;
960 					/* Success! shift 1 byte. */
961 					b >>= 8;
962 				}
963 				/* Set register rx. */
964 				regs->regs[rx] = ret;
965 
966 				/* Bump pc. */
967 				regs->pc += 8;
968 			}
969 		}
970 
971 		unaligned_fixup_count++;
972 
973 		if (unaligned_printk) {
974 			pr_info("%s/%d - Unalign fixup for kernel access to userspace %lx\n",
975 				current->comm, current->pid, regs->regs[ra]);
976 		}
977 
978 		/* Done! Return to the exception handler. */
979 		return;
980 	}
981 
982 	if ((align_ctl == 0) || unexpected) {
983 		siginfo_t info = {
984 			.si_signo = SIGBUS,
985 			.si_code = BUS_ADRALN,
986 			.si_addr = (unsigned char __user *)0
987 		};
988 		if (unaligned_printk)
989 			pr_info("Unalign bundle: unexp @%llx, %llx\n",
990 				(unsigned long long)regs->pc,
991 				(unsigned long long)bundle);
992 
993 		if (ra < 56) {
994 			unsigned long uaa = (unsigned long)regs->regs[ra];
995 			/* Set bus Address. */
996 			info.si_addr = (unsigned char __user *)uaa;
997 		}
998 
999 		unaligned_fixup_count++;
1000 
1001 		trace_unhandled_signal("unaligned fixup trap", regs,
1002 				       (unsigned long)info.si_addr, SIGBUS);
1003 		force_sig_info(info.si_signo, &info, current);
1004 		return;
1005 	}
1006 
1007 #ifdef __LITTLE_ENDIAN
1008 #define UA_FIXUP_ADDR_DELTA          1
1009 #define UA_FIXUP_BFEXT_START(_B_)    0
1010 #define UA_FIXUP_BFEXT_END(_B_)     (8 * (_B_) - 1)
1011 #else /* __BIG_ENDIAN */
1012 #define UA_FIXUP_ADDR_DELTA          -1
1013 #define UA_FIXUP_BFEXT_START(_B_)   (64 - 8 * (_B_))
1014 #define UA_FIXUP_BFEXT_END(_B_)      63
1015 #endif /* __LITTLE_ENDIAN */
1016 
1017 
1018 
1019 	if ((ra != rb) && (rd != TREG_SP) && !alias &&
1020 	    !y1_br && !y1_lr && !x1_add) {
1021 		/*
1022 		 * Simple case: ra != rb and no register alias found,
1023 		 * and no branch or link. This will be the majority.
1024 		 * We can do a little better for simplae case than the
1025 		 * generic scheme below.
1026 		 */
1027 		if (!load_n_store) {
1028 			/*
1029 			 * Simple store: ra != rb, no need for scratch register.
1030 			 * Just store and rotate to right bytewise.
1031 			 */
1032 #ifdef __BIG_ENDIAN
1033 			frag.insn[n++] =
1034 				jit_x0_addi(ra, ra, load_store_size - 1) |
1035 				jit_x1_fnop();
1036 #endif /* __BIG_ENDIAN */
1037 			for (k = 0; k < load_store_size; k++) {
1038 				/* Store a byte. */
1039 				frag.insn[n++] =
1040 					jit_x0_rotli(rb, rb, 56) |
1041 					jit_x1_st1_add(ra, rb,
1042 						       UA_FIXUP_ADDR_DELTA);
1043 			}
1044 #ifdef __BIG_ENDIAN
1045 			frag.insn[n] = jit_x1_addi(ra, ra, 1);
1046 #else
1047 			frag.insn[n] = jit_x1_addi(ra, ra,
1048 						   -1 * load_store_size);
1049 #endif /* __LITTLE_ENDIAN */
1050 
1051 			if (load_store_size == 8) {
1052 				frag.insn[n] |= jit_x0_fnop();
1053 			} else if (load_store_size == 4) {
1054 				frag.insn[n] |= jit_x0_rotli(rb, rb, 32);
1055 			} else { /* = 2 */
1056 				frag.insn[n] |= jit_x0_rotli(rb, rb, 16);
1057 			}
1058 			n++;
1059 			if (bundle_2_enable)
1060 				frag.insn[n++] = bundle_2;
1061 			frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1062 		} else {
1063 			if (rd == ra) {
1064 				/* Use two clobber registers: clob1/2. */
1065 				frag.insn[n++] =
1066 					jit_x0_addi(TREG_SP, TREG_SP, -16) |
1067 					jit_x1_fnop();
1068 				frag.insn[n++] =
1069 					jit_x0_addi(clob1, ra, 7) |
1070 					jit_x1_st_add(TREG_SP, clob1, -8);
1071 				frag.insn[n++] =
1072 					jit_x0_addi(clob2, ra, 0) |
1073 					jit_x1_st(TREG_SP, clob2);
1074 				frag.insn[n++] =
1075 					jit_x0_fnop() |
1076 					jit_x1_ldna(rd, ra);
1077 				frag.insn[n++] =
1078 					jit_x0_fnop() |
1079 					jit_x1_ldna(clob1, clob1);
1080 				/*
1081 				 * Note: we must make sure that rd must not
1082 				 * be sp. Recover clob1/2 from stack.
1083 				 */
1084 				frag.insn[n++] =
1085 					jit_x0_dblalign(rd, clob1, clob2) |
1086 					jit_x1_ld_add(clob2, TREG_SP, 8);
1087 				frag.insn[n++] =
1088 					jit_x0_fnop() |
1089 					jit_x1_ld_add(clob1, TREG_SP, 16);
1090 			} else {
1091 				/* Use one clobber register: clob1 only. */
1092 				frag.insn[n++] =
1093 					jit_x0_addi(TREG_SP, TREG_SP, -16) |
1094 					jit_x1_fnop();
1095 				frag.insn[n++] =
1096 					jit_x0_addi(clob1, ra, 7) |
1097 					jit_x1_st(TREG_SP, clob1);
1098 				frag.insn[n++] =
1099 					jit_x0_fnop() |
1100 					jit_x1_ldna(rd, ra);
1101 				frag.insn[n++] =
1102 					jit_x0_fnop() |
1103 					jit_x1_ldna(clob1, clob1);
1104 				/*
1105 				 * Note: we must make sure that rd must not
1106 				 * be sp. Recover clob1 from stack.
1107 				 */
1108 				frag.insn[n++] =
1109 					jit_x0_dblalign(rd, clob1, ra) |
1110 					jit_x1_ld_add(clob1, TREG_SP, 16);
1111 			}
1112 
1113 			if (bundle_2_enable)
1114 				frag.insn[n++] = bundle_2;
1115 			/*
1116 			 * For non 8-byte load, extract corresponding bytes and
1117 			 * signed extension.
1118 			 */
1119 			if (load_store_size == 4) {
1120 				if (load_store_signed)
1121 					frag.insn[n++] =
1122 						jit_x0_bfexts(
1123 							rd, rd,
1124 							UA_FIXUP_BFEXT_START(4),
1125 							UA_FIXUP_BFEXT_END(4)) |
1126 						jit_x1_fnop();
1127 				else
1128 					frag.insn[n++] =
1129 						jit_x0_bfextu(
1130 							rd, rd,
1131 							UA_FIXUP_BFEXT_START(4),
1132 							UA_FIXUP_BFEXT_END(4)) |
1133 						jit_x1_fnop();
1134 			} else if (load_store_size == 2) {
1135 				if (load_store_signed)
1136 					frag.insn[n++] =
1137 						jit_x0_bfexts(
1138 							rd, rd,
1139 							UA_FIXUP_BFEXT_START(2),
1140 							UA_FIXUP_BFEXT_END(2)) |
1141 						jit_x1_fnop();
1142 				else
1143 					frag.insn[n++] =
1144 						jit_x0_bfextu(
1145 							rd, rd,
1146 							UA_FIXUP_BFEXT_START(2),
1147 							UA_FIXUP_BFEXT_END(2)) |
1148 						jit_x1_fnop();
1149 			}
1150 
1151 			frag.insn[n++] =
1152 				jit_x0_fnop()  |
1153 				jit_x1_iret();
1154 		}
1155 	} else if (!load_n_store) {
1156 
1157 		/*
1158 		 * Generic memory store cases: use 3 clobber registers.
1159 		 *
1160 		 * Alloc space for saveing clob2,1,3 on user's stack.
1161 		 * register clob3 points to where clob2 saved, followed by
1162 		 * clob1 and 3 from high to low memory.
1163 		 */
1164 		frag.insn[n++] =
1165 			jit_x0_addi(TREG_SP, TREG_SP, -32)    |
1166 			jit_x1_fnop();
1167 		frag.insn[n++] =
1168 			jit_x0_addi(clob3, TREG_SP, 16)  |
1169 			jit_x1_st_add(TREG_SP, clob3, 8);
1170 #ifdef __LITTLE_ENDIAN
1171 		frag.insn[n++] =
1172 			jit_x0_addi(clob1, ra, 0)   |
1173 			jit_x1_st_add(TREG_SP, clob1, 8);
1174 #else
1175 		frag.insn[n++] =
1176 			jit_x0_addi(clob1, ra, load_store_size - 1)   |
1177 			jit_x1_st_add(TREG_SP, clob1, 8);
1178 #endif
1179 		if (load_store_size == 8) {
1180 			/*
1181 			 * We save one byte a time, not for fast, but compact
1182 			 * code. After each store, data source register shift
1183 			 * right one byte. unchanged after 8 stores.
1184 			 */
1185 			frag.insn[n++] =
1186 				jit_x0_addi(clob2, TREG_ZERO, 7)     |
1187 				jit_x1_st_add(TREG_SP, clob2, 16);
1188 			frag.insn[n++] =
1189 				jit_x0_rotli(rb, rb, 56)      |
1190 				jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1191 			frag.insn[n++] =
1192 				jit_x0_addi(clob2, clob2, -1) |
1193 				jit_x1_bnezt(clob2, -1);
1194 			frag.insn[n++] =
1195 				jit_x0_fnop()                 |
1196 				jit_x1_addi(clob2, y1_br_reg, 0);
1197 		} else if (load_store_size == 4) {
1198 			frag.insn[n++] =
1199 				jit_x0_addi(clob2, TREG_ZERO, 3)     |
1200 				jit_x1_st_add(TREG_SP, clob2, 16);
1201 			frag.insn[n++] =
1202 				jit_x0_rotli(rb, rb, 56)      |
1203 				jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1204 			frag.insn[n++] =
1205 				jit_x0_addi(clob2, clob2, -1) |
1206 				jit_x1_bnezt(clob2, -1);
1207 			/*
1208 			 * same as 8-byte case, but need shift another 4
1209 			 * byte to recover rb for 4-byte store.
1210 			 */
1211 			frag.insn[n++] = jit_x0_rotli(rb, rb, 32)      |
1212 				jit_x1_addi(clob2, y1_br_reg, 0);
1213 		} else { /* =2 */
1214 			frag.insn[n++] =
1215 				jit_x0_addi(clob2, rb, 0)     |
1216 				jit_x1_st_add(TREG_SP, clob2, 16);
1217 			for (k = 0; k < 2; k++) {
1218 				frag.insn[n++] =
1219 					jit_x0_shrui(rb, rb, 8)  |
1220 					jit_x1_st1_add(clob1, rb,
1221 						       UA_FIXUP_ADDR_DELTA);
1222 			}
1223 			frag.insn[n++] =
1224 				jit_x0_addi(rb, clob2, 0)       |
1225 				jit_x1_addi(clob2, y1_br_reg, 0);
1226 		}
1227 
1228 		if (bundle_2_enable)
1229 			frag.insn[n++] = bundle_2;
1230 
1231 		if (y1_lr) {
1232 			frag.insn[n++] =
1233 				jit_x0_fnop()                    |
1234 				jit_x1_mfspr(y1_lr_reg,
1235 					     SPR_EX_CONTEXT_0_0);
1236 		}
1237 		if (y1_br) {
1238 			frag.insn[n++] =
1239 				jit_x0_fnop()                    |
1240 				jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1241 					     clob2);
1242 		}
1243 		if (x1_add) {
1244 			frag.insn[n++] =
1245 				jit_x0_addi(ra, ra, x1_add_imm8) |
1246 				jit_x1_ld_add(clob2, clob3, -8);
1247 		} else {
1248 			frag.insn[n++] =
1249 				jit_x0_fnop()                    |
1250 				jit_x1_ld_add(clob2, clob3, -8);
1251 		}
1252 		frag.insn[n++] =
1253 			jit_x0_fnop()   |
1254 			jit_x1_ld_add(clob1, clob3, -8);
1255 		frag.insn[n++] = jit_x0_fnop()   | jit_x1_ld(clob3, clob3);
1256 		frag.insn[n++] = jit_x0_fnop()   | jit_x1_iret();
1257 
1258 	} else {
1259 		/*
1260 		 * Generic memory load cases.
1261 		 *
1262 		 * Alloc space for saveing clob1,2,3 on user's stack.
1263 		 * register clob3 points to where clob1 saved, followed
1264 		 * by clob2 and 3 from high to low memory.
1265 		 */
1266 
1267 		frag.insn[n++] =
1268 			jit_x0_addi(TREG_SP, TREG_SP, -32) |
1269 			jit_x1_fnop();
1270 		frag.insn[n++] =
1271 			jit_x0_addi(clob3, TREG_SP, 16) |
1272 			jit_x1_st_add(TREG_SP, clob3, 8);
1273 		frag.insn[n++] =
1274 			jit_x0_addi(clob2, ra, 0) |
1275 			jit_x1_st_add(TREG_SP, clob2, 8);
1276 
1277 		if (y1_br) {
1278 			frag.insn[n++] =
1279 				jit_x0_addi(clob1, y1_br_reg, 0) |
1280 				jit_x1_st_add(TREG_SP, clob1, 16);
1281 		} else {
1282 			frag.insn[n++] =
1283 				jit_x0_fnop() |
1284 				jit_x1_st_add(TREG_SP, clob1, 16);
1285 		}
1286 
1287 		if (bundle_2_enable)
1288 			frag.insn[n++] = bundle_2;
1289 
1290 		if (y1_lr) {
1291 			frag.insn[n++] =
1292 				jit_x0_fnop()  |
1293 				jit_x1_mfspr(y1_lr_reg,
1294 					     SPR_EX_CONTEXT_0_0);
1295 		}
1296 
1297 		if (y1_br) {
1298 			frag.insn[n++] =
1299 				jit_x0_fnop() |
1300 				jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1301 					     clob1);
1302 		}
1303 
1304 		frag.insn[n++] =
1305 			jit_x0_addi(clob1, clob2, 7)      |
1306 			jit_x1_ldna(rd, clob2);
1307 		frag.insn[n++] =
1308 			jit_x0_fnop()                     |
1309 			jit_x1_ldna(clob1, clob1);
1310 		frag.insn[n++] =
1311 			jit_x0_dblalign(rd, clob1, clob2) |
1312 			jit_x1_ld_add(clob1, clob3, -8);
1313 		if (x1_add) {
1314 			frag.insn[n++] =
1315 				jit_x0_addi(ra, ra, x1_add_imm8) |
1316 				jit_x1_ld_add(clob2, clob3, -8);
1317 		} else {
1318 			frag.insn[n++] =
1319 				jit_x0_fnop()  |
1320 				jit_x1_ld_add(clob2, clob3, -8);
1321 		}
1322 
1323 		frag.insn[n++] =
1324 			jit_x0_fnop() |
1325 			jit_x1_ld(clob3, clob3);
1326 
1327 		if (load_store_size == 4) {
1328 			if (load_store_signed)
1329 				frag.insn[n++] =
1330 					jit_x0_bfexts(
1331 						rd, rd,
1332 						UA_FIXUP_BFEXT_START(4),
1333 						UA_FIXUP_BFEXT_END(4)) |
1334 					jit_x1_fnop();
1335 			else
1336 				frag.insn[n++] =
1337 					jit_x0_bfextu(
1338 						rd, rd,
1339 						UA_FIXUP_BFEXT_START(4),
1340 						UA_FIXUP_BFEXT_END(4)) |
1341 					jit_x1_fnop();
1342 		} else if (load_store_size == 2) {
1343 			if (load_store_signed)
1344 				frag.insn[n++] =
1345 					jit_x0_bfexts(
1346 						rd, rd,
1347 						UA_FIXUP_BFEXT_START(2),
1348 						UA_FIXUP_BFEXT_END(2)) |
1349 					jit_x1_fnop();
1350 			else
1351 				frag.insn[n++] =
1352 					jit_x0_bfextu(
1353 						rd, rd,
1354 						UA_FIXUP_BFEXT_START(2),
1355 						UA_FIXUP_BFEXT_END(2)) |
1356 					jit_x1_fnop();
1357 		}
1358 
1359 		frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1360 	}
1361 
1362 	/* Max JIT bundle count is 14. */
1363 	WARN_ON(n > 14);
1364 
1365 	if (!unexpected) {
1366 		int status = 0;
1367 		int idx = (regs->pc >> 3) &
1368 			((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1);
1369 
1370 		frag.pc = regs->pc;
1371 		frag.bundle = bundle;
1372 
1373 		if (unaligned_printk) {
1374 			pr_info("%s/%d, Unalign fixup: pc=%lx bundle=%lx %d %d %d %d %d %d %d %d\n",
1375 				current->comm, current->pid,
1376 				(unsigned long)frag.pc,
1377 				(unsigned long)frag.bundle,
1378 				(int)alias, (int)rd, (int)ra,
1379 				(int)rb, (int)bundle_2_enable,
1380 				(int)y1_lr, (int)y1_br, (int)x1_add);
1381 
1382 			for (k = 0; k < n; k += 2)
1383 				pr_info("[%d] %016llx %016llx\n",
1384 					k, (unsigned long long)frag.insn[k],
1385 					(unsigned long long)frag.insn[k+1]);
1386 		}
1387 
1388 		/* Swap bundle byte order for big endian sys. */
1389 #ifdef __BIG_ENDIAN
1390 		frag.bundle = GX_INSN_BSWAP(frag.bundle);
1391 		for (k = 0; k < n; k++)
1392 			frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]);
1393 #endif /* __BIG_ENDIAN */
1394 
1395 		status = copy_to_user((void __user *)&jit_code_area[idx],
1396 				      &frag, sizeof(frag));
1397 		if (status) {
1398 			/* Fail to copy JIT into user land. send SIGSEGV. */
1399 			siginfo_t info = {
1400 				.si_signo = SIGSEGV,
1401 				.si_code = SEGV_MAPERR,
1402 				.si_addr = (void __user *)&jit_code_area[idx]
1403 			};
1404 
1405 			pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx\n",
1406 				current->pid, current->comm,
1407 				(unsigned long long)&jit_code_area[idx]);
1408 
1409 			trace_unhandled_signal("segfault in unalign fixup",
1410 					       regs,
1411 					       (unsigned long)info.si_addr,
1412 					       SIGSEGV);
1413 			force_sig_info(info.si_signo, &info, current);
1414 			return;
1415 		}
1416 
1417 
1418 		/* Do a cheaper increment, not accurate. */
1419 		unaligned_fixup_count++;
1420 		__flush_icache_range((unsigned long)&jit_code_area[idx],
1421 				     (unsigned long)&jit_code_area[idx] +
1422 				     sizeof(frag));
1423 
1424 		/* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/
1425 		__insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8);
1426 		__insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0));
1427 
1428 		/* Modify pc at the start of new JIT. */
1429 		regs->pc = (unsigned long)&jit_code_area[idx].insn[0];
1430 		/* Set ICS in SPR_EX_CONTEXT_K_1. */
1431 		regs->ex1 = PL_ICS_EX1(USER_PL, 1);
1432 	}
1433 }
1434 
1435 
1436 /*
1437  * C function to generate unalign data JIT. Called from unalign data
1438  * interrupt handler.
1439  *
1440  * First check if unalign fix is disabled or exception did not not come from
1441  * user space or sp register points to unalign address, if true, generate a
1442  * SIGBUS. Then map a page into user space as JIT area if it is not mapped
1443  * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return
1444  * back to exception handler.
1445  *
1446  * The exception handler will "iret" to new generated JIT code after
1447  * restoring caller saved registers. In theory, the JIT code will perform
1448  * another "iret" to resume user's program.
1449  */
1450 
do_unaligned(struct pt_regs * regs,int vecnum)1451 void do_unaligned(struct pt_regs *regs, int vecnum)
1452 {
1453 	tilegx_bundle_bits __user  *pc;
1454 	tilegx_bundle_bits bundle;
1455 	struct thread_info *info = current_thread_info();
1456 	int align_ctl;
1457 
1458 	/* Checks the per-process unaligned JIT flags */
1459 	align_ctl = unaligned_fixup;
1460 	switch (task_thread_info(current)->align_ctl) {
1461 	case PR_UNALIGN_NOPRINT:
1462 		align_ctl = 1;
1463 		break;
1464 	case PR_UNALIGN_SIGBUS:
1465 		align_ctl = 0;
1466 		break;
1467 	}
1468 
1469 	/* Enable iterrupt in order to access user land. */
1470 	local_irq_enable();
1471 
1472 	/*
1473 	 * The fault came from kernel space. Two choices:
1474 	 * (a) unaligned_fixup < 1, we will first call get/put_user fixup
1475 	 *     to return -EFAULT. If no fixup, simply panic the kernel.
1476 	 * (b) unaligned_fixup >=1, we will try to fix the unaligned access
1477 	 *     if it was triggered by get_user/put_user() macros. Panic the
1478 	 *     kernel if it is not fixable.
1479 	 */
1480 
1481 	if (EX1_PL(regs->ex1) != USER_PL) {
1482 
1483 		if (align_ctl < 1) {
1484 			unaligned_fixup_count++;
1485 			/* If exception came from kernel, try fix it up. */
1486 			if (fixup_exception(regs)) {
1487 				if (unaligned_printk)
1488 					pr_info("Unalign fixup: %d %llx @%llx\n",
1489 						(int)unaligned_fixup,
1490 						(unsigned long long)regs->ex1,
1491 						(unsigned long long)regs->pc);
1492 			} else {
1493 				/* Not fixable. Go panic. */
1494 				panic("Unalign exception in Kernel. pc=%lx",
1495 				      regs->pc);
1496 			}
1497 		} else {
1498 			/*
1499 			 * Try to fix the exception. If we can't, panic the
1500 			 * kernel.
1501 			 */
1502 			bundle = GX_INSN_BSWAP(
1503 				*((tilegx_bundle_bits *)(regs->pc)));
1504 			jit_bundle_gen(regs, bundle, align_ctl);
1505 		}
1506 		return;
1507 	}
1508 
1509 	/*
1510 	 * Fault came from user with ICS or stack is not aligned.
1511 	 * If so, we will trigger SIGBUS.
1512 	 */
1513 	if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) {
1514 		siginfo_t info = {
1515 			.si_signo = SIGBUS,
1516 			.si_code = BUS_ADRALN,
1517 			.si_addr = (unsigned char __user *)0
1518 		};
1519 
1520 		if (unaligned_printk)
1521 			pr_info("Unalign fixup: %d %llx @%llx\n",
1522 				(int)unaligned_fixup,
1523 				(unsigned long long)regs->ex1,
1524 				(unsigned long long)regs->pc);
1525 
1526 		unaligned_fixup_count++;
1527 
1528 		trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS);
1529 		force_sig_info(info.si_signo, &info, current);
1530 		return;
1531 	}
1532 
1533 
1534 	/* Read the bundle caused the exception! */
1535 	pc = (tilegx_bundle_bits __user *)(regs->pc);
1536 	if (get_user(bundle, pc) != 0) {
1537 		/* Probably never be here since pc is valid user address.*/
1538 		siginfo_t info = {
1539 			.si_signo = SIGSEGV,
1540 			.si_code = SEGV_MAPERR,
1541 			.si_addr = (void __user *)pc
1542 		};
1543 		pr_err("Couldn't read instruction at %p trying to step\n", pc);
1544 		trace_unhandled_signal("segfault in unalign fixup", regs,
1545 				       (unsigned long)info.si_addr, SIGSEGV);
1546 		force_sig_info(info.si_signo, &info, current);
1547 		return;
1548 	}
1549 
1550 	if (!info->unalign_jit_base) {
1551 		void __user *user_page;
1552 
1553 		/*
1554 		 * Allocate a page in userland.
1555 		 * For 64-bit processes we try to place the mapping far
1556 		 * from anything else that might be going on (specifically
1557 		 * 64 GB below the top of the user address space).  If it
1558 		 * happens not to be possible to put it there, it's OK;
1559 		 * the kernel will choose another location and we'll
1560 		 * remember it for later.
1561 		 */
1562 		if (is_compat_task())
1563 			user_page = NULL;
1564 		else
1565 			user_page = (void __user *)(TASK_SIZE - (1UL << 36)) +
1566 				(current->pid << PAGE_SHIFT);
1567 
1568 		user_page = (void __user *) vm_mmap(NULL,
1569 						    (unsigned long)user_page,
1570 						    PAGE_SIZE,
1571 						    PROT_EXEC | PROT_READ |
1572 						    PROT_WRITE,
1573 #ifdef CONFIG_HOMECACHE
1574 						    MAP_CACHE_HOME_TASK |
1575 #endif
1576 						    MAP_PRIVATE |
1577 						    MAP_ANONYMOUS,
1578 						    0);
1579 
1580 		if (IS_ERR((void __force *)user_page)) {
1581 			pr_err("Out of kernel pages trying do_mmap\n");
1582 			return;
1583 		}
1584 
1585 		/* Save the address in the thread_info struct */
1586 		info->unalign_jit_base = user_page;
1587 		if (unaligned_printk)
1588 			pr_info("Unalign bundle: %d:%d, allocate page @%llx\n",
1589 				raw_smp_processor_id(), current->pid,
1590 				(unsigned long long)user_page);
1591 	}
1592 
1593 	/* Generate unalign JIT */
1594 	jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl);
1595 }
1596 
1597 #endif /* __tilegx__ */
1598