• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2013 Tilera Corporation. All Rights Reserved.
3  *
4  *   This program is free software; you can redistribute it and/or
5  *   modify it under the terms of the GNU General Public License
6  *   as published by the Free Software Foundation, version 2.
7  *
8  *   This program is distributed in the hope that it will be useful, but
9  *   WITHOUT ANY WARRANTY; without even the implied warranty of
10  *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11  *   NON INFRINGEMENT.  See the GNU General Public License for
12  *   more details.
13  *
14  * A code-rewriter that handles unaligned exception.
15  */
16 
17 #include <linux/smp.h>
18 #include <linux/ptrace.h>
19 #include <linux/slab.h>
20 #include <linux/thread_info.h>
21 #include <linux/uaccess.h>
22 #include <linux/mman.h>
23 #include <linux/types.h>
24 #include <linux/err.h>
25 #include <linux/module.h>
26 #include <linux/compat.h>
27 #include <linux/prctl.h>
28 #include <linux/context_tracking.h>
29 #include <asm/cacheflush.h>
30 #include <asm/traps.h>
31 #include <asm/uaccess.h>
32 #include <asm/unaligned.h>
33 #include <arch/abi.h>
34 #include <arch/spr_def.h>
35 #include <arch/opcode.h>
36 
37 
38 /*
39  * This file handles unaligned exception for tile-Gx. The tilepro's unaligned
40  * exception is supported out of single_step.c
41  */
42 
43 int unaligned_printk;
44 
setup_unaligned_printk(char * str)45 static int __init setup_unaligned_printk(char *str)
46 {
47 	long val;
48 	if (kstrtol(str, 0, &val) != 0)
49 		return 0;
50 	unaligned_printk = val;
51 	pr_info("Printk for each unaligned data accesses is %s\n",
52 		unaligned_printk ? "enabled" : "disabled");
53 	return 1;
54 }
55 __setup("unaligned_printk=", setup_unaligned_printk);
56 
57 unsigned int unaligned_fixup_count;
58 
59 #ifdef __tilegx__
60 
61 /*
62  * Unalign data jit fixup code fragement. Reserved space is 128 bytes.
63  * The 1st 64-bit word saves fault PC address, 2nd word is the fault
64  * instruction bundle followed by 14 JIT bundles.
65  */
66 
67 struct unaligned_jit_fragment {
68 	unsigned long       pc;
69 	tilegx_bundle_bits  bundle;
70 	tilegx_bundle_bits  insn[14];
71 };
72 
73 /*
74  * Check if a nop or fnop at bundle's pipeline X0.
75  */
76 
is_bundle_x0_nop(tilegx_bundle_bits bundle)77 static bool is_bundle_x0_nop(tilegx_bundle_bits bundle)
78 {
79 	return (((get_UnaryOpcodeExtension_X0(bundle) ==
80 		  NOP_UNARY_OPCODE_X0) &&
81 		 (get_RRROpcodeExtension_X0(bundle) ==
82 		  UNARY_RRR_0_OPCODE_X0) &&
83 		 (get_Opcode_X0(bundle) ==
84 		  RRR_0_OPCODE_X0)) ||
85 		((get_UnaryOpcodeExtension_X0(bundle) ==
86 		  FNOP_UNARY_OPCODE_X0) &&
87 		 (get_RRROpcodeExtension_X0(bundle) ==
88 		  UNARY_RRR_0_OPCODE_X0) &&
89 		 (get_Opcode_X0(bundle) ==
90 		  RRR_0_OPCODE_X0)));
91 }
92 
93 /*
94  * Check if nop or fnop at bundle's pipeline X1.
95  */
96 
is_bundle_x1_nop(tilegx_bundle_bits bundle)97 static bool is_bundle_x1_nop(tilegx_bundle_bits bundle)
98 {
99 	return (((get_UnaryOpcodeExtension_X1(bundle) ==
100 		  NOP_UNARY_OPCODE_X1) &&
101 		 (get_RRROpcodeExtension_X1(bundle) ==
102 		  UNARY_RRR_0_OPCODE_X1) &&
103 		 (get_Opcode_X1(bundle) ==
104 		  RRR_0_OPCODE_X1)) ||
105 		((get_UnaryOpcodeExtension_X1(bundle) ==
106 		  FNOP_UNARY_OPCODE_X1) &&
107 		 (get_RRROpcodeExtension_X1(bundle) ==
108 		  UNARY_RRR_0_OPCODE_X1) &&
109 		 (get_Opcode_X1(bundle) ==
110 		  RRR_0_OPCODE_X1)));
111 }
112 
113 /*
114  * Check if nop or fnop at bundle's Y0 pipeline.
115  */
116 
is_bundle_y0_nop(tilegx_bundle_bits bundle)117 static bool is_bundle_y0_nop(tilegx_bundle_bits bundle)
118 {
119 	return (((get_UnaryOpcodeExtension_Y0(bundle) ==
120 		  NOP_UNARY_OPCODE_Y0) &&
121 		 (get_RRROpcodeExtension_Y0(bundle) ==
122 		  UNARY_RRR_1_OPCODE_Y0) &&
123 		 (get_Opcode_Y0(bundle) ==
124 		  RRR_1_OPCODE_Y0)) ||
125 		((get_UnaryOpcodeExtension_Y0(bundle) ==
126 		  FNOP_UNARY_OPCODE_Y0) &&
127 		 (get_RRROpcodeExtension_Y0(bundle) ==
128 		  UNARY_RRR_1_OPCODE_Y0) &&
129 		 (get_Opcode_Y0(bundle) ==
130 		  RRR_1_OPCODE_Y0)));
131 }
132 
133 /*
134  * Check if nop or fnop at bundle's pipeline Y1.
135  */
136 
is_bundle_y1_nop(tilegx_bundle_bits bundle)137 static bool is_bundle_y1_nop(tilegx_bundle_bits bundle)
138 {
139 	return (((get_UnaryOpcodeExtension_Y1(bundle) ==
140 		  NOP_UNARY_OPCODE_Y1) &&
141 		 (get_RRROpcodeExtension_Y1(bundle) ==
142 		  UNARY_RRR_1_OPCODE_Y1) &&
143 		 (get_Opcode_Y1(bundle) ==
144 		  RRR_1_OPCODE_Y1)) ||
145 		((get_UnaryOpcodeExtension_Y1(bundle) ==
146 		  FNOP_UNARY_OPCODE_Y1) &&
147 		 (get_RRROpcodeExtension_Y1(bundle) ==
148 		  UNARY_RRR_1_OPCODE_Y1) &&
149 		 (get_Opcode_Y1(bundle) ==
150 		  RRR_1_OPCODE_Y1)));
151 }
152 
153 /*
154  * Test if a bundle's y0 and y1 pipelines are both nop or fnop.
155  */
156 
is_y0_y1_nop(tilegx_bundle_bits bundle)157 static bool is_y0_y1_nop(tilegx_bundle_bits bundle)
158 {
159 	return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle);
160 }
161 
162 /*
163  * Test if a bundle's x0 and x1 pipelines are both nop or fnop.
164  */
165 
is_x0_x1_nop(tilegx_bundle_bits bundle)166 static bool is_x0_x1_nop(tilegx_bundle_bits bundle)
167 {
168 	return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle);
169 }
170 
171 /*
172  * Find the destination, source registers of fault unalign access instruction
173  * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and
174  * clob3, which are guaranteed different from any register used in the fault
175  * bundle. r_alias is used to return if the other instructions other than the
176  * unalign load/store shares same register with ra, rb and rd.
177  */
178 
find_regs(tilegx_bundle_bits bundle,uint64_t * rd,uint64_t * ra,uint64_t * rb,uint64_t * clob1,uint64_t * clob2,uint64_t * clob3,bool * r_alias)179 static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra,
180 		      uint64_t *rb, uint64_t *clob1, uint64_t *clob2,
181 		      uint64_t *clob3, bool *r_alias)
182 {
183 	int i;
184 	uint64_t reg;
185 	uint64_t reg_map = 0, alias_reg_map = 0, map;
186 	bool alias = false;
187 
188 	/*
189 	 * Parse fault bundle, find potential used registers and mark
190 	 * corresponding bits in reg_map and alias_map. These 2 bit maps
191 	 * are used to find the scratch registers and determine if there
192 	 * is register alais.
193 	 */
194 	if (bundle & TILEGX_BUNDLE_MODE_MASK) {  /* Y Mode Bundle. */
195 
196 		reg = get_SrcA_Y2(bundle);
197 		reg_map |= 1ULL << reg;
198 		*ra = reg;
199 		reg = get_SrcBDest_Y2(bundle);
200 		reg_map |= 1ULL << reg;
201 
202 		if (rd) {
203 			/* Load. */
204 			*rd = reg;
205 			alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
206 		} else {
207 			/* Store. */
208 			*rb = reg;
209 			alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
210 		}
211 
212 		if (!is_bundle_y1_nop(bundle)) {
213 			reg = get_SrcA_Y1(bundle);
214 			reg_map |= (1ULL << reg);
215 			map = (1ULL << reg);
216 
217 			reg = get_SrcB_Y1(bundle);
218 			reg_map |= (1ULL << reg);
219 			map |= (1ULL << reg);
220 
221 			reg = get_Dest_Y1(bundle);
222 			reg_map |= (1ULL << reg);
223 			map |= (1ULL << reg);
224 
225 			if (map & alias_reg_map)
226 				alias = true;
227 		}
228 
229 		if (!is_bundle_y0_nop(bundle)) {
230 			reg = get_SrcA_Y0(bundle);
231 			reg_map |= (1ULL << reg);
232 			map = (1ULL << reg);
233 
234 			reg = get_SrcB_Y0(bundle);
235 			reg_map |= (1ULL << reg);
236 			map |= (1ULL << reg);
237 
238 			reg = get_Dest_Y0(bundle);
239 			reg_map |= (1ULL << reg);
240 			map |= (1ULL << reg);
241 
242 			if (map & alias_reg_map)
243 				alias = true;
244 		}
245 	} else	{ /* X Mode Bundle. */
246 
247 		reg = get_SrcA_X1(bundle);
248 		reg_map |= (1ULL << reg);
249 		*ra = reg;
250 		if (rd)	{
251 			/* Load. */
252 			reg = get_Dest_X1(bundle);
253 			reg_map |= (1ULL << reg);
254 			*rd = reg;
255 			alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
256 		} else {
257 			/* Store. */
258 			reg = get_SrcB_X1(bundle);
259 			reg_map |= (1ULL << reg);
260 			*rb = reg;
261 			alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
262 		}
263 
264 		if (!is_bundle_x0_nop(bundle)) {
265 			reg = get_SrcA_X0(bundle);
266 			reg_map |= (1ULL << reg);
267 			map = (1ULL << reg);
268 
269 			reg = get_SrcB_X0(bundle);
270 			reg_map |= (1ULL << reg);
271 			map |= (1ULL << reg);
272 
273 			reg = get_Dest_X0(bundle);
274 			reg_map |= (1ULL << reg);
275 			map |= (1ULL << reg);
276 
277 			if (map & alias_reg_map)
278 				alias = true;
279 		}
280 	}
281 
282 	/*
283 	 * "alias" indicates if the unalign access registers have collision
284 	 * with others in the same bundle. We jsut simply test all register
285 	 * operands case (RRR), ignored the case with immidate. If a bundle
286 	 * has no register alias, we may do fixup in a simple or fast manner.
287 	 * So if an immidata field happens to hit with a register, we may end
288 	 * up fall back to the generic handling.
289 	 */
290 
291 	*r_alias = alias;
292 
293 	/* Flip bits on reg_map. */
294 	reg_map ^= -1ULL;
295 
296 	/* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */
297 	for (i = 0; i < TREG_SP; i++) {
298 		if (reg_map & (0x1ULL << i)) {
299 			if (*clob1 == -1) {
300 				*clob1 = i;
301 			} else if (*clob2 == -1) {
302 				*clob2 = i;
303 			} else if (*clob3 == -1) {
304 				*clob3 = i;
305 				return;
306 			}
307 		}
308 	}
309 }
310 
311 /*
312  * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them
313  * is unexpected.
314  */
315 
check_regs(uint64_t rd,uint64_t ra,uint64_t rb,uint64_t clob1,uint64_t clob2,uint64_t clob3)316 static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb,
317 		       uint64_t clob1, uint64_t clob2,  uint64_t clob3)
318 {
319 	bool unexpected = false;
320 	if ((ra >= 56) && (ra != TREG_ZERO))
321 		unexpected = true;
322 
323 	if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56))
324 		unexpected = true;
325 
326 	if (rd != -1) {
327 		if ((rd >= 56) && (rd != TREG_ZERO))
328 			unexpected = true;
329 	} else {
330 		if ((rb >= 56) && (rb != TREG_ZERO))
331 			unexpected = true;
332 	}
333 	return unexpected;
334 }
335 
336 
337 #define  GX_INSN_X0_MASK   ((1ULL << 31) - 1)
338 #define  GX_INSN_X1_MASK   (((1ULL << 31) - 1) << 31)
339 #define  GX_INSN_Y0_MASK   ((0xFULL << 27) | (0xFFFFFULL))
340 #define  GX_INSN_Y1_MASK   (GX_INSN_Y0_MASK << 31)
341 #define  GX_INSN_Y2_MASK   ((0x7FULL << 51) | (0x7FULL << 20))
342 
343 #ifdef __LITTLE_ENDIAN
344 #define  GX_INSN_BSWAP(_bundle_)    (_bundle_)
345 #else
346 #define  GX_INSN_BSWAP(_bundle_)    swab64(_bundle_)
347 #endif /* __LITTLE_ENDIAN */
348 
349 /*
350  * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section.
351  * The corresponding static function jix_x#_###(.) generates partial or
352  * whole bundle based on the template and given arguments.
353  */
354 
355 #define __JIT_CODE(_X_)						\
356 	asm (".pushsection .rodata.unalign_data, \"a\"\n"	\
357 	     _X_"\n"						\
358 	     ".popsection\n")
359 
360 __JIT_CODE("__unalign_jit_x1_mtspr:   {mtspr 0,  r0}");
jit_x1_mtspr(int spr,int reg)361 static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg)
362 {
363 	extern  tilegx_bundle_bits __unalign_jit_x1_mtspr;
364 	return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) |
365 		create_MT_Imm14_X1(spr) | create_SrcA_X1(reg);
366 }
367 
368 __JIT_CODE("__unalign_jit_x1_mfspr:   {mfspr r0, 0}");
jit_x1_mfspr(int reg,int spr)369 static tilegx_bundle_bits  jit_x1_mfspr(int reg, int spr)
370 {
371 	extern  tilegx_bundle_bits __unalign_jit_x1_mfspr;
372 	return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) |
373 		create_MF_Imm14_X1(spr) | create_Dest_X1(reg);
374 }
375 
376 __JIT_CODE("__unalign_jit_x0_addi:   {addi  r0, r0, 0; iret}");
jit_x0_addi(int rd,int ra,int imm8)377 static tilegx_bundle_bits  jit_x0_addi(int rd, int ra, int imm8)
378 {
379 	extern  tilegx_bundle_bits __unalign_jit_x0_addi;
380 	return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) |
381 		create_Dest_X0(rd) | create_SrcA_X0(ra) |
382 		create_Imm8_X0(imm8);
383 }
384 
385 __JIT_CODE("__unalign_jit_x1_ldna:   {ldna  r0, r0}");
jit_x1_ldna(int rd,int ra)386 static tilegx_bundle_bits  jit_x1_ldna(int rd, int ra)
387 {
388 	extern  tilegx_bundle_bits __unalign_jit_x1_ldna;
389 	return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) &  GX_INSN_X1_MASK) |
390 		create_Dest_X1(rd) | create_SrcA_X1(ra);
391 }
392 
393 __JIT_CODE("__unalign_jit_x0_dblalign:   {dblalign r0, r0 ,r0}");
jit_x0_dblalign(int rd,int ra,int rb)394 static tilegx_bundle_bits  jit_x0_dblalign(int rd, int ra, int rb)
395 {
396 	extern  tilegx_bundle_bits __unalign_jit_x0_dblalign;
397 	return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) |
398 		create_Dest_X0(rd) | create_SrcA_X0(ra) |
399 		create_SrcB_X0(rb);
400 }
401 
402 __JIT_CODE("__unalign_jit_x1_iret:   {iret}");
jit_x1_iret(void)403 static tilegx_bundle_bits  jit_x1_iret(void)
404 {
405 	extern  tilegx_bundle_bits __unalign_jit_x1_iret;
406 	return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK;
407 }
408 
409 __JIT_CODE("__unalign_jit_x01_fnop:   {fnop;fnop}");
jit_x0_fnop(void)410 static tilegx_bundle_bits  jit_x0_fnop(void)
411 {
412 	extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
413 	return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK;
414 }
415 
jit_x1_fnop(void)416 static tilegx_bundle_bits  jit_x1_fnop(void)
417 {
418 	extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
419 	return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK;
420 }
421 
422 __JIT_CODE("__unalign_jit_y2_dummy:   {fnop; fnop; ld zero, sp}");
jit_y2_dummy(void)423 static tilegx_bundle_bits  jit_y2_dummy(void)
424 {
425 	extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
426 	return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK;
427 }
428 
jit_y1_fnop(void)429 static tilegx_bundle_bits  jit_y1_fnop(void)
430 {
431 	extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
432 	return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK;
433 }
434 
435 __JIT_CODE("__unalign_jit_x1_st1_add:  {st1_add r1, r0, 0}");
jit_x1_st1_add(int ra,int rb,int imm8)436 static tilegx_bundle_bits  jit_x1_st1_add(int ra, int rb, int imm8)
437 {
438 	extern  tilegx_bundle_bits __unalign_jit_x1_st1_add;
439 	return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) &
440 		(~create_SrcA_X1(-1)) &
441 		GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
442 		create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
443 }
444 
445 __JIT_CODE("__unalign_jit_x1_st:  {crc32_8 r1, r0, r0; st  r0, r0}");
jit_x1_st(int ra,int rb)446 static tilegx_bundle_bits  jit_x1_st(int ra, int rb)
447 {
448 	extern  tilegx_bundle_bits __unalign_jit_x1_st;
449 	return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) |
450 		create_SrcA_X1(ra) | create_SrcB_X1(rb);
451 }
452 
453 __JIT_CODE("__unalign_jit_x1_st_add:  {st_add  r1, r0, 0}");
jit_x1_st_add(int ra,int rb,int imm8)454 static tilegx_bundle_bits  jit_x1_st_add(int ra, int rb, int imm8)
455 {
456 	extern  tilegx_bundle_bits __unalign_jit_x1_st_add;
457 	return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) &
458 		(~create_SrcA_X1(-1)) &
459 		GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
460 		create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
461 }
462 
463 __JIT_CODE("__unalign_jit_x1_ld:  {crc32_8 r1, r0, r0; ld  r0, r0}");
jit_x1_ld(int rd,int ra)464 static tilegx_bundle_bits  jit_x1_ld(int rd, int ra)
465 {
466 	extern  tilegx_bundle_bits __unalign_jit_x1_ld;
467 	return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) |
468 		create_Dest_X1(rd) | create_SrcA_X1(ra);
469 }
470 
471 __JIT_CODE("__unalign_jit_x1_ld_add:  {ld_add  r1, r0, 0}");
jit_x1_ld_add(int rd,int ra,int imm8)472 static tilegx_bundle_bits  jit_x1_ld_add(int rd, int ra, int imm8)
473 {
474 	extern  tilegx_bundle_bits __unalign_jit_x1_ld_add;
475 	return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) &
476 		(~create_Dest_X1(-1)) &
477 		GX_INSN_X1_MASK) | create_Dest_X1(rd) |
478 		create_SrcA_X1(ra) | create_Imm8_X1(imm8);
479 }
480 
481 __JIT_CODE("__unalign_jit_x0_bfexts:  {bfexts r0, r0, 0, 0}");
jit_x0_bfexts(int rd,int ra,int bfs,int bfe)482 static tilegx_bundle_bits  jit_x0_bfexts(int rd, int ra, int bfs, int bfe)
483 {
484 	extern  tilegx_bundle_bits __unalign_jit_x0_bfexts;
485 	return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) &
486 		GX_INSN_X0_MASK) |
487 		create_Dest_X0(rd) | create_SrcA_X0(ra) |
488 		create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
489 }
490 
491 __JIT_CODE("__unalign_jit_x0_bfextu:  {bfextu r0, r0, 0, 0}");
jit_x0_bfextu(int rd,int ra,int bfs,int bfe)492 static tilegx_bundle_bits  jit_x0_bfextu(int rd, int ra, int bfs, int bfe)
493 {
494 	extern  tilegx_bundle_bits __unalign_jit_x0_bfextu;
495 	return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) &
496 		GX_INSN_X0_MASK) |
497 		create_Dest_X0(rd) | create_SrcA_X0(ra) |
498 		create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
499 }
500 
501 __JIT_CODE("__unalign_jit_x1_addi:  {bfextu r1, r1, 0, 0; addi r0, r0, 0}");
jit_x1_addi(int rd,int ra,int imm8)502 static tilegx_bundle_bits  jit_x1_addi(int rd, int ra, int imm8)
503 {
504 	extern  tilegx_bundle_bits __unalign_jit_x1_addi;
505 	return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) |
506 		create_Dest_X1(rd) | create_SrcA_X1(ra) |
507 		create_Imm8_X1(imm8);
508 }
509 
510 __JIT_CODE("__unalign_jit_x0_shrui:  {shrui r0, r0, 0; iret}");
jit_x0_shrui(int rd,int ra,int imm6)511 static tilegx_bundle_bits  jit_x0_shrui(int rd, int ra, int imm6)
512 {
513 	extern  tilegx_bundle_bits __unalign_jit_x0_shrui;
514 	return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) &
515 		GX_INSN_X0_MASK) |
516 		create_Dest_X0(rd) | create_SrcA_X0(ra) |
517 		create_ShAmt_X0(imm6);
518 }
519 
520 __JIT_CODE("__unalign_jit_x0_rotli:  {rotli r0, r0, 0; iret}");
jit_x0_rotli(int rd,int ra,int imm6)521 static tilegx_bundle_bits  jit_x0_rotli(int rd, int ra, int imm6)
522 {
523 	extern  tilegx_bundle_bits __unalign_jit_x0_rotli;
524 	return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) &
525 		GX_INSN_X0_MASK) |
526 		create_Dest_X0(rd) | create_SrcA_X0(ra) |
527 		create_ShAmt_X0(imm6);
528 }
529 
530 __JIT_CODE("__unalign_jit_x1_bnezt:  {bnezt r0, __unalign_jit_x1_bnezt}");
jit_x1_bnezt(int ra,int broff)531 static tilegx_bundle_bits  jit_x1_bnezt(int ra, int broff)
532 {
533 	extern  tilegx_bundle_bits __unalign_jit_x1_bnezt;
534 	return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) &
535 		GX_INSN_X1_MASK) |
536 		create_SrcA_X1(ra) | create_BrOff_X1(broff);
537 }
538 
539 #undef __JIT_CODE
540 
541 /*
542  * This function generates unalign fixup JIT.
543  *
544  * We first find unalign load/store instruction's destination, source
545  * registers: ra, rb and rd. and 3 scratch registers by calling
546  * find_regs(...). 3 scratch clobbers should not alias with any register
547  * used in the fault bundle. Then analyze the fault bundle to determine
548  * if it's a load or store, operand width, branch or address increment etc.
549  * At last generated JIT is copied into JIT code area in user space.
550  */
551 
552 static
jit_bundle_gen(struct pt_regs * regs,tilegx_bundle_bits bundle,int align_ctl)553 void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
554 		    int align_ctl)
555 {
556 	struct thread_info *info = current_thread_info();
557 	struct unaligned_jit_fragment frag;
558 	struct unaligned_jit_fragment *jit_code_area;
559 	tilegx_bundle_bits bundle_2 = 0;
560 	/* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */
561 	bool     bundle_2_enable = true;
562 	uint64_t ra = -1, rb = -1, rd = -1, clob1 = -1, clob2 = -1, clob3 = -1;
563 	/*
564 	 * Indicate if the unalign access
565 	 * instruction's registers hit with
566 	 * others in the same bundle.
567 	 */
568 	bool     alias = false;
569 	bool     load_n_store = true;
570 	bool     load_store_signed = false;
571 	unsigned int  load_store_size = 8;
572 	bool     y1_br = false;  /* True, for a branch in same bundle at Y1.*/
573 	int      y1_br_reg = 0;
574 	/* True for link operation. i.e. jalr or lnk at Y1 */
575 	bool     y1_lr = false;
576 	int      y1_lr_reg = 0;
577 	bool     x1_add = false;/* True, for load/store ADD instruction at X1*/
578 	int      x1_add_imm8 = 0;
579 	bool     unexpected = false;
580 	int      n = 0, k;
581 
582 	jit_code_area =
583 		(struct unaligned_jit_fragment *)(info->unalign_jit_base);
584 
585 	memset((void *)&frag, 0, sizeof(frag));
586 
587 	/* 0: X mode, Otherwise: Y mode. */
588 	if (bundle & TILEGX_BUNDLE_MODE_MASK) {
589 		unsigned int mod, opcode;
590 
591 		if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 &&
592 		    get_RRROpcodeExtension_Y1(bundle) ==
593 		    UNARY_RRR_1_OPCODE_Y1) {
594 
595 			opcode = get_UnaryOpcodeExtension_Y1(bundle);
596 
597 			/*
598 			 * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1
599 			 * pipeline.
600 			 */
601 			switch (opcode) {
602 			case JALR_UNARY_OPCODE_Y1:
603 			case JALRP_UNARY_OPCODE_Y1:
604 				y1_lr = true;
605 				y1_lr_reg = 55; /* Link register. */
606 				/* FALLTHROUGH */
607 			case JR_UNARY_OPCODE_Y1:
608 			case JRP_UNARY_OPCODE_Y1:
609 				y1_br = true;
610 				y1_br_reg = get_SrcA_Y1(bundle);
611 				break;
612 			case LNK_UNARY_OPCODE_Y1:
613 				/* "lnk" at Y1 pipeline. */
614 				y1_lr = true;
615 				y1_lr_reg = get_Dest_Y1(bundle);
616 				break;
617 			}
618 		}
619 
620 		opcode = get_Opcode_Y2(bundle);
621 		mod = get_Mode(bundle);
622 
623 		/*
624 		 *  bundle_2 is bundle after making Y2 as a dummy operation
625 		 *  - ld zero, sp
626 		 */
627 		bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy();
628 
629 		/* Make Y1 as fnop if Y1 is a branch or lnk operation. */
630 		if (y1_br || y1_lr) {
631 			bundle_2 &= ~(GX_INSN_Y1_MASK);
632 			bundle_2 |= jit_y1_fnop();
633 		}
634 
635 		if (is_y0_y1_nop(bundle_2))
636 			bundle_2_enable = false;
637 
638 		if (mod == MODE_OPCODE_YC2) {
639 			/* Store. */
640 			load_n_store = false;
641 			load_store_size = 1 << opcode;
642 			load_store_signed = false;
643 			find_regs(bundle, 0, &ra, &rb, &clob1, &clob2,
644 				  &clob3, &alias);
645 			if (load_store_size > 8)
646 				unexpected = true;
647 		} else {
648 			/* Load. */
649 			load_n_store = true;
650 			if (mod == MODE_OPCODE_YB2) {
651 				switch (opcode) {
652 				case LD_OPCODE_Y2:
653 					load_store_signed = false;
654 					load_store_size = 8;
655 					break;
656 				case LD4S_OPCODE_Y2:
657 					load_store_signed = true;
658 					load_store_size = 4;
659 					break;
660 				case LD4U_OPCODE_Y2:
661 					load_store_signed = false;
662 					load_store_size = 4;
663 					break;
664 				default:
665 					unexpected = true;
666 				}
667 			} else if (mod == MODE_OPCODE_YA2) {
668 				if (opcode == LD2S_OPCODE_Y2) {
669 					load_store_signed = true;
670 					load_store_size = 2;
671 				} else if (opcode == LD2U_OPCODE_Y2) {
672 					load_store_signed = false;
673 					load_store_size = 2;
674 				} else
675 					unexpected = true;
676 			} else
677 				unexpected = true;
678 			find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2,
679 				  &clob3, &alias);
680 		}
681 	} else {
682 		unsigned int opcode;
683 
684 		/* bundle_2 is bundle after making X1 as "fnop". */
685 		bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop();
686 
687 		if (is_x0_x1_nop(bundle_2))
688 			bundle_2_enable = false;
689 
690 		if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) {
691 			opcode = get_UnaryOpcodeExtension_X1(bundle);
692 
693 			if (get_RRROpcodeExtension_X1(bundle) ==
694 			    UNARY_RRR_0_OPCODE_X1) {
695 				load_n_store = true;
696 				find_regs(bundle, &rd, &ra, &rb, &clob1,
697 					  &clob2, &clob3, &alias);
698 
699 				switch (opcode) {
700 				case LD_UNARY_OPCODE_X1:
701 					load_store_signed = false;
702 					load_store_size = 8;
703 					break;
704 				case LD4S_UNARY_OPCODE_X1:
705 					load_store_signed = true;
706 					/* FALLTHROUGH */
707 				case LD4U_UNARY_OPCODE_X1:
708 					load_store_size = 4;
709 					break;
710 
711 				case LD2S_UNARY_OPCODE_X1:
712 					load_store_signed = true;
713 					/* FALLTHROUGH */
714 				case LD2U_UNARY_OPCODE_X1:
715 					load_store_size = 2;
716 					break;
717 				default:
718 					unexpected = true;
719 				}
720 			} else {
721 				load_n_store = false;
722 				load_store_signed = false;
723 				find_regs(bundle, 0, &ra, &rb,
724 					  &clob1, &clob2, &clob3,
725 					  &alias);
726 
727 				opcode = get_RRROpcodeExtension_X1(bundle);
728 				switch (opcode)	{
729 				case ST_RRR_0_OPCODE_X1:
730 					load_store_size = 8;
731 					break;
732 				case ST4_RRR_0_OPCODE_X1:
733 					load_store_size = 4;
734 					break;
735 				case ST2_RRR_0_OPCODE_X1:
736 					load_store_size = 2;
737 					break;
738 				default:
739 					unexpected = true;
740 				}
741 			}
742 		} else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) {
743 			load_n_store = true;
744 			opcode = get_Imm8OpcodeExtension_X1(bundle);
745 			switch (opcode)	{
746 			case LD_ADD_IMM8_OPCODE_X1:
747 				load_store_size = 8;
748 				break;
749 
750 			case LD4S_ADD_IMM8_OPCODE_X1:
751 				load_store_signed = true;
752 				/* FALLTHROUGH */
753 			case LD4U_ADD_IMM8_OPCODE_X1:
754 				load_store_size = 4;
755 				break;
756 
757 			case LD2S_ADD_IMM8_OPCODE_X1:
758 				load_store_signed = true;
759 				/* FALLTHROUGH */
760 			case LD2U_ADD_IMM8_OPCODE_X1:
761 				load_store_size = 2;
762 				break;
763 
764 			case ST_ADD_IMM8_OPCODE_X1:
765 				load_n_store = false;
766 				load_store_size = 8;
767 				break;
768 			case ST4_ADD_IMM8_OPCODE_X1:
769 				load_n_store = false;
770 				load_store_size = 4;
771 				break;
772 			case ST2_ADD_IMM8_OPCODE_X1:
773 				load_n_store = false;
774 				load_store_size = 2;
775 				break;
776 			default:
777 				unexpected = true;
778 			}
779 
780 			if (!unexpected) {
781 				x1_add = true;
782 				if (load_n_store)
783 					x1_add_imm8 = get_Imm8_X1(bundle);
784 				else
785 					x1_add_imm8 = get_Dest_Imm8_X1(bundle);
786 			}
787 
788 			find_regs(bundle, load_n_store ? (&rd) : NULL,
789 				  &ra, &rb, &clob1, &clob2, &clob3, &alias);
790 		} else
791 			unexpected = true;
792 	}
793 
794 	/*
795 	 * Some sanity check for register numbers extracted from fault bundle.
796 	 */
797 	if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true)
798 		unexpected = true;
799 
800 	/* Give warning if register ra has an aligned address. */
801 	if (!unexpected)
802 		WARN_ON(!((load_store_size - 1) & (regs->regs[ra])));
803 
804 
805 	/*
806 	 * Fault came from kernel space, here we only need take care of
807 	 * unaligned "get_user/put_user" macros defined in "uaccess.h".
808 	 * Basically, we will handle bundle like this:
809 	 * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0}
810 	 * (Refer to file "arch/tile/include/asm/uaccess.h" for details).
811 	 * For either load or store, byte-wise operation is performed by calling
812 	 * get_user() or put_user(). If the macro returns non-zero value,
813 	 * set the value to rx, otherwise set zero to rx. Finally make pc point
814 	 * to next bundle and return.
815 	 */
816 
817 	if (EX1_PL(regs->ex1) != USER_PL) {
818 
819 		unsigned long rx = 0;
820 		unsigned long x = 0, ret = 0;
821 
822 		if (y1_br || y1_lr || x1_add ||
823 		    (load_store_signed !=
824 		     (load_n_store && load_store_size == 4))) {
825 			/* No branch, link, wrong sign-ext or load/store add. */
826 			unexpected = true;
827 		} else if (!unexpected) {
828 			if (bundle & TILEGX_BUNDLE_MODE_MASK) {
829 				/*
830 				 * Fault bundle is Y mode.
831 				 * Check if the Y1 and Y0 is the form of
832 				 * { movei rx, 0; nop/fnop }, if yes,
833 				 * find the rx.
834 				 */
835 
836 				if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1)
837 				    && (get_SrcA_Y1(bundle) == TREG_ZERO) &&
838 				    (get_Imm8_Y1(bundle) == 0) &&
839 				    is_bundle_y0_nop(bundle)) {
840 					rx = get_Dest_Y1(bundle);
841 				} else if ((get_Opcode_Y0(bundle) ==
842 					    ADDI_OPCODE_Y0) &&
843 					   (get_SrcA_Y0(bundle) == TREG_ZERO) &&
844 					   (get_Imm8_Y0(bundle) == 0) &&
845 					   is_bundle_y1_nop(bundle)) {
846 					rx = get_Dest_Y0(bundle);
847 				} else {
848 					unexpected = true;
849 				}
850 			} else {
851 				/*
852 				 * Fault bundle is X mode.
853 				 * Check if the X0 is 'movei rx, 0',
854 				 * if yes, find the rx.
855 				 */
856 
857 				if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0)
858 				    && (get_Imm8OpcodeExtension_X0(bundle) ==
859 					ADDI_IMM8_OPCODE_X0) &&
860 				    (get_SrcA_X0(bundle) == TREG_ZERO) &&
861 				    (get_Imm8_X0(bundle) == 0)) {
862 					rx = get_Dest_X0(bundle);
863 				} else {
864 					unexpected = true;
865 				}
866 			}
867 
868 			/* rx should be less than 56. */
869 			if (!unexpected && (rx >= 56))
870 				unexpected = true;
871 		}
872 
873 		if (!search_exception_tables(regs->pc)) {
874 			/* No fixup in the exception tables for the pc. */
875 			unexpected = true;
876 		}
877 
878 		if (unexpected) {
879 			/* Unexpected unalign kernel fault. */
880 			struct task_struct *tsk = validate_current();
881 
882 			bust_spinlocks(1);
883 
884 			show_regs(regs);
885 
886 			if (unlikely(tsk->pid < 2)) {
887 				panic("Kernel unalign fault running %s!",
888 				      tsk->pid ? "init" : "the idle task");
889 			}
890 #ifdef SUPPORT_DIE
891 			die("Oops", regs);
892 #endif
893 			bust_spinlocks(1);
894 
895 			do_group_exit(SIGKILL);
896 
897 		} else {
898 			unsigned long i, b = 0;
899 			unsigned char *ptr =
900 				(unsigned char *)regs->regs[ra];
901 			if (load_n_store) {
902 				/* handle get_user(x, ptr) */
903 				for (i = 0; i < load_store_size; i++) {
904 					ret = get_user(b, ptr++);
905 					if (!ret) {
906 						/* Success! update x. */
907 #ifdef __LITTLE_ENDIAN
908 						x |= (b << (8 * i));
909 #else
910 						x <<= 8;
911 						x |= b;
912 #endif /* __LITTLE_ENDIAN */
913 					} else {
914 						x = 0;
915 						break;
916 					}
917 				}
918 
919 				/* Sign-extend 4-byte loads. */
920 				if (load_store_size == 4)
921 					x = (long)(int)x;
922 
923 				/* Set register rd. */
924 				regs->regs[rd] = x;
925 
926 				/* Set register rx. */
927 				regs->regs[rx] = ret;
928 
929 				/* Bump pc. */
930 				regs->pc += 8;
931 
932 			} else {
933 				/* Handle put_user(x, ptr) */
934 				x = regs->regs[rb];
935 #ifdef __LITTLE_ENDIAN
936 				b = x;
937 #else
938 				/*
939 				 * Swap x in order to store x from low
940 				 * to high memory same as the
941 				 * little-endian case.
942 				 */
943 				switch (load_store_size) {
944 				case 8:
945 					b = swab64(x);
946 					break;
947 				case 4:
948 					b = swab32(x);
949 					break;
950 				case 2:
951 					b = swab16(x);
952 					break;
953 				}
954 #endif /* __LITTLE_ENDIAN */
955 				for (i = 0; i < load_store_size; i++) {
956 					ret = put_user(b, ptr++);
957 					if (ret)
958 						break;
959 					/* Success! shift 1 byte. */
960 					b >>= 8;
961 				}
962 				/* Set register rx. */
963 				regs->regs[rx] = ret;
964 
965 				/* Bump pc. */
966 				regs->pc += 8;
967 			}
968 		}
969 
970 		unaligned_fixup_count++;
971 
972 		if (unaligned_printk) {
973 			pr_info("%s/%d - Unalign fixup for kernel access to userspace %lx\n",
974 				current->comm, current->pid, regs->regs[ra]);
975 		}
976 
977 		/* Done! Return to the exception handler. */
978 		return;
979 	}
980 
981 	if ((align_ctl == 0) || unexpected) {
982 		siginfo_t info = {
983 			.si_signo = SIGBUS,
984 			.si_code = BUS_ADRALN,
985 			.si_addr = (unsigned char __user *)0
986 		};
987 		if (unaligned_printk)
988 			pr_info("Unalign bundle: unexp @%llx, %llx\n",
989 				(unsigned long long)regs->pc,
990 				(unsigned long long)bundle);
991 
992 		if (ra < 56) {
993 			unsigned long uaa = (unsigned long)regs->regs[ra];
994 			/* Set bus Address. */
995 			info.si_addr = (unsigned char __user *)uaa;
996 		}
997 
998 		unaligned_fixup_count++;
999 
1000 		trace_unhandled_signal("unaligned fixup trap", regs,
1001 				       (unsigned long)info.si_addr, SIGBUS);
1002 		force_sig_info(info.si_signo, &info, current);
1003 		return;
1004 	}
1005 
1006 #ifdef __LITTLE_ENDIAN
1007 #define UA_FIXUP_ADDR_DELTA          1
1008 #define UA_FIXUP_BFEXT_START(_B_)    0
1009 #define UA_FIXUP_BFEXT_END(_B_)     (8 * (_B_) - 1)
1010 #else /* __BIG_ENDIAN */
1011 #define UA_FIXUP_ADDR_DELTA          -1
1012 #define UA_FIXUP_BFEXT_START(_B_)   (64 - 8 * (_B_))
1013 #define UA_FIXUP_BFEXT_END(_B_)      63
1014 #endif /* __LITTLE_ENDIAN */
1015 
1016 
1017 
1018 	if ((ra != rb) && (rd != TREG_SP) && !alias &&
1019 	    !y1_br && !y1_lr && !x1_add) {
1020 		/*
1021 		 * Simple case: ra != rb and no register alias found,
1022 		 * and no branch or link. This will be the majority.
1023 		 * We can do a little better for simplae case than the
1024 		 * generic scheme below.
1025 		 */
1026 		if (!load_n_store) {
1027 			/*
1028 			 * Simple store: ra != rb, no need for scratch register.
1029 			 * Just store and rotate to right bytewise.
1030 			 */
1031 #ifdef __BIG_ENDIAN
1032 			frag.insn[n++] =
1033 				jit_x0_addi(ra, ra, load_store_size - 1) |
1034 				jit_x1_fnop();
1035 #endif /* __BIG_ENDIAN */
1036 			for (k = 0; k < load_store_size; k++) {
1037 				/* Store a byte. */
1038 				frag.insn[n++] =
1039 					jit_x0_rotli(rb, rb, 56) |
1040 					jit_x1_st1_add(ra, rb,
1041 						       UA_FIXUP_ADDR_DELTA);
1042 			}
1043 #ifdef __BIG_ENDIAN
1044 			frag.insn[n] = jit_x1_addi(ra, ra, 1);
1045 #else
1046 			frag.insn[n] = jit_x1_addi(ra, ra,
1047 						   -1 * load_store_size);
1048 #endif /* __LITTLE_ENDIAN */
1049 
1050 			if (load_store_size == 8) {
1051 				frag.insn[n] |= jit_x0_fnop();
1052 			} else if (load_store_size == 4) {
1053 				frag.insn[n] |= jit_x0_rotli(rb, rb, 32);
1054 			} else { /* = 2 */
1055 				frag.insn[n] |= jit_x0_rotli(rb, rb, 16);
1056 			}
1057 			n++;
1058 			if (bundle_2_enable)
1059 				frag.insn[n++] = bundle_2;
1060 			frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1061 		} else {
1062 			if (rd == ra) {
1063 				/* Use two clobber registers: clob1/2. */
1064 				frag.insn[n++] =
1065 					jit_x0_addi(TREG_SP, TREG_SP, -16) |
1066 					jit_x1_fnop();
1067 				frag.insn[n++] =
1068 					jit_x0_addi(clob1, ra, 7) |
1069 					jit_x1_st_add(TREG_SP, clob1, -8);
1070 				frag.insn[n++] =
1071 					jit_x0_addi(clob2, ra, 0) |
1072 					jit_x1_st(TREG_SP, clob2);
1073 				frag.insn[n++] =
1074 					jit_x0_fnop() |
1075 					jit_x1_ldna(rd, ra);
1076 				frag.insn[n++] =
1077 					jit_x0_fnop() |
1078 					jit_x1_ldna(clob1, clob1);
1079 				/*
1080 				 * Note: we must make sure that rd must not
1081 				 * be sp. Recover clob1/2 from stack.
1082 				 */
1083 				frag.insn[n++] =
1084 					jit_x0_dblalign(rd, clob1, clob2) |
1085 					jit_x1_ld_add(clob2, TREG_SP, 8);
1086 				frag.insn[n++] =
1087 					jit_x0_fnop() |
1088 					jit_x1_ld_add(clob1, TREG_SP, 16);
1089 			} else {
1090 				/* Use one clobber register: clob1 only. */
1091 				frag.insn[n++] =
1092 					jit_x0_addi(TREG_SP, TREG_SP, -16) |
1093 					jit_x1_fnop();
1094 				frag.insn[n++] =
1095 					jit_x0_addi(clob1, ra, 7) |
1096 					jit_x1_st(TREG_SP, clob1);
1097 				frag.insn[n++] =
1098 					jit_x0_fnop() |
1099 					jit_x1_ldna(rd, ra);
1100 				frag.insn[n++] =
1101 					jit_x0_fnop() |
1102 					jit_x1_ldna(clob1, clob1);
1103 				/*
1104 				 * Note: we must make sure that rd must not
1105 				 * be sp. Recover clob1 from stack.
1106 				 */
1107 				frag.insn[n++] =
1108 					jit_x0_dblalign(rd, clob1, ra) |
1109 					jit_x1_ld_add(clob1, TREG_SP, 16);
1110 			}
1111 
1112 			if (bundle_2_enable)
1113 				frag.insn[n++] = bundle_2;
1114 			/*
1115 			 * For non 8-byte load, extract corresponding bytes and
1116 			 * signed extension.
1117 			 */
1118 			if (load_store_size == 4) {
1119 				if (load_store_signed)
1120 					frag.insn[n++] =
1121 						jit_x0_bfexts(
1122 							rd, rd,
1123 							UA_FIXUP_BFEXT_START(4),
1124 							UA_FIXUP_BFEXT_END(4)) |
1125 						jit_x1_fnop();
1126 				else
1127 					frag.insn[n++] =
1128 						jit_x0_bfextu(
1129 							rd, rd,
1130 							UA_FIXUP_BFEXT_START(4),
1131 							UA_FIXUP_BFEXT_END(4)) |
1132 						jit_x1_fnop();
1133 			} else if (load_store_size == 2) {
1134 				if (load_store_signed)
1135 					frag.insn[n++] =
1136 						jit_x0_bfexts(
1137 							rd, rd,
1138 							UA_FIXUP_BFEXT_START(2),
1139 							UA_FIXUP_BFEXT_END(2)) |
1140 						jit_x1_fnop();
1141 				else
1142 					frag.insn[n++] =
1143 						jit_x0_bfextu(
1144 							rd, rd,
1145 							UA_FIXUP_BFEXT_START(2),
1146 							UA_FIXUP_BFEXT_END(2)) |
1147 						jit_x1_fnop();
1148 			}
1149 
1150 			frag.insn[n++] =
1151 				jit_x0_fnop()  |
1152 				jit_x1_iret();
1153 		}
1154 	} else if (!load_n_store) {
1155 
1156 		/*
1157 		 * Generic memory store cases: use 3 clobber registers.
1158 		 *
1159 		 * Alloc space for saveing clob2,1,3 on user's stack.
1160 		 * register clob3 points to where clob2 saved, followed by
1161 		 * clob1 and 3 from high to low memory.
1162 		 */
1163 		frag.insn[n++] =
1164 			jit_x0_addi(TREG_SP, TREG_SP, -32)    |
1165 			jit_x1_fnop();
1166 		frag.insn[n++] =
1167 			jit_x0_addi(clob3, TREG_SP, 16)  |
1168 			jit_x1_st_add(TREG_SP, clob3, 8);
1169 #ifdef __LITTLE_ENDIAN
1170 		frag.insn[n++] =
1171 			jit_x0_addi(clob1, ra, 0)   |
1172 			jit_x1_st_add(TREG_SP, clob1, 8);
1173 #else
1174 		frag.insn[n++] =
1175 			jit_x0_addi(clob1, ra, load_store_size - 1)   |
1176 			jit_x1_st_add(TREG_SP, clob1, 8);
1177 #endif
1178 		if (load_store_size == 8) {
1179 			/*
1180 			 * We save one byte a time, not for fast, but compact
1181 			 * code. After each store, data source register shift
1182 			 * right one byte. unchanged after 8 stores.
1183 			 */
1184 			frag.insn[n++] =
1185 				jit_x0_addi(clob2, TREG_ZERO, 7)     |
1186 				jit_x1_st_add(TREG_SP, clob2, 16);
1187 			frag.insn[n++] =
1188 				jit_x0_rotli(rb, rb, 56)      |
1189 				jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1190 			frag.insn[n++] =
1191 				jit_x0_addi(clob2, clob2, -1) |
1192 				jit_x1_bnezt(clob2, -1);
1193 			frag.insn[n++] =
1194 				jit_x0_fnop()                 |
1195 				jit_x1_addi(clob2, y1_br_reg, 0);
1196 		} else if (load_store_size == 4) {
1197 			frag.insn[n++] =
1198 				jit_x0_addi(clob2, TREG_ZERO, 3)     |
1199 				jit_x1_st_add(TREG_SP, clob2, 16);
1200 			frag.insn[n++] =
1201 				jit_x0_rotli(rb, rb, 56)      |
1202 				jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1203 			frag.insn[n++] =
1204 				jit_x0_addi(clob2, clob2, -1) |
1205 				jit_x1_bnezt(clob2, -1);
1206 			/*
1207 			 * same as 8-byte case, but need shift another 4
1208 			 * byte to recover rb for 4-byte store.
1209 			 */
1210 			frag.insn[n++] = jit_x0_rotli(rb, rb, 32)      |
1211 				jit_x1_addi(clob2, y1_br_reg, 0);
1212 		} else { /* =2 */
1213 			frag.insn[n++] =
1214 				jit_x0_addi(clob2, rb, 0)     |
1215 				jit_x1_st_add(TREG_SP, clob2, 16);
1216 			for (k = 0; k < 2; k++) {
1217 				frag.insn[n++] =
1218 					jit_x0_shrui(rb, rb, 8)  |
1219 					jit_x1_st1_add(clob1, rb,
1220 						       UA_FIXUP_ADDR_DELTA);
1221 			}
1222 			frag.insn[n++] =
1223 				jit_x0_addi(rb, clob2, 0)       |
1224 				jit_x1_addi(clob2, y1_br_reg, 0);
1225 		}
1226 
1227 		if (bundle_2_enable)
1228 			frag.insn[n++] = bundle_2;
1229 
1230 		if (y1_lr) {
1231 			frag.insn[n++] =
1232 				jit_x0_fnop()                    |
1233 				jit_x1_mfspr(y1_lr_reg,
1234 					     SPR_EX_CONTEXT_0_0);
1235 		}
1236 		if (y1_br) {
1237 			frag.insn[n++] =
1238 				jit_x0_fnop()                    |
1239 				jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1240 					     clob2);
1241 		}
1242 		if (x1_add) {
1243 			frag.insn[n++] =
1244 				jit_x0_addi(ra, ra, x1_add_imm8) |
1245 				jit_x1_ld_add(clob2, clob3, -8);
1246 		} else {
1247 			frag.insn[n++] =
1248 				jit_x0_fnop()                    |
1249 				jit_x1_ld_add(clob2, clob3, -8);
1250 		}
1251 		frag.insn[n++] =
1252 			jit_x0_fnop()   |
1253 			jit_x1_ld_add(clob1, clob3, -8);
1254 		frag.insn[n++] = jit_x0_fnop()   | jit_x1_ld(clob3, clob3);
1255 		frag.insn[n++] = jit_x0_fnop()   | jit_x1_iret();
1256 
1257 	} else {
1258 		/*
1259 		 * Generic memory load cases.
1260 		 *
1261 		 * Alloc space for saveing clob1,2,3 on user's stack.
1262 		 * register clob3 points to where clob1 saved, followed
1263 		 * by clob2 and 3 from high to low memory.
1264 		 */
1265 
1266 		frag.insn[n++] =
1267 			jit_x0_addi(TREG_SP, TREG_SP, -32) |
1268 			jit_x1_fnop();
1269 		frag.insn[n++] =
1270 			jit_x0_addi(clob3, TREG_SP, 16) |
1271 			jit_x1_st_add(TREG_SP, clob3, 8);
1272 		frag.insn[n++] =
1273 			jit_x0_addi(clob2, ra, 0) |
1274 			jit_x1_st_add(TREG_SP, clob2, 8);
1275 
1276 		if (y1_br) {
1277 			frag.insn[n++] =
1278 				jit_x0_addi(clob1, y1_br_reg, 0) |
1279 				jit_x1_st_add(TREG_SP, clob1, 16);
1280 		} else {
1281 			frag.insn[n++] =
1282 				jit_x0_fnop() |
1283 				jit_x1_st_add(TREG_SP, clob1, 16);
1284 		}
1285 
1286 		if (bundle_2_enable)
1287 			frag.insn[n++] = bundle_2;
1288 
1289 		if (y1_lr) {
1290 			frag.insn[n++] =
1291 				jit_x0_fnop()  |
1292 				jit_x1_mfspr(y1_lr_reg,
1293 					     SPR_EX_CONTEXT_0_0);
1294 		}
1295 
1296 		if (y1_br) {
1297 			frag.insn[n++] =
1298 				jit_x0_fnop() |
1299 				jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1300 					     clob1);
1301 		}
1302 
1303 		frag.insn[n++] =
1304 			jit_x0_addi(clob1, clob2, 7)      |
1305 			jit_x1_ldna(rd, clob2);
1306 		frag.insn[n++] =
1307 			jit_x0_fnop()                     |
1308 			jit_x1_ldna(clob1, clob1);
1309 		frag.insn[n++] =
1310 			jit_x0_dblalign(rd, clob1, clob2) |
1311 			jit_x1_ld_add(clob1, clob3, -8);
1312 		if (x1_add) {
1313 			frag.insn[n++] =
1314 				jit_x0_addi(ra, ra, x1_add_imm8) |
1315 				jit_x1_ld_add(clob2, clob3, -8);
1316 		} else {
1317 			frag.insn[n++] =
1318 				jit_x0_fnop()  |
1319 				jit_x1_ld_add(clob2, clob3, -8);
1320 		}
1321 
1322 		frag.insn[n++] =
1323 			jit_x0_fnop() |
1324 			jit_x1_ld(clob3, clob3);
1325 
1326 		if (load_store_size == 4) {
1327 			if (load_store_signed)
1328 				frag.insn[n++] =
1329 					jit_x0_bfexts(
1330 						rd, rd,
1331 						UA_FIXUP_BFEXT_START(4),
1332 						UA_FIXUP_BFEXT_END(4)) |
1333 					jit_x1_fnop();
1334 			else
1335 				frag.insn[n++] =
1336 					jit_x0_bfextu(
1337 						rd, rd,
1338 						UA_FIXUP_BFEXT_START(4),
1339 						UA_FIXUP_BFEXT_END(4)) |
1340 					jit_x1_fnop();
1341 		} else if (load_store_size == 2) {
1342 			if (load_store_signed)
1343 				frag.insn[n++] =
1344 					jit_x0_bfexts(
1345 						rd, rd,
1346 						UA_FIXUP_BFEXT_START(2),
1347 						UA_FIXUP_BFEXT_END(2)) |
1348 					jit_x1_fnop();
1349 			else
1350 				frag.insn[n++] =
1351 					jit_x0_bfextu(
1352 						rd, rd,
1353 						UA_FIXUP_BFEXT_START(2),
1354 						UA_FIXUP_BFEXT_END(2)) |
1355 					jit_x1_fnop();
1356 		}
1357 
1358 		frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1359 	}
1360 
1361 	/* Max JIT bundle count is 14. */
1362 	WARN_ON(n > 14);
1363 
1364 	if (!unexpected) {
1365 		int status = 0;
1366 		int idx = (regs->pc >> 3) &
1367 			((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1);
1368 
1369 		frag.pc = regs->pc;
1370 		frag.bundle = bundle;
1371 
1372 		if (unaligned_printk) {
1373 			pr_info("%s/%d, Unalign fixup: pc=%lx bundle=%lx %d %d %d %d %d %d %d %d\n",
1374 				current->comm, current->pid,
1375 				(unsigned long)frag.pc,
1376 				(unsigned long)frag.bundle,
1377 				(int)alias, (int)rd, (int)ra,
1378 				(int)rb, (int)bundle_2_enable,
1379 				(int)y1_lr, (int)y1_br, (int)x1_add);
1380 
1381 			for (k = 0; k < n; k += 2)
1382 				pr_info("[%d] %016llx %016llx\n",
1383 					k, (unsigned long long)frag.insn[k],
1384 					(unsigned long long)frag.insn[k+1]);
1385 		}
1386 
1387 		/* Swap bundle byte order for big endian sys. */
1388 #ifdef __BIG_ENDIAN
1389 		frag.bundle = GX_INSN_BSWAP(frag.bundle);
1390 		for (k = 0; k < n; k++)
1391 			frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]);
1392 #endif /* __BIG_ENDIAN */
1393 
1394 		status = copy_to_user((void __user *)&jit_code_area[idx],
1395 				      &frag, sizeof(frag));
1396 		if (status) {
1397 			/* Fail to copy JIT into user land. send SIGSEGV. */
1398 			siginfo_t info = {
1399 				.si_signo = SIGSEGV,
1400 				.si_code = SEGV_MAPERR,
1401 				.si_addr = (void __user *)&jit_code_area[idx]
1402 			};
1403 
1404 			pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx\n",
1405 				current->pid, current->comm,
1406 				(unsigned long long)&jit_code_area[idx]);
1407 
1408 			trace_unhandled_signal("segfault in unalign fixup",
1409 					       regs,
1410 					       (unsigned long)info.si_addr,
1411 					       SIGSEGV);
1412 			force_sig_info(info.si_signo, &info, current);
1413 			return;
1414 		}
1415 
1416 
1417 		/* Do a cheaper increment, not accurate. */
1418 		unaligned_fixup_count++;
1419 		__flush_icache_range((unsigned long)&jit_code_area[idx],
1420 				     (unsigned long)&jit_code_area[idx] +
1421 				     sizeof(frag));
1422 
1423 		/* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/
1424 		__insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8);
1425 		__insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0));
1426 
1427 		/* Modify pc at the start of new JIT. */
1428 		regs->pc = (unsigned long)&jit_code_area[idx].insn[0];
1429 		/* Set ICS in SPR_EX_CONTEXT_K_1. */
1430 		regs->ex1 = PL_ICS_EX1(USER_PL, 1);
1431 	}
1432 }
1433 
1434 
1435 /*
1436  * C function to generate unalign data JIT. Called from unalign data
1437  * interrupt handler.
1438  *
1439  * First check if unalign fix is disabled or exception did not not come from
1440  * user space or sp register points to unalign address, if true, generate a
1441  * SIGBUS. Then map a page into user space as JIT area if it is not mapped
1442  * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return
1443  * back to exception handler.
1444  *
1445  * The exception handler will "iret" to new generated JIT code after
1446  * restoring caller saved registers. In theory, the JIT code will perform
1447  * another "iret" to resume user's program.
1448  */
1449 
do_unaligned(struct pt_regs * regs,int vecnum)1450 void do_unaligned(struct pt_regs *regs, int vecnum)
1451 {
1452 	enum ctx_state prev_state = exception_enter();
1453 	tilegx_bundle_bits __user  *pc;
1454 	tilegx_bundle_bits bundle;
1455 	struct thread_info *info = current_thread_info();
1456 	int align_ctl;
1457 
1458 	/* Checks the per-process unaligned JIT flags */
1459 	align_ctl = unaligned_fixup;
1460 	switch (task_thread_info(current)->align_ctl) {
1461 	case PR_UNALIGN_NOPRINT:
1462 		align_ctl = 1;
1463 		break;
1464 	case PR_UNALIGN_SIGBUS:
1465 		align_ctl = 0;
1466 		break;
1467 	}
1468 
1469 	/* Enable iterrupt in order to access user land. */
1470 	local_irq_enable();
1471 
1472 	/*
1473 	 * The fault came from kernel space. Two choices:
1474 	 * (a) unaligned_fixup < 1, we will first call get/put_user fixup
1475 	 *     to return -EFAULT. If no fixup, simply panic the kernel.
1476 	 * (b) unaligned_fixup >=1, we will try to fix the unaligned access
1477 	 *     if it was triggered by get_user/put_user() macros. Panic the
1478 	 *     kernel if it is not fixable.
1479 	 */
1480 
1481 	if (EX1_PL(regs->ex1) != USER_PL) {
1482 
1483 		if (align_ctl < 1) {
1484 			unaligned_fixup_count++;
1485 			/* If exception came from kernel, try fix it up. */
1486 			if (fixup_exception(regs)) {
1487 				if (unaligned_printk)
1488 					pr_info("Unalign fixup: %d %llx @%llx\n",
1489 						(int)unaligned_fixup,
1490 						(unsigned long long)regs->ex1,
1491 						(unsigned long long)regs->pc);
1492 			} else {
1493 				/* Not fixable. Go panic. */
1494 				panic("Unalign exception in Kernel. pc=%lx",
1495 				      regs->pc);
1496 			}
1497 		} else {
1498 			/*
1499 			 * Try to fix the exception. If we can't, panic the
1500 			 * kernel.
1501 			 */
1502 			bundle = GX_INSN_BSWAP(
1503 				*((tilegx_bundle_bits *)(regs->pc)));
1504 			jit_bundle_gen(regs, bundle, align_ctl);
1505 		}
1506 		goto done;
1507 	}
1508 
1509 	/*
1510 	 * Fault came from user with ICS or stack is not aligned.
1511 	 * If so, we will trigger SIGBUS.
1512 	 */
1513 	if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) {
1514 		siginfo_t info = {
1515 			.si_signo = SIGBUS,
1516 			.si_code = BUS_ADRALN,
1517 			.si_addr = (unsigned char __user *)0
1518 		};
1519 
1520 		if (unaligned_printk)
1521 			pr_info("Unalign fixup: %d %llx @%llx\n",
1522 				(int)unaligned_fixup,
1523 				(unsigned long long)regs->ex1,
1524 				(unsigned long long)regs->pc);
1525 
1526 		unaligned_fixup_count++;
1527 
1528 		trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS);
1529 		force_sig_info(info.si_signo, &info, current);
1530 		goto done;
1531 	}
1532 
1533 
1534 	/* Read the bundle casued the exception! */
1535 	pc = (tilegx_bundle_bits __user *)(regs->pc);
1536 	if (get_user(bundle, pc) != 0) {
1537 		/* Probably never be here since pc is valid user address.*/
1538 		siginfo_t info = {
1539 			.si_signo = SIGSEGV,
1540 			.si_code = SEGV_MAPERR,
1541 			.si_addr = (void __user *)pc
1542 		};
1543 		pr_err("Couldn't read instruction at %p trying to step\n", pc);
1544 		trace_unhandled_signal("segfault in unalign fixup", regs,
1545 				       (unsigned long)info.si_addr, SIGSEGV);
1546 		force_sig_info(info.si_signo, &info, current);
1547 		goto done;
1548 	}
1549 
1550 	if (!info->unalign_jit_base) {
1551 		void __user *user_page;
1552 
1553 		/*
1554 		 * Allocate a page in userland.
1555 		 * For 64-bit processes we try to place the mapping far
1556 		 * from anything else that might be going on (specifically
1557 		 * 64 GB below the top of the user address space).  If it
1558 		 * happens not to be possible to put it there, it's OK;
1559 		 * the kernel will choose another location and we'll
1560 		 * remember it for later.
1561 		 */
1562 		if (is_compat_task())
1563 			user_page = NULL;
1564 		else
1565 			user_page = (void __user *)(TASK_SIZE - (1UL << 36)) +
1566 				(current->pid << PAGE_SHIFT);
1567 
1568 		user_page = (void __user *) vm_mmap(NULL,
1569 						    (unsigned long)user_page,
1570 						    PAGE_SIZE,
1571 						    PROT_EXEC | PROT_READ |
1572 						    PROT_WRITE,
1573 #ifdef CONFIG_HOMECACHE
1574 						    MAP_CACHE_HOME_TASK |
1575 #endif
1576 						    MAP_PRIVATE |
1577 						    MAP_ANONYMOUS,
1578 						    0);
1579 
1580 		if (IS_ERR((void __force *)user_page)) {
1581 			pr_err("Out of kernel pages trying do_mmap\n");
1582 			goto done;
1583 		}
1584 
1585 		/* Save the address in the thread_info struct */
1586 		info->unalign_jit_base = user_page;
1587 		if (unaligned_printk)
1588 			pr_info("Unalign bundle: %d:%d, allocate page @%llx\n",
1589 				raw_smp_processor_id(), current->pid,
1590 				(unsigned long long)user_page);
1591 	}
1592 
1593 	/* Generate unalign JIT */
1594 	jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl);
1595 
1596 done:
1597 	exception_exit(prev_state);
1598 }
1599 
1600 #endif /* __tilegx__ */
1601