1/* 2 * Copyright 2010 Tilera Corporation. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation, version 2. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 11 * NON INFRINGEMENT. See the GNU General Public License for 12 * more details. 13 * 14 * Support routines for atomic operations. Each function takes: 15 * 16 * r0: address to manipulate 17 * r1: pointer to atomic lock guarding this operation (for ATOMIC_LOCK_REG) 18 * r2: new value to write, or for cmpxchg/add_unless, value to compare against 19 * r3: (cmpxchg/xchg_add_unless) new value to write or add; 20 * (atomic64 ops) high word of value to write 21 * r4/r5: (cmpxchg64/add_unless64) new value to write or add 22 * 23 * The 32-bit routines return a "struct __get_user" so that the futex code 24 * has an opportunity to return -EFAULT to the user if needed. 25 * The 64-bit routines just return a "long long" with the value, 26 * since they are only used from kernel space and don't expect to fault. 27 * Support for 16-bit ops is included in the framework but we don't provide any. 28 * 29 * Note that the caller is advised to issue a suitable L1 or L2 30 * prefetch on the address being manipulated to avoid extra stalls. 31 * In addition, the hot path is on two icache lines, and we start with 32 * a jump to the second line to make sure they are both in cache so 33 * that we never stall waiting on icache fill while holding the lock. 34 * (This doesn't work out with most 64-bit ops, since they consume 35 * too many bundles, so may take an extra i-cache stall.) 36 * 37 * These routines set the INTERRUPT_CRITICAL_SECTION bit, just 38 * like sys_cmpxchg(), so that NMIs like PERF_COUNT will not interrupt 39 * the code, just page faults. 40 * 41 * If the load or store faults in a way that can be directly fixed in 42 * the do_page_fault_ics() handler (e.g. a vmalloc reference) we fix it 43 * directly, return to the instruction that faulted, and retry it. 44 * 45 * If the load or store faults in a way that potentially requires us 46 * to release the atomic lock, then retry (e.g. a migrating PTE), we 47 * reset the PC in do_page_fault_ics() to the "tns" instruction so 48 * that on return we will reacquire the lock and restart the op. We 49 * are somewhat overloading the exception_table_entry notion by doing 50 * this, since those entries are not normally used for migrating PTEs. 51 * 52 * If the main page fault handler discovers a bad address, it will see 53 * the PC pointing to the "tns" instruction (due to the earlier 54 * exception_table_entry processing in do_page_fault_ics), and 55 * re-reset the PC to the fault handler, atomic_bad_address(), which 56 * effectively takes over from the atomic op and can either return a 57 * bad "struct __get_user" (for user addresses) or can just panic (for 58 * bad kernel addresses). 59 * 60 * Note that if the value we would store is the same as what we 61 * loaded, we bypass the store. Other platforms with true atomics can 62 * make the guarantee that a non-atomic __clear_bit(), for example, 63 * can safely race with an atomic test_and_set_bit(); this example is 64 * from bit_spinlock.h in slub_lock() / slub_unlock(). We can't do 65 * that on Tile since the "atomic" op is really just a 66 * read/modify/write, and can race with the non-atomic 67 * read/modify/write. However, if we can short-circuit the write when 68 * it is not needed, in the atomic case, we avoid the race. 69 */ 70 71#include <linux/linkage.h> 72#include <asm/atomic_32.h> 73#include <asm/page.h> 74#include <asm/processor.h> 75 76 .section .text.atomic,"ax" 77ENTRY(__start_atomic_asm_code) 78 79 .macro atomic_op, name, bitwidth, body 80 .align 64 81STD_ENTRY_SECTION(__atomic\name, .text.atomic) 82 { 83 movei r24, 1 84 j 4f /* branch to second cache line */ 85 } 861: { 87 .ifc \bitwidth,16 88 lh r22, r0 89 .else 90 lw r22, r0 91 addi r28, r0, 4 92 .endif 93 } 94 .ifc \bitwidth,64 95 lw r23, r28 96 .endif 97 \body /* set r24, and r25 if 64-bit */ 98 { 99 seq r26, r22, r24 100 seq r27, r23, r25 101 } 102 .ifc \bitwidth,64 103 bbnst r27, 2f 104 .endif 105 bbs r26, 3f /* skip write-back if it's the same value */ 1062: { 107 .ifc \bitwidth,16 108 sh r0, r24 109 .else 110 sw r0, r24 111 .endif 112 } 113 .ifc \bitwidth,64 114 sw r28, r25 115 .endif 116 mf 1173: { 118 move r0, r22 119 .ifc \bitwidth,64 120 move r1, r23 121 .else 122 move r1, zero 123 .endif 124 sw ATOMIC_LOCK_REG_NAME, zero 125 } 126 mtspr INTERRUPT_CRITICAL_SECTION, zero 127 jrp lr 1284: { 129 move ATOMIC_LOCK_REG_NAME, r1 130 mtspr INTERRUPT_CRITICAL_SECTION, r24 131 } 132#ifndef CONFIG_SMP 133 j 1b /* no atomic locks */ 134#else 135 { 136 tns r21, ATOMIC_LOCK_REG_NAME 137 moveli r23, 2048 /* maximum backoff time in cycles */ 138 } 139 { 140 bzt r21, 1b /* branch if lock acquired */ 141 moveli r25, 32 /* starting backoff time in cycles */ 142 } 1435: mtspr INTERRUPT_CRITICAL_SECTION, zero 144 mfspr r26, CYCLE_LOW /* get start point for this backoff */ 1456: mfspr r22, CYCLE_LOW /* test to see if we've backed off enough */ 146 sub r22, r22, r26 147 slt r22, r22, r25 148 bbst r22, 6b 149 { 150 mtspr INTERRUPT_CRITICAL_SECTION, r24 151 shli r25, r25, 1 /* double the backoff; retry the tns */ 152 } 153 { 154 tns r21, ATOMIC_LOCK_REG_NAME 155 slt r26, r23, r25 /* is the proposed backoff too big? */ 156 } 157 { 158 bzt r21, 1b /* branch if lock acquired */ 159 mvnz r25, r26, r23 160 } 161 j 5b 162#endif 163 STD_ENDPROC(__atomic\name) 164 .ifc \bitwidth,32 165 .pushsection __ex_table,"a" 166 .align 4 167 .word 1b, __atomic\name 168 .word 2b, __atomic\name 169 .word __atomic\name, __atomic_bad_address 170 .popsection 171 .endif 172 .endm 173 174 175/* 176 * Use __atomic32 prefix to avoid collisions with GCC builtin __atomic functions. 177 */ 178 179atomic_op 32_cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }" 180atomic_op 32_xchg, 32, "move r24, r2" 181atomic_op 32_xchg_add, 32, "add r24, r22, r2" 182atomic_op 32_xchg_add_unless, 32, \ 183 "sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }" 184atomic_op 32_fetch_or, 32, "or r24, r22, r2" 185atomic_op 32_fetch_and, 32, "and r24, r22, r2" 186atomic_op 32_fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2" 187atomic_op 32_fetch_xor, 32, "xor r24, r22, r2" 188 189atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \ 190 { bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }" 191atomic_op 64_xchg, 64, "{ move r24, r2; move r25, r3 }" 192atomic_op 64_xchg_add, 64, "{ add r24, r22, r2; add r25, r23, r3 }; \ 193 slt_u r26, r24, r22; add r25, r25, r26" 194atomic_op 64_xchg_add_unless, 64, \ 195 "{ sne r26, r22, r2; sne r27, r23, r3 }; \ 196 { bbns r26, 3f; add r24, r22, r4 }; \ 197 { bbns r27, 3f; add r25, r23, r5 }; \ 198 slt_u r26, r24, r22; add r25, r25, r26" 199atomic_op 64_fetch_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }" 200atomic_op 64_fetch_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }" 201atomic_op 64_fetch_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }" 202 203 jrp lr /* happy backtracer */ 204 205ENTRY(__end_atomic_asm_code) 206