• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2010 Tilera Corporation. All Rights Reserved.
3  *
4  *   This program is free software; you can redistribute it and/or
5  *   modify it under the terms of the GNU General Public License
6  *   as published by the Free Software Foundation, version 2.
7  *
8  *   This program is distributed in the hope that it will be useful, but
9  *   WITHOUT ANY WARRANTY; without even the implied warranty of
10  *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11  *   NON INFRINGEMENT.  See the GNU General Public License for
12  *   more details.
13  */
14 
15 #include <linux/string.h>
16 #include <linux/smp.h>
17 #include <linux/module.h>
18 #include <linux/uaccess.h>
19 #include <asm/fixmap.h>
20 #include <asm/kmap_types.h>
21 #include <asm/tlbflush.h>
22 #include <hv/hypervisor.h>
23 #include <arch/chip.h>
24 
25 
26 #if !CHIP_HAS_COHERENT_LOCAL_CACHE()
27 
28 /* Defined in memcpy.S */
29 extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n);
30 extern unsigned long __copy_to_user_inatomic_asm(
31 	void __user *to, const void *from, unsigned long n);
32 extern unsigned long __copy_from_user_inatomic_asm(
33 	void *to, const void __user *from, unsigned long n);
34 extern unsigned long __copy_from_user_zeroing_asm(
35 	void *to, const void __user *from, unsigned long n);
36 
37 typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long);
38 
39 /* Size above which to consider TLB games for performance */
40 #define LARGE_COPY_CUTOFF 2048
41 
42 /* Communicate to the simulator what we are trying to do. */
43 #define sim_allow_multiple_caching(b) \
44   __insn_mtspr(SPR_SIM_CONTROL, \
45    SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS))
46 
47 /*
48  * Copy memory by briefly enabling incoherent cacheline-at-a-time mode.
49  *
50  * We set up our own source and destination PTEs that we fully control.
51  * This is the only way to guarantee that we don't race with another
52  * thread that is modifying the PTE; we can't afford to try the
53  * copy_{to,from}_user() technique of catching the interrupt, since
54  * we must run with interrupts disabled to avoid the risk of some
55  * other code seeing the incoherent data in our cache.  (Recall that
56  * our cache is indexed by PA, so even if the other code doesn't use
57  * our kmap_atomic virtual addresses, they'll still hit in cache using
58  * the normal VAs that aren't supposed to hit in cache.)
59  */
memcpy_multicache(void * dest,const void * source,pte_t dst_pte,pte_t src_pte,int len)60 static void memcpy_multicache(void *dest, const void *source,
61 			      pte_t dst_pte, pte_t src_pte, int len)
62 {
63 	int idx;
64 	unsigned long flags, newsrc, newdst;
65 	pmd_t *pmdp;
66 	pte_t *ptep;
67 	int type0, type1;
68 	int cpu = get_cpu();
69 
70 	/*
71 	 * Disable interrupts so that we don't recurse into memcpy()
72 	 * in an interrupt handler, nor accidentally reference
73 	 * the PA of the source from an interrupt routine.  Also
74 	 * notify the simulator that we're playing games so we don't
75 	 * generate spurious coherency warnings.
76 	 */
77 	local_irq_save(flags);
78 	sim_allow_multiple_caching(1);
79 
80 	/* Set up the new dest mapping */
81 	type0 = kmap_atomic_idx_push();
82 	idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0;
83 	newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1));
84 	pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst);
85 	ptep = pte_offset_kernel(pmdp, newdst);
86 	if (pte_val(*ptep) != pte_val(dst_pte)) {
87 		set_pte(ptep, dst_pte);
88 		local_flush_tlb_page(NULL, newdst, PAGE_SIZE);
89 	}
90 
91 	/* Set up the new source mapping */
92 	type1 = kmap_atomic_idx_push();
93 	idx += (type0 - type1);
94 	src_pte = hv_pte_set_nc(src_pte);
95 	src_pte = hv_pte_clear_writable(src_pte);  /* be paranoid */
96 	newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1));
97 	pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc);
98 	ptep = pte_offset_kernel(pmdp, newsrc);
99 	__set_pte(ptep, src_pte);   /* set_pte() would be confused by this */
100 	local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
101 
102 	/* Actually move the data. */
103 	__memcpy_asm((void *)newdst, (const void *)newsrc, len);
104 
105 	/*
106 	 * Remap the source as locally-cached and not OLOC'ed so that
107 	 * we can inval without also invaling the remote cpu's cache.
108 	 * This also avoids known errata with inv'ing cacheable oloc data.
109 	 */
110 	src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3);
111 	src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */
112 	__set_pte(ptep, src_pte);   /* set_pte() would be confused by this */
113 	local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
114 
115 	/*
116 	 * Do the actual invalidation, covering the full L2 cache line
117 	 * at the end since __memcpy_asm() is somewhat aggressive.
118 	 */
119 	__inv_buffer((void *)newsrc, len);
120 
121 	/*
122 	 * We're done: notify the simulator that all is back to normal,
123 	 * and re-enable interrupts and pre-emption.
124 	 */
125 	kmap_atomic_idx_pop();
126 	kmap_atomic_idx_pop();
127 	sim_allow_multiple_caching(0);
128 	local_irq_restore(flags);
129 	put_cpu();
130 }
131 
132 /*
133  * Identify large copies from remotely-cached memory, and copy them
134  * via memcpy_multicache() if they look good, otherwise fall back
135  * to the particular kind of copying passed as the memcpy_t function.
136  */
fast_copy(void * dest,const void * source,int len,memcpy_t func)137 static unsigned long fast_copy(void *dest, const void *source, int len,
138 			       memcpy_t func)
139 {
140 	/*
141 	 * Check if it's big enough to bother with.  We may end up doing a
142 	 * small copy via TLB manipulation if we're near a page boundary,
143 	 * but presumably we'll make it up when we hit the second page.
144 	 */
145 	while (len >= LARGE_COPY_CUTOFF) {
146 		int copy_size, bytes_left_on_page;
147 		pte_t *src_ptep, *dst_ptep;
148 		pte_t src_pte, dst_pte;
149 		struct page *src_page, *dst_page;
150 
151 		/* Is the source page oloc'ed to a remote cpu? */
152 retry_source:
153 		src_ptep = virt_to_pte(current->mm, (unsigned long)source);
154 		if (src_ptep == NULL)
155 			break;
156 		src_pte = *src_ptep;
157 		if (!hv_pte_get_present(src_pte) ||
158 		    !hv_pte_get_readable(src_pte) ||
159 		    hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3)
160 			break;
161 		if (get_remote_cache_cpu(src_pte) == smp_processor_id())
162 			break;
163 		src_page = pfn_to_page(hv_pte_get_pfn(src_pte));
164 		get_page(src_page);
165 		if (pte_val(src_pte) != pte_val(*src_ptep)) {
166 			put_page(src_page);
167 			goto retry_source;
168 		}
169 		if (pte_huge(src_pte)) {
170 			/* Adjust the PTE to correspond to a small page */
171 			int pfn = hv_pte_get_pfn(src_pte);
172 			pfn += (((unsigned long)source & (HPAGE_SIZE-1))
173 				>> PAGE_SHIFT);
174 			src_pte = pfn_pte(pfn, src_pte);
175 			src_pte = pte_mksmall(src_pte);
176 		}
177 
178 		/* Is the destination page writable? */
179 retry_dest:
180 		dst_ptep = virt_to_pte(current->mm, (unsigned long)dest);
181 		if (dst_ptep == NULL) {
182 			put_page(src_page);
183 			break;
184 		}
185 		dst_pte = *dst_ptep;
186 		if (!hv_pte_get_present(dst_pte) ||
187 		    !hv_pte_get_writable(dst_pte)) {
188 			put_page(src_page);
189 			break;
190 		}
191 		dst_page = pfn_to_page(hv_pte_get_pfn(dst_pte));
192 		if (dst_page == src_page) {
193 			/*
194 			 * Source and dest are on the same page; this
195 			 * potentially exposes us to incoherence if any
196 			 * part of src and dest overlap on a cache line.
197 			 * Just give up rather than trying to be precise.
198 			 */
199 			put_page(src_page);
200 			break;
201 		}
202 		get_page(dst_page);
203 		if (pte_val(dst_pte) != pte_val(*dst_ptep)) {
204 			put_page(dst_page);
205 			goto retry_dest;
206 		}
207 		if (pte_huge(dst_pte)) {
208 			/* Adjust the PTE to correspond to a small page */
209 			int pfn = hv_pte_get_pfn(dst_pte);
210 			pfn += (((unsigned long)dest & (HPAGE_SIZE-1))
211 				>> PAGE_SHIFT);
212 			dst_pte = pfn_pte(pfn, dst_pte);
213 			dst_pte = pte_mksmall(dst_pte);
214 		}
215 
216 		/* All looks good: create a cachable PTE and copy from it */
217 		copy_size = len;
218 		bytes_left_on_page =
219 			PAGE_SIZE - (((int)source) & (PAGE_SIZE-1));
220 		if (copy_size > bytes_left_on_page)
221 			copy_size = bytes_left_on_page;
222 		bytes_left_on_page =
223 			PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1));
224 		if (copy_size > bytes_left_on_page)
225 			copy_size = bytes_left_on_page;
226 		memcpy_multicache(dest, source, dst_pte, src_pte, copy_size);
227 
228 		/* Release the pages */
229 		put_page(dst_page);
230 		put_page(src_page);
231 
232 		/* Continue on the next page */
233 		dest += copy_size;
234 		source += copy_size;
235 		len -= copy_size;
236 	}
237 
238 	return func(dest, source, len);
239 }
240 
memcpy(void * to,const void * from,__kernel_size_t n)241 void *memcpy(void *to, const void *from, __kernel_size_t n)
242 {
243 	if (n < LARGE_COPY_CUTOFF)
244 		return (void *)__memcpy_asm(to, from, n);
245 	else
246 		return (void *)fast_copy(to, from, n, __memcpy_asm);
247 }
248 
__copy_to_user_inatomic(void __user * to,const void * from,unsigned long n)249 unsigned long __copy_to_user_inatomic(void __user *to, const void *from,
250 				      unsigned long n)
251 {
252 	if (n < LARGE_COPY_CUTOFF)
253 		return __copy_to_user_inatomic_asm(to, from, n);
254 	else
255 		return fast_copy(to, from, n, __copy_to_user_inatomic_asm);
256 }
257 
__copy_from_user_inatomic(void * to,const void __user * from,unsigned long n)258 unsigned long __copy_from_user_inatomic(void *to, const void __user *from,
259 					unsigned long n)
260 {
261 	if (n < LARGE_COPY_CUTOFF)
262 		return __copy_from_user_inatomic_asm(to, from, n);
263 	else
264 		return fast_copy(to, from, n, __copy_from_user_inatomic_asm);
265 }
266 
__copy_from_user_zeroing(void * to,const void __user * from,unsigned long n)267 unsigned long __copy_from_user_zeroing(void *to, const void __user *from,
268 				       unsigned long n)
269 {
270 	if (n < LARGE_COPY_CUTOFF)
271 		return __copy_from_user_zeroing_asm(to, from, n);
272 	else
273 		return fast_copy(to, from, n, __copy_from_user_zeroing_asm);
274 }
275 
276 #endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */
277