1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #if V8_TARGET_ARCH_MIPS
6
7 #include <memory>
8
9 #include "src/codegen.h"
10 #include "src/isolate.h"
11 #include "src/macro-assembler.h"
12 #include "src/mips/simulator-mips.h"
13
14 namespace v8 {
15 namespace internal {
16
17 #define __ masm.
18
19 #if defined(V8_HOST_ARCH_MIPS)
20
CreateMemCopyUint8Function(Isolate * isolate,MemCopyUint8Function stub)21 MemCopyUint8Function CreateMemCopyUint8Function(Isolate* isolate,
22 MemCopyUint8Function stub) {
23 #if defined(USE_SIMULATOR) || defined(_MIPS_ARCH_MIPS32R6) || \
24 defined(_MIPS_ARCH_MIPS32RX)
25 return stub;
26 #else
27 size_t allocated = 0;
28 byte* buffer = AllocatePage(isolate->heap()->GetRandomMmapAddr(), &allocated);
29 if (buffer == nullptr) return nullptr;
30
31 MacroAssembler masm(isolate, buffer, static_cast<int>(allocated),
32 CodeObjectRequired::kNo);
33
34 // This code assumes that cache lines are 32 bytes and if the cache line is
35 // larger it will not work correctly.
36 {
37 Label lastb, unaligned, aligned, chkw,
38 loop16w, chk1w, wordCopy_loop, skip_pref, lastbloop,
39 leave, ua_chk16w, ua_loop16w, ua_skip_pref, ua_chkw,
40 ua_chk1w, ua_wordCopy_loop, ua_smallCopy, ua_smallCopy_loop;
41
42 // The size of each prefetch.
43 uint32_t pref_chunk = 32;
44 // The maximum size of a prefetch, it must not be less than pref_chunk.
45 // If the real size of a prefetch is greater than max_pref_size and
46 // the kPrefHintPrepareForStore hint is used, the code will not work
47 // correctly.
48 uint32_t max_pref_size = 128;
49 DCHECK(pref_chunk < max_pref_size);
50
51 // pref_limit is set based on the fact that we never use an offset
52 // greater then 5 on a store pref and that a single pref can
53 // never be larger then max_pref_size.
54 uint32_t pref_limit = (5 * pref_chunk) + max_pref_size;
55 int32_t pref_hint_load = kPrefHintLoadStreamed;
56 int32_t pref_hint_store = kPrefHintPrepareForStore;
57 uint32_t loadstore_chunk = 4;
58
59 // The initial prefetches may fetch bytes that are before the buffer being
60 // copied. Start copies with an offset of 4 so avoid this situation when
61 // using kPrefHintPrepareForStore.
62 DCHECK(pref_hint_store != kPrefHintPrepareForStore ||
63 pref_chunk * 4 >= max_pref_size);
64
65 // If the size is less than 8, go to lastb. Regardless of size,
66 // copy dst pointer to v0 for the retuen value.
67 __ slti(t2, a2, 2 * loadstore_chunk);
68 __ bne(t2, zero_reg, &lastb);
69 __ mov(v0, a0); // In delay slot.
70
71 // If src and dst have different alignments, go to unaligned, if they
72 // have the same alignment (but are not actually aligned) do a partial
73 // load/store to make them aligned. If they are both already aligned
74 // we can start copying at aligned.
75 __ xor_(t8, a1, a0);
76 __ andi(t8, t8, loadstore_chunk - 1); // t8 is a0/a1 word-displacement.
77 __ bne(t8, zero_reg, &unaligned);
78 __ subu(a3, zero_reg, a0); // In delay slot.
79
80 __ andi(a3, a3, loadstore_chunk - 1); // Copy a3 bytes to align a0/a1.
81 __ beq(a3, zero_reg, &aligned); // Already aligned.
82 __ subu(a2, a2, a3); // In delay slot. a2 is the remining bytes count.
83
84 if (kArchEndian == kLittle) {
85 __ lwr(t8, MemOperand(a1));
86 __ addu(a1, a1, a3);
87 __ swr(t8, MemOperand(a0));
88 __ addu(a0, a0, a3);
89 } else {
90 __ lwl(t8, MemOperand(a1));
91 __ addu(a1, a1, a3);
92 __ swl(t8, MemOperand(a0));
93 __ addu(a0, a0, a3);
94 }
95 // Now dst/src are both aligned to (word) aligned addresses. Set a2 to
96 // count how many bytes we have to copy after all the 64 byte chunks are
97 // copied and a3 to the dst pointer after all the 64 byte chunks have been
98 // copied. We will loop, incrementing a0 and a1 until a0 equals a3.
99 __ bind(&aligned);
100 __ andi(t8, a2, 0x3F);
101 __ beq(a2, t8, &chkw); // Less than 64?
102 __ subu(a3, a2, t8); // In delay slot.
103 __ addu(a3, a0, a3); // Now a3 is the final dst after loop.
104
105 // When in the loop we prefetch with kPrefHintPrepareForStore hint,
106 // in this case the a0+x should be past the "t0-32" address. This means:
107 // for x=128 the last "safe" a0 address is "t0-160". Alternatively, for
108 // x=64 the last "safe" a0 address is "t0-96". In the current version we
109 // will use "pref hint, 128(a0)", so "t0-160" is the limit.
110 if (pref_hint_store == kPrefHintPrepareForStore) {
111 __ addu(t0, a0, a2); // t0 is the "past the end" address.
112 __ Subu(t9, t0, pref_limit); // t9 is the "last safe pref" address.
113 }
114
115 __ Pref(pref_hint_load, MemOperand(a1, 0 * pref_chunk));
116 __ Pref(pref_hint_load, MemOperand(a1, 1 * pref_chunk));
117 __ Pref(pref_hint_load, MemOperand(a1, 2 * pref_chunk));
118 __ Pref(pref_hint_load, MemOperand(a1, 3 * pref_chunk));
119
120 if (pref_hint_store != kPrefHintPrepareForStore) {
121 __ Pref(pref_hint_store, MemOperand(a0, 1 * pref_chunk));
122 __ Pref(pref_hint_store, MemOperand(a0, 2 * pref_chunk));
123 __ Pref(pref_hint_store, MemOperand(a0, 3 * pref_chunk));
124 }
125 __ bind(&loop16w);
126 __ lw(t0, MemOperand(a1));
127
128 if (pref_hint_store == kPrefHintPrepareForStore) {
129 __ sltu(v1, t9, a0); // If a0 > t9, don't use next prefetch.
130 __ Branch(USE_DELAY_SLOT, &skip_pref, gt, v1, Operand(zero_reg));
131 }
132 __ lw(t1, MemOperand(a1, 1, loadstore_chunk)); // Maybe in delay slot.
133
134 __ Pref(pref_hint_store, MemOperand(a0, 4 * pref_chunk));
135 __ Pref(pref_hint_store, MemOperand(a0, 5 * pref_chunk));
136
137 __ bind(&skip_pref);
138 __ lw(t2, MemOperand(a1, 2, loadstore_chunk));
139 __ lw(t3, MemOperand(a1, 3, loadstore_chunk));
140 __ lw(t4, MemOperand(a1, 4, loadstore_chunk));
141 __ lw(t5, MemOperand(a1, 5, loadstore_chunk));
142 __ lw(t6, MemOperand(a1, 6, loadstore_chunk));
143 __ lw(t7, MemOperand(a1, 7, loadstore_chunk));
144 __ Pref(pref_hint_load, MemOperand(a1, 4 * pref_chunk));
145
146 __ sw(t0, MemOperand(a0));
147 __ sw(t1, MemOperand(a0, 1, loadstore_chunk));
148 __ sw(t2, MemOperand(a0, 2, loadstore_chunk));
149 __ sw(t3, MemOperand(a0, 3, loadstore_chunk));
150 __ sw(t4, MemOperand(a0, 4, loadstore_chunk));
151 __ sw(t5, MemOperand(a0, 5, loadstore_chunk));
152 __ sw(t6, MemOperand(a0, 6, loadstore_chunk));
153 __ sw(t7, MemOperand(a0, 7, loadstore_chunk));
154
155 __ lw(t0, MemOperand(a1, 8, loadstore_chunk));
156 __ lw(t1, MemOperand(a1, 9, loadstore_chunk));
157 __ lw(t2, MemOperand(a1, 10, loadstore_chunk));
158 __ lw(t3, MemOperand(a1, 11, loadstore_chunk));
159 __ lw(t4, MemOperand(a1, 12, loadstore_chunk));
160 __ lw(t5, MemOperand(a1, 13, loadstore_chunk));
161 __ lw(t6, MemOperand(a1, 14, loadstore_chunk));
162 __ lw(t7, MemOperand(a1, 15, loadstore_chunk));
163 __ Pref(pref_hint_load, MemOperand(a1, 5 * pref_chunk));
164
165 __ sw(t0, MemOperand(a0, 8, loadstore_chunk));
166 __ sw(t1, MemOperand(a0, 9, loadstore_chunk));
167 __ sw(t2, MemOperand(a0, 10, loadstore_chunk));
168 __ sw(t3, MemOperand(a0, 11, loadstore_chunk));
169 __ sw(t4, MemOperand(a0, 12, loadstore_chunk));
170 __ sw(t5, MemOperand(a0, 13, loadstore_chunk));
171 __ sw(t6, MemOperand(a0, 14, loadstore_chunk));
172 __ sw(t7, MemOperand(a0, 15, loadstore_chunk));
173 __ addiu(a0, a0, 16 * loadstore_chunk);
174 __ bne(a0, a3, &loop16w);
175 __ addiu(a1, a1, 16 * loadstore_chunk); // In delay slot.
176 __ mov(a2, t8);
177
178 // Here we have src and dest word-aligned but less than 64-bytes to go.
179 // Check for a 32 bytes chunk and copy if there is one. Otherwise jump
180 // down to chk1w to handle the tail end of the copy.
181 __ bind(&chkw);
182 __ Pref(pref_hint_load, MemOperand(a1, 0 * pref_chunk));
183 __ andi(t8, a2, 0x1F);
184 __ beq(a2, t8, &chk1w); // Less than 32?
185 __ nop(); // In delay slot.
186 __ lw(t0, MemOperand(a1));
187 __ lw(t1, MemOperand(a1, 1, loadstore_chunk));
188 __ lw(t2, MemOperand(a1, 2, loadstore_chunk));
189 __ lw(t3, MemOperand(a1, 3, loadstore_chunk));
190 __ lw(t4, MemOperand(a1, 4, loadstore_chunk));
191 __ lw(t5, MemOperand(a1, 5, loadstore_chunk));
192 __ lw(t6, MemOperand(a1, 6, loadstore_chunk));
193 __ lw(t7, MemOperand(a1, 7, loadstore_chunk));
194 __ addiu(a1, a1, 8 * loadstore_chunk);
195 __ sw(t0, MemOperand(a0));
196 __ sw(t1, MemOperand(a0, 1, loadstore_chunk));
197 __ sw(t2, MemOperand(a0, 2, loadstore_chunk));
198 __ sw(t3, MemOperand(a0, 3, loadstore_chunk));
199 __ sw(t4, MemOperand(a0, 4, loadstore_chunk));
200 __ sw(t5, MemOperand(a0, 5, loadstore_chunk));
201 __ sw(t6, MemOperand(a0, 6, loadstore_chunk));
202 __ sw(t7, MemOperand(a0, 7, loadstore_chunk));
203 __ addiu(a0, a0, 8 * loadstore_chunk);
204
205 // Here we have less than 32 bytes to copy. Set up for a loop to copy
206 // one word at a time. Set a2 to count how many bytes we have to copy
207 // after all the word chunks are copied and a3 to the dst pointer after
208 // all the word chunks have been copied. We will loop, incrementing a0
209 // and a1 until a0 equals a3.
210 __ bind(&chk1w);
211 __ andi(a2, t8, loadstore_chunk - 1);
212 __ beq(a2, t8, &lastb);
213 __ subu(a3, t8, a2); // In delay slot.
214 __ addu(a3, a0, a3);
215
216 __ bind(&wordCopy_loop);
217 __ lw(t3, MemOperand(a1));
218 __ addiu(a0, a0, loadstore_chunk);
219 __ addiu(a1, a1, loadstore_chunk);
220 __ bne(a0, a3, &wordCopy_loop);
221 __ sw(t3, MemOperand(a0, -1, loadstore_chunk)); // In delay slot.
222
223 __ bind(&lastb);
224 __ Branch(&leave, le, a2, Operand(zero_reg));
225 __ addu(a3, a0, a2);
226
227 __ bind(&lastbloop);
228 __ lb(v1, MemOperand(a1));
229 __ addiu(a0, a0, 1);
230 __ addiu(a1, a1, 1);
231 __ bne(a0, a3, &lastbloop);
232 __ sb(v1, MemOperand(a0, -1)); // In delay slot.
233
234 __ bind(&leave);
235 __ jr(ra);
236 __ nop();
237
238 // Unaligned case. Only the dst gets aligned so we need to do partial
239 // loads of the source followed by normal stores to the dst (once we
240 // have aligned the destination).
241 __ bind(&unaligned);
242 __ andi(a3, a3, loadstore_chunk - 1); // Copy a3 bytes to align a0/a1.
243 __ beq(a3, zero_reg, &ua_chk16w);
244 __ subu(a2, a2, a3); // In delay slot.
245
246 if (kArchEndian == kLittle) {
247 __ lwr(v1, MemOperand(a1));
248 __ lwl(v1,
249 MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one));
250 __ addu(a1, a1, a3);
251 __ swr(v1, MemOperand(a0));
252 __ addu(a0, a0, a3);
253 } else {
254 __ lwl(v1, MemOperand(a1));
255 __ lwr(v1,
256 MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one));
257 __ addu(a1, a1, a3);
258 __ swl(v1, MemOperand(a0));
259 __ addu(a0, a0, a3);
260 }
261
262 // Now the dst (but not the source) is aligned. Set a2 to count how many
263 // bytes we have to copy after all the 64 byte chunks are copied and a3 to
264 // the dst pointer after all the 64 byte chunks have been copied. We will
265 // loop, incrementing a0 and a1 until a0 equals a3.
266 __ bind(&ua_chk16w);
267 __ andi(t8, a2, 0x3F);
268 __ beq(a2, t8, &ua_chkw);
269 __ subu(a3, a2, t8); // In delay slot.
270 __ addu(a3, a0, a3);
271
272 if (pref_hint_store == kPrefHintPrepareForStore) {
273 __ addu(t0, a0, a2);
274 __ Subu(t9, t0, pref_limit);
275 }
276
277 __ Pref(pref_hint_load, MemOperand(a1, 0 * pref_chunk));
278 __ Pref(pref_hint_load, MemOperand(a1, 1 * pref_chunk));
279 __ Pref(pref_hint_load, MemOperand(a1, 2 * pref_chunk));
280
281 if (pref_hint_store != kPrefHintPrepareForStore) {
282 __ Pref(pref_hint_store, MemOperand(a0, 1 * pref_chunk));
283 __ Pref(pref_hint_store, MemOperand(a0, 2 * pref_chunk));
284 __ Pref(pref_hint_store, MemOperand(a0, 3 * pref_chunk));
285 }
286
287 __ bind(&ua_loop16w);
288 __ Pref(pref_hint_load, MemOperand(a1, 3 * pref_chunk));
289 if (kArchEndian == kLittle) {
290 __ lwr(t0, MemOperand(a1));
291 __ lwr(t1, MemOperand(a1, 1, loadstore_chunk));
292 __ lwr(t2, MemOperand(a1, 2, loadstore_chunk));
293
294 if (pref_hint_store == kPrefHintPrepareForStore) {
295 __ sltu(v1, t9, a0);
296 __ Branch(USE_DELAY_SLOT, &ua_skip_pref, gt, v1, Operand(zero_reg));
297 }
298 __ lwr(t3, MemOperand(a1, 3, loadstore_chunk)); // Maybe in delay slot.
299
300 __ Pref(pref_hint_store, MemOperand(a0, 4 * pref_chunk));
301 __ Pref(pref_hint_store, MemOperand(a0, 5 * pref_chunk));
302
303 __ bind(&ua_skip_pref);
304 __ lwr(t4, MemOperand(a1, 4, loadstore_chunk));
305 __ lwr(t5, MemOperand(a1, 5, loadstore_chunk));
306 __ lwr(t6, MemOperand(a1, 6, loadstore_chunk));
307 __ lwr(t7, MemOperand(a1, 7, loadstore_chunk));
308 __ lwl(t0,
309 MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one));
310 __ lwl(t1,
311 MemOperand(a1, 2, loadstore_chunk, MemOperand::offset_minus_one));
312 __ lwl(t2,
313 MemOperand(a1, 3, loadstore_chunk, MemOperand::offset_minus_one));
314 __ lwl(t3,
315 MemOperand(a1, 4, loadstore_chunk, MemOperand::offset_minus_one));
316 __ lwl(t4,
317 MemOperand(a1, 5, loadstore_chunk, MemOperand::offset_minus_one));
318 __ lwl(t5,
319 MemOperand(a1, 6, loadstore_chunk, MemOperand::offset_minus_one));
320 __ lwl(t6,
321 MemOperand(a1, 7, loadstore_chunk, MemOperand::offset_minus_one));
322 __ lwl(t7,
323 MemOperand(a1, 8, loadstore_chunk, MemOperand::offset_minus_one));
324 } else {
325 __ lwl(t0, MemOperand(a1));
326 __ lwl(t1, MemOperand(a1, 1, loadstore_chunk));
327 __ lwl(t2, MemOperand(a1, 2, loadstore_chunk));
328
329 if (pref_hint_store == kPrefHintPrepareForStore) {
330 __ sltu(v1, t9, a0);
331 __ Branch(USE_DELAY_SLOT, &ua_skip_pref, gt, v1, Operand(zero_reg));
332 }
333 __ lwl(t3, MemOperand(a1, 3, loadstore_chunk)); // Maybe in delay slot.
334
335 __ Pref(pref_hint_store, MemOperand(a0, 4 * pref_chunk));
336 __ Pref(pref_hint_store, MemOperand(a0, 5 * pref_chunk));
337
338 __ bind(&ua_skip_pref);
339 __ lwl(t4, MemOperand(a1, 4, loadstore_chunk));
340 __ lwl(t5, MemOperand(a1, 5, loadstore_chunk));
341 __ lwl(t6, MemOperand(a1, 6, loadstore_chunk));
342 __ lwl(t7, MemOperand(a1, 7, loadstore_chunk));
343 __ lwr(t0,
344 MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one));
345 __ lwr(t1,
346 MemOperand(a1, 2, loadstore_chunk, MemOperand::offset_minus_one));
347 __ lwr(t2,
348 MemOperand(a1, 3, loadstore_chunk, MemOperand::offset_minus_one));
349 __ lwr(t3,
350 MemOperand(a1, 4, loadstore_chunk, MemOperand::offset_minus_one));
351 __ lwr(t4,
352 MemOperand(a1, 5, loadstore_chunk, MemOperand::offset_minus_one));
353 __ lwr(t5,
354 MemOperand(a1, 6, loadstore_chunk, MemOperand::offset_minus_one));
355 __ lwr(t6,
356 MemOperand(a1, 7, loadstore_chunk, MemOperand::offset_minus_one));
357 __ lwr(t7,
358 MemOperand(a1, 8, loadstore_chunk, MemOperand::offset_minus_one));
359 }
360 __ Pref(pref_hint_load, MemOperand(a1, 4 * pref_chunk));
361 __ sw(t0, MemOperand(a0));
362 __ sw(t1, MemOperand(a0, 1, loadstore_chunk));
363 __ sw(t2, MemOperand(a0, 2, loadstore_chunk));
364 __ sw(t3, MemOperand(a0, 3, loadstore_chunk));
365 __ sw(t4, MemOperand(a0, 4, loadstore_chunk));
366 __ sw(t5, MemOperand(a0, 5, loadstore_chunk));
367 __ sw(t6, MemOperand(a0, 6, loadstore_chunk));
368 __ sw(t7, MemOperand(a0, 7, loadstore_chunk));
369 if (kArchEndian == kLittle) {
370 __ lwr(t0, MemOperand(a1, 8, loadstore_chunk));
371 __ lwr(t1, MemOperand(a1, 9, loadstore_chunk));
372 __ lwr(t2, MemOperand(a1, 10, loadstore_chunk));
373 __ lwr(t3, MemOperand(a1, 11, loadstore_chunk));
374 __ lwr(t4, MemOperand(a1, 12, loadstore_chunk));
375 __ lwr(t5, MemOperand(a1, 13, loadstore_chunk));
376 __ lwr(t6, MemOperand(a1, 14, loadstore_chunk));
377 __ lwr(t7, MemOperand(a1, 15, loadstore_chunk));
378 __ lwl(t0,
379 MemOperand(a1, 9, loadstore_chunk, MemOperand::offset_minus_one));
380 __ lwl(t1,
381 MemOperand(a1, 10, loadstore_chunk, MemOperand::offset_minus_one));
382 __ lwl(t2,
383 MemOperand(a1, 11, loadstore_chunk, MemOperand::offset_minus_one));
384 __ lwl(t3,
385 MemOperand(a1, 12, loadstore_chunk, MemOperand::offset_minus_one));
386 __ lwl(t4,
387 MemOperand(a1, 13, loadstore_chunk, MemOperand::offset_minus_one));
388 __ lwl(t5,
389 MemOperand(a1, 14, loadstore_chunk, MemOperand::offset_minus_one));
390 __ lwl(t6,
391 MemOperand(a1, 15, loadstore_chunk, MemOperand::offset_minus_one));
392 __ lwl(t7,
393 MemOperand(a1, 16, loadstore_chunk, MemOperand::offset_minus_one));
394 } else {
395 __ lwl(t0, MemOperand(a1, 8, loadstore_chunk));
396 __ lwl(t1, MemOperand(a1, 9, loadstore_chunk));
397 __ lwl(t2, MemOperand(a1, 10, loadstore_chunk));
398 __ lwl(t3, MemOperand(a1, 11, loadstore_chunk));
399 __ lwl(t4, MemOperand(a1, 12, loadstore_chunk));
400 __ lwl(t5, MemOperand(a1, 13, loadstore_chunk));
401 __ lwl(t6, MemOperand(a1, 14, loadstore_chunk));
402 __ lwl(t7, MemOperand(a1, 15, loadstore_chunk));
403 __ lwr(t0,
404 MemOperand(a1, 9, loadstore_chunk, MemOperand::offset_minus_one));
405 __ lwr(t1,
406 MemOperand(a1, 10, loadstore_chunk, MemOperand::offset_minus_one));
407 __ lwr(t2,
408 MemOperand(a1, 11, loadstore_chunk, MemOperand::offset_minus_one));
409 __ lwr(t3,
410 MemOperand(a1, 12, loadstore_chunk, MemOperand::offset_minus_one));
411 __ lwr(t4,
412 MemOperand(a1, 13, loadstore_chunk, MemOperand::offset_minus_one));
413 __ lwr(t5,
414 MemOperand(a1, 14, loadstore_chunk, MemOperand::offset_minus_one));
415 __ lwr(t6,
416 MemOperand(a1, 15, loadstore_chunk, MemOperand::offset_minus_one));
417 __ lwr(t7,
418 MemOperand(a1, 16, loadstore_chunk, MemOperand::offset_minus_one));
419 }
420 __ Pref(pref_hint_load, MemOperand(a1, 5 * pref_chunk));
421 __ sw(t0, MemOperand(a0, 8, loadstore_chunk));
422 __ sw(t1, MemOperand(a0, 9, loadstore_chunk));
423 __ sw(t2, MemOperand(a0, 10, loadstore_chunk));
424 __ sw(t3, MemOperand(a0, 11, loadstore_chunk));
425 __ sw(t4, MemOperand(a0, 12, loadstore_chunk));
426 __ sw(t5, MemOperand(a0, 13, loadstore_chunk));
427 __ sw(t6, MemOperand(a0, 14, loadstore_chunk));
428 __ sw(t7, MemOperand(a0, 15, loadstore_chunk));
429 __ addiu(a0, a0, 16 * loadstore_chunk);
430 __ bne(a0, a3, &ua_loop16w);
431 __ addiu(a1, a1, 16 * loadstore_chunk); // In delay slot.
432 __ mov(a2, t8);
433
434 // Here less than 64-bytes. Check for
435 // a 32 byte chunk and copy if there is one. Otherwise jump down to
436 // ua_chk1w to handle the tail end of the copy.
437 __ bind(&ua_chkw);
438 __ Pref(pref_hint_load, MemOperand(a1));
439 __ andi(t8, a2, 0x1F);
440
441 __ beq(a2, t8, &ua_chk1w);
442 __ nop(); // In delay slot.
443 if (kArchEndian == kLittle) {
444 __ lwr(t0, MemOperand(a1));
445 __ lwr(t1, MemOperand(a1, 1, loadstore_chunk));
446 __ lwr(t2, MemOperand(a1, 2, loadstore_chunk));
447 __ lwr(t3, MemOperand(a1, 3, loadstore_chunk));
448 __ lwr(t4, MemOperand(a1, 4, loadstore_chunk));
449 __ lwr(t5, MemOperand(a1, 5, loadstore_chunk));
450 __ lwr(t6, MemOperand(a1, 6, loadstore_chunk));
451 __ lwr(t7, MemOperand(a1, 7, loadstore_chunk));
452 __ lwl(t0,
453 MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one));
454 __ lwl(t1,
455 MemOperand(a1, 2, loadstore_chunk, MemOperand::offset_minus_one));
456 __ lwl(t2,
457 MemOperand(a1, 3, loadstore_chunk, MemOperand::offset_minus_one));
458 __ lwl(t3,
459 MemOperand(a1, 4, loadstore_chunk, MemOperand::offset_minus_one));
460 __ lwl(t4,
461 MemOperand(a1, 5, loadstore_chunk, MemOperand::offset_minus_one));
462 __ lwl(t5,
463 MemOperand(a1, 6, loadstore_chunk, MemOperand::offset_minus_one));
464 __ lwl(t6,
465 MemOperand(a1, 7, loadstore_chunk, MemOperand::offset_minus_one));
466 __ lwl(t7,
467 MemOperand(a1, 8, loadstore_chunk, MemOperand::offset_minus_one));
468 } else {
469 __ lwl(t0, MemOperand(a1));
470 __ lwl(t1, MemOperand(a1, 1, loadstore_chunk));
471 __ lwl(t2, MemOperand(a1, 2, loadstore_chunk));
472 __ lwl(t3, MemOperand(a1, 3, loadstore_chunk));
473 __ lwl(t4, MemOperand(a1, 4, loadstore_chunk));
474 __ lwl(t5, MemOperand(a1, 5, loadstore_chunk));
475 __ lwl(t6, MemOperand(a1, 6, loadstore_chunk));
476 __ lwl(t7, MemOperand(a1, 7, loadstore_chunk));
477 __ lwr(t0,
478 MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one));
479 __ lwr(t1,
480 MemOperand(a1, 2, loadstore_chunk, MemOperand::offset_minus_one));
481 __ lwr(t2,
482 MemOperand(a1, 3, loadstore_chunk, MemOperand::offset_minus_one));
483 __ lwr(t3,
484 MemOperand(a1, 4, loadstore_chunk, MemOperand::offset_minus_one));
485 __ lwr(t4,
486 MemOperand(a1, 5, loadstore_chunk, MemOperand::offset_minus_one));
487 __ lwr(t5,
488 MemOperand(a1, 6, loadstore_chunk, MemOperand::offset_minus_one));
489 __ lwr(t6,
490 MemOperand(a1, 7, loadstore_chunk, MemOperand::offset_minus_one));
491 __ lwr(t7,
492 MemOperand(a1, 8, loadstore_chunk, MemOperand::offset_minus_one));
493 }
494 __ addiu(a1, a1, 8 * loadstore_chunk);
495 __ sw(t0, MemOperand(a0));
496 __ sw(t1, MemOperand(a0, 1, loadstore_chunk));
497 __ sw(t2, MemOperand(a0, 2, loadstore_chunk));
498 __ sw(t3, MemOperand(a0, 3, loadstore_chunk));
499 __ sw(t4, MemOperand(a0, 4, loadstore_chunk));
500 __ sw(t5, MemOperand(a0, 5, loadstore_chunk));
501 __ sw(t6, MemOperand(a0, 6, loadstore_chunk));
502 __ sw(t7, MemOperand(a0, 7, loadstore_chunk));
503 __ addiu(a0, a0, 8 * loadstore_chunk);
504
505 // Less than 32 bytes to copy. Set up for a loop to
506 // copy one word at a time.
507 __ bind(&ua_chk1w);
508 __ andi(a2, t8, loadstore_chunk - 1);
509 __ beq(a2, t8, &ua_smallCopy);
510 __ subu(a3, t8, a2); // In delay slot.
511 __ addu(a3, a0, a3);
512
513 __ bind(&ua_wordCopy_loop);
514 if (kArchEndian == kLittle) {
515 __ lwr(v1, MemOperand(a1));
516 __ lwl(v1,
517 MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one));
518 } else {
519 __ lwl(v1, MemOperand(a1));
520 __ lwr(v1,
521 MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one));
522 }
523 __ addiu(a0, a0, loadstore_chunk);
524 __ addiu(a1, a1, loadstore_chunk);
525 __ bne(a0, a3, &ua_wordCopy_loop);
526 __ sw(v1, MemOperand(a0, -1, loadstore_chunk)); // In delay slot.
527
528 // Copy the last 8 bytes.
529 __ bind(&ua_smallCopy);
530 __ beq(a2, zero_reg, &leave);
531 __ addu(a3, a0, a2); // In delay slot.
532
533 __ bind(&ua_smallCopy_loop);
534 __ lb(v1, MemOperand(a1));
535 __ addiu(a0, a0, 1);
536 __ addiu(a1, a1, 1);
537 __ bne(a0, a3, &ua_smallCopy_loop);
538 __ sb(v1, MemOperand(a0, -1)); // In delay slot.
539
540 __ jr(ra);
541 __ nop();
542 }
543 CodeDesc desc;
544 masm.GetCode(isolate, &desc);
545 DCHECK(!RelocInfo::RequiresRelocationAfterCodegen(desc));
546
547 Assembler::FlushICache(buffer, allocated);
548 CHECK(SetPermissions(buffer, allocated, PageAllocator::kReadExecute));
549 return FUNCTION_CAST<MemCopyUint8Function>(buffer);
550 #endif
551 }
552 #endif
553
CreateSqrtFunction(Isolate * isolate)554 UnaryMathFunctionWithIsolate CreateSqrtFunction(Isolate* isolate) {
555 #if defined(USE_SIMULATOR)
556 return nullptr;
557 #else
558 size_t allocated = 0;
559 byte* buffer = AllocatePage(isolate->heap()->GetRandomMmapAddr(), &allocated);
560 if (buffer == nullptr) return nullptr;
561
562 MacroAssembler masm(isolate, buffer, static_cast<int>(allocated),
563 CodeObjectRequired::kNo);
564
565 __ MovFromFloatParameter(f12);
566 __ sqrt_d(f0, f12);
567 __ MovToFloatResult(f0);
568 __ Ret();
569
570 CodeDesc desc;
571 masm.GetCode(isolate, &desc);
572 DCHECK(!RelocInfo::RequiresRelocationAfterCodegen(desc));
573
574 Assembler::FlushICache(buffer, allocated);
575 CHECK(SetPermissions(buffer, allocated, PageAllocator::kReadExecute));
576 return FUNCTION_CAST<UnaryMathFunctionWithIsolate>(buffer);
577 #endif
578 }
579
580 #undef __
581
582 } // namespace internal
583 } // namespace v8
584
585 #endif // V8_TARGET_ARCH_MIPS
586