1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16/* 17 * Contributed by: Intel Corporation 18 */ 19 20#ifndef L 21# define L(label) .L##label 22#endif 23 24#ifndef ALIGN 25# define ALIGN(n) .p2align n 26#endif 27 28#ifndef cfi_startproc 29# define cfi_startproc .cfi_startproc 30#endif 31 32#ifndef cfi_endproc 33# define cfi_endproc .cfi_endproc 34#endif 35 36#ifndef cfi_rel_offset 37# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 38#endif 39 40#ifndef cfi_restore 41# define cfi_restore(reg) .cfi_restore reg 42#endif 43 44#ifndef cfi_adjust_cfa_offset 45# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 46#endif 47 48#ifndef ENTRY 49# define ENTRY(name) \ 50 .type name, @function; \ 51 .globl name; \ 52 .p2align 4; \ 53name: \ 54 cfi_startproc 55#endif 56 57#ifndef END 58# define END(name) \ 59 cfi_endproc; \ 60 .size name, .-name 61#endif 62 63#define CFI_PUSH(REG) \ 64 cfi_adjust_cfa_offset (4); \ 65 cfi_rel_offset (REG, 0) 66 67#define CFI_POP(REG) \ 68 cfi_adjust_cfa_offset (-4); \ 69 cfi_restore (REG) 70 71#define PUSH(REG) pushl REG; CFI_PUSH (REG) 72#define POP(REG) popl REG; CFI_POP (REG) 73 74#ifdef USE_AS_BZERO16 75# define DEST PARMS 76# define LEN DEST+4 77#else 78# define DEST PARMS 79# define CHR DEST+4 80# define LEN CHR+4 81#endif 82 83#if 1 84# define SETRTNVAL 85#else 86# define SETRTNVAL movl DEST(%esp), %eax 87#endif 88 89#if (defined SHARED || defined __PIC__) 90# define ENTRANCE PUSH (%ebx); 91# define RETURN_END POP (%ebx); ret 92# define RETURN RETURN_END; CFI_PUSH (%ebx) 93# define PARMS 8 /* Preserve EBX. */ 94# define JMPTBL(I, B) I - B 95 96/* Load an entry in a jump table into EBX and branch to it. TABLE is a 97 jump table with relative offsets. */ 98# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 99 /* We first load PC into EBX. */ \ 100 call __i686.get_pc_thunk.bx; \ 101 /* Get the address of the jump table. */ \ 102 add $(TABLE - .), %ebx; \ 103 /* Get the entry and convert the relative offset to the \ 104 absolute address. */ \ 105 add (%ebx,%ecx,4), %ebx; \ 106 /* We loaded the jump table and adjuested EDX. Go. */ \ 107 jmp *%ebx 108 109 .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits 110 .globl __i686.get_pc_thunk.bx 111 .hidden __i686.get_pc_thunk.bx 112 ALIGN (4) 113 .type __i686.get_pc_thunk.bx,@function 114__i686.get_pc_thunk.bx: 115 movl (%esp), %ebx 116 ret 117#else 118# define ENTRANCE 119# define RETURN_END ret 120# define RETURN RETURN_END 121# define PARMS 4 122# define JMPTBL(I, B) I 123 124/* Branch to an entry in a jump table. TABLE is a jump table with 125 absolute offsets. */ 126# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 127 jmp *TABLE(,%ecx,4) 128#endif 129 130 .section .text.sse2,"ax",@progbits 131 ALIGN (4) 132ENTRY (sse2_memset16_atom) 133 ENTRANCE 134 135 movl LEN(%esp), %ecx 136#ifdef USE_AS_ANDROID 137 shr $1, %ecx 138#endif 139#ifdef USE_AS_BZERO16 140 xor %eax, %eax 141#else 142 movzwl CHR(%esp), %eax 143 mov %eax, %edx 144 shl $16, %eax 145 or %edx, %eax 146#endif 147 movl DEST(%esp), %edx 148 cmp $32, %ecx 149 jae L(32wordsormore) 150 151L(write_less32words): 152 lea (%edx, %ecx, 2), %edx 153 BRANCH_TO_JMPTBL_ENTRY (L(table_less32words)) 154 155 156 .pushsection .rodata.sse2,"a",@progbits 157 ALIGN (2) 158L(table_less32words): 159 .int JMPTBL (L(write_0words), L(table_less32words)) 160 .int JMPTBL (L(write_1words), L(table_less32words)) 161 .int JMPTBL (L(write_2words), L(table_less32words)) 162 .int JMPTBL (L(write_3words), L(table_less32words)) 163 .int JMPTBL (L(write_4words), L(table_less32words)) 164 .int JMPTBL (L(write_5words), L(table_less32words)) 165 .int JMPTBL (L(write_6words), L(table_less32words)) 166 .int JMPTBL (L(write_7words), L(table_less32words)) 167 .int JMPTBL (L(write_8words), L(table_less32words)) 168 .int JMPTBL (L(write_9words), L(table_less32words)) 169 .int JMPTBL (L(write_10words), L(table_less32words)) 170 .int JMPTBL (L(write_11words), L(table_less32words)) 171 .int JMPTBL (L(write_12words), L(table_less32words)) 172 .int JMPTBL (L(write_13words), L(table_less32words)) 173 .int JMPTBL (L(write_14words), L(table_less32words)) 174 .int JMPTBL (L(write_15words), L(table_less32words)) 175 .int JMPTBL (L(write_16words), L(table_less32words)) 176 .int JMPTBL (L(write_17words), L(table_less32words)) 177 .int JMPTBL (L(write_18words), L(table_less32words)) 178 .int JMPTBL (L(write_19words), L(table_less32words)) 179 .int JMPTBL (L(write_20words), L(table_less32words)) 180 .int JMPTBL (L(write_21words), L(table_less32words)) 181 .int JMPTBL (L(write_22words), L(table_less32words)) 182 .int JMPTBL (L(write_23words), L(table_less32words)) 183 .int JMPTBL (L(write_24words), L(table_less32words)) 184 .int JMPTBL (L(write_25words), L(table_less32words)) 185 .int JMPTBL (L(write_26words), L(table_less32words)) 186 .int JMPTBL (L(write_27words), L(table_less32words)) 187 .int JMPTBL (L(write_28words), L(table_less32words)) 188 .int JMPTBL (L(write_29words), L(table_less32words)) 189 .int JMPTBL (L(write_30words), L(table_less32words)) 190 .int JMPTBL (L(write_31words), L(table_less32words)) 191 .popsection 192 193 ALIGN (4) 194L(write_28words): 195 movl %eax, -56(%edx) 196 movl %eax, -52(%edx) 197L(write_24words): 198 movl %eax, -48(%edx) 199 movl %eax, -44(%edx) 200L(write_20words): 201 movl %eax, -40(%edx) 202 movl %eax, -36(%edx) 203L(write_16words): 204 movl %eax, -32(%edx) 205 movl %eax, -28(%edx) 206L(write_12words): 207 movl %eax, -24(%edx) 208 movl %eax, -20(%edx) 209L(write_8words): 210 movl %eax, -16(%edx) 211 movl %eax, -12(%edx) 212L(write_4words): 213 movl %eax, -8(%edx) 214 movl %eax, -4(%edx) 215L(write_0words): 216 SETRTNVAL 217 RETURN 218 219 ALIGN (4) 220L(write_29words): 221 movl %eax, -58(%edx) 222 movl %eax, -54(%edx) 223L(write_25words): 224 movl %eax, -50(%edx) 225 movl %eax, -46(%edx) 226L(write_21words): 227 movl %eax, -42(%edx) 228 movl %eax, -38(%edx) 229L(write_17words): 230 movl %eax, -34(%edx) 231 movl %eax, -30(%edx) 232L(write_13words): 233 movl %eax, -26(%edx) 234 movl %eax, -22(%edx) 235L(write_9words): 236 movl %eax, -18(%edx) 237 movl %eax, -14(%edx) 238L(write_5words): 239 movl %eax, -10(%edx) 240 movl %eax, -6(%edx) 241L(write_1words): 242 mov %ax, -2(%edx) 243 SETRTNVAL 244 RETURN 245 246 ALIGN (4) 247L(write_30words): 248 movl %eax, -60(%edx) 249 movl %eax, -56(%edx) 250L(write_26words): 251 movl %eax, -52(%edx) 252 movl %eax, -48(%edx) 253L(write_22words): 254 movl %eax, -44(%edx) 255 movl %eax, -40(%edx) 256L(write_18words): 257 movl %eax, -36(%edx) 258 movl %eax, -32(%edx) 259L(write_14words): 260 movl %eax, -28(%edx) 261 movl %eax, -24(%edx) 262L(write_10words): 263 movl %eax, -20(%edx) 264 movl %eax, -16(%edx) 265L(write_6words): 266 movl %eax, -12(%edx) 267 movl %eax, -8(%edx) 268L(write_2words): 269 movl %eax, -4(%edx) 270 SETRTNVAL 271 RETURN 272 273 ALIGN (4) 274L(write_31words): 275 movl %eax, -62(%edx) 276 movl %eax, -58(%edx) 277L(write_27words): 278 movl %eax, -54(%edx) 279 movl %eax, -50(%edx) 280L(write_23words): 281 movl %eax, -46(%edx) 282 movl %eax, -42(%edx) 283L(write_19words): 284 movl %eax, -38(%edx) 285 movl %eax, -34(%edx) 286L(write_15words): 287 movl %eax, -30(%edx) 288 movl %eax, -26(%edx) 289L(write_11words): 290 movl %eax, -22(%edx) 291 movl %eax, -18(%edx) 292L(write_7words): 293 movl %eax, -14(%edx) 294 movl %eax, -10(%edx) 295L(write_3words): 296 movl %eax, -6(%edx) 297 movw %ax, -2(%edx) 298 SETRTNVAL 299 RETURN 300 301 ALIGN (4) 302 303L(32wordsormore): 304 shl $1, %ecx 305 test $0x01, %edx 306 jz L(aligned2bytes) 307 mov %eax, (%edx) 308 mov %eax, -4(%edx, %ecx) 309 sub $2, %ecx 310 add $1, %edx 311 rol $8, %eax 312L(aligned2bytes): 313#ifdef USE_AS_BZERO16 314 pxor %xmm0, %xmm0 315#else 316 movd %eax, %xmm0 317 pshufd $0, %xmm0, %xmm0 318#endif 319 testl $0xf, %edx 320 jz L(aligned_16) 321/* ECX > 32 and EDX is not 16 byte aligned. */ 322L(not_aligned_16): 323 movdqu %xmm0, (%edx) 324 movl %edx, %eax 325 and $-16, %edx 326 add $16, %edx 327 sub %edx, %eax 328 add %eax, %ecx 329 movd %xmm0, %eax 330 331 ALIGN (4) 332L(aligned_16): 333 cmp $128, %ecx 334 jae L(128bytesormore) 335 336L(aligned_16_less128bytes): 337 add %ecx, %edx 338 shr $1, %ecx 339 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 340 341 ALIGN (4) 342L(128bytesormore): 343#ifdef SHARED_CACHE_SIZE 344 PUSH (%ebx) 345 mov $SHARED_CACHE_SIZE, %ebx 346#else 347# if (defined SHARED || defined __PIC__) 348 call __i686.get_pc_thunk.bx 349 add $_GLOBAL_OFFSET_TABLE_, %ebx 350 mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx 351# else 352 PUSH (%ebx) 353 mov __x86_shared_cache_size, %ebx 354# endif 355#endif 356 cmp %ebx, %ecx 357 jae L(128bytesormore_nt_start) 358 359 360#ifdef DATA_CACHE_SIZE 361 POP (%ebx) 362# define RESTORE_EBX_STATE CFI_PUSH (%ebx) 363 cmp $DATA_CACHE_SIZE, %ecx 364#else 365# if (defined SHARED || defined __PIC__) 366# define RESTORE_EBX_STATE 367 call __i686.get_pc_thunk.bx 368 add $_GLOBAL_OFFSET_TABLE_, %ebx 369 cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx 370# else 371 POP (%ebx) 372# define RESTORE_EBX_STATE CFI_PUSH (%ebx) 373 cmp __x86_data_cache_size, %ecx 374# endif 375#endif 376 377 jae L(128bytes_L2_normal) 378 subl $128, %ecx 379L(128bytesormore_normal): 380 sub $128, %ecx 381 movdqa %xmm0, (%edx) 382 movdqa %xmm0, 0x10(%edx) 383 movdqa %xmm0, 0x20(%edx) 384 movdqa %xmm0, 0x30(%edx) 385 movdqa %xmm0, 0x40(%edx) 386 movdqa %xmm0, 0x50(%edx) 387 movdqa %xmm0, 0x60(%edx) 388 movdqa %xmm0, 0x70(%edx) 389 lea 128(%edx), %edx 390 jb L(128bytesless_normal) 391 392 393 sub $128, %ecx 394 movdqa %xmm0, (%edx) 395 movdqa %xmm0, 0x10(%edx) 396 movdqa %xmm0, 0x20(%edx) 397 movdqa %xmm0, 0x30(%edx) 398 movdqa %xmm0, 0x40(%edx) 399 movdqa %xmm0, 0x50(%edx) 400 movdqa %xmm0, 0x60(%edx) 401 movdqa %xmm0, 0x70(%edx) 402 lea 128(%edx), %edx 403 jae L(128bytesormore_normal) 404 405L(128bytesless_normal): 406 lea 128(%ecx), %ecx 407 add %ecx, %edx 408 shr $1, %ecx 409 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 410 411 ALIGN (4) 412L(128bytes_L2_normal): 413 prefetcht0 0x380(%edx) 414 prefetcht0 0x3c0(%edx) 415 sub $128, %ecx 416 movdqa %xmm0, (%edx) 417 movaps %xmm0, 0x10(%edx) 418 movaps %xmm0, 0x20(%edx) 419 movaps %xmm0, 0x30(%edx) 420 movaps %xmm0, 0x40(%edx) 421 movaps %xmm0, 0x50(%edx) 422 movaps %xmm0, 0x60(%edx) 423 movaps %xmm0, 0x70(%edx) 424 add $128, %edx 425 cmp $128, %ecx 426 jae L(128bytes_L2_normal) 427 428L(128bytesless_L2_normal): 429 add %ecx, %edx 430 shr $1, %ecx 431 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 432 433 RESTORE_EBX_STATE 434L(128bytesormore_nt_start): 435 sub %ebx, %ecx 436 mov %ebx, %eax 437 and $0x7f, %eax 438 add %eax, %ecx 439 movd %xmm0, %eax 440 ALIGN (4) 441L(128bytesormore_shared_cache_loop): 442 prefetcht0 0x3c0(%edx) 443 prefetcht0 0x380(%edx) 444 sub $0x80, %ebx 445 movdqa %xmm0, (%edx) 446 movdqa %xmm0, 0x10(%edx) 447 movdqa %xmm0, 0x20(%edx) 448 movdqa %xmm0, 0x30(%edx) 449 movdqa %xmm0, 0x40(%edx) 450 movdqa %xmm0, 0x50(%edx) 451 movdqa %xmm0, 0x60(%edx) 452 movdqa %xmm0, 0x70(%edx) 453 add $0x80, %edx 454 cmp $0x80, %ebx 455 jae L(128bytesormore_shared_cache_loop) 456 cmp $0x80, %ecx 457 jb L(shared_cache_loop_end) 458 ALIGN (4) 459L(128bytesormore_nt): 460 sub $0x80, %ecx 461 movntdq %xmm0, (%edx) 462 movntdq %xmm0, 0x10(%edx) 463 movntdq %xmm0, 0x20(%edx) 464 movntdq %xmm0, 0x30(%edx) 465 movntdq %xmm0, 0x40(%edx) 466 movntdq %xmm0, 0x50(%edx) 467 movntdq %xmm0, 0x60(%edx) 468 movntdq %xmm0, 0x70(%edx) 469 add $0x80, %edx 470 cmp $0x80, %ecx 471 jae L(128bytesormore_nt) 472 sfence 473L(shared_cache_loop_end): 474#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__) 475 POP (%ebx) 476#endif 477 add %ecx, %edx 478 shr $1, %ecx 479 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 480 481 482 .pushsection .rodata.sse2,"a",@progbits 483 ALIGN (2) 484L(table_16_128bytes): 485 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) 486 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) 487 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) 488 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) 489 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) 490 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) 491 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) 492 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) 493 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) 494 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) 495 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) 496 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) 497 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) 498 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) 499 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) 500 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) 501 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) 502 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) 503 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) 504 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) 505 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) 506 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) 507 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) 508 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) 509 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) 510 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) 511 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) 512 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) 513 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) 514 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) 515 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) 516 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) 517 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) 518 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) 519 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) 520 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) 521 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) 522 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) 523 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) 524 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) 525 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) 526 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) 527 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) 528 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) 529 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) 530 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) 531 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) 532 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) 533 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) 534 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) 535 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) 536 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) 537 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) 538 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) 539 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) 540 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) 541 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) 542 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) 543 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) 544 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) 545 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) 546 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) 547 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) 548 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) 549 .popsection 550 551 552 ALIGN (4) 553L(aligned_16_112bytes): 554 movdqa %xmm0, -112(%edx) 555L(aligned_16_96bytes): 556 movdqa %xmm0, -96(%edx) 557L(aligned_16_80bytes): 558 movdqa %xmm0, -80(%edx) 559L(aligned_16_64bytes): 560 movdqa %xmm0, -64(%edx) 561L(aligned_16_48bytes): 562 movdqa %xmm0, -48(%edx) 563L(aligned_16_32bytes): 564 movdqa %xmm0, -32(%edx) 565L(aligned_16_16bytes): 566 movdqa %xmm0, -16(%edx) 567L(aligned_16_0bytes): 568 SETRTNVAL 569 RETURN 570 571 572 ALIGN (4) 573L(aligned_16_114bytes): 574 movdqa %xmm0, -114(%edx) 575L(aligned_16_98bytes): 576 movdqa %xmm0, -98(%edx) 577L(aligned_16_82bytes): 578 movdqa %xmm0, -82(%edx) 579L(aligned_16_66bytes): 580 movdqa %xmm0, -66(%edx) 581L(aligned_16_50bytes): 582 movdqa %xmm0, -50(%edx) 583L(aligned_16_34bytes): 584 movdqa %xmm0, -34(%edx) 585L(aligned_16_18bytes): 586 movdqa %xmm0, -18(%edx) 587L(aligned_16_2bytes): 588 movw %ax, -2(%edx) 589 SETRTNVAL 590 RETURN 591 592 ALIGN (4) 593L(aligned_16_116bytes): 594 movdqa %xmm0, -116(%edx) 595L(aligned_16_100bytes): 596 movdqa %xmm0, -100(%edx) 597L(aligned_16_84bytes): 598 movdqa %xmm0, -84(%edx) 599L(aligned_16_68bytes): 600 movdqa %xmm0, -68(%edx) 601L(aligned_16_52bytes): 602 movdqa %xmm0, -52(%edx) 603L(aligned_16_36bytes): 604 movdqa %xmm0, -36(%edx) 605L(aligned_16_20bytes): 606 movdqa %xmm0, -20(%edx) 607L(aligned_16_4bytes): 608 movl %eax, -4(%edx) 609 SETRTNVAL 610 RETURN 611 612 613 ALIGN (4) 614L(aligned_16_118bytes): 615 movdqa %xmm0, -118(%edx) 616L(aligned_16_102bytes): 617 movdqa %xmm0, -102(%edx) 618L(aligned_16_86bytes): 619 movdqa %xmm0, -86(%edx) 620L(aligned_16_70bytes): 621 movdqa %xmm0, -70(%edx) 622L(aligned_16_54bytes): 623 movdqa %xmm0, -54(%edx) 624L(aligned_16_38bytes): 625 movdqa %xmm0, -38(%edx) 626L(aligned_16_22bytes): 627 movdqa %xmm0, -22(%edx) 628L(aligned_16_6bytes): 629 movl %eax, -6(%edx) 630 movw %ax, -2(%edx) 631 SETRTNVAL 632 RETURN 633 634 635 ALIGN (4) 636L(aligned_16_120bytes): 637 movdqa %xmm0, -120(%edx) 638L(aligned_16_104bytes): 639 movdqa %xmm0, -104(%edx) 640L(aligned_16_88bytes): 641 movdqa %xmm0, -88(%edx) 642L(aligned_16_72bytes): 643 movdqa %xmm0, -72(%edx) 644L(aligned_16_56bytes): 645 movdqa %xmm0, -56(%edx) 646L(aligned_16_40bytes): 647 movdqa %xmm0, -40(%edx) 648L(aligned_16_24bytes): 649 movdqa %xmm0, -24(%edx) 650L(aligned_16_8bytes): 651 movq %xmm0, -8(%edx) 652 SETRTNVAL 653 RETURN 654 655 656 ALIGN (4) 657L(aligned_16_122bytes): 658 movdqa %xmm0, -122(%edx) 659L(aligned_16_106bytes): 660 movdqa %xmm0, -106(%edx) 661L(aligned_16_90bytes): 662 movdqa %xmm0, -90(%edx) 663L(aligned_16_74bytes): 664 movdqa %xmm0, -74(%edx) 665L(aligned_16_58bytes): 666 movdqa %xmm0, -58(%edx) 667L(aligned_16_42bytes): 668 movdqa %xmm0, -42(%edx) 669L(aligned_16_26bytes): 670 movdqa %xmm0, -26(%edx) 671L(aligned_16_10bytes): 672 movq %xmm0, -10(%edx) 673 movw %ax, -2(%edx) 674 SETRTNVAL 675 RETURN 676 677 678 ALIGN (4) 679L(aligned_16_124bytes): 680 movdqa %xmm0, -124(%edx) 681L(aligned_16_108bytes): 682 movdqa %xmm0, -108(%edx) 683L(aligned_16_92bytes): 684 movdqa %xmm0, -92(%edx) 685L(aligned_16_76bytes): 686 movdqa %xmm0, -76(%edx) 687L(aligned_16_60bytes): 688 movdqa %xmm0, -60(%edx) 689L(aligned_16_44bytes): 690 movdqa %xmm0, -44(%edx) 691L(aligned_16_28bytes): 692 movdqa %xmm0, -28(%edx) 693L(aligned_16_12bytes): 694 movq %xmm0, -12(%edx) 695 movl %eax, -4(%edx) 696 SETRTNVAL 697 RETURN 698 699 700 ALIGN (4) 701L(aligned_16_126bytes): 702 movdqa %xmm0, -126(%edx) 703L(aligned_16_110bytes): 704 movdqa %xmm0, -110(%edx) 705L(aligned_16_94bytes): 706 movdqa %xmm0, -94(%edx) 707L(aligned_16_78bytes): 708 movdqa %xmm0, -78(%edx) 709L(aligned_16_62bytes): 710 movdqa %xmm0, -62(%edx) 711L(aligned_16_46bytes): 712 movdqa %xmm0, -46(%edx) 713L(aligned_16_30bytes): 714 movdqa %xmm0, -30(%edx) 715L(aligned_16_14bytes): 716 movq %xmm0, -14(%edx) 717 movl %eax, -6(%edx) 718 movw %ax, -2(%edx) 719 SETRTNVAL 720 RETURN 721 722END (sse2_memset16_atom) 723