1/* 2Copyright (c) 2010, Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31#ifndef L 32# define L(label) .L##label 33#endif 34 35#ifndef ALIGN 36# define ALIGN(n) .p2align n 37#endif 38 39#ifndef cfi_startproc 40# define cfi_startproc .cfi_startproc 41#endif 42 43#ifndef cfi_endproc 44# define cfi_endproc .cfi_endproc 45#endif 46 47#ifndef cfi_rel_offset 48# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 49#endif 50 51#ifndef cfi_restore 52# define cfi_restore(reg) .cfi_restore reg 53#endif 54 55#ifndef cfi_adjust_cfa_offset 56# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 57#endif 58 59#ifndef ENTRY 60# define ENTRY(name) \ 61 .type name, @function; \ 62 .globl name; \ 63 .p2align 4; \ 64name: \ 65 cfi_startproc 66#endif 67 68#ifndef END 69# define END(name) \ 70 cfi_endproc; \ 71 .size name, .-name 72#endif 73 74#define CFI_PUSH(REG) \ 75 cfi_adjust_cfa_offset (4); \ 76 cfi_rel_offset (REG, 0) 77 78#define CFI_POP(REG) \ 79 cfi_adjust_cfa_offset (-4); \ 80 cfi_restore (REG) 81 82#define PUSH(REG) pushl REG; CFI_PUSH (REG) 83#define POP(REG) popl REG; CFI_POP (REG) 84 85#ifdef USE_AS_BZERO 86# define DEST PARMS 87# define LEN DEST+4 88# define SETRTNVAL 89#else 90# define DEST PARMS 91# define CHR DEST+4 92# define LEN CHR+4 93# define SETRTNVAL movl DEST(%esp), %eax 94#endif 95 96#ifdef SHARED 97# define ENTRANCE PUSH (%ebx); 98# define RETURN_END POP (%ebx); ret 99# define RETURN RETURN_END; CFI_PUSH (%ebx) 100# define PARMS 8 /* Preserve EBX. */ 101# define JMPTBL(I, B) I - B 102 103/* Load an entry in a jump table into EBX and branch to it. TABLE is a 104 jump table with relative offsets. */ 105# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 106 /* We first load PC into EBX. */ \ 107 call __i686.get_pc_thunk.bx; \ 108 /* Get the address of the jump table. */ \ 109 add $(TABLE - .), %ebx; \ 110 /* Get the entry and convert the relative offset to the \ 111 absolute address. */ \ 112 add (%ebx,%ecx,4), %ebx; \ 113 add %ecx, %edx; \ 114 /* We loaded the jump table and adjuested EDX. Go. */ \ 115 jmp *%ebx 116 117 .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits 118 .globl __i686.get_pc_thunk.bx 119 .hidden __i686.get_pc_thunk.bx 120 ALIGN (4) 121 .type __i686.get_pc_thunk.bx,@function 122__i686.get_pc_thunk.bx: 123 movl (%esp), %ebx 124 ret 125#else 126# define ENTRANCE 127# define RETURN_END ret 128# define RETURN RETURN_END 129# define PARMS 4 130# define JMPTBL(I, B) I 131 132/* Branch to an entry in a jump table. TABLE is a jump table with 133 absolute offsets. */ 134# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 135 add %ecx, %edx; \ 136 jmp *TABLE(,%ecx,4) 137#endif 138 139 .section .text.sse2,"ax",@progbits 140 ALIGN (4) 141ENTRY (sse2_memset5_atom) 142 ENTRANCE 143 144 movl LEN(%esp), %ecx 145#ifdef USE_AS_BZERO 146 xor %eax, %eax 147#else 148 movzbl CHR(%esp), %eax 149 movb %al, %ah 150 /* Fill the whole EAX with pattern. */ 151 movl %eax, %edx 152 shl $16, %eax 153 or %edx, %eax 154#endif 155 movl DEST(%esp), %edx 156 cmp $32, %ecx 157 jae L(32bytesormore) 158 159L(write_less32bytes): 160 BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes)) 161 162 163 .pushsection .rodata.sse2,"a",@progbits 164 ALIGN (2) 165L(table_less_32bytes): 166 .int JMPTBL (L(write_0bytes), L(table_less_32bytes)) 167 .int JMPTBL (L(write_1bytes), L(table_less_32bytes)) 168 .int JMPTBL (L(write_2bytes), L(table_less_32bytes)) 169 .int JMPTBL (L(write_3bytes), L(table_less_32bytes)) 170 .int JMPTBL (L(write_4bytes), L(table_less_32bytes)) 171 .int JMPTBL (L(write_5bytes), L(table_less_32bytes)) 172 .int JMPTBL (L(write_6bytes), L(table_less_32bytes)) 173 .int JMPTBL (L(write_7bytes), L(table_less_32bytes)) 174 .int JMPTBL (L(write_8bytes), L(table_less_32bytes)) 175 .int JMPTBL (L(write_9bytes), L(table_less_32bytes)) 176 .int JMPTBL (L(write_10bytes), L(table_less_32bytes)) 177 .int JMPTBL (L(write_11bytes), L(table_less_32bytes)) 178 .int JMPTBL (L(write_12bytes), L(table_less_32bytes)) 179 .int JMPTBL (L(write_13bytes), L(table_less_32bytes)) 180 .int JMPTBL (L(write_14bytes), L(table_less_32bytes)) 181 .int JMPTBL (L(write_15bytes), L(table_less_32bytes)) 182 .int JMPTBL (L(write_16bytes), L(table_less_32bytes)) 183 .int JMPTBL (L(write_17bytes), L(table_less_32bytes)) 184 .int JMPTBL (L(write_18bytes), L(table_less_32bytes)) 185 .int JMPTBL (L(write_19bytes), L(table_less_32bytes)) 186 .int JMPTBL (L(write_20bytes), L(table_less_32bytes)) 187 .int JMPTBL (L(write_21bytes), L(table_less_32bytes)) 188 .int JMPTBL (L(write_22bytes), L(table_less_32bytes)) 189 .int JMPTBL (L(write_23bytes), L(table_less_32bytes)) 190 .int JMPTBL (L(write_24bytes), L(table_less_32bytes)) 191 .int JMPTBL (L(write_25bytes), L(table_less_32bytes)) 192 .int JMPTBL (L(write_26bytes), L(table_less_32bytes)) 193 .int JMPTBL (L(write_27bytes), L(table_less_32bytes)) 194 .int JMPTBL (L(write_28bytes), L(table_less_32bytes)) 195 .int JMPTBL (L(write_29bytes), L(table_less_32bytes)) 196 .int JMPTBL (L(write_30bytes), L(table_less_32bytes)) 197 .int JMPTBL (L(write_31bytes), L(table_less_32bytes)) 198 .popsection 199 200 ALIGN (4) 201L(write_28bytes): 202 movl %eax, -28(%edx) 203L(write_24bytes): 204 movl %eax, -24(%edx) 205L(write_20bytes): 206 movl %eax, -20(%edx) 207L(write_16bytes): 208 movl %eax, -16(%edx) 209L(write_12bytes): 210 movl %eax, -12(%edx) 211L(write_8bytes): 212 movl %eax, -8(%edx) 213L(write_4bytes): 214 movl %eax, -4(%edx) 215L(write_0bytes): 216 SETRTNVAL 217 RETURN 218 219 ALIGN (4) 220L(write_29bytes): 221 movl %eax, -29(%edx) 222L(write_25bytes): 223 movl %eax, -25(%edx) 224L(write_21bytes): 225 movl %eax, -21(%edx) 226L(write_17bytes): 227 movl %eax, -17(%edx) 228L(write_13bytes): 229 movl %eax, -13(%edx) 230L(write_9bytes): 231 movl %eax, -9(%edx) 232L(write_5bytes): 233 movl %eax, -5(%edx) 234L(write_1bytes): 235 movb %al, -1(%edx) 236 SETRTNVAL 237 RETURN 238 239 ALIGN (4) 240L(write_30bytes): 241 movl %eax, -30(%edx) 242L(write_26bytes): 243 movl %eax, -26(%edx) 244L(write_22bytes): 245 movl %eax, -22(%edx) 246L(write_18bytes): 247 movl %eax, -18(%edx) 248L(write_14bytes): 249 movl %eax, -14(%edx) 250L(write_10bytes): 251 movl %eax, -10(%edx) 252L(write_6bytes): 253 movl %eax, -6(%edx) 254L(write_2bytes): 255 movw %ax, -2(%edx) 256 SETRTNVAL 257 RETURN 258 259 ALIGN (4) 260L(write_31bytes): 261 movl %eax, -31(%edx) 262L(write_27bytes): 263 movl %eax, -27(%edx) 264L(write_23bytes): 265 movl %eax, -23(%edx) 266L(write_19bytes): 267 movl %eax, -19(%edx) 268L(write_15bytes): 269 movl %eax, -15(%edx) 270L(write_11bytes): 271 movl %eax, -11(%edx) 272L(write_7bytes): 273 movl %eax, -7(%edx) 274L(write_3bytes): 275 movw %ax, -3(%edx) 276 movb %al, -1(%edx) 277 SETRTNVAL 278 RETURN 279 280 ALIGN (4) 281/* ECX > 32 and EDX is 4 byte aligned. */ 282L(32bytesormore): 283 /* Fill xmm0 with the pattern. */ 284#ifdef USE_AS_BZERO 285 pxor %xmm0, %xmm0 286#else 287 movd %eax, %xmm0 288 pshufd $0, %xmm0, %xmm0 289#endif 290 testl $0xf, %edx 291 jz L(aligned_16) 292/* ECX > 32 and EDX is not 16 byte aligned. */ 293L(not_aligned_16): 294 movdqu %xmm0, (%edx) 295 movl %edx, %eax 296 and $-16, %edx 297 add $16, %edx 298 sub %edx, %eax 299 add %eax, %ecx 300 movd %xmm0, %eax 301 302 ALIGN (4) 303L(aligned_16): 304 cmp $128, %ecx 305 jae L(128bytesormore) 306 307L(aligned_16_less128bytes): 308 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 309 310 ALIGN (4) 311L(128bytesormore): 312#ifdef SHARED_CACHE_SIZE 313 PUSH (%ebx) 314 mov $SHARED_CACHE_SIZE, %ebx 315#else 316# ifdef SHARED 317 call __i686.get_pc_thunk.bx 318 add $_GLOBAL_OFFSET_TABLE_, %ebx 319 mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx 320# else 321 PUSH (%ebx) 322 mov __x86_shared_cache_size, %ebx 323# endif 324#endif 325 cmp %ebx, %ecx 326 jae L(128bytesormore_nt_start) 327 328 329#ifdef DATA_CACHE_SIZE 330 POP (%ebx) 331# define RESTORE_EBX_STATE CFI_PUSH (%ebx) 332 cmp $DATA_CACHE_SIZE, %ecx 333#else 334# ifdef SHARED 335# define RESTORE_EBX_STATE 336 call __i686.get_pc_thunk.bx 337 add $_GLOBAL_OFFSET_TABLE_, %ebx 338 cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx 339# else 340 POP (%ebx) 341# define RESTORE_EBX_STATE CFI_PUSH (%ebx) 342 cmp __x86_data_cache_size, %ecx 343# endif 344#endif 345 346 jae L(128bytes_L2_normal) 347 subl $128, %ecx 348L(128bytesormore_normal): 349 sub $128, %ecx 350 movdqa %xmm0, (%edx) 351 movdqa %xmm0, 0x10(%edx) 352 movdqa %xmm0, 0x20(%edx) 353 movdqa %xmm0, 0x30(%edx) 354 movdqa %xmm0, 0x40(%edx) 355 movdqa %xmm0, 0x50(%edx) 356 movdqa %xmm0, 0x60(%edx) 357 movdqa %xmm0, 0x70(%edx) 358 lea 128(%edx), %edx 359 jb L(128bytesless_normal) 360 361 362 sub $128, %ecx 363 movdqa %xmm0, (%edx) 364 movdqa %xmm0, 0x10(%edx) 365 movdqa %xmm0, 0x20(%edx) 366 movdqa %xmm0, 0x30(%edx) 367 movdqa %xmm0, 0x40(%edx) 368 movdqa %xmm0, 0x50(%edx) 369 movdqa %xmm0, 0x60(%edx) 370 movdqa %xmm0, 0x70(%edx) 371 lea 128(%edx), %edx 372 jae L(128bytesormore_normal) 373 374L(128bytesless_normal): 375 add $128, %ecx 376 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 377 378 ALIGN (4) 379L(128bytes_L2_normal): 380 prefetcht0 0x380(%edx) 381 prefetcht0 0x3c0(%edx) 382 sub $128, %ecx 383 movdqa %xmm0, (%edx) 384 movaps %xmm0, 0x10(%edx) 385 movaps %xmm0, 0x20(%edx) 386 movaps %xmm0, 0x30(%edx) 387 movaps %xmm0, 0x40(%edx) 388 movaps %xmm0, 0x50(%edx) 389 movaps %xmm0, 0x60(%edx) 390 movaps %xmm0, 0x70(%edx) 391 add $128, %edx 392 cmp $128, %ecx 393 jae L(128bytes_L2_normal) 394 395L(128bytesless_L2_normal): 396 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 397 398 RESTORE_EBX_STATE 399L(128bytesormore_nt_start): 400 sub %ebx, %ecx 401 mov %ebx, %eax 402 and $0x7f, %eax 403 add %eax, %ecx 404 movd %xmm0, %eax 405 ALIGN (4) 406L(128bytesormore_shared_cache_loop): 407 prefetcht0 0x3c0(%edx) 408 prefetcht0 0x380(%edx) 409 sub $0x80, %ebx 410 movdqa %xmm0, (%edx) 411 movdqa %xmm0, 0x10(%edx) 412 movdqa %xmm0, 0x20(%edx) 413 movdqa %xmm0, 0x30(%edx) 414 movdqa %xmm0, 0x40(%edx) 415 movdqa %xmm0, 0x50(%edx) 416 movdqa %xmm0, 0x60(%edx) 417 movdqa %xmm0, 0x70(%edx) 418 add $0x80, %edx 419 cmp $0x80, %ebx 420 jae L(128bytesormore_shared_cache_loop) 421 cmp $0x80, %ecx 422 jb L(shared_cache_loop_end) 423 ALIGN (4) 424L(128bytesormore_nt): 425 sub $0x80, %ecx 426 movntdq %xmm0, (%edx) 427 movntdq %xmm0, 0x10(%edx) 428 movntdq %xmm0, 0x20(%edx) 429 movntdq %xmm0, 0x30(%edx) 430 movntdq %xmm0, 0x40(%edx) 431 movntdq %xmm0, 0x50(%edx) 432 movntdq %xmm0, 0x60(%edx) 433 movntdq %xmm0, 0x70(%edx) 434 add $0x80, %edx 435 cmp $0x80, %ecx 436 jae L(128bytesormore_nt) 437 sfence 438L(shared_cache_loop_end): 439#if defined DATA_CACHE_SIZE || !defined SHARED 440 POP (%ebx) 441#endif 442 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 443 444 445 .pushsection .rodata.sse2,"a",@progbits 446 ALIGN (2) 447L(table_16_128bytes): 448 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) 449 .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes)) 450 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) 451 .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes)) 452 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) 453 .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes)) 454 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) 455 .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes)) 456 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) 457 .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes)) 458 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) 459 .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes)) 460 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) 461 .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes)) 462 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) 463 .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes)) 464 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) 465 .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes)) 466 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) 467 .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes)) 468 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) 469 .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes)) 470 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) 471 .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes)) 472 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) 473 .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes)) 474 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) 475 .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes)) 476 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) 477 .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes)) 478 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) 479 .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes)) 480 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) 481 .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes)) 482 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) 483 .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes)) 484 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) 485 .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes)) 486 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) 487 .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes)) 488 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) 489 .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes)) 490 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) 491 .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes)) 492 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) 493 .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes)) 494 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) 495 .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes)) 496 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) 497 .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes)) 498 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) 499 .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes)) 500 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) 501 .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes)) 502 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) 503 .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes)) 504 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) 505 .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes)) 506 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) 507 .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes)) 508 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) 509 .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes)) 510 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) 511 .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes)) 512 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) 513 .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes)) 514 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) 515 .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes)) 516 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) 517 .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes)) 518 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) 519 .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes)) 520 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) 521 .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes)) 522 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) 523 .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes)) 524 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) 525 .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes)) 526 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) 527 .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes)) 528 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) 529 .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes)) 530 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) 531 .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes)) 532 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) 533 .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes)) 534 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) 535 .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes)) 536 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) 537 .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes)) 538 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) 539 .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes)) 540 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) 541 .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes)) 542 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) 543 .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes)) 544 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) 545 .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes)) 546 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) 547 .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes)) 548 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) 549 .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes)) 550 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) 551 .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes)) 552 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) 553 .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes)) 554 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) 555 .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes)) 556 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) 557 .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes)) 558 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) 559 .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes)) 560 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) 561 .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes)) 562 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) 563 .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes)) 564 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) 565 .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes)) 566 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) 567 .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes)) 568 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) 569 .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes)) 570 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) 571 .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes)) 572 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) 573 .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes)) 574 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) 575 .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes)) 576 .popsection 577 578 ALIGN (4) 579L(aligned_16_112bytes): 580 movdqa %xmm0, -112(%edx) 581L(aligned_16_96bytes): 582 movdqa %xmm0, -96(%edx) 583L(aligned_16_80bytes): 584 movdqa %xmm0, -80(%edx) 585L(aligned_16_64bytes): 586 movdqa %xmm0, -64(%edx) 587L(aligned_16_48bytes): 588 movdqa %xmm0, -48(%edx) 589L(aligned_16_32bytes): 590 movdqa %xmm0, -32(%edx) 591L(aligned_16_16bytes): 592 movdqa %xmm0, -16(%edx) 593L(aligned_16_0bytes): 594 SETRTNVAL 595 RETURN 596 597 ALIGN (4) 598L(aligned_16_113bytes): 599 movdqa %xmm0, -113(%edx) 600L(aligned_16_97bytes): 601 movdqa %xmm0, -97(%edx) 602L(aligned_16_81bytes): 603 movdqa %xmm0, -81(%edx) 604L(aligned_16_65bytes): 605 movdqa %xmm0, -65(%edx) 606L(aligned_16_49bytes): 607 movdqa %xmm0, -49(%edx) 608L(aligned_16_33bytes): 609 movdqa %xmm0, -33(%edx) 610L(aligned_16_17bytes): 611 movdqa %xmm0, -17(%edx) 612L(aligned_16_1bytes): 613 movb %al, -1(%edx) 614 SETRTNVAL 615 RETURN 616 617 ALIGN (4) 618L(aligned_16_114bytes): 619 movdqa %xmm0, -114(%edx) 620L(aligned_16_98bytes): 621 movdqa %xmm0, -98(%edx) 622L(aligned_16_82bytes): 623 movdqa %xmm0, -82(%edx) 624L(aligned_16_66bytes): 625 movdqa %xmm0, -66(%edx) 626L(aligned_16_50bytes): 627 movdqa %xmm0, -50(%edx) 628L(aligned_16_34bytes): 629 movdqa %xmm0, -34(%edx) 630L(aligned_16_18bytes): 631 movdqa %xmm0, -18(%edx) 632L(aligned_16_2bytes): 633 movw %ax, -2(%edx) 634 SETRTNVAL 635 RETURN 636 637 ALIGN (4) 638L(aligned_16_115bytes): 639 movdqa %xmm0, -115(%edx) 640L(aligned_16_99bytes): 641 movdqa %xmm0, -99(%edx) 642L(aligned_16_83bytes): 643 movdqa %xmm0, -83(%edx) 644L(aligned_16_67bytes): 645 movdqa %xmm0, -67(%edx) 646L(aligned_16_51bytes): 647 movdqa %xmm0, -51(%edx) 648L(aligned_16_35bytes): 649 movdqa %xmm0, -35(%edx) 650L(aligned_16_19bytes): 651 movdqa %xmm0, -19(%edx) 652L(aligned_16_3bytes): 653 movw %ax, -3(%edx) 654 movb %al, -1(%edx) 655 SETRTNVAL 656 RETURN 657 658 ALIGN (4) 659L(aligned_16_116bytes): 660 movdqa %xmm0, -116(%edx) 661L(aligned_16_100bytes): 662 movdqa %xmm0, -100(%edx) 663L(aligned_16_84bytes): 664 movdqa %xmm0, -84(%edx) 665L(aligned_16_68bytes): 666 movdqa %xmm0, -68(%edx) 667L(aligned_16_52bytes): 668 movdqa %xmm0, -52(%edx) 669L(aligned_16_36bytes): 670 movdqa %xmm0, -36(%edx) 671L(aligned_16_20bytes): 672 movdqa %xmm0, -20(%edx) 673L(aligned_16_4bytes): 674 movl %eax, -4(%edx) 675 SETRTNVAL 676 RETURN 677 678 ALIGN (4) 679L(aligned_16_117bytes): 680 movdqa %xmm0, -117(%edx) 681L(aligned_16_101bytes): 682 movdqa %xmm0, -101(%edx) 683L(aligned_16_85bytes): 684 movdqa %xmm0, -85(%edx) 685L(aligned_16_69bytes): 686 movdqa %xmm0, -69(%edx) 687L(aligned_16_53bytes): 688 movdqa %xmm0, -53(%edx) 689L(aligned_16_37bytes): 690 movdqa %xmm0, -37(%edx) 691L(aligned_16_21bytes): 692 movdqa %xmm0, -21(%edx) 693L(aligned_16_5bytes): 694 movl %eax, -5(%edx) 695 movb %al, -1(%edx) 696 SETRTNVAL 697 RETURN 698 699 ALIGN (4) 700L(aligned_16_118bytes): 701 movdqa %xmm0, -118(%edx) 702L(aligned_16_102bytes): 703 movdqa %xmm0, -102(%edx) 704L(aligned_16_86bytes): 705 movdqa %xmm0, -86(%edx) 706L(aligned_16_70bytes): 707 movdqa %xmm0, -70(%edx) 708L(aligned_16_54bytes): 709 movdqa %xmm0, -54(%edx) 710L(aligned_16_38bytes): 711 movdqa %xmm0, -38(%edx) 712L(aligned_16_22bytes): 713 movdqa %xmm0, -22(%edx) 714L(aligned_16_6bytes): 715 movl %eax, -6(%edx) 716 movw %ax, -2(%edx) 717 SETRTNVAL 718 RETURN 719 720 ALIGN (4) 721L(aligned_16_119bytes): 722 movdqa %xmm0, -119(%edx) 723L(aligned_16_103bytes): 724 movdqa %xmm0, -103(%edx) 725L(aligned_16_87bytes): 726 movdqa %xmm0, -87(%edx) 727L(aligned_16_71bytes): 728 movdqa %xmm0, -71(%edx) 729L(aligned_16_55bytes): 730 movdqa %xmm0, -55(%edx) 731L(aligned_16_39bytes): 732 movdqa %xmm0, -39(%edx) 733L(aligned_16_23bytes): 734 movdqa %xmm0, -23(%edx) 735L(aligned_16_7bytes): 736 movl %eax, -7(%edx) 737 movw %ax, -3(%edx) 738 movb %al, -1(%edx) 739 SETRTNVAL 740 RETURN 741 742 ALIGN (4) 743L(aligned_16_120bytes): 744 movdqa %xmm0, -120(%edx) 745L(aligned_16_104bytes): 746 movdqa %xmm0, -104(%edx) 747L(aligned_16_88bytes): 748 movdqa %xmm0, -88(%edx) 749L(aligned_16_72bytes): 750 movdqa %xmm0, -72(%edx) 751L(aligned_16_56bytes): 752 movdqa %xmm0, -56(%edx) 753L(aligned_16_40bytes): 754 movdqa %xmm0, -40(%edx) 755L(aligned_16_24bytes): 756 movdqa %xmm0, -24(%edx) 757L(aligned_16_8bytes): 758 movq %xmm0, -8(%edx) 759 SETRTNVAL 760 RETURN 761 762 ALIGN (4) 763L(aligned_16_121bytes): 764 movdqa %xmm0, -121(%edx) 765L(aligned_16_105bytes): 766 movdqa %xmm0, -105(%edx) 767L(aligned_16_89bytes): 768 movdqa %xmm0, -89(%edx) 769L(aligned_16_73bytes): 770 movdqa %xmm0, -73(%edx) 771L(aligned_16_57bytes): 772 movdqa %xmm0, -57(%edx) 773L(aligned_16_41bytes): 774 movdqa %xmm0, -41(%edx) 775L(aligned_16_25bytes): 776 movdqa %xmm0, -25(%edx) 777L(aligned_16_9bytes): 778 movq %xmm0, -9(%edx) 779 movb %al, -1(%edx) 780 SETRTNVAL 781 RETURN 782 783 ALIGN (4) 784L(aligned_16_122bytes): 785 movdqa %xmm0, -122(%edx) 786L(aligned_16_106bytes): 787 movdqa %xmm0, -106(%edx) 788L(aligned_16_90bytes): 789 movdqa %xmm0, -90(%edx) 790L(aligned_16_74bytes): 791 movdqa %xmm0, -74(%edx) 792L(aligned_16_58bytes): 793 movdqa %xmm0, -58(%edx) 794L(aligned_16_42bytes): 795 movdqa %xmm0, -42(%edx) 796L(aligned_16_26bytes): 797 movdqa %xmm0, -26(%edx) 798L(aligned_16_10bytes): 799 movq %xmm0, -10(%edx) 800 movw %ax, -2(%edx) 801 SETRTNVAL 802 RETURN 803 804 ALIGN (4) 805L(aligned_16_123bytes): 806 movdqa %xmm0, -123(%edx) 807L(aligned_16_107bytes): 808 movdqa %xmm0, -107(%edx) 809L(aligned_16_91bytes): 810 movdqa %xmm0, -91(%edx) 811L(aligned_16_75bytes): 812 movdqa %xmm0, -75(%edx) 813L(aligned_16_59bytes): 814 movdqa %xmm0, -59(%edx) 815L(aligned_16_43bytes): 816 movdqa %xmm0, -43(%edx) 817L(aligned_16_27bytes): 818 movdqa %xmm0, -27(%edx) 819L(aligned_16_11bytes): 820 movq %xmm0, -11(%edx) 821 movw %ax, -3(%edx) 822 movb %al, -1(%edx) 823 SETRTNVAL 824 RETURN 825 826 ALIGN (4) 827L(aligned_16_124bytes): 828 movdqa %xmm0, -124(%edx) 829L(aligned_16_108bytes): 830 movdqa %xmm0, -108(%edx) 831L(aligned_16_92bytes): 832 movdqa %xmm0, -92(%edx) 833L(aligned_16_76bytes): 834 movdqa %xmm0, -76(%edx) 835L(aligned_16_60bytes): 836 movdqa %xmm0, -60(%edx) 837L(aligned_16_44bytes): 838 movdqa %xmm0, -44(%edx) 839L(aligned_16_28bytes): 840 movdqa %xmm0, -28(%edx) 841L(aligned_16_12bytes): 842 movq %xmm0, -12(%edx) 843 movl %eax, -4(%edx) 844 SETRTNVAL 845 RETURN 846 847 ALIGN (4) 848L(aligned_16_125bytes): 849 movdqa %xmm0, -125(%edx) 850L(aligned_16_109bytes): 851 movdqa %xmm0, -109(%edx) 852L(aligned_16_93bytes): 853 movdqa %xmm0, -93(%edx) 854L(aligned_16_77bytes): 855 movdqa %xmm0, -77(%edx) 856L(aligned_16_61bytes): 857 movdqa %xmm0, -61(%edx) 858L(aligned_16_45bytes): 859 movdqa %xmm0, -45(%edx) 860L(aligned_16_29bytes): 861 movdqa %xmm0, -29(%edx) 862L(aligned_16_13bytes): 863 movq %xmm0, -13(%edx) 864 movl %eax, -5(%edx) 865 movb %al, -1(%edx) 866 SETRTNVAL 867 RETURN 868 869 ALIGN (4) 870L(aligned_16_126bytes): 871 movdqa %xmm0, -126(%edx) 872L(aligned_16_110bytes): 873 movdqa %xmm0, -110(%edx) 874L(aligned_16_94bytes): 875 movdqa %xmm0, -94(%edx) 876L(aligned_16_78bytes): 877 movdqa %xmm0, -78(%edx) 878L(aligned_16_62bytes): 879 movdqa %xmm0, -62(%edx) 880L(aligned_16_46bytes): 881 movdqa %xmm0, -46(%edx) 882L(aligned_16_30bytes): 883 movdqa %xmm0, -30(%edx) 884L(aligned_16_14bytes): 885 movq %xmm0, -14(%edx) 886 movl %eax, -6(%edx) 887 movw %ax, -2(%edx) 888 SETRTNVAL 889 RETURN 890 891 ALIGN (4) 892L(aligned_16_127bytes): 893 movdqa %xmm0, -127(%edx) 894L(aligned_16_111bytes): 895 movdqa %xmm0, -111(%edx) 896L(aligned_16_95bytes): 897 movdqa %xmm0, -95(%edx) 898L(aligned_16_79bytes): 899 movdqa %xmm0, -79(%edx) 900L(aligned_16_63bytes): 901 movdqa %xmm0, -63(%edx) 902L(aligned_16_47bytes): 903 movdqa %xmm0, -47(%edx) 904L(aligned_16_31bytes): 905 movdqa %xmm0, -31(%edx) 906L(aligned_16_15bytes): 907 movq %xmm0, -15(%edx) 908 movl %eax, -7(%edx) 909 movw %ax, -3(%edx) 910 movb %al, -1(%edx) 911 SETRTNVAL 912 RETURN_END 913 914END (sse2_memset5_atom) 915