1/* 2Copyright (c) 2010, Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31#include "cache.h" 32#undef __i686 33 34#ifndef L 35# define L(label) .L##label 36#endif 37 38#ifndef ALIGN 39# define ALIGN(n) .p2align n 40#endif 41 42#ifndef cfi_startproc 43# define cfi_startproc .cfi_startproc 44#endif 45 46#ifndef cfi_endproc 47# define cfi_endproc .cfi_endproc 48#endif 49 50#ifndef cfi_rel_offset 51# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 52#endif 53 54#ifndef cfi_restore 55# define cfi_restore(reg) .cfi_restore reg 56#endif 57 58#ifndef cfi_adjust_cfa_offset 59# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 60#endif 61 62#ifndef ENTRY 63# define ENTRY(name) \ 64 .type name, @function; \ 65 .globl name; \ 66 .p2align 4; \ 67name: \ 68 cfi_startproc 69#endif 70 71#ifndef END 72# define END(name) \ 73 cfi_endproc; \ 74 .size name, .-name 75#endif 76 77#define CFI_PUSH(REG) \ 78 cfi_adjust_cfa_offset (4); \ 79 cfi_rel_offset (REG, 0) 80 81#define CFI_POP(REG) \ 82 cfi_adjust_cfa_offset (-4); \ 83 cfi_restore (REG) 84 85#define PUSH(REG) pushl REG; CFI_PUSH (REG) 86#define POP(REG) popl REG; CFI_POP (REG) 87 88#ifdef USE_AS_BZERO 89# define DEST PARMS 90# define LEN DEST+4 91# define SETRTNVAL 92#else 93# define DEST PARMS 94# define CHR DEST+4 95# define LEN CHR+4 96# define SETRTNVAL movl DEST(%esp), %eax 97#endif 98 99#if (defined SHARED || defined __PIC__) 100# define ENTRANCE PUSH (%ebx); 101# define RETURN_END POP (%ebx); ret 102# define RETURN RETURN_END; CFI_PUSH (%ebx) 103# define PARMS 8 /* Preserve EBX. */ 104# define JMPTBL(I, B) I - B 105 106/* Load an entry in a jump table into EBX and branch to it. TABLE is a 107 jump table with relative offsets. */ 108# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 109 /* We first load PC into EBX. */ \ 110 call __i686.get_pc_thunk.bx; \ 111 /* Get the address of the jump table. */ \ 112 add $(TABLE - .), %ebx; \ 113 /* Get the entry and convert the relative offset to the \ 114 absolute address. */ \ 115 add (%ebx,%ecx,4), %ebx; \ 116 add %ecx, %edx; \ 117 /* We loaded the jump table and adjuested EDX. Go. */ \ 118 jmp *%ebx 119 120 .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits 121 .globl __i686.get_pc_thunk.bx 122 .hidden __i686.get_pc_thunk.bx 123 ALIGN (4) 124 .type __i686.get_pc_thunk.bx,@function 125__i686.get_pc_thunk.bx: 126 movl (%esp), %ebx 127 ret 128#else 129# define ENTRANCE 130# define RETURN_END ret 131# define RETURN RETURN_END 132# define PARMS 4 133# define JMPTBL(I, B) I 134 135/* Branch to an entry in a jump table. TABLE is a jump table with 136 absolute offsets. */ 137# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 138 add %ecx, %edx; \ 139 jmp *TABLE(,%ecx,4) 140#endif 141 142#ifndef MEMSET 143# define MEMSET memset 144#endif 145 146 .section .text.sse2,"ax",@progbits 147 ALIGN (4) 148ENTRY (MEMSET) 149 ENTRANCE 150 151 movl LEN(%esp), %ecx 152#ifdef USE_AS_BZERO 153 xor %eax, %eax 154#else 155 movzbl CHR(%esp), %eax 156 movb %al, %ah 157 /* Fill the whole EAX with pattern. */ 158 movl %eax, %edx 159 shl $16, %eax 160 or %edx, %eax 161#endif 162 movl DEST(%esp), %edx 163 cmp $32, %ecx 164 jae L(32bytesormore) 165 166L(write_less32bytes): 167 BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes)) 168 169 170 .pushsection .rodata.sse2,"a",@progbits 171 ALIGN (2) 172L(table_less_32bytes): 173 .int JMPTBL (L(write_0bytes), L(table_less_32bytes)) 174 .int JMPTBL (L(write_1bytes), L(table_less_32bytes)) 175 .int JMPTBL (L(write_2bytes), L(table_less_32bytes)) 176 .int JMPTBL (L(write_3bytes), L(table_less_32bytes)) 177 .int JMPTBL (L(write_4bytes), L(table_less_32bytes)) 178 .int JMPTBL (L(write_5bytes), L(table_less_32bytes)) 179 .int JMPTBL (L(write_6bytes), L(table_less_32bytes)) 180 .int JMPTBL (L(write_7bytes), L(table_less_32bytes)) 181 .int JMPTBL (L(write_8bytes), L(table_less_32bytes)) 182 .int JMPTBL (L(write_9bytes), L(table_less_32bytes)) 183 .int JMPTBL (L(write_10bytes), L(table_less_32bytes)) 184 .int JMPTBL (L(write_11bytes), L(table_less_32bytes)) 185 .int JMPTBL (L(write_12bytes), L(table_less_32bytes)) 186 .int JMPTBL (L(write_13bytes), L(table_less_32bytes)) 187 .int JMPTBL (L(write_14bytes), L(table_less_32bytes)) 188 .int JMPTBL (L(write_15bytes), L(table_less_32bytes)) 189 .int JMPTBL (L(write_16bytes), L(table_less_32bytes)) 190 .int JMPTBL (L(write_17bytes), L(table_less_32bytes)) 191 .int JMPTBL (L(write_18bytes), L(table_less_32bytes)) 192 .int JMPTBL (L(write_19bytes), L(table_less_32bytes)) 193 .int JMPTBL (L(write_20bytes), L(table_less_32bytes)) 194 .int JMPTBL (L(write_21bytes), L(table_less_32bytes)) 195 .int JMPTBL (L(write_22bytes), L(table_less_32bytes)) 196 .int JMPTBL (L(write_23bytes), L(table_less_32bytes)) 197 .int JMPTBL (L(write_24bytes), L(table_less_32bytes)) 198 .int JMPTBL (L(write_25bytes), L(table_less_32bytes)) 199 .int JMPTBL (L(write_26bytes), L(table_less_32bytes)) 200 .int JMPTBL (L(write_27bytes), L(table_less_32bytes)) 201 .int JMPTBL (L(write_28bytes), L(table_less_32bytes)) 202 .int JMPTBL (L(write_29bytes), L(table_less_32bytes)) 203 .int JMPTBL (L(write_30bytes), L(table_less_32bytes)) 204 .int JMPTBL (L(write_31bytes), L(table_less_32bytes)) 205 .popsection 206 207 ALIGN (4) 208L(write_28bytes): 209 movl %eax, -28(%edx) 210L(write_24bytes): 211 movl %eax, -24(%edx) 212L(write_20bytes): 213 movl %eax, -20(%edx) 214L(write_16bytes): 215 movl %eax, -16(%edx) 216L(write_12bytes): 217 movl %eax, -12(%edx) 218L(write_8bytes): 219 movl %eax, -8(%edx) 220L(write_4bytes): 221 movl %eax, -4(%edx) 222L(write_0bytes): 223 SETRTNVAL 224 RETURN 225 226 ALIGN (4) 227L(write_29bytes): 228 movl %eax, -29(%edx) 229L(write_25bytes): 230 movl %eax, -25(%edx) 231L(write_21bytes): 232 movl %eax, -21(%edx) 233L(write_17bytes): 234 movl %eax, -17(%edx) 235L(write_13bytes): 236 movl %eax, -13(%edx) 237L(write_9bytes): 238 movl %eax, -9(%edx) 239L(write_5bytes): 240 movl %eax, -5(%edx) 241L(write_1bytes): 242 movb %al, -1(%edx) 243 SETRTNVAL 244 RETURN 245 246 ALIGN (4) 247L(write_30bytes): 248 movl %eax, -30(%edx) 249L(write_26bytes): 250 movl %eax, -26(%edx) 251L(write_22bytes): 252 movl %eax, -22(%edx) 253L(write_18bytes): 254 movl %eax, -18(%edx) 255L(write_14bytes): 256 movl %eax, -14(%edx) 257L(write_10bytes): 258 movl %eax, -10(%edx) 259L(write_6bytes): 260 movl %eax, -6(%edx) 261L(write_2bytes): 262 movw %ax, -2(%edx) 263 SETRTNVAL 264 RETURN 265 266 ALIGN (4) 267L(write_31bytes): 268 movl %eax, -31(%edx) 269L(write_27bytes): 270 movl %eax, -27(%edx) 271L(write_23bytes): 272 movl %eax, -23(%edx) 273L(write_19bytes): 274 movl %eax, -19(%edx) 275L(write_15bytes): 276 movl %eax, -15(%edx) 277L(write_11bytes): 278 movl %eax, -11(%edx) 279L(write_7bytes): 280 movl %eax, -7(%edx) 281L(write_3bytes): 282 movw %ax, -3(%edx) 283 movb %al, -1(%edx) 284 SETRTNVAL 285 RETURN 286 287 ALIGN (4) 288/* ECX > 32 and EDX is 4 byte aligned. */ 289L(32bytesormore): 290 /* Fill xmm0 with the pattern. */ 291#ifdef USE_AS_BZERO 292 pxor %xmm0, %xmm0 293#else 294 movd %eax, %xmm0 295 pshufd $0, %xmm0, %xmm0 296#endif 297 testl $0xf, %edx 298 jz L(aligned_16) 299/* ECX > 32 and EDX is not 16 byte aligned. */ 300L(not_aligned_16): 301 movdqu %xmm0, (%edx) 302 movl %edx, %eax 303 and $-16, %edx 304 add $16, %edx 305 sub %edx, %eax 306 add %eax, %ecx 307 movd %xmm0, %eax 308 309 ALIGN (4) 310L(aligned_16): 311 cmp $128, %ecx 312 jae L(128bytesormore) 313 314L(aligned_16_less128bytes): 315 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 316 317 ALIGN (4) 318L(128bytesormore): 319#ifdef SHARED_CACHE_SIZE 320 PUSH (%ebx) 321 mov $SHARED_CACHE_SIZE, %ebx 322#else 323# if (defined SHARED || defined __PIC__) 324 call __i686.get_pc_thunk.bx 325 add $_GLOBAL_OFFSET_TABLE_, %ebx 326 mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx 327# else 328 PUSH (%ebx) 329 mov __x86_shared_cache_size, %ebx 330# endif 331#endif 332 cmp %ebx, %ecx 333 jae L(128bytesormore_nt_start) 334 335 336#ifdef DATA_CACHE_SIZE 337 POP (%ebx) 338# define RESTORE_EBX_STATE CFI_PUSH (%ebx) 339 cmp $DATA_CACHE_SIZE, %ecx 340#else 341# if (defined SHARED || defined __PIC__) 342# define RESTORE_EBX_STATE 343 call __i686.get_pc_thunk.bx 344 add $_GLOBAL_OFFSET_TABLE_, %ebx 345 cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx 346# else 347 POP (%ebx) 348# define RESTORE_EBX_STATE CFI_PUSH (%ebx) 349 cmp __x86_data_cache_size, %ecx 350# endif 351#endif 352 353 jae L(128bytes_L2_normal) 354 subl $128, %ecx 355L(128bytesormore_normal): 356 sub $128, %ecx 357 movdqa %xmm0, (%edx) 358 movdqa %xmm0, 0x10(%edx) 359 movdqa %xmm0, 0x20(%edx) 360 movdqa %xmm0, 0x30(%edx) 361 movdqa %xmm0, 0x40(%edx) 362 movdqa %xmm0, 0x50(%edx) 363 movdqa %xmm0, 0x60(%edx) 364 movdqa %xmm0, 0x70(%edx) 365 lea 128(%edx), %edx 366 jb L(128bytesless_normal) 367 368 369 sub $128, %ecx 370 movdqa %xmm0, (%edx) 371 movdqa %xmm0, 0x10(%edx) 372 movdqa %xmm0, 0x20(%edx) 373 movdqa %xmm0, 0x30(%edx) 374 movdqa %xmm0, 0x40(%edx) 375 movdqa %xmm0, 0x50(%edx) 376 movdqa %xmm0, 0x60(%edx) 377 movdqa %xmm0, 0x70(%edx) 378 lea 128(%edx), %edx 379 jae L(128bytesormore_normal) 380 381L(128bytesless_normal): 382 add $128, %ecx 383 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 384 385 ALIGN (4) 386L(128bytes_L2_normal): 387 prefetcht0 0x380(%edx) 388 prefetcht0 0x3c0(%edx) 389 sub $128, %ecx 390 movdqa %xmm0, (%edx) 391 movaps %xmm0, 0x10(%edx) 392 movaps %xmm0, 0x20(%edx) 393 movaps %xmm0, 0x30(%edx) 394 movaps %xmm0, 0x40(%edx) 395 movaps %xmm0, 0x50(%edx) 396 movaps %xmm0, 0x60(%edx) 397 movaps %xmm0, 0x70(%edx) 398 add $128, %edx 399 cmp $128, %ecx 400 jae L(128bytes_L2_normal) 401 402L(128bytesless_L2_normal): 403 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 404 405 RESTORE_EBX_STATE 406L(128bytesormore_nt_start): 407 sub %ebx, %ecx 408 mov %ebx, %eax 409 and $0x7f, %eax 410 add %eax, %ecx 411 movd %xmm0, %eax 412 ALIGN (4) 413L(128bytesormore_shared_cache_loop): 414 prefetcht0 0x3c0(%edx) 415 prefetcht0 0x380(%edx) 416 sub $0x80, %ebx 417 movdqa %xmm0, (%edx) 418 movdqa %xmm0, 0x10(%edx) 419 movdqa %xmm0, 0x20(%edx) 420 movdqa %xmm0, 0x30(%edx) 421 movdqa %xmm0, 0x40(%edx) 422 movdqa %xmm0, 0x50(%edx) 423 movdqa %xmm0, 0x60(%edx) 424 movdqa %xmm0, 0x70(%edx) 425 add $0x80, %edx 426 cmp $0x80, %ebx 427 jae L(128bytesormore_shared_cache_loop) 428 cmp $0x80, %ecx 429 jb L(shared_cache_loop_end) 430 ALIGN (4) 431L(128bytesormore_nt): 432 sub $0x80, %ecx 433 movntdq %xmm0, (%edx) 434 movntdq %xmm0, 0x10(%edx) 435 movntdq %xmm0, 0x20(%edx) 436 movntdq %xmm0, 0x30(%edx) 437 movntdq %xmm0, 0x40(%edx) 438 movntdq %xmm0, 0x50(%edx) 439 movntdq %xmm0, 0x60(%edx) 440 movntdq %xmm0, 0x70(%edx) 441 add $0x80, %edx 442 cmp $0x80, %ecx 443 jae L(128bytesormore_nt) 444 sfence 445L(shared_cache_loop_end): 446#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__) 447 POP (%ebx) 448#endif 449 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 450 451 452 .pushsection .rodata.sse2,"a",@progbits 453 ALIGN (2) 454L(table_16_128bytes): 455 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) 456 .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes)) 457 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) 458 .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes)) 459 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) 460 .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes)) 461 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) 462 .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes)) 463 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) 464 .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes)) 465 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) 466 .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes)) 467 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) 468 .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes)) 469 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) 470 .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes)) 471 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) 472 .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes)) 473 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) 474 .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes)) 475 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) 476 .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes)) 477 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) 478 .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes)) 479 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) 480 .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes)) 481 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) 482 .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes)) 483 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) 484 .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes)) 485 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) 486 .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes)) 487 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) 488 .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes)) 489 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) 490 .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes)) 491 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) 492 .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes)) 493 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) 494 .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes)) 495 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) 496 .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes)) 497 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) 498 .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes)) 499 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) 500 .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes)) 501 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) 502 .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes)) 503 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) 504 .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes)) 505 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) 506 .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes)) 507 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) 508 .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes)) 509 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) 510 .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes)) 511 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) 512 .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes)) 513 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) 514 .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes)) 515 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) 516 .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes)) 517 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) 518 .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes)) 519 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) 520 .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes)) 521 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) 522 .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes)) 523 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) 524 .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes)) 525 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) 526 .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes)) 527 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) 528 .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes)) 529 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) 530 .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes)) 531 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) 532 .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes)) 533 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) 534 .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes)) 535 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) 536 .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes)) 537 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) 538 .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes)) 539 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) 540 .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes)) 541 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) 542 .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes)) 543 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) 544 .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes)) 545 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) 546 .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes)) 547 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) 548 .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes)) 549 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) 550 .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes)) 551 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) 552 .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes)) 553 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) 554 .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes)) 555 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) 556 .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes)) 557 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) 558 .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes)) 559 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) 560 .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes)) 561 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) 562 .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes)) 563 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) 564 .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes)) 565 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) 566 .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes)) 567 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) 568 .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes)) 569 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) 570 .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes)) 571 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) 572 .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes)) 573 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) 574 .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes)) 575 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) 576 .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes)) 577 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) 578 .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes)) 579 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) 580 .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes)) 581 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) 582 .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes)) 583 .popsection 584 585 ALIGN (4) 586L(aligned_16_112bytes): 587 movdqa %xmm0, -112(%edx) 588L(aligned_16_96bytes): 589 movdqa %xmm0, -96(%edx) 590L(aligned_16_80bytes): 591 movdqa %xmm0, -80(%edx) 592L(aligned_16_64bytes): 593 movdqa %xmm0, -64(%edx) 594L(aligned_16_48bytes): 595 movdqa %xmm0, -48(%edx) 596L(aligned_16_32bytes): 597 movdqa %xmm0, -32(%edx) 598L(aligned_16_16bytes): 599 movdqa %xmm0, -16(%edx) 600L(aligned_16_0bytes): 601 SETRTNVAL 602 RETURN 603 604 ALIGN (4) 605L(aligned_16_113bytes): 606 movdqa %xmm0, -113(%edx) 607L(aligned_16_97bytes): 608 movdqa %xmm0, -97(%edx) 609L(aligned_16_81bytes): 610 movdqa %xmm0, -81(%edx) 611L(aligned_16_65bytes): 612 movdqa %xmm0, -65(%edx) 613L(aligned_16_49bytes): 614 movdqa %xmm0, -49(%edx) 615L(aligned_16_33bytes): 616 movdqa %xmm0, -33(%edx) 617L(aligned_16_17bytes): 618 movdqa %xmm0, -17(%edx) 619L(aligned_16_1bytes): 620 movb %al, -1(%edx) 621 SETRTNVAL 622 RETURN 623 624 ALIGN (4) 625L(aligned_16_114bytes): 626 movdqa %xmm0, -114(%edx) 627L(aligned_16_98bytes): 628 movdqa %xmm0, -98(%edx) 629L(aligned_16_82bytes): 630 movdqa %xmm0, -82(%edx) 631L(aligned_16_66bytes): 632 movdqa %xmm0, -66(%edx) 633L(aligned_16_50bytes): 634 movdqa %xmm0, -50(%edx) 635L(aligned_16_34bytes): 636 movdqa %xmm0, -34(%edx) 637L(aligned_16_18bytes): 638 movdqa %xmm0, -18(%edx) 639L(aligned_16_2bytes): 640 movw %ax, -2(%edx) 641 SETRTNVAL 642 RETURN 643 644 ALIGN (4) 645L(aligned_16_115bytes): 646 movdqa %xmm0, -115(%edx) 647L(aligned_16_99bytes): 648 movdqa %xmm0, -99(%edx) 649L(aligned_16_83bytes): 650 movdqa %xmm0, -83(%edx) 651L(aligned_16_67bytes): 652 movdqa %xmm0, -67(%edx) 653L(aligned_16_51bytes): 654 movdqa %xmm0, -51(%edx) 655L(aligned_16_35bytes): 656 movdqa %xmm0, -35(%edx) 657L(aligned_16_19bytes): 658 movdqa %xmm0, -19(%edx) 659L(aligned_16_3bytes): 660 movw %ax, -3(%edx) 661 movb %al, -1(%edx) 662 SETRTNVAL 663 RETURN 664 665 ALIGN (4) 666L(aligned_16_116bytes): 667 movdqa %xmm0, -116(%edx) 668L(aligned_16_100bytes): 669 movdqa %xmm0, -100(%edx) 670L(aligned_16_84bytes): 671 movdqa %xmm0, -84(%edx) 672L(aligned_16_68bytes): 673 movdqa %xmm0, -68(%edx) 674L(aligned_16_52bytes): 675 movdqa %xmm0, -52(%edx) 676L(aligned_16_36bytes): 677 movdqa %xmm0, -36(%edx) 678L(aligned_16_20bytes): 679 movdqa %xmm0, -20(%edx) 680L(aligned_16_4bytes): 681 movl %eax, -4(%edx) 682 SETRTNVAL 683 RETURN 684 685 ALIGN (4) 686L(aligned_16_117bytes): 687 movdqa %xmm0, -117(%edx) 688L(aligned_16_101bytes): 689 movdqa %xmm0, -101(%edx) 690L(aligned_16_85bytes): 691 movdqa %xmm0, -85(%edx) 692L(aligned_16_69bytes): 693 movdqa %xmm0, -69(%edx) 694L(aligned_16_53bytes): 695 movdqa %xmm0, -53(%edx) 696L(aligned_16_37bytes): 697 movdqa %xmm0, -37(%edx) 698L(aligned_16_21bytes): 699 movdqa %xmm0, -21(%edx) 700L(aligned_16_5bytes): 701 movl %eax, -5(%edx) 702 movb %al, -1(%edx) 703 SETRTNVAL 704 RETURN 705 706 ALIGN (4) 707L(aligned_16_118bytes): 708 movdqa %xmm0, -118(%edx) 709L(aligned_16_102bytes): 710 movdqa %xmm0, -102(%edx) 711L(aligned_16_86bytes): 712 movdqa %xmm0, -86(%edx) 713L(aligned_16_70bytes): 714 movdqa %xmm0, -70(%edx) 715L(aligned_16_54bytes): 716 movdqa %xmm0, -54(%edx) 717L(aligned_16_38bytes): 718 movdqa %xmm0, -38(%edx) 719L(aligned_16_22bytes): 720 movdqa %xmm0, -22(%edx) 721L(aligned_16_6bytes): 722 movl %eax, -6(%edx) 723 movw %ax, -2(%edx) 724 SETRTNVAL 725 RETURN 726 727 ALIGN (4) 728L(aligned_16_119bytes): 729 movdqa %xmm0, -119(%edx) 730L(aligned_16_103bytes): 731 movdqa %xmm0, -103(%edx) 732L(aligned_16_87bytes): 733 movdqa %xmm0, -87(%edx) 734L(aligned_16_71bytes): 735 movdqa %xmm0, -71(%edx) 736L(aligned_16_55bytes): 737 movdqa %xmm0, -55(%edx) 738L(aligned_16_39bytes): 739 movdqa %xmm0, -39(%edx) 740L(aligned_16_23bytes): 741 movdqa %xmm0, -23(%edx) 742L(aligned_16_7bytes): 743 movl %eax, -7(%edx) 744 movw %ax, -3(%edx) 745 movb %al, -1(%edx) 746 SETRTNVAL 747 RETURN 748 749 ALIGN (4) 750L(aligned_16_120bytes): 751 movdqa %xmm0, -120(%edx) 752L(aligned_16_104bytes): 753 movdqa %xmm0, -104(%edx) 754L(aligned_16_88bytes): 755 movdqa %xmm0, -88(%edx) 756L(aligned_16_72bytes): 757 movdqa %xmm0, -72(%edx) 758L(aligned_16_56bytes): 759 movdqa %xmm0, -56(%edx) 760L(aligned_16_40bytes): 761 movdqa %xmm0, -40(%edx) 762L(aligned_16_24bytes): 763 movdqa %xmm0, -24(%edx) 764L(aligned_16_8bytes): 765 movq %xmm0, -8(%edx) 766 SETRTNVAL 767 RETURN 768 769 ALIGN (4) 770L(aligned_16_121bytes): 771 movdqa %xmm0, -121(%edx) 772L(aligned_16_105bytes): 773 movdqa %xmm0, -105(%edx) 774L(aligned_16_89bytes): 775 movdqa %xmm0, -89(%edx) 776L(aligned_16_73bytes): 777 movdqa %xmm0, -73(%edx) 778L(aligned_16_57bytes): 779 movdqa %xmm0, -57(%edx) 780L(aligned_16_41bytes): 781 movdqa %xmm0, -41(%edx) 782L(aligned_16_25bytes): 783 movdqa %xmm0, -25(%edx) 784L(aligned_16_9bytes): 785 movq %xmm0, -9(%edx) 786 movb %al, -1(%edx) 787 SETRTNVAL 788 RETURN 789 790 ALIGN (4) 791L(aligned_16_122bytes): 792 movdqa %xmm0, -122(%edx) 793L(aligned_16_106bytes): 794 movdqa %xmm0, -106(%edx) 795L(aligned_16_90bytes): 796 movdqa %xmm0, -90(%edx) 797L(aligned_16_74bytes): 798 movdqa %xmm0, -74(%edx) 799L(aligned_16_58bytes): 800 movdqa %xmm0, -58(%edx) 801L(aligned_16_42bytes): 802 movdqa %xmm0, -42(%edx) 803L(aligned_16_26bytes): 804 movdqa %xmm0, -26(%edx) 805L(aligned_16_10bytes): 806 movq %xmm0, -10(%edx) 807 movw %ax, -2(%edx) 808 SETRTNVAL 809 RETURN 810 811 ALIGN (4) 812L(aligned_16_123bytes): 813 movdqa %xmm0, -123(%edx) 814L(aligned_16_107bytes): 815 movdqa %xmm0, -107(%edx) 816L(aligned_16_91bytes): 817 movdqa %xmm0, -91(%edx) 818L(aligned_16_75bytes): 819 movdqa %xmm0, -75(%edx) 820L(aligned_16_59bytes): 821 movdqa %xmm0, -59(%edx) 822L(aligned_16_43bytes): 823 movdqa %xmm0, -43(%edx) 824L(aligned_16_27bytes): 825 movdqa %xmm0, -27(%edx) 826L(aligned_16_11bytes): 827 movq %xmm0, -11(%edx) 828 movw %ax, -3(%edx) 829 movb %al, -1(%edx) 830 SETRTNVAL 831 RETURN 832 833 ALIGN (4) 834L(aligned_16_124bytes): 835 movdqa %xmm0, -124(%edx) 836L(aligned_16_108bytes): 837 movdqa %xmm0, -108(%edx) 838L(aligned_16_92bytes): 839 movdqa %xmm0, -92(%edx) 840L(aligned_16_76bytes): 841 movdqa %xmm0, -76(%edx) 842L(aligned_16_60bytes): 843 movdqa %xmm0, -60(%edx) 844L(aligned_16_44bytes): 845 movdqa %xmm0, -44(%edx) 846L(aligned_16_28bytes): 847 movdqa %xmm0, -28(%edx) 848L(aligned_16_12bytes): 849 movq %xmm0, -12(%edx) 850 movl %eax, -4(%edx) 851 SETRTNVAL 852 RETURN 853 854 ALIGN (4) 855L(aligned_16_125bytes): 856 movdqa %xmm0, -125(%edx) 857L(aligned_16_109bytes): 858 movdqa %xmm0, -109(%edx) 859L(aligned_16_93bytes): 860 movdqa %xmm0, -93(%edx) 861L(aligned_16_77bytes): 862 movdqa %xmm0, -77(%edx) 863L(aligned_16_61bytes): 864 movdqa %xmm0, -61(%edx) 865L(aligned_16_45bytes): 866 movdqa %xmm0, -45(%edx) 867L(aligned_16_29bytes): 868 movdqa %xmm0, -29(%edx) 869L(aligned_16_13bytes): 870 movq %xmm0, -13(%edx) 871 movl %eax, -5(%edx) 872 movb %al, -1(%edx) 873 SETRTNVAL 874 RETURN 875 876 ALIGN (4) 877L(aligned_16_126bytes): 878 movdqa %xmm0, -126(%edx) 879L(aligned_16_110bytes): 880 movdqa %xmm0, -110(%edx) 881L(aligned_16_94bytes): 882 movdqa %xmm0, -94(%edx) 883L(aligned_16_78bytes): 884 movdqa %xmm0, -78(%edx) 885L(aligned_16_62bytes): 886 movdqa %xmm0, -62(%edx) 887L(aligned_16_46bytes): 888 movdqa %xmm0, -46(%edx) 889L(aligned_16_30bytes): 890 movdqa %xmm0, -30(%edx) 891L(aligned_16_14bytes): 892 movq %xmm0, -14(%edx) 893 movl %eax, -6(%edx) 894 movw %ax, -2(%edx) 895 SETRTNVAL 896 RETURN 897 898 ALIGN (4) 899L(aligned_16_127bytes): 900 movdqa %xmm0, -127(%edx) 901L(aligned_16_111bytes): 902 movdqa %xmm0, -111(%edx) 903L(aligned_16_95bytes): 904 movdqa %xmm0, -95(%edx) 905L(aligned_16_79bytes): 906 movdqa %xmm0, -79(%edx) 907L(aligned_16_63bytes): 908 movdqa %xmm0, -63(%edx) 909L(aligned_16_47bytes): 910 movdqa %xmm0, -47(%edx) 911L(aligned_16_31bytes): 912 movdqa %xmm0, -31(%edx) 913L(aligned_16_15bytes): 914 movq %xmm0, -15(%edx) 915 movl %eax, -7(%edx) 916 movw %ax, -3(%edx) 917 movb %al, -1(%edx) 918 SETRTNVAL 919 RETURN_END 920 921END (MEMSET) 922