1/* 2Copyright (c) 2010, Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31#include <private/bionic_asm.h> 32 33#include "cache.h" 34 35#ifndef L 36# define L(label) .L##label 37#endif 38 39#ifndef ALIGN 40# define ALIGN(n) .p2align n 41#endif 42 43#define CFI_PUSH(REG) \ 44 .cfi_adjust_cfa_offset 4; \ 45 .cfi_rel_offset REG, 0 46 47#define CFI_POP(REG) \ 48 .cfi_adjust_cfa_offset -4; \ 49 .cfi_restore REG 50 51#define PUSH(REG) pushl REG; CFI_PUSH(REG) 52#define POP(REG) popl REG; CFI_POP(REG) 53 54#define PARMS 8 /* Preserve EBX. */ 55#define DST PARMS 56#define CHR (DST+4) 57#define LEN (CHR+4) 58#define CHK_DST_LEN (LEN+4) 59#define SETRTNVAL movl DST(%esp), %eax 60 61#define ENTRANCE PUSH(%ebx); 62#define RETURN_END POP(%ebx); ret 63#define RETURN RETURN_END; CFI_PUSH(%ebx) 64#define JMPTBL(I, B) I - B 65 66/* Load an entry in a jump table into EBX and branch to it. TABLE is a 67 jump table with relative offsets. */ 68# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 69 /* We first load PC into EBX. */ \ 70 call __x86.get_pc_thunk.bx; \ 71 /* Get the address of the jump table. */ \ 72 add $(TABLE - .), %ebx; \ 73 /* Get the entry and convert the relative offset to the \ 74 absolute address. */ \ 75 add (%ebx,%ecx,4), %ebx; \ 76 add %ecx, %edx; \ 77 /* We loaded the jump table and adjusted EDX. Go. */ \ 78 jmp *%ebx 79 80 .section .gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits 81 .globl __x86.get_pc_thunk.bx 82 .hidden __x86.get_pc_thunk.bx 83 ALIGN(4) 84 .type __x86.get_pc_thunk.bx,@function 85__x86.get_pc_thunk.bx: 86 movl (%esp), %ebx 87 ret 88 89ENTRY(__memset_chk) 90 ENTRANCE 91 92 movl LEN(%esp), %ecx 93 cmpl CHK_DST_LEN(%esp), %ecx 94 jna L(memset_length_loaded) 95 96 POP(%ebx) // Undo ENTRANCE without returning. 97 jmp __memset_chk_fail 98END(__memset_chk) 99 100 .section .text.sse2,"ax",@progbits 101 ALIGN(4) 102ENTRY(memset) 103 ENTRANCE 104 105 movl LEN(%esp), %ecx 106L(memset_length_loaded): 107 movzbl CHR(%esp), %eax 108 movb %al, %ah 109 /* Fill the whole EAX with pattern. */ 110 movl %eax, %edx 111 shl $16, %eax 112 or %edx, %eax 113 movl DST(%esp), %edx 114 cmp $32, %ecx 115 jae L(32bytesormore) 116 117L(write_less32bytes): 118 BRANCH_TO_JMPTBL_ENTRY(L(table_less_32bytes)) 119 120 121 .pushsection .rodata.sse2,"a",@progbits 122 ALIGN(2) 123L(table_less_32bytes): 124 .int JMPTBL(L(write_0bytes), L(table_less_32bytes)) 125 .int JMPTBL(L(write_1bytes), L(table_less_32bytes)) 126 .int JMPTBL(L(write_2bytes), L(table_less_32bytes)) 127 .int JMPTBL(L(write_3bytes), L(table_less_32bytes)) 128 .int JMPTBL(L(write_4bytes), L(table_less_32bytes)) 129 .int JMPTBL(L(write_5bytes), L(table_less_32bytes)) 130 .int JMPTBL(L(write_6bytes), L(table_less_32bytes)) 131 .int JMPTBL(L(write_7bytes), L(table_less_32bytes)) 132 .int JMPTBL(L(write_8bytes), L(table_less_32bytes)) 133 .int JMPTBL(L(write_9bytes), L(table_less_32bytes)) 134 .int JMPTBL(L(write_10bytes), L(table_less_32bytes)) 135 .int JMPTBL(L(write_11bytes), L(table_less_32bytes)) 136 .int JMPTBL(L(write_12bytes), L(table_less_32bytes)) 137 .int JMPTBL(L(write_13bytes), L(table_less_32bytes)) 138 .int JMPTBL(L(write_14bytes), L(table_less_32bytes)) 139 .int JMPTBL(L(write_15bytes), L(table_less_32bytes)) 140 .int JMPTBL(L(write_16bytes), L(table_less_32bytes)) 141 .int JMPTBL(L(write_17bytes), L(table_less_32bytes)) 142 .int JMPTBL(L(write_18bytes), L(table_less_32bytes)) 143 .int JMPTBL(L(write_19bytes), L(table_less_32bytes)) 144 .int JMPTBL(L(write_20bytes), L(table_less_32bytes)) 145 .int JMPTBL(L(write_21bytes), L(table_less_32bytes)) 146 .int JMPTBL(L(write_22bytes), L(table_less_32bytes)) 147 .int JMPTBL(L(write_23bytes), L(table_less_32bytes)) 148 .int JMPTBL(L(write_24bytes), L(table_less_32bytes)) 149 .int JMPTBL(L(write_25bytes), L(table_less_32bytes)) 150 .int JMPTBL(L(write_26bytes), L(table_less_32bytes)) 151 .int JMPTBL(L(write_27bytes), L(table_less_32bytes)) 152 .int JMPTBL(L(write_28bytes), L(table_less_32bytes)) 153 .int JMPTBL(L(write_29bytes), L(table_less_32bytes)) 154 .int JMPTBL(L(write_30bytes), L(table_less_32bytes)) 155 .int JMPTBL(L(write_31bytes), L(table_less_32bytes)) 156 .popsection 157 158 ALIGN(4) 159L(write_28bytes): 160 movl %eax, -28(%edx) 161L(write_24bytes): 162 movl %eax, -24(%edx) 163L(write_20bytes): 164 movl %eax, -20(%edx) 165L(write_16bytes): 166 movl %eax, -16(%edx) 167L(write_12bytes): 168 movl %eax, -12(%edx) 169L(write_8bytes): 170 movl %eax, -8(%edx) 171L(write_4bytes): 172 movl %eax, -4(%edx) 173L(write_0bytes): 174 SETRTNVAL 175 RETURN 176 177 ALIGN(4) 178L(write_29bytes): 179 movl %eax, -29(%edx) 180L(write_25bytes): 181 movl %eax, -25(%edx) 182L(write_21bytes): 183 movl %eax, -21(%edx) 184L(write_17bytes): 185 movl %eax, -17(%edx) 186L(write_13bytes): 187 movl %eax, -13(%edx) 188L(write_9bytes): 189 movl %eax, -9(%edx) 190L(write_5bytes): 191 movl %eax, -5(%edx) 192L(write_1bytes): 193 movb %al, -1(%edx) 194 SETRTNVAL 195 RETURN 196 197 ALIGN(4) 198L(write_30bytes): 199 movl %eax, -30(%edx) 200L(write_26bytes): 201 movl %eax, -26(%edx) 202L(write_22bytes): 203 movl %eax, -22(%edx) 204L(write_18bytes): 205 movl %eax, -18(%edx) 206L(write_14bytes): 207 movl %eax, -14(%edx) 208L(write_10bytes): 209 movl %eax, -10(%edx) 210L(write_6bytes): 211 movl %eax, -6(%edx) 212L(write_2bytes): 213 movw %ax, -2(%edx) 214 SETRTNVAL 215 RETURN 216 217 ALIGN(4) 218L(write_31bytes): 219 movl %eax, -31(%edx) 220L(write_27bytes): 221 movl %eax, -27(%edx) 222L(write_23bytes): 223 movl %eax, -23(%edx) 224L(write_19bytes): 225 movl %eax, -19(%edx) 226L(write_15bytes): 227 movl %eax, -15(%edx) 228L(write_11bytes): 229 movl %eax, -11(%edx) 230L(write_7bytes): 231 movl %eax, -7(%edx) 232L(write_3bytes): 233 movw %ax, -3(%edx) 234 movb %al, -1(%edx) 235 SETRTNVAL 236 RETURN 237 238 ALIGN(4) 239/* ECX > 32 and EDX is 4 byte aligned. */ 240L(32bytesormore): 241 /* Fill xmm0 with the pattern. */ 242 movd %eax, %xmm0 243 pshufd $0, %xmm0, %xmm0 244 testl $0xf, %edx 245 jz L(aligned_16) 246/* ECX > 32 and EDX is not 16 byte aligned. */ 247L(not_aligned_16): 248 movdqu %xmm0, (%edx) 249 movl %edx, %eax 250 and $-16, %edx 251 add $16, %edx 252 sub %edx, %eax 253 add %eax, %ecx 254 movd %xmm0, %eax 255 256 ALIGN(4) 257L(aligned_16): 258 cmp $128, %ecx 259 jae L(128bytesormore) 260 261L(aligned_16_less128bytes): 262 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes)) 263 264 ALIGN(4) 265L(128bytesormore): 266 PUSH(%ebx) 267 mov $SHARED_CACHE_SIZE, %ebx 268 cmp %ebx, %ecx 269 jae L(128bytesormore_nt_start) 270 271 272 POP(%ebx) 273# define RESTORE_EBX_STATE CFI_PUSH(%ebx) 274 cmp $DATA_CACHE_SIZE, %ecx 275 276 jae L(128bytes_L2_normal) 277 subl $128, %ecx 278L(128bytesormore_normal): 279 sub $128, %ecx 280 movdqa %xmm0, (%edx) 281 movdqa %xmm0, 0x10(%edx) 282 movdqa %xmm0, 0x20(%edx) 283 movdqa %xmm0, 0x30(%edx) 284 movdqa %xmm0, 0x40(%edx) 285 movdqa %xmm0, 0x50(%edx) 286 movdqa %xmm0, 0x60(%edx) 287 movdqa %xmm0, 0x70(%edx) 288 lea 128(%edx), %edx 289 jb L(128bytesless_normal) 290 291 292 sub $128, %ecx 293 movdqa %xmm0, (%edx) 294 movdqa %xmm0, 0x10(%edx) 295 movdqa %xmm0, 0x20(%edx) 296 movdqa %xmm0, 0x30(%edx) 297 movdqa %xmm0, 0x40(%edx) 298 movdqa %xmm0, 0x50(%edx) 299 movdqa %xmm0, 0x60(%edx) 300 movdqa %xmm0, 0x70(%edx) 301 lea 128(%edx), %edx 302 jae L(128bytesormore_normal) 303 304L(128bytesless_normal): 305 add $128, %ecx 306 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes)) 307 308 ALIGN(4) 309L(128bytes_L2_normal): 310 prefetcht0 0x380(%edx) 311 prefetcht0 0x3c0(%edx) 312 sub $128, %ecx 313 movdqa %xmm0, (%edx) 314 movaps %xmm0, 0x10(%edx) 315 movaps %xmm0, 0x20(%edx) 316 movaps %xmm0, 0x30(%edx) 317 movaps %xmm0, 0x40(%edx) 318 movaps %xmm0, 0x50(%edx) 319 movaps %xmm0, 0x60(%edx) 320 movaps %xmm0, 0x70(%edx) 321 add $128, %edx 322 cmp $128, %ecx 323 jae L(128bytes_L2_normal) 324 325L(128bytesless_L2_normal): 326 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes)) 327 328 RESTORE_EBX_STATE 329L(128bytesormore_nt_start): 330 sub %ebx, %ecx 331 mov %ebx, %eax 332 and $0x7f, %eax 333 add %eax, %ecx 334 movd %xmm0, %eax 335 ALIGN(4) 336L(128bytesormore_shared_cache_loop): 337 prefetcht0 0x3c0(%edx) 338 prefetcht0 0x380(%edx) 339 sub $0x80, %ebx 340 movdqa %xmm0, (%edx) 341 movdqa %xmm0, 0x10(%edx) 342 movdqa %xmm0, 0x20(%edx) 343 movdqa %xmm0, 0x30(%edx) 344 movdqa %xmm0, 0x40(%edx) 345 movdqa %xmm0, 0x50(%edx) 346 movdqa %xmm0, 0x60(%edx) 347 movdqa %xmm0, 0x70(%edx) 348 add $0x80, %edx 349 cmp $0x80, %ebx 350 jae L(128bytesormore_shared_cache_loop) 351 cmp $0x80, %ecx 352 jb L(shared_cache_loop_end) 353 ALIGN(4) 354L(128bytesormore_nt): 355 sub $0x80, %ecx 356 movntdq %xmm0, (%edx) 357 movntdq %xmm0, 0x10(%edx) 358 movntdq %xmm0, 0x20(%edx) 359 movntdq %xmm0, 0x30(%edx) 360 movntdq %xmm0, 0x40(%edx) 361 movntdq %xmm0, 0x50(%edx) 362 movntdq %xmm0, 0x60(%edx) 363 movntdq %xmm0, 0x70(%edx) 364 add $0x80, %edx 365 cmp $0x80, %ecx 366 jae L(128bytesormore_nt) 367 sfence 368L(shared_cache_loop_end): 369 POP(%ebx) 370 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes)) 371 372 373 .pushsection .rodata.sse2,"a",@progbits 374 ALIGN(2) 375L(table_16_128bytes): 376 .int JMPTBL(L(aligned_16_0bytes), L(table_16_128bytes)) 377 .int JMPTBL(L(aligned_16_1bytes), L(table_16_128bytes)) 378 .int JMPTBL(L(aligned_16_2bytes), L(table_16_128bytes)) 379 .int JMPTBL(L(aligned_16_3bytes), L(table_16_128bytes)) 380 .int JMPTBL(L(aligned_16_4bytes), L(table_16_128bytes)) 381 .int JMPTBL(L(aligned_16_5bytes), L(table_16_128bytes)) 382 .int JMPTBL(L(aligned_16_6bytes), L(table_16_128bytes)) 383 .int JMPTBL(L(aligned_16_7bytes), L(table_16_128bytes)) 384 .int JMPTBL(L(aligned_16_8bytes), L(table_16_128bytes)) 385 .int JMPTBL(L(aligned_16_9bytes), L(table_16_128bytes)) 386 .int JMPTBL(L(aligned_16_10bytes), L(table_16_128bytes)) 387 .int JMPTBL(L(aligned_16_11bytes), L(table_16_128bytes)) 388 .int JMPTBL(L(aligned_16_12bytes), L(table_16_128bytes)) 389 .int JMPTBL(L(aligned_16_13bytes), L(table_16_128bytes)) 390 .int JMPTBL(L(aligned_16_14bytes), L(table_16_128bytes)) 391 .int JMPTBL(L(aligned_16_15bytes), L(table_16_128bytes)) 392 .int JMPTBL(L(aligned_16_16bytes), L(table_16_128bytes)) 393 .int JMPTBL(L(aligned_16_17bytes), L(table_16_128bytes)) 394 .int JMPTBL(L(aligned_16_18bytes), L(table_16_128bytes)) 395 .int JMPTBL(L(aligned_16_19bytes), L(table_16_128bytes)) 396 .int JMPTBL(L(aligned_16_20bytes), L(table_16_128bytes)) 397 .int JMPTBL(L(aligned_16_21bytes), L(table_16_128bytes)) 398 .int JMPTBL(L(aligned_16_22bytes), L(table_16_128bytes)) 399 .int JMPTBL(L(aligned_16_23bytes), L(table_16_128bytes)) 400 .int JMPTBL(L(aligned_16_24bytes), L(table_16_128bytes)) 401 .int JMPTBL(L(aligned_16_25bytes), L(table_16_128bytes)) 402 .int JMPTBL(L(aligned_16_26bytes), L(table_16_128bytes)) 403 .int JMPTBL(L(aligned_16_27bytes), L(table_16_128bytes)) 404 .int JMPTBL(L(aligned_16_28bytes), L(table_16_128bytes)) 405 .int JMPTBL(L(aligned_16_29bytes), L(table_16_128bytes)) 406 .int JMPTBL(L(aligned_16_30bytes), L(table_16_128bytes)) 407 .int JMPTBL(L(aligned_16_31bytes), L(table_16_128bytes)) 408 .int JMPTBL(L(aligned_16_32bytes), L(table_16_128bytes)) 409 .int JMPTBL(L(aligned_16_33bytes), L(table_16_128bytes)) 410 .int JMPTBL(L(aligned_16_34bytes), L(table_16_128bytes)) 411 .int JMPTBL(L(aligned_16_35bytes), L(table_16_128bytes)) 412 .int JMPTBL(L(aligned_16_36bytes), L(table_16_128bytes)) 413 .int JMPTBL(L(aligned_16_37bytes), L(table_16_128bytes)) 414 .int JMPTBL(L(aligned_16_38bytes), L(table_16_128bytes)) 415 .int JMPTBL(L(aligned_16_39bytes), L(table_16_128bytes)) 416 .int JMPTBL(L(aligned_16_40bytes), L(table_16_128bytes)) 417 .int JMPTBL(L(aligned_16_41bytes), L(table_16_128bytes)) 418 .int JMPTBL(L(aligned_16_42bytes), L(table_16_128bytes)) 419 .int JMPTBL(L(aligned_16_43bytes), L(table_16_128bytes)) 420 .int JMPTBL(L(aligned_16_44bytes), L(table_16_128bytes)) 421 .int JMPTBL(L(aligned_16_45bytes), L(table_16_128bytes)) 422 .int JMPTBL(L(aligned_16_46bytes), L(table_16_128bytes)) 423 .int JMPTBL(L(aligned_16_47bytes), L(table_16_128bytes)) 424 .int JMPTBL(L(aligned_16_48bytes), L(table_16_128bytes)) 425 .int JMPTBL(L(aligned_16_49bytes), L(table_16_128bytes)) 426 .int JMPTBL(L(aligned_16_50bytes), L(table_16_128bytes)) 427 .int JMPTBL(L(aligned_16_51bytes), L(table_16_128bytes)) 428 .int JMPTBL(L(aligned_16_52bytes), L(table_16_128bytes)) 429 .int JMPTBL(L(aligned_16_53bytes), L(table_16_128bytes)) 430 .int JMPTBL(L(aligned_16_54bytes), L(table_16_128bytes)) 431 .int JMPTBL(L(aligned_16_55bytes), L(table_16_128bytes)) 432 .int JMPTBL(L(aligned_16_56bytes), L(table_16_128bytes)) 433 .int JMPTBL(L(aligned_16_57bytes), L(table_16_128bytes)) 434 .int JMPTBL(L(aligned_16_58bytes), L(table_16_128bytes)) 435 .int JMPTBL(L(aligned_16_59bytes), L(table_16_128bytes)) 436 .int JMPTBL(L(aligned_16_60bytes), L(table_16_128bytes)) 437 .int JMPTBL(L(aligned_16_61bytes), L(table_16_128bytes)) 438 .int JMPTBL(L(aligned_16_62bytes), L(table_16_128bytes)) 439 .int JMPTBL(L(aligned_16_63bytes), L(table_16_128bytes)) 440 .int JMPTBL(L(aligned_16_64bytes), L(table_16_128bytes)) 441 .int JMPTBL(L(aligned_16_65bytes), L(table_16_128bytes)) 442 .int JMPTBL(L(aligned_16_66bytes), L(table_16_128bytes)) 443 .int JMPTBL(L(aligned_16_67bytes), L(table_16_128bytes)) 444 .int JMPTBL(L(aligned_16_68bytes), L(table_16_128bytes)) 445 .int JMPTBL(L(aligned_16_69bytes), L(table_16_128bytes)) 446 .int JMPTBL(L(aligned_16_70bytes), L(table_16_128bytes)) 447 .int JMPTBL(L(aligned_16_71bytes), L(table_16_128bytes)) 448 .int JMPTBL(L(aligned_16_72bytes), L(table_16_128bytes)) 449 .int JMPTBL(L(aligned_16_73bytes), L(table_16_128bytes)) 450 .int JMPTBL(L(aligned_16_74bytes), L(table_16_128bytes)) 451 .int JMPTBL(L(aligned_16_75bytes), L(table_16_128bytes)) 452 .int JMPTBL(L(aligned_16_76bytes), L(table_16_128bytes)) 453 .int JMPTBL(L(aligned_16_77bytes), L(table_16_128bytes)) 454 .int JMPTBL(L(aligned_16_78bytes), L(table_16_128bytes)) 455 .int JMPTBL(L(aligned_16_79bytes), L(table_16_128bytes)) 456 .int JMPTBL(L(aligned_16_80bytes), L(table_16_128bytes)) 457 .int JMPTBL(L(aligned_16_81bytes), L(table_16_128bytes)) 458 .int JMPTBL(L(aligned_16_82bytes), L(table_16_128bytes)) 459 .int JMPTBL(L(aligned_16_83bytes), L(table_16_128bytes)) 460 .int JMPTBL(L(aligned_16_84bytes), L(table_16_128bytes)) 461 .int JMPTBL(L(aligned_16_85bytes), L(table_16_128bytes)) 462 .int JMPTBL(L(aligned_16_86bytes), L(table_16_128bytes)) 463 .int JMPTBL(L(aligned_16_87bytes), L(table_16_128bytes)) 464 .int JMPTBL(L(aligned_16_88bytes), L(table_16_128bytes)) 465 .int JMPTBL(L(aligned_16_89bytes), L(table_16_128bytes)) 466 .int JMPTBL(L(aligned_16_90bytes), L(table_16_128bytes)) 467 .int JMPTBL(L(aligned_16_91bytes), L(table_16_128bytes)) 468 .int JMPTBL(L(aligned_16_92bytes), L(table_16_128bytes)) 469 .int JMPTBL(L(aligned_16_93bytes), L(table_16_128bytes)) 470 .int JMPTBL(L(aligned_16_94bytes), L(table_16_128bytes)) 471 .int JMPTBL(L(aligned_16_95bytes), L(table_16_128bytes)) 472 .int JMPTBL(L(aligned_16_96bytes), L(table_16_128bytes)) 473 .int JMPTBL(L(aligned_16_97bytes), L(table_16_128bytes)) 474 .int JMPTBL(L(aligned_16_98bytes), L(table_16_128bytes)) 475 .int JMPTBL(L(aligned_16_99bytes), L(table_16_128bytes)) 476 .int JMPTBL(L(aligned_16_100bytes), L(table_16_128bytes)) 477 .int JMPTBL(L(aligned_16_101bytes), L(table_16_128bytes)) 478 .int JMPTBL(L(aligned_16_102bytes), L(table_16_128bytes)) 479 .int JMPTBL(L(aligned_16_103bytes), L(table_16_128bytes)) 480 .int JMPTBL(L(aligned_16_104bytes), L(table_16_128bytes)) 481 .int JMPTBL(L(aligned_16_105bytes), L(table_16_128bytes)) 482 .int JMPTBL(L(aligned_16_106bytes), L(table_16_128bytes)) 483 .int JMPTBL(L(aligned_16_107bytes), L(table_16_128bytes)) 484 .int JMPTBL(L(aligned_16_108bytes), L(table_16_128bytes)) 485 .int JMPTBL(L(aligned_16_109bytes), L(table_16_128bytes)) 486 .int JMPTBL(L(aligned_16_110bytes), L(table_16_128bytes)) 487 .int JMPTBL(L(aligned_16_111bytes), L(table_16_128bytes)) 488 .int JMPTBL(L(aligned_16_112bytes), L(table_16_128bytes)) 489 .int JMPTBL(L(aligned_16_113bytes), L(table_16_128bytes)) 490 .int JMPTBL(L(aligned_16_114bytes), L(table_16_128bytes)) 491 .int JMPTBL(L(aligned_16_115bytes), L(table_16_128bytes)) 492 .int JMPTBL(L(aligned_16_116bytes), L(table_16_128bytes)) 493 .int JMPTBL(L(aligned_16_117bytes), L(table_16_128bytes)) 494 .int JMPTBL(L(aligned_16_118bytes), L(table_16_128bytes)) 495 .int JMPTBL(L(aligned_16_119bytes), L(table_16_128bytes)) 496 .int JMPTBL(L(aligned_16_120bytes), L(table_16_128bytes)) 497 .int JMPTBL(L(aligned_16_121bytes), L(table_16_128bytes)) 498 .int JMPTBL(L(aligned_16_122bytes), L(table_16_128bytes)) 499 .int JMPTBL(L(aligned_16_123bytes), L(table_16_128bytes)) 500 .int JMPTBL(L(aligned_16_124bytes), L(table_16_128bytes)) 501 .int JMPTBL(L(aligned_16_125bytes), L(table_16_128bytes)) 502 .int JMPTBL(L(aligned_16_126bytes), L(table_16_128bytes)) 503 .int JMPTBL(L(aligned_16_127bytes), L(table_16_128bytes)) 504 .popsection 505 506 ALIGN(4) 507L(aligned_16_112bytes): 508 movdqa %xmm0, -112(%edx) 509L(aligned_16_96bytes): 510 movdqa %xmm0, -96(%edx) 511L(aligned_16_80bytes): 512 movdqa %xmm0, -80(%edx) 513L(aligned_16_64bytes): 514 movdqa %xmm0, -64(%edx) 515L(aligned_16_48bytes): 516 movdqa %xmm0, -48(%edx) 517L(aligned_16_32bytes): 518 movdqa %xmm0, -32(%edx) 519L(aligned_16_16bytes): 520 movdqa %xmm0, -16(%edx) 521L(aligned_16_0bytes): 522 SETRTNVAL 523 RETURN 524 525 ALIGN(4) 526L(aligned_16_113bytes): 527 movdqa %xmm0, -113(%edx) 528L(aligned_16_97bytes): 529 movdqa %xmm0, -97(%edx) 530L(aligned_16_81bytes): 531 movdqa %xmm0, -81(%edx) 532L(aligned_16_65bytes): 533 movdqa %xmm0, -65(%edx) 534L(aligned_16_49bytes): 535 movdqa %xmm0, -49(%edx) 536L(aligned_16_33bytes): 537 movdqa %xmm0, -33(%edx) 538L(aligned_16_17bytes): 539 movdqa %xmm0, -17(%edx) 540L(aligned_16_1bytes): 541 movb %al, -1(%edx) 542 SETRTNVAL 543 RETURN 544 545 ALIGN(4) 546L(aligned_16_114bytes): 547 movdqa %xmm0, -114(%edx) 548L(aligned_16_98bytes): 549 movdqa %xmm0, -98(%edx) 550L(aligned_16_82bytes): 551 movdqa %xmm0, -82(%edx) 552L(aligned_16_66bytes): 553 movdqa %xmm0, -66(%edx) 554L(aligned_16_50bytes): 555 movdqa %xmm0, -50(%edx) 556L(aligned_16_34bytes): 557 movdqa %xmm0, -34(%edx) 558L(aligned_16_18bytes): 559 movdqa %xmm0, -18(%edx) 560L(aligned_16_2bytes): 561 movw %ax, -2(%edx) 562 SETRTNVAL 563 RETURN 564 565 ALIGN(4) 566L(aligned_16_115bytes): 567 movdqa %xmm0, -115(%edx) 568L(aligned_16_99bytes): 569 movdqa %xmm0, -99(%edx) 570L(aligned_16_83bytes): 571 movdqa %xmm0, -83(%edx) 572L(aligned_16_67bytes): 573 movdqa %xmm0, -67(%edx) 574L(aligned_16_51bytes): 575 movdqa %xmm0, -51(%edx) 576L(aligned_16_35bytes): 577 movdqa %xmm0, -35(%edx) 578L(aligned_16_19bytes): 579 movdqa %xmm0, -19(%edx) 580L(aligned_16_3bytes): 581 movw %ax, -3(%edx) 582 movb %al, -1(%edx) 583 SETRTNVAL 584 RETURN 585 586 ALIGN(4) 587L(aligned_16_116bytes): 588 movdqa %xmm0, -116(%edx) 589L(aligned_16_100bytes): 590 movdqa %xmm0, -100(%edx) 591L(aligned_16_84bytes): 592 movdqa %xmm0, -84(%edx) 593L(aligned_16_68bytes): 594 movdqa %xmm0, -68(%edx) 595L(aligned_16_52bytes): 596 movdqa %xmm0, -52(%edx) 597L(aligned_16_36bytes): 598 movdqa %xmm0, -36(%edx) 599L(aligned_16_20bytes): 600 movdqa %xmm0, -20(%edx) 601L(aligned_16_4bytes): 602 movl %eax, -4(%edx) 603 SETRTNVAL 604 RETURN 605 606 ALIGN(4) 607L(aligned_16_117bytes): 608 movdqa %xmm0, -117(%edx) 609L(aligned_16_101bytes): 610 movdqa %xmm0, -101(%edx) 611L(aligned_16_85bytes): 612 movdqa %xmm0, -85(%edx) 613L(aligned_16_69bytes): 614 movdqa %xmm0, -69(%edx) 615L(aligned_16_53bytes): 616 movdqa %xmm0, -53(%edx) 617L(aligned_16_37bytes): 618 movdqa %xmm0, -37(%edx) 619L(aligned_16_21bytes): 620 movdqa %xmm0, -21(%edx) 621L(aligned_16_5bytes): 622 movl %eax, -5(%edx) 623 movb %al, -1(%edx) 624 SETRTNVAL 625 RETURN 626 627 ALIGN(4) 628L(aligned_16_118bytes): 629 movdqa %xmm0, -118(%edx) 630L(aligned_16_102bytes): 631 movdqa %xmm0, -102(%edx) 632L(aligned_16_86bytes): 633 movdqa %xmm0, -86(%edx) 634L(aligned_16_70bytes): 635 movdqa %xmm0, -70(%edx) 636L(aligned_16_54bytes): 637 movdqa %xmm0, -54(%edx) 638L(aligned_16_38bytes): 639 movdqa %xmm0, -38(%edx) 640L(aligned_16_22bytes): 641 movdqa %xmm0, -22(%edx) 642L(aligned_16_6bytes): 643 movl %eax, -6(%edx) 644 movw %ax, -2(%edx) 645 SETRTNVAL 646 RETURN 647 648 ALIGN(4) 649L(aligned_16_119bytes): 650 movdqa %xmm0, -119(%edx) 651L(aligned_16_103bytes): 652 movdqa %xmm0, -103(%edx) 653L(aligned_16_87bytes): 654 movdqa %xmm0, -87(%edx) 655L(aligned_16_71bytes): 656 movdqa %xmm0, -71(%edx) 657L(aligned_16_55bytes): 658 movdqa %xmm0, -55(%edx) 659L(aligned_16_39bytes): 660 movdqa %xmm0, -39(%edx) 661L(aligned_16_23bytes): 662 movdqa %xmm0, -23(%edx) 663L(aligned_16_7bytes): 664 movl %eax, -7(%edx) 665 movw %ax, -3(%edx) 666 movb %al, -1(%edx) 667 SETRTNVAL 668 RETURN 669 670 ALIGN(4) 671L(aligned_16_120bytes): 672 movdqa %xmm0, -120(%edx) 673L(aligned_16_104bytes): 674 movdqa %xmm0, -104(%edx) 675L(aligned_16_88bytes): 676 movdqa %xmm0, -88(%edx) 677L(aligned_16_72bytes): 678 movdqa %xmm0, -72(%edx) 679L(aligned_16_56bytes): 680 movdqa %xmm0, -56(%edx) 681L(aligned_16_40bytes): 682 movdqa %xmm0, -40(%edx) 683L(aligned_16_24bytes): 684 movdqa %xmm0, -24(%edx) 685L(aligned_16_8bytes): 686 movq %xmm0, -8(%edx) 687 SETRTNVAL 688 RETURN 689 690 ALIGN(4) 691L(aligned_16_121bytes): 692 movdqa %xmm0, -121(%edx) 693L(aligned_16_105bytes): 694 movdqa %xmm0, -105(%edx) 695L(aligned_16_89bytes): 696 movdqa %xmm0, -89(%edx) 697L(aligned_16_73bytes): 698 movdqa %xmm0, -73(%edx) 699L(aligned_16_57bytes): 700 movdqa %xmm0, -57(%edx) 701L(aligned_16_41bytes): 702 movdqa %xmm0, -41(%edx) 703L(aligned_16_25bytes): 704 movdqa %xmm0, -25(%edx) 705L(aligned_16_9bytes): 706 movq %xmm0, -9(%edx) 707 movb %al, -1(%edx) 708 SETRTNVAL 709 RETURN 710 711 ALIGN(4) 712L(aligned_16_122bytes): 713 movdqa %xmm0, -122(%edx) 714L(aligned_16_106bytes): 715 movdqa %xmm0, -106(%edx) 716L(aligned_16_90bytes): 717 movdqa %xmm0, -90(%edx) 718L(aligned_16_74bytes): 719 movdqa %xmm0, -74(%edx) 720L(aligned_16_58bytes): 721 movdqa %xmm0, -58(%edx) 722L(aligned_16_42bytes): 723 movdqa %xmm0, -42(%edx) 724L(aligned_16_26bytes): 725 movdqa %xmm0, -26(%edx) 726L(aligned_16_10bytes): 727 movq %xmm0, -10(%edx) 728 movw %ax, -2(%edx) 729 SETRTNVAL 730 RETURN 731 732 ALIGN(4) 733L(aligned_16_123bytes): 734 movdqa %xmm0, -123(%edx) 735L(aligned_16_107bytes): 736 movdqa %xmm0, -107(%edx) 737L(aligned_16_91bytes): 738 movdqa %xmm0, -91(%edx) 739L(aligned_16_75bytes): 740 movdqa %xmm0, -75(%edx) 741L(aligned_16_59bytes): 742 movdqa %xmm0, -59(%edx) 743L(aligned_16_43bytes): 744 movdqa %xmm0, -43(%edx) 745L(aligned_16_27bytes): 746 movdqa %xmm0, -27(%edx) 747L(aligned_16_11bytes): 748 movq %xmm0, -11(%edx) 749 movw %ax, -3(%edx) 750 movb %al, -1(%edx) 751 SETRTNVAL 752 RETURN 753 754 ALIGN(4) 755L(aligned_16_124bytes): 756 movdqa %xmm0, -124(%edx) 757L(aligned_16_108bytes): 758 movdqa %xmm0, -108(%edx) 759L(aligned_16_92bytes): 760 movdqa %xmm0, -92(%edx) 761L(aligned_16_76bytes): 762 movdqa %xmm0, -76(%edx) 763L(aligned_16_60bytes): 764 movdqa %xmm0, -60(%edx) 765L(aligned_16_44bytes): 766 movdqa %xmm0, -44(%edx) 767L(aligned_16_28bytes): 768 movdqa %xmm0, -28(%edx) 769L(aligned_16_12bytes): 770 movq %xmm0, -12(%edx) 771 movl %eax, -4(%edx) 772 SETRTNVAL 773 RETURN 774 775 ALIGN(4) 776L(aligned_16_125bytes): 777 movdqa %xmm0, -125(%edx) 778L(aligned_16_109bytes): 779 movdqa %xmm0, -109(%edx) 780L(aligned_16_93bytes): 781 movdqa %xmm0, -93(%edx) 782L(aligned_16_77bytes): 783 movdqa %xmm0, -77(%edx) 784L(aligned_16_61bytes): 785 movdqa %xmm0, -61(%edx) 786L(aligned_16_45bytes): 787 movdqa %xmm0, -45(%edx) 788L(aligned_16_29bytes): 789 movdqa %xmm0, -29(%edx) 790L(aligned_16_13bytes): 791 movq %xmm0, -13(%edx) 792 movl %eax, -5(%edx) 793 movb %al, -1(%edx) 794 SETRTNVAL 795 RETURN 796 797 ALIGN(4) 798L(aligned_16_126bytes): 799 movdqa %xmm0, -126(%edx) 800L(aligned_16_110bytes): 801 movdqa %xmm0, -110(%edx) 802L(aligned_16_94bytes): 803 movdqa %xmm0, -94(%edx) 804L(aligned_16_78bytes): 805 movdqa %xmm0, -78(%edx) 806L(aligned_16_62bytes): 807 movdqa %xmm0, -62(%edx) 808L(aligned_16_46bytes): 809 movdqa %xmm0, -46(%edx) 810L(aligned_16_30bytes): 811 movdqa %xmm0, -30(%edx) 812L(aligned_16_14bytes): 813 movq %xmm0, -14(%edx) 814 movl %eax, -6(%edx) 815 movw %ax, -2(%edx) 816 SETRTNVAL 817 RETURN 818 819 ALIGN(4) 820L(aligned_16_127bytes): 821 movdqa %xmm0, -127(%edx) 822L(aligned_16_111bytes): 823 movdqa %xmm0, -111(%edx) 824L(aligned_16_95bytes): 825 movdqa %xmm0, -95(%edx) 826L(aligned_16_79bytes): 827 movdqa %xmm0, -79(%edx) 828L(aligned_16_63bytes): 829 movdqa %xmm0, -63(%edx) 830L(aligned_16_47bytes): 831 movdqa %xmm0, -47(%edx) 832L(aligned_16_31bytes): 833 movdqa %xmm0, -31(%edx) 834L(aligned_16_15bytes): 835 movq %xmm0, -15(%edx) 836 movl %eax, -7(%edx) 837 movw %ax, -3(%edx) 838 movb %al, -1(%edx) 839 SETRTNVAL 840 RETURN_END 841 842END(memset) 843