1/* 2Copyright (c) 2014, Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31#include <private/bionic_asm.h> 32 33#include "cache.h" 34 35#ifndef L 36# define L(label) .L##label 37#endif 38 39#ifndef ALIGN 40# define ALIGN(n) .p2align n 41#endif 42 43#define CFI_PUSH(REG) \ 44 .cfi_adjust_cfa_offset 4; \ 45 .cfi_rel_offset REG, 0 46 47#define CFI_POP(REG) \ 48 .cfi_adjust_cfa_offset -4; \ 49 .cfi_restore REG 50 51#define PUSH(REG) pushl REG; CFI_PUSH(REG) 52#define POP(REG) popl REG; CFI_POP(REG) 53 54#define PARMS 8 /* Preserve EBX. */ 55#define DST PARMS 56#define CHR (DST+4) 57#define LEN (CHR+4) 58#define CHK_DST_LEN (LEN+4) 59#define SETRTNVAL movl DST(%esp), %eax 60 61# define ENTRANCE PUSH(%ebx); 62# define RETURN_END POP(%ebx); ret 63# define RETURN RETURN_END; CFI_PUSH(%ebx) 64# define JMPTBL(I, B) I - B 65 66/* Load an entry in a jump table into EBX and branch to it. TABLE is a 67 jump table with relative offsets. */ 68# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 69 /* We first load PC into EBX. */ \ 70 call __x86.get_pc_thunk.bx; \ 71 /* Get the address of the jump table. */ \ 72 add $(TABLE - .), %ebx; \ 73 /* Get the entry and convert the relative offset to the \ 74 absolute address. */ \ 75 add (%ebx,%ecx,4), %ebx; \ 76 add %ecx, %edx; \ 77 /* We loaded the jump table and adjusted EDX. Go. */ \ 78 jmp *%ebx 79 80 .section .gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits 81 .globl __x86.get_pc_thunk.bx 82 .hidden __x86.get_pc_thunk.bx 83 ALIGN(4) 84 .type __x86.get_pc_thunk.bx,@function 85__x86.get_pc_thunk.bx: 86 movl (%esp), %ebx 87 ret 88 89ENTRY(__memset_chk_generic) 90 ENTRANCE 91 92 movl LEN(%esp), %ecx 93 cmpl CHK_DST_LEN(%esp), %ecx 94 jna L(memset_length_loaded) 95 96 POP(%ebx) // Undo ENTRANCE without returning. 97 jmp __memset_chk_fail 98END(__memset_chk_generic) 99 100 .section .text.sse2,"ax",@progbits 101 ALIGN(4) 102ENTRY(memset_generic) 103 ENTRANCE 104 105 movl LEN(%esp), %ecx 106L(memset_length_loaded): 107 cmp $0, %ecx 108 ja L(1byteormore) 109 SETRTNVAL 110 RETURN 111 112L(1byteormore): 113 movzbl CHR(%esp), %eax 114 movb %al, %ah 115 /* Fill the whole EAX with pattern. */ 116 movl %eax, %edx 117 shl $16, %eax 118 or %edx, %eax 119 movl DST(%esp), %edx 120 cmp $1, %ecx 121 je L(1byte) 122 cmp $16, %ecx 123 jae L(16bytesormore) 124 125 cmp $4, %ecx 126 jb L(4bytesless) 127 movl %eax, (%edx) 128 movl %eax, -4(%edx, %ecx) 129 cmp $8, %ecx 130 jb L(8bytesless) 131 movl %eax, 4(%edx) 132 movl %eax, -8(%edx, %ecx) 133L(8bytesless): 134 SETRTNVAL 135 RETURN 136 137L(4bytesless): 138 movw %ax, (%edx) 139 movw %ax, -2(%edx, %ecx) 140 SETRTNVAL 141 RETURN 142 143L(1byte): 144 movb %al, (%edx) 145 SETRTNVAL 146 RETURN 147 148 ALIGN(4) 149L(16bytesormore): 150 movd %eax, %xmm0 151 pshufd $0, %xmm0, %xmm0 152 153 cmp $64, %ecx 154 ja L(64bytesmore) 155 movdqu %xmm0, (%edx) 156 movdqu %xmm0, -16(%edx, %ecx) 157 cmp $32, %ecx 158 jbe L(32bytesless) 159 movdqu %xmm0, 16(%edx) 160 movdqu %xmm0, -32(%edx, %ecx) 161L(32bytesless): 162 SETRTNVAL 163 RETURN 164 165L(64bytesmore): 166 testl $0xf, %edx 167 jz L(aligned_16) 168L(not_aligned_16): 169 movdqu %xmm0, (%edx) 170 movl %edx, %eax 171 and $-16, %edx 172 add $16, %edx 173 sub %edx, %eax 174 add %eax, %ecx 175 movd %xmm0, %eax 176 177 ALIGN(4) 178L(aligned_16): 179 cmp $128, %ecx 180 jae L(128bytesormore) 181 182L(aligned_16_less128bytes): 183 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes)) 184 185 ALIGN(4) 186L(128bytesormore): 187 PUSH(%ebx) 188 mov $SHARED_CACHE_SIZE, %ebx 189 cmp %ebx, %ecx 190 jae L(128bytesormore_nt_start) 191 192 POP(%ebx) 193 194 PUSH(%ebx) 195 mov $DATA_CACHE_SIZE, %ebx 196 197 cmp %ebx, %ecx 198 jae L(128bytes_L2_normal) 199 subl $128, %ecx 200L(128bytesormore_normal): 201 sub $128, %ecx 202 movdqa %xmm0, (%edx) 203 movaps %xmm0, 0x10(%edx) 204 movaps %xmm0, 0x20(%edx) 205 movaps %xmm0, 0x30(%edx) 206 movaps %xmm0, 0x40(%edx) 207 movaps %xmm0, 0x50(%edx) 208 movaps %xmm0, 0x60(%edx) 209 movaps %xmm0, 0x70(%edx) 210 lea 128(%edx), %edx 211 jb L(128bytesless_normal) 212 213 214 sub $128, %ecx 215 movdqa %xmm0, (%edx) 216 movaps %xmm0, 0x10(%edx) 217 movaps %xmm0, 0x20(%edx) 218 movaps %xmm0, 0x30(%edx) 219 movaps %xmm0, 0x40(%edx) 220 movaps %xmm0, 0x50(%edx) 221 movaps %xmm0, 0x60(%edx) 222 movaps %xmm0, 0x70(%edx) 223 lea 128(%edx), %edx 224 jae L(128bytesormore_normal) 225 226L(128bytesless_normal): 227 lea 128(%ecx), %ecx 228 POP(%ebx) 229 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes)) 230 231 ALIGN(4) 232L(128bytes_L2_normal): 233 prefetchnta 0x380(%edx) 234 prefetchnta 0x3c0(%edx) 235 sub $128, %ecx 236 movdqa %xmm0, (%edx) 237 movaps %xmm0, 0x10(%edx) 238 movaps %xmm0, 0x20(%edx) 239 movaps %xmm0, 0x30(%edx) 240 movaps %xmm0, 0x40(%edx) 241 movaps %xmm0, 0x50(%edx) 242 movaps %xmm0, 0x60(%edx) 243 movaps %xmm0, 0x70(%edx) 244 add $128, %edx 245 cmp $128, %ecx 246 jae L(128bytes_L2_normal) 247 248L(128bytesless_L2_normal): 249 POP(%ebx) 250 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes)) 251 252L(128bytesormore_nt_start): 253 sub %ebx, %ecx 254 ALIGN(4) 255L(128bytesormore_shared_cache_loop): 256 prefetchnta 0x3c0(%edx) 257 prefetchnta 0x380(%edx) 258 sub $0x80, %ebx 259 movdqa %xmm0, (%edx) 260 movaps %xmm0, 0x10(%edx) 261 movaps %xmm0, 0x20(%edx) 262 movaps %xmm0, 0x30(%edx) 263 movaps %xmm0, 0x40(%edx) 264 movaps %xmm0, 0x50(%edx) 265 movaps %xmm0, 0x60(%edx) 266 movaps %xmm0, 0x70(%edx) 267 add $0x80, %edx 268 cmp $0x80, %ebx 269 jae L(128bytesormore_shared_cache_loop) 270 cmp $0x80, %ecx 271 jb L(shared_cache_loop_end) 272 ALIGN(4) 273L(128bytesormore_nt): 274 sub $0x80, %ecx 275 movntdq %xmm0, (%edx) 276 movntdq %xmm0, 0x10(%edx) 277 movntdq %xmm0, 0x20(%edx) 278 movntdq %xmm0, 0x30(%edx) 279 movntdq %xmm0, 0x40(%edx) 280 movntdq %xmm0, 0x50(%edx) 281 movntdq %xmm0, 0x60(%edx) 282 movntdq %xmm0, 0x70(%edx) 283 add $0x80, %edx 284 cmp $0x80, %ecx 285 jae L(128bytesormore_nt) 286 sfence 287L(shared_cache_loop_end): 288 POP(%ebx) 289 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes)) 290 291 292 .pushsection .rodata.sse2,"a",@progbits 293 ALIGN(2) 294L(table_16_128bytes): 295 .int JMPTBL(L(aligned_16_0bytes), L(table_16_128bytes)) 296 .int JMPTBL(L(aligned_16_1bytes), L(table_16_128bytes)) 297 .int JMPTBL(L(aligned_16_2bytes), L(table_16_128bytes)) 298 .int JMPTBL(L(aligned_16_3bytes), L(table_16_128bytes)) 299 .int JMPTBL(L(aligned_16_4bytes), L(table_16_128bytes)) 300 .int JMPTBL(L(aligned_16_5bytes), L(table_16_128bytes)) 301 .int JMPTBL(L(aligned_16_6bytes), L(table_16_128bytes)) 302 .int JMPTBL(L(aligned_16_7bytes), L(table_16_128bytes)) 303 .int JMPTBL(L(aligned_16_8bytes), L(table_16_128bytes)) 304 .int JMPTBL(L(aligned_16_9bytes), L(table_16_128bytes)) 305 .int JMPTBL(L(aligned_16_10bytes), L(table_16_128bytes)) 306 .int JMPTBL(L(aligned_16_11bytes), L(table_16_128bytes)) 307 .int JMPTBL(L(aligned_16_12bytes), L(table_16_128bytes)) 308 .int JMPTBL(L(aligned_16_13bytes), L(table_16_128bytes)) 309 .int JMPTBL(L(aligned_16_14bytes), L(table_16_128bytes)) 310 .int JMPTBL(L(aligned_16_15bytes), L(table_16_128bytes)) 311 .int JMPTBL(L(aligned_16_16bytes), L(table_16_128bytes)) 312 .int JMPTBL(L(aligned_16_17bytes), L(table_16_128bytes)) 313 .int JMPTBL(L(aligned_16_18bytes), L(table_16_128bytes)) 314 .int JMPTBL(L(aligned_16_19bytes), L(table_16_128bytes)) 315 .int JMPTBL(L(aligned_16_20bytes), L(table_16_128bytes)) 316 .int JMPTBL(L(aligned_16_21bytes), L(table_16_128bytes)) 317 .int JMPTBL(L(aligned_16_22bytes), L(table_16_128bytes)) 318 .int JMPTBL(L(aligned_16_23bytes), L(table_16_128bytes)) 319 .int JMPTBL(L(aligned_16_24bytes), L(table_16_128bytes)) 320 .int JMPTBL(L(aligned_16_25bytes), L(table_16_128bytes)) 321 .int JMPTBL(L(aligned_16_26bytes), L(table_16_128bytes)) 322 .int JMPTBL(L(aligned_16_27bytes), L(table_16_128bytes)) 323 .int JMPTBL(L(aligned_16_28bytes), L(table_16_128bytes)) 324 .int JMPTBL(L(aligned_16_29bytes), L(table_16_128bytes)) 325 .int JMPTBL(L(aligned_16_30bytes), L(table_16_128bytes)) 326 .int JMPTBL(L(aligned_16_31bytes), L(table_16_128bytes)) 327 .int JMPTBL(L(aligned_16_32bytes), L(table_16_128bytes)) 328 .int JMPTBL(L(aligned_16_33bytes), L(table_16_128bytes)) 329 .int JMPTBL(L(aligned_16_34bytes), L(table_16_128bytes)) 330 .int JMPTBL(L(aligned_16_35bytes), L(table_16_128bytes)) 331 .int JMPTBL(L(aligned_16_36bytes), L(table_16_128bytes)) 332 .int JMPTBL(L(aligned_16_37bytes), L(table_16_128bytes)) 333 .int JMPTBL(L(aligned_16_38bytes), L(table_16_128bytes)) 334 .int JMPTBL(L(aligned_16_39bytes), L(table_16_128bytes)) 335 .int JMPTBL(L(aligned_16_40bytes), L(table_16_128bytes)) 336 .int JMPTBL(L(aligned_16_41bytes), L(table_16_128bytes)) 337 .int JMPTBL(L(aligned_16_42bytes), L(table_16_128bytes)) 338 .int JMPTBL(L(aligned_16_43bytes), L(table_16_128bytes)) 339 .int JMPTBL(L(aligned_16_44bytes), L(table_16_128bytes)) 340 .int JMPTBL(L(aligned_16_45bytes), L(table_16_128bytes)) 341 .int JMPTBL(L(aligned_16_46bytes), L(table_16_128bytes)) 342 .int JMPTBL(L(aligned_16_47bytes), L(table_16_128bytes)) 343 .int JMPTBL(L(aligned_16_48bytes), L(table_16_128bytes)) 344 .int JMPTBL(L(aligned_16_49bytes), L(table_16_128bytes)) 345 .int JMPTBL(L(aligned_16_50bytes), L(table_16_128bytes)) 346 .int JMPTBL(L(aligned_16_51bytes), L(table_16_128bytes)) 347 .int JMPTBL(L(aligned_16_52bytes), L(table_16_128bytes)) 348 .int JMPTBL(L(aligned_16_53bytes), L(table_16_128bytes)) 349 .int JMPTBL(L(aligned_16_54bytes), L(table_16_128bytes)) 350 .int JMPTBL(L(aligned_16_55bytes), L(table_16_128bytes)) 351 .int JMPTBL(L(aligned_16_56bytes), L(table_16_128bytes)) 352 .int JMPTBL(L(aligned_16_57bytes), L(table_16_128bytes)) 353 .int JMPTBL(L(aligned_16_58bytes), L(table_16_128bytes)) 354 .int JMPTBL(L(aligned_16_59bytes), L(table_16_128bytes)) 355 .int JMPTBL(L(aligned_16_60bytes), L(table_16_128bytes)) 356 .int JMPTBL(L(aligned_16_61bytes), L(table_16_128bytes)) 357 .int JMPTBL(L(aligned_16_62bytes), L(table_16_128bytes)) 358 .int JMPTBL(L(aligned_16_63bytes), L(table_16_128bytes)) 359 .int JMPTBL(L(aligned_16_64bytes), L(table_16_128bytes)) 360 .int JMPTBL(L(aligned_16_65bytes), L(table_16_128bytes)) 361 .int JMPTBL(L(aligned_16_66bytes), L(table_16_128bytes)) 362 .int JMPTBL(L(aligned_16_67bytes), L(table_16_128bytes)) 363 .int JMPTBL(L(aligned_16_68bytes), L(table_16_128bytes)) 364 .int JMPTBL(L(aligned_16_69bytes), L(table_16_128bytes)) 365 .int JMPTBL(L(aligned_16_70bytes), L(table_16_128bytes)) 366 .int JMPTBL(L(aligned_16_71bytes), L(table_16_128bytes)) 367 .int JMPTBL(L(aligned_16_72bytes), L(table_16_128bytes)) 368 .int JMPTBL(L(aligned_16_73bytes), L(table_16_128bytes)) 369 .int JMPTBL(L(aligned_16_74bytes), L(table_16_128bytes)) 370 .int JMPTBL(L(aligned_16_75bytes), L(table_16_128bytes)) 371 .int JMPTBL(L(aligned_16_76bytes), L(table_16_128bytes)) 372 .int JMPTBL(L(aligned_16_77bytes), L(table_16_128bytes)) 373 .int JMPTBL(L(aligned_16_78bytes), L(table_16_128bytes)) 374 .int JMPTBL(L(aligned_16_79bytes), L(table_16_128bytes)) 375 .int JMPTBL(L(aligned_16_80bytes), L(table_16_128bytes)) 376 .int JMPTBL(L(aligned_16_81bytes), L(table_16_128bytes)) 377 .int JMPTBL(L(aligned_16_82bytes), L(table_16_128bytes)) 378 .int JMPTBL(L(aligned_16_83bytes), L(table_16_128bytes)) 379 .int JMPTBL(L(aligned_16_84bytes), L(table_16_128bytes)) 380 .int JMPTBL(L(aligned_16_85bytes), L(table_16_128bytes)) 381 .int JMPTBL(L(aligned_16_86bytes), L(table_16_128bytes)) 382 .int JMPTBL(L(aligned_16_87bytes), L(table_16_128bytes)) 383 .int JMPTBL(L(aligned_16_88bytes), L(table_16_128bytes)) 384 .int JMPTBL(L(aligned_16_89bytes), L(table_16_128bytes)) 385 .int JMPTBL(L(aligned_16_90bytes), L(table_16_128bytes)) 386 .int JMPTBL(L(aligned_16_91bytes), L(table_16_128bytes)) 387 .int JMPTBL(L(aligned_16_92bytes), L(table_16_128bytes)) 388 .int JMPTBL(L(aligned_16_93bytes), L(table_16_128bytes)) 389 .int JMPTBL(L(aligned_16_94bytes), L(table_16_128bytes)) 390 .int JMPTBL(L(aligned_16_95bytes), L(table_16_128bytes)) 391 .int JMPTBL(L(aligned_16_96bytes), L(table_16_128bytes)) 392 .int JMPTBL(L(aligned_16_97bytes), L(table_16_128bytes)) 393 .int JMPTBL(L(aligned_16_98bytes), L(table_16_128bytes)) 394 .int JMPTBL(L(aligned_16_99bytes), L(table_16_128bytes)) 395 .int JMPTBL(L(aligned_16_100bytes), L(table_16_128bytes)) 396 .int JMPTBL(L(aligned_16_101bytes), L(table_16_128bytes)) 397 .int JMPTBL(L(aligned_16_102bytes), L(table_16_128bytes)) 398 .int JMPTBL(L(aligned_16_103bytes), L(table_16_128bytes)) 399 .int JMPTBL(L(aligned_16_104bytes), L(table_16_128bytes)) 400 .int JMPTBL(L(aligned_16_105bytes), L(table_16_128bytes)) 401 .int JMPTBL(L(aligned_16_106bytes), L(table_16_128bytes)) 402 .int JMPTBL(L(aligned_16_107bytes), L(table_16_128bytes)) 403 .int JMPTBL(L(aligned_16_108bytes), L(table_16_128bytes)) 404 .int JMPTBL(L(aligned_16_109bytes), L(table_16_128bytes)) 405 .int JMPTBL(L(aligned_16_110bytes), L(table_16_128bytes)) 406 .int JMPTBL(L(aligned_16_111bytes), L(table_16_128bytes)) 407 .int JMPTBL(L(aligned_16_112bytes), L(table_16_128bytes)) 408 .int JMPTBL(L(aligned_16_113bytes), L(table_16_128bytes)) 409 .int JMPTBL(L(aligned_16_114bytes), L(table_16_128bytes)) 410 .int JMPTBL(L(aligned_16_115bytes), L(table_16_128bytes)) 411 .int JMPTBL(L(aligned_16_116bytes), L(table_16_128bytes)) 412 .int JMPTBL(L(aligned_16_117bytes), L(table_16_128bytes)) 413 .int JMPTBL(L(aligned_16_118bytes), L(table_16_128bytes)) 414 .int JMPTBL(L(aligned_16_119bytes), L(table_16_128bytes)) 415 .int JMPTBL(L(aligned_16_120bytes), L(table_16_128bytes)) 416 .int JMPTBL(L(aligned_16_121bytes), L(table_16_128bytes)) 417 .int JMPTBL(L(aligned_16_122bytes), L(table_16_128bytes)) 418 .int JMPTBL(L(aligned_16_123bytes), L(table_16_128bytes)) 419 .int JMPTBL(L(aligned_16_124bytes), L(table_16_128bytes)) 420 .int JMPTBL(L(aligned_16_125bytes), L(table_16_128bytes)) 421 .int JMPTBL(L(aligned_16_126bytes), L(table_16_128bytes)) 422 .int JMPTBL(L(aligned_16_127bytes), L(table_16_128bytes)) 423 .popsection 424 425 ALIGN(4) 426L(aligned_16_112bytes): 427 movdqa %xmm0, -112(%edx) 428L(aligned_16_96bytes): 429 movdqa %xmm0, -96(%edx) 430L(aligned_16_80bytes): 431 movdqa %xmm0, -80(%edx) 432L(aligned_16_64bytes): 433 movdqa %xmm0, -64(%edx) 434L(aligned_16_48bytes): 435 movdqa %xmm0, -48(%edx) 436L(aligned_16_32bytes): 437 movdqa %xmm0, -32(%edx) 438L(aligned_16_16bytes): 439 movdqa %xmm0, -16(%edx) 440L(aligned_16_0bytes): 441 SETRTNVAL 442 RETURN 443 444 ALIGN(4) 445L(aligned_16_113bytes): 446 movdqa %xmm0, -113(%edx) 447L(aligned_16_97bytes): 448 movdqa %xmm0, -97(%edx) 449L(aligned_16_81bytes): 450 movdqa %xmm0, -81(%edx) 451L(aligned_16_65bytes): 452 movdqa %xmm0, -65(%edx) 453L(aligned_16_49bytes): 454 movdqa %xmm0, -49(%edx) 455L(aligned_16_33bytes): 456 movdqa %xmm0, -33(%edx) 457L(aligned_16_17bytes): 458 movdqa %xmm0, -17(%edx) 459L(aligned_16_1bytes): 460 movb %al, -1(%edx) 461 SETRTNVAL 462 RETURN 463 464 ALIGN(4) 465L(aligned_16_114bytes): 466 movdqa %xmm0, -114(%edx) 467L(aligned_16_98bytes): 468 movdqa %xmm0, -98(%edx) 469L(aligned_16_82bytes): 470 movdqa %xmm0, -82(%edx) 471L(aligned_16_66bytes): 472 movdqa %xmm0, -66(%edx) 473L(aligned_16_50bytes): 474 movdqa %xmm0, -50(%edx) 475L(aligned_16_34bytes): 476 movdqa %xmm0, -34(%edx) 477L(aligned_16_18bytes): 478 movdqa %xmm0, -18(%edx) 479L(aligned_16_2bytes): 480 movw %ax, -2(%edx) 481 SETRTNVAL 482 RETURN 483 484 ALIGN(4) 485L(aligned_16_115bytes): 486 movdqa %xmm0, -115(%edx) 487L(aligned_16_99bytes): 488 movdqa %xmm0, -99(%edx) 489L(aligned_16_83bytes): 490 movdqa %xmm0, -83(%edx) 491L(aligned_16_67bytes): 492 movdqa %xmm0, -67(%edx) 493L(aligned_16_51bytes): 494 movdqa %xmm0, -51(%edx) 495L(aligned_16_35bytes): 496 movdqa %xmm0, -35(%edx) 497L(aligned_16_19bytes): 498 movdqa %xmm0, -19(%edx) 499L(aligned_16_3bytes): 500 movw %ax, -3(%edx) 501 movb %al, -1(%edx) 502 SETRTNVAL 503 RETURN 504 505 ALIGN(4) 506L(aligned_16_116bytes): 507 movdqa %xmm0, -116(%edx) 508L(aligned_16_100bytes): 509 movdqa %xmm0, -100(%edx) 510L(aligned_16_84bytes): 511 movdqa %xmm0, -84(%edx) 512L(aligned_16_68bytes): 513 movdqa %xmm0, -68(%edx) 514L(aligned_16_52bytes): 515 movdqa %xmm0, -52(%edx) 516L(aligned_16_36bytes): 517 movdqa %xmm0, -36(%edx) 518L(aligned_16_20bytes): 519 movdqa %xmm0, -20(%edx) 520L(aligned_16_4bytes): 521 movl %eax, -4(%edx) 522 SETRTNVAL 523 RETURN 524 525 ALIGN(4) 526L(aligned_16_117bytes): 527 movdqa %xmm0, -117(%edx) 528L(aligned_16_101bytes): 529 movdqa %xmm0, -101(%edx) 530L(aligned_16_85bytes): 531 movdqa %xmm0, -85(%edx) 532L(aligned_16_69bytes): 533 movdqa %xmm0, -69(%edx) 534L(aligned_16_53bytes): 535 movdqa %xmm0, -53(%edx) 536L(aligned_16_37bytes): 537 movdqa %xmm0, -37(%edx) 538L(aligned_16_21bytes): 539 movdqa %xmm0, -21(%edx) 540L(aligned_16_5bytes): 541 movl %eax, -5(%edx) 542 movb %al, -1(%edx) 543 SETRTNVAL 544 RETURN 545 546 ALIGN(4) 547L(aligned_16_118bytes): 548 movdqa %xmm0, -118(%edx) 549L(aligned_16_102bytes): 550 movdqa %xmm0, -102(%edx) 551L(aligned_16_86bytes): 552 movdqa %xmm0, -86(%edx) 553L(aligned_16_70bytes): 554 movdqa %xmm0, -70(%edx) 555L(aligned_16_54bytes): 556 movdqa %xmm0, -54(%edx) 557L(aligned_16_38bytes): 558 movdqa %xmm0, -38(%edx) 559L(aligned_16_22bytes): 560 movdqa %xmm0, -22(%edx) 561L(aligned_16_6bytes): 562 movl %eax, -6(%edx) 563 movw %ax, -2(%edx) 564 SETRTNVAL 565 RETURN 566 567 ALIGN(4) 568L(aligned_16_119bytes): 569 movdqa %xmm0, -119(%edx) 570L(aligned_16_103bytes): 571 movdqa %xmm0, -103(%edx) 572L(aligned_16_87bytes): 573 movdqa %xmm0, -87(%edx) 574L(aligned_16_71bytes): 575 movdqa %xmm0, -71(%edx) 576L(aligned_16_55bytes): 577 movdqa %xmm0, -55(%edx) 578L(aligned_16_39bytes): 579 movdqa %xmm0, -39(%edx) 580L(aligned_16_23bytes): 581 movdqa %xmm0, -23(%edx) 582L(aligned_16_7bytes): 583 movl %eax, -7(%edx) 584 movw %ax, -3(%edx) 585 movb %al, -1(%edx) 586 SETRTNVAL 587 RETURN 588 589 ALIGN(4) 590L(aligned_16_120bytes): 591 movdqa %xmm0, -120(%edx) 592L(aligned_16_104bytes): 593 movdqa %xmm0, -104(%edx) 594L(aligned_16_88bytes): 595 movdqa %xmm0, -88(%edx) 596L(aligned_16_72bytes): 597 movdqa %xmm0, -72(%edx) 598L(aligned_16_56bytes): 599 movdqa %xmm0, -56(%edx) 600L(aligned_16_40bytes): 601 movdqa %xmm0, -40(%edx) 602L(aligned_16_24bytes): 603 movdqa %xmm0, -24(%edx) 604L(aligned_16_8bytes): 605 movq %xmm0, -8(%edx) 606 SETRTNVAL 607 RETURN 608 609 ALIGN(4) 610L(aligned_16_121bytes): 611 movdqa %xmm0, -121(%edx) 612L(aligned_16_105bytes): 613 movdqa %xmm0, -105(%edx) 614L(aligned_16_89bytes): 615 movdqa %xmm0, -89(%edx) 616L(aligned_16_73bytes): 617 movdqa %xmm0, -73(%edx) 618L(aligned_16_57bytes): 619 movdqa %xmm0, -57(%edx) 620L(aligned_16_41bytes): 621 movdqa %xmm0, -41(%edx) 622L(aligned_16_25bytes): 623 movdqa %xmm0, -25(%edx) 624L(aligned_16_9bytes): 625 movq %xmm0, -9(%edx) 626 movb %al, -1(%edx) 627 SETRTNVAL 628 RETURN 629 630 ALIGN(4) 631L(aligned_16_122bytes): 632 movdqa %xmm0, -122(%edx) 633L(aligned_16_106bytes): 634 movdqa %xmm0, -106(%edx) 635L(aligned_16_90bytes): 636 movdqa %xmm0, -90(%edx) 637L(aligned_16_74bytes): 638 movdqa %xmm0, -74(%edx) 639L(aligned_16_58bytes): 640 movdqa %xmm0, -58(%edx) 641L(aligned_16_42bytes): 642 movdqa %xmm0, -42(%edx) 643L(aligned_16_26bytes): 644 movdqa %xmm0, -26(%edx) 645L(aligned_16_10bytes): 646 movq %xmm0, -10(%edx) 647 movw %ax, -2(%edx) 648 SETRTNVAL 649 RETURN 650 651 ALIGN(4) 652L(aligned_16_123bytes): 653 movdqa %xmm0, -123(%edx) 654L(aligned_16_107bytes): 655 movdqa %xmm0, -107(%edx) 656L(aligned_16_91bytes): 657 movdqa %xmm0, -91(%edx) 658L(aligned_16_75bytes): 659 movdqa %xmm0, -75(%edx) 660L(aligned_16_59bytes): 661 movdqa %xmm0, -59(%edx) 662L(aligned_16_43bytes): 663 movdqa %xmm0, -43(%edx) 664L(aligned_16_27bytes): 665 movdqa %xmm0, -27(%edx) 666L(aligned_16_11bytes): 667 movq %xmm0, -11(%edx) 668 movw %ax, -3(%edx) 669 movb %al, -1(%edx) 670 SETRTNVAL 671 RETURN 672 673 ALIGN(4) 674L(aligned_16_124bytes): 675 movdqa %xmm0, -124(%edx) 676L(aligned_16_108bytes): 677 movdqa %xmm0, -108(%edx) 678L(aligned_16_92bytes): 679 movdqa %xmm0, -92(%edx) 680L(aligned_16_76bytes): 681 movdqa %xmm0, -76(%edx) 682L(aligned_16_60bytes): 683 movdqa %xmm0, -60(%edx) 684L(aligned_16_44bytes): 685 movdqa %xmm0, -44(%edx) 686L(aligned_16_28bytes): 687 movdqa %xmm0, -28(%edx) 688L(aligned_16_12bytes): 689 movq %xmm0, -12(%edx) 690 movl %eax, -4(%edx) 691 SETRTNVAL 692 RETURN 693 694 ALIGN(4) 695L(aligned_16_125bytes): 696 movdqa %xmm0, -125(%edx) 697L(aligned_16_109bytes): 698 movdqa %xmm0, -109(%edx) 699L(aligned_16_93bytes): 700 movdqa %xmm0, -93(%edx) 701L(aligned_16_77bytes): 702 movdqa %xmm0, -77(%edx) 703L(aligned_16_61bytes): 704 movdqa %xmm0, -61(%edx) 705L(aligned_16_45bytes): 706 movdqa %xmm0, -45(%edx) 707L(aligned_16_29bytes): 708 movdqa %xmm0, -29(%edx) 709L(aligned_16_13bytes): 710 movq %xmm0, -13(%edx) 711 movl %eax, -5(%edx) 712 movb %al, -1(%edx) 713 SETRTNVAL 714 RETURN 715 716 ALIGN(4) 717L(aligned_16_126bytes): 718 movdqa %xmm0, -126(%edx) 719L(aligned_16_110bytes): 720 movdqa %xmm0, -110(%edx) 721L(aligned_16_94bytes): 722 movdqa %xmm0, -94(%edx) 723L(aligned_16_78bytes): 724 movdqa %xmm0, -78(%edx) 725L(aligned_16_62bytes): 726 movdqa %xmm0, -62(%edx) 727L(aligned_16_46bytes): 728 movdqa %xmm0, -46(%edx) 729L(aligned_16_30bytes): 730 movdqa %xmm0, -30(%edx) 731L(aligned_16_14bytes): 732 movq %xmm0, -14(%edx) 733 movl %eax, -6(%edx) 734 movw %ax, -2(%edx) 735 SETRTNVAL 736 RETURN 737 738 ALIGN(4) 739L(aligned_16_127bytes): 740 movdqa %xmm0, -127(%edx) 741L(aligned_16_111bytes): 742 movdqa %xmm0, -111(%edx) 743L(aligned_16_95bytes): 744 movdqa %xmm0, -95(%edx) 745L(aligned_16_79bytes): 746 movdqa %xmm0, -79(%edx) 747L(aligned_16_63bytes): 748 movdqa %xmm0, -63(%edx) 749L(aligned_16_47bytes): 750 movdqa %xmm0, -47(%edx) 751L(aligned_16_31bytes): 752 movdqa %xmm0, -31(%edx) 753L(aligned_16_15bytes): 754 movq %xmm0, -15(%edx) 755 movl %eax, -7(%edx) 756 movw %ax, -3(%edx) 757 movb %al, -1(%edx) 758 SETRTNVAL 759 RETURN_END 760 761END(memset_generic) 762