1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "cache.h" 18 19#ifndef MEMSET 20# define MEMSET android_memset16 21#endif 22 23#ifndef L 24# define L(label) .L##label 25#endif 26 27#ifndef ALIGN 28# define ALIGN(n) .p2align n 29#endif 30 31#ifndef cfi_startproc 32# define cfi_startproc .cfi_startproc 33#endif 34 35#ifndef cfi_endproc 36# define cfi_endproc .cfi_endproc 37#endif 38 39#ifndef ENTRY 40# define ENTRY(name) \ 41 .type name, @function; \ 42 .globl name; \ 43 .p2align 4; \ 44name: \ 45 cfi_startproc 46#endif 47 48#ifndef END 49# define END(name) \ 50 cfi_endproc; \ 51 .size name, .-name 52#endif 53 54#define JMPTBL(I, B) I - B 55 56/* Branch to an entry in a jump table. TABLE is a jump table with 57 relative offsets. INDEX is a register contains the index into the 58 jump table. SCALE is the scale of INDEX. */ 59#define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ 60 lea TABLE(%rip), %r11; \ 61 movslq (%r11, INDEX, SCALE), INDEX; \ 62 lea (%r11, INDEX), INDEX; \ 63 jmp *INDEX 64 65 .section .text.sse2,"ax",@progbits 66 ALIGN (4) 67ENTRY (MEMSET) // Address in rdi 68 shr $1, %rdx // Count in rdx 69 movzwl %si, %ecx 70 /* Fill the whole ECX with pattern. */ 71 shl $16, %esi 72 or %esi, %ecx // Pattern in ecx 73 74 cmp $32, %rdx 75 jae L(32wordsormore) 76 77L(write_less32words): 78 lea (%rdi, %rdx, 2), %rdi 79 BRANCH_TO_JMPTBL_ENTRY (L(table_less32words), %rdx, 4) 80 81 .pushsection .rodata.sse2,"a",@progbits 82 ALIGN (2) 83L(table_less32words): 84 .int JMPTBL (L(write_0words), L(table_less32words)) 85 .int JMPTBL (L(write_1words), L(table_less32words)) 86 .int JMPTBL (L(write_2words), L(table_less32words)) 87 .int JMPTBL (L(write_3words), L(table_less32words)) 88 .int JMPTBL (L(write_4words), L(table_less32words)) 89 .int JMPTBL (L(write_5words), L(table_less32words)) 90 .int JMPTBL (L(write_6words), L(table_less32words)) 91 .int JMPTBL (L(write_7words), L(table_less32words)) 92 .int JMPTBL (L(write_8words), L(table_less32words)) 93 .int JMPTBL (L(write_9words), L(table_less32words)) 94 .int JMPTBL (L(write_10words), L(table_less32words)) 95 .int JMPTBL (L(write_11words), L(table_less32words)) 96 .int JMPTBL (L(write_12words), L(table_less32words)) 97 .int JMPTBL (L(write_13words), L(table_less32words)) 98 .int JMPTBL (L(write_14words), L(table_less32words)) 99 .int JMPTBL (L(write_15words), L(table_less32words)) 100 .int JMPTBL (L(write_16words), L(table_less32words)) 101 .int JMPTBL (L(write_17words), L(table_less32words)) 102 .int JMPTBL (L(write_18words), L(table_less32words)) 103 .int JMPTBL (L(write_19words), L(table_less32words)) 104 .int JMPTBL (L(write_20words), L(table_less32words)) 105 .int JMPTBL (L(write_21words), L(table_less32words)) 106 .int JMPTBL (L(write_22words), L(table_less32words)) 107 .int JMPTBL (L(write_23words), L(table_less32words)) 108 .int JMPTBL (L(write_24words), L(table_less32words)) 109 .int JMPTBL (L(write_25words), L(table_less32words)) 110 .int JMPTBL (L(write_26words), L(table_less32words)) 111 .int JMPTBL (L(write_27words), L(table_less32words)) 112 .int JMPTBL (L(write_28words), L(table_less32words)) 113 .int JMPTBL (L(write_29words), L(table_less32words)) 114 .int JMPTBL (L(write_30words), L(table_less32words)) 115 .int JMPTBL (L(write_31words), L(table_less32words)) 116 .popsection 117 118 ALIGN (4) 119L(write_28words): 120 movl %ecx, -56(%rdi) 121 movl %ecx, -52(%rdi) 122L(write_24words): 123 movl %ecx, -48(%rdi) 124 movl %ecx, -44(%rdi) 125L(write_20words): 126 movl %ecx, -40(%rdi) 127 movl %ecx, -36(%rdi) 128L(write_16words): 129 movl %ecx, -32(%rdi) 130 movl %ecx, -28(%rdi) 131L(write_12words): 132 movl %ecx, -24(%rdi) 133 movl %ecx, -20(%rdi) 134L(write_8words): 135 movl %ecx, -16(%rdi) 136 movl %ecx, -12(%rdi) 137L(write_4words): 138 movl %ecx, -8(%rdi) 139 movl %ecx, -4(%rdi) 140L(write_0words): 141 ret 142 143 ALIGN (4) 144L(write_29words): 145 movl %ecx, -58(%rdi) 146 movl %ecx, -54(%rdi) 147L(write_25words): 148 movl %ecx, -50(%rdi) 149 movl %ecx, -46(%rdi) 150L(write_21words): 151 movl %ecx, -42(%rdi) 152 movl %ecx, -38(%rdi) 153L(write_17words): 154 movl %ecx, -34(%rdi) 155 movl %ecx, -30(%rdi) 156L(write_13words): 157 movl %ecx, -26(%rdi) 158 movl %ecx, -22(%rdi) 159L(write_9words): 160 movl %ecx, -18(%rdi) 161 movl %ecx, -14(%rdi) 162L(write_5words): 163 movl %ecx, -10(%rdi) 164 movl %ecx, -6(%rdi) 165L(write_1words): 166 mov %cx, -2(%rdi) 167 ret 168 169 ALIGN (4) 170L(write_30words): 171 movl %ecx, -60(%rdi) 172 movl %ecx, -56(%rdi) 173L(write_26words): 174 movl %ecx, -52(%rdi) 175 movl %ecx, -48(%rdi) 176L(write_22words): 177 movl %ecx, -44(%rdi) 178 movl %ecx, -40(%rdi) 179L(write_18words): 180 movl %ecx, -36(%rdi) 181 movl %ecx, -32(%rdi) 182L(write_14words): 183 movl %ecx, -28(%rdi) 184 movl %ecx, -24(%rdi) 185L(write_10words): 186 movl %ecx, -20(%rdi) 187 movl %ecx, -16(%rdi) 188L(write_6words): 189 movl %ecx, -12(%rdi) 190 movl %ecx, -8(%rdi) 191L(write_2words): 192 movl %ecx, -4(%rdi) 193 ret 194 195 ALIGN (4) 196L(write_31words): 197 movl %ecx, -62(%rdi) 198 movl %ecx, -58(%rdi) 199L(write_27words): 200 movl %ecx, -54(%rdi) 201 movl %ecx, -50(%rdi) 202L(write_23words): 203 movl %ecx, -46(%rdi) 204 movl %ecx, -42(%rdi) 205L(write_19words): 206 movl %ecx, -38(%rdi) 207 movl %ecx, -34(%rdi) 208L(write_15words): 209 movl %ecx, -30(%rdi) 210 movl %ecx, -26(%rdi) 211L(write_11words): 212 movl %ecx, -22(%rdi) 213 movl %ecx, -18(%rdi) 214L(write_7words): 215 movl %ecx, -14(%rdi) 216 movl %ecx, -10(%rdi) 217L(write_3words): 218 movl %ecx, -6(%rdi) 219 movw %cx, -2(%rdi) 220 ret 221 222 ALIGN (4) 223L(32wordsormore): 224 shl $1, %rdx 225 test $0x01, %edi 226 jz L(aligned2bytes) 227 mov %ecx, (%rdi) 228 mov %ecx, -4(%rdi, %rdx) 229 sub $2, %rdx 230 add $1, %rdi 231 rol $8, %ecx 232L(aligned2bytes): 233 /* Fill xmm0 with the pattern. */ 234 movd %ecx, %xmm0 235 pshufd $0, %xmm0, %xmm0 236 237 testl $0xf, %edi 238 jz L(aligned_16) 239/* RDX > 32 and RDI is not 16 byte aligned. */ 240 movdqu %xmm0, (%rdi) 241 mov %rdi, %rsi 242 and $-16, %rdi 243 add $16, %rdi 244 sub %rdi, %rsi 245 add %rsi, %rdx 246 247 ALIGN (4) 248L(aligned_16): 249 cmp $128, %rdx 250 jge L(128bytesormore) 251 252L(aligned_16_less128bytes): 253 add %rdx, %rdi 254 shr $1, %rdx 255 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes), %rdx, 4) 256 257 ALIGN (4) 258L(128bytesormore): 259 cmp $SHARED_CACHE_SIZE, %rdx 260 jg L(128bytesormore_nt) 261 262L(128bytesormore_normal): 263 sub $128, %rdx 264 movdqa %xmm0, (%rdi) 265 movdqa %xmm0, 0x10(%rdi) 266 movdqa %xmm0, 0x20(%rdi) 267 movdqa %xmm0, 0x30(%rdi) 268 movdqa %xmm0, 0x40(%rdi) 269 movdqa %xmm0, 0x50(%rdi) 270 movdqa %xmm0, 0x60(%rdi) 271 movdqa %xmm0, 0x70(%rdi) 272 lea 128(%rdi), %rdi 273 cmp $128, %rdx 274 jl L(128bytesless_normal) 275 276 sub $128, %rdx 277 movdqa %xmm0, (%rdi) 278 movdqa %xmm0, 0x10(%rdi) 279 movdqa %xmm0, 0x20(%rdi) 280 movdqa %xmm0, 0x30(%rdi) 281 movdqa %xmm0, 0x40(%rdi) 282 movdqa %xmm0, 0x50(%rdi) 283 movdqa %xmm0, 0x60(%rdi) 284 movdqa %xmm0, 0x70(%rdi) 285 lea 128(%rdi), %rdi 286 cmp $128, %rdx 287 jl L(128bytesless_normal) 288 289 sub $128, %rdx 290 movdqa %xmm0, (%rdi) 291 movdqa %xmm0, 0x10(%rdi) 292 movdqa %xmm0, 0x20(%rdi) 293 movdqa %xmm0, 0x30(%rdi) 294 movdqa %xmm0, 0x40(%rdi) 295 movdqa %xmm0, 0x50(%rdi) 296 movdqa %xmm0, 0x60(%rdi) 297 movdqa %xmm0, 0x70(%rdi) 298 lea 128(%rdi), %rdi 299 cmp $128, %rdx 300 jl L(128bytesless_normal) 301 302 sub $128, %rdx 303 movdqa %xmm0, (%rdi) 304 movdqa %xmm0, 0x10(%rdi) 305 movdqa %xmm0, 0x20(%rdi) 306 movdqa %xmm0, 0x30(%rdi) 307 movdqa %xmm0, 0x40(%rdi) 308 movdqa %xmm0, 0x50(%rdi) 309 movdqa %xmm0, 0x60(%rdi) 310 movdqa %xmm0, 0x70(%rdi) 311 lea 128(%rdi), %rdi 312 cmp $128, %rdx 313 jge L(128bytesormore_normal) 314 315L(128bytesless_normal): 316 add %rdx, %rdi 317 shr $1, %rdx 318 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes), %rdx, 4) 319 320 ALIGN (4) 321L(128bytesormore_nt): 322 sub $128, %rdx 323 movntdq %xmm0, (%rdi) 324 movntdq %xmm0, 0x10(%rdi) 325 movntdq %xmm0, 0x20(%rdi) 326 movntdq %xmm0, 0x30(%rdi) 327 movntdq %xmm0, 0x40(%rdi) 328 movntdq %xmm0, 0x50(%rdi) 329 movntdq %xmm0, 0x60(%rdi) 330 movntdq %xmm0, 0x70(%rdi) 331 lea 128(%rdi), %rdi 332 cmp $128, %rdx 333 jge L(128bytesormore_nt) 334 335 sfence 336 add %rdx, %rdi 337 shr $1, %rdx 338 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes), %rdx, 4) 339 340 .pushsection .rodata.sse2,"a",@progbits 341 ALIGN (2) 342L(table_16_128bytes): 343 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) 344 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) 345 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) 346 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) 347 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) 348 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) 349 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) 350 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) 351 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) 352 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) 353 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) 354 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) 355 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) 356 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) 357 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) 358 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) 359 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) 360 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) 361 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) 362 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) 363 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) 364 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) 365 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) 366 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) 367 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) 368 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) 369 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) 370 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) 371 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) 372 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) 373 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) 374 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) 375 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) 376 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) 377 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) 378 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) 379 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) 380 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) 381 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) 382 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) 383 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) 384 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) 385 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) 386 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) 387 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) 388 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) 389 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) 390 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) 391 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) 392 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) 393 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) 394 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) 395 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) 396 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) 397 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) 398 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) 399 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) 400 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) 401 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) 402 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) 403 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) 404 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) 405 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) 406 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) 407 .popsection 408 409 ALIGN (4) 410L(aligned_16_112bytes): 411 movdqa %xmm0, -112(%rdi) 412L(aligned_16_96bytes): 413 movdqa %xmm0, -96(%rdi) 414L(aligned_16_80bytes): 415 movdqa %xmm0, -80(%rdi) 416L(aligned_16_64bytes): 417 movdqa %xmm0, -64(%rdi) 418L(aligned_16_48bytes): 419 movdqa %xmm0, -48(%rdi) 420L(aligned_16_32bytes): 421 movdqa %xmm0, -32(%rdi) 422L(aligned_16_16bytes): 423 movdqa %xmm0, -16(%rdi) 424L(aligned_16_0bytes): 425 ret 426 427 ALIGN (4) 428L(aligned_16_114bytes): 429 movdqa %xmm0, -114(%rdi) 430L(aligned_16_98bytes): 431 movdqa %xmm0, -98(%rdi) 432L(aligned_16_82bytes): 433 movdqa %xmm0, -82(%rdi) 434L(aligned_16_66bytes): 435 movdqa %xmm0, -66(%rdi) 436L(aligned_16_50bytes): 437 movdqa %xmm0, -50(%rdi) 438L(aligned_16_34bytes): 439 movdqa %xmm0, -34(%rdi) 440L(aligned_16_18bytes): 441 movdqa %xmm0, -18(%rdi) 442L(aligned_16_2bytes): 443 movw %cx, -2(%rdi) 444 ret 445 446 ALIGN (4) 447L(aligned_16_116bytes): 448 movdqa %xmm0, -116(%rdi) 449L(aligned_16_100bytes): 450 movdqa %xmm0, -100(%rdi) 451L(aligned_16_84bytes): 452 movdqa %xmm0, -84(%rdi) 453L(aligned_16_68bytes): 454 movdqa %xmm0, -68(%rdi) 455L(aligned_16_52bytes): 456 movdqa %xmm0, -52(%rdi) 457L(aligned_16_36bytes): 458 movdqa %xmm0, -36(%rdi) 459L(aligned_16_20bytes): 460 movdqa %xmm0, -20(%rdi) 461L(aligned_16_4bytes): 462 movl %ecx, -4(%rdi) 463 ret 464 465 ALIGN (4) 466L(aligned_16_118bytes): 467 movdqa %xmm0, -118(%rdi) 468L(aligned_16_102bytes): 469 movdqa %xmm0, -102(%rdi) 470L(aligned_16_86bytes): 471 movdqa %xmm0, -86(%rdi) 472L(aligned_16_70bytes): 473 movdqa %xmm0, -70(%rdi) 474L(aligned_16_54bytes): 475 movdqa %xmm0, -54(%rdi) 476L(aligned_16_38bytes): 477 movdqa %xmm0, -38(%rdi) 478L(aligned_16_22bytes): 479 movdqa %xmm0, -22(%rdi) 480L(aligned_16_6bytes): 481 movl %ecx, -6(%rdi) 482 movw %cx, -2(%rdi) 483 ret 484 485 ALIGN (4) 486L(aligned_16_120bytes): 487 movdqa %xmm0, -120(%rdi) 488L(aligned_16_104bytes): 489 movdqa %xmm0, -104(%rdi) 490L(aligned_16_88bytes): 491 movdqa %xmm0, -88(%rdi) 492L(aligned_16_72bytes): 493 movdqa %xmm0, -72(%rdi) 494L(aligned_16_56bytes): 495 movdqa %xmm0, -56(%rdi) 496L(aligned_16_40bytes): 497 movdqa %xmm0, -40(%rdi) 498L(aligned_16_24bytes): 499 movdqa %xmm0, -24(%rdi) 500L(aligned_16_8bytes): 501 movq %xmm0, -8(%rdi) 502 ret 503 504 ALIGN (4) 505L(aligned_16_122bytes): 506 movdqa %xmm0, -122(%rdi) 507L(aligned_16_106bytes): 508 movdqa %xmm0, -106(%rdi) 509L(aligned_16_90bytes): 510 movdqa %xmm0, -90(%rdi) 511L(aligned_16_74bytes): 512 movdqa %xmm0, -74(%rdi) 513L(aligned_16_58bytes): 514 movdqa %xmm0, -58(%rdi) 515L(aligned_16_42bytes): 516 movdqa %xmm0, -42(%rdi) 517L(aligned_16_26bytes): 518 movdqa %xmm0, -26(%rdi) 519L(aligned_16_10bytes): 520 movq %xmm0, -10(%rdi) 521 movw %cx, -2(%rdi) 522 ret 523 524 ALIGN (4) 525L(aligned_16_124bytes): 526 movdqa %xmm0, -124(%rdi) 527L(aligned_16_108bytes): 528 movdqa %xmm0, -108(%rdi) 529L(aligned_16_92bytes): 530 movdqa %xmm0, -92(%rdi) 531L(aligned_16_76bytes): 532 movdqa %xmm0, -76(%rdi) 533L(aligned_16_60bytes): 534 movdqa %xmm0, -60(%rdi) 535L(aligned_16_44bytes): 536 movdqa %xmm0, -44(%rdi) 537L(aligned_16_28bytes): 538 movdqa %xmm0, -28(%rdi) 539L(aligned_16_12bytes): 540 movq %xmm0, -12(%rdi) 541 movl %ecx, -4(%rdi) 542 ret 543 544 ALIGN (4) 545L(aligned_16_126bytes): 546 movdqa %xmm0, -126(%rdi) 547L(aligned_16_110bytes): 548 movdqa %xmm0, -110(%rdi) 549L(aligned_16_94bytes): 550 movdqa %xmm0, -94(%rdi) 551L(aligned_16_78bytes): 552 movdqa %xmm0, -78(%rdi) 553L(aligned_16_62bytes): 554 movdqa %xmm0, -62(%rdi) 555L(aligned_16_46bytes): 556 movdqa %xmm0, -46(%rdi) 557L(aligned_16_30bytes): 558 movdqa %xmm0, -30(%rdi) 559L(aligned_16_14bytes): 560 movq %xmm0, -14(%rdi) 561 movl %ecx, -6(%rdi) 562 movw %cx, -2(%rdi) 563 ret 564 565END (MEMSET) 566