• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;------------------------------------------------------------------------------
2;
3; Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>
4; This program and the accompanying materials
5; are licensed and made available under the terms and conditions of the BSD License
6; which accompanies this distribution.  The full text of the license may be found at
7; http://opensource.org/licenses/bsd-license.php.
8;
9; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
10; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
11;
12; Abstract:
13;
14;   Provide macro for register save/restore using SSE registers
15;
16;------------------------------------------------------------------------------
17
18;
19; Define SSE instruction set
20;
21%ifdef USE_SSE41_FLAG
22;
23; Define SSE macros using SSE 4.1 instructions
24; args 1:XMM, 2:IDX, 3:REG
25%macro SXMMN           3
26             pinsrd  %1, %3, (%2 & 3)
27             %endmacro
28
29;
30;args 1:XMM, 2:REG, 3:IDX
31;
32%macro LXMMN           3
33             pextrd  %2, %1, (%3 & 3)
34             %endmacro
35%else
36;
37; Define SSE macros using SSE 2 instructions
38; args 1:XMM, 2:IDX, 3:REG
39%macro SXMMN       3
40             pinsrw  %1, %3, (%2 & 3) * 2
41             ror     %3, 16
42             pinsrw  %1, %3, (%2 & 3) * 2 + 1
43             rol     %3, 16
44             %endmacro
45
46;
47;args 1:XMM, 2:REG,  3:IDX
48;
49%macro LXMMN    3
50             pshufd  %1, %1,  ((0E4E4E4h >> (%3 * 2))  & 0FFh)
51             movd    %2, %1
52             pshufd  %1, %1,  ((0E4E4E4h >> (%3 * 2 + (%3 & 1) * 4)) & 0FFh)
53             %endmacro
54%endif
55
56;
57; XMM7 to save/restore EBP, EBX, ESI, EDI
58;
59%macro SAVE_REGS   0
60  SXMMN      xmm7, 0, ebp
61  SXMMN      xmm7, 1, ebx
62  SXMMN      xmm7, 2, esi
63  SXMMN      xmm7, 3, edi
64  SAVE_ESP
65             %endmacro
66
67%macro LOAD_REGS    0
68  LXMMN      xmm7, ebp, 0
69  LXMMN      xmm7, ebx, 1
70  LXMMN      xmm7, esi, 2
71  LXMMN      xmm7, edi, 3
72  LOAD_ESP
73             %endmacro
74
75;
76; XMM6 to save/restore EAX, EDX, ECX, ESP
77;
78%macro LOAD_EAX     0
79  LXMMN      xmm6, eax, 1
80             %endmacro
81
82%macro SAVE_EAX     0
83  SXMMN      xmm6, 1, eax
84             %endmacro
85
86%macro LOAD_EDX     0
87  LXMMN      xmm6, edx, 2
88             %endmacro
89
90%macro SAVE_EDX     0
91  SXMMN      xmm6, 2, edx
92             %endmacro
93
94%macro SAVE_ECX     0
95  SXMMN      xmm6, 3, ecx
96             %endmacro
97
98%macro LOAD_ECX     0
99  LXMMN      xmm6, ecx, 3
100             %endmacro
101
102%macro SAVE_ESP     0
103  SXMMN      xmm6, 0, esp
104             %endmacro
105
106%macro LOAD_ESP     0
107  movd       esp,  xmm6
108             %endmacro
109;
110; XMM5 for calling stack
111; arg 1:Entry
112%macro CALL_XMM       1
113             mov     esi, %%ReturnAddress
114             pslldq  xmm5, 4
115%ifdef USE_SSE41_FLAG
116             pinsrd  xmm5, esi, 0
117%else
118             pinsrw  xmm5, esi, 0
119             ror     esi,  16
120             pinsrw  xmm5, esi, 1
121%endif
122             mov     esi,  %1
123             jmp     esi
124%%ReturnAddress:
125             %endmacro
126
127%macro RET_XMM       0
128             movd    esi, xmm5
129             psrldq  xmm5, 4
130             jmp     esi
131             %endmacro
132
133%macro ENABLE_SSE   0
134            ;
135            ; Initialize floating point units
136            ;
137            jmp     NextAddress
138align 4
139            ;
140            ; Float control word initial value:
141            ; all exceptions masked, double-precision, round-to-nearest
142            ;
143FpuControlWord       DW      027Fh
144            ;
145            ; Multimedia-extensions control word:
146            ; all exceptions masked, round-to-nearest, flush to zero for masked underflow
147            ;
148MmxControlWord       DD      01F80h
149SseError:
150            ;
151            ; Processor has to support SSE
152            ;
153            jmp     SseError
154NextAddress:
155            finit
156            fldcw   [FpuControlWord]
157
158            ;
159            ; Use CpuId instructuion (CPUID.01H:EDX.SSE[bit 25] = 1) to test
160            ; whether the processor supports SSE instruction.
161            ;
162            mov     eax, 1
163            cpuid
164            bt      edx, 25
165            jnc     SseError
166
167%ifdef USE_SSE41_FLAG
168            ;
169            ; SSE 4.1 support
170            ;
171            bt      ecx, 19
172            jnc     SseError
173%endif
174
175            ;
176            ; Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10)
177            ;
178            mov     eax, cr4
179            or      eax, 00000600h
180            mov     cr4, eax
181
182            ;
183            ; The processor should support SSE instruction and we can use
184            ; ldmxcsr instruction
185            ;
186            ldmxcsr [MmxControlWord]
187            %endmacro
188