1 // Copyright 2017, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #include "examples.h"
28
29 #define __ masm->
30
GenerateMandelBrot(MacroAssembler * masm)31 void GenerateMandelBrot(MacroAssembler* masm) {
32 const QRegister kCReal = q0;
33 const QRegister kCImag = q1;
34
35 const QRegister kCRealStep = q13;
36 const QRegister kCImagStep = q14;
37
38 const QRegister kModSqLimit = q15;
39
40 // Save register values.
41 __ Push(RegisterList(r4, r5, r6));
42
43 __ Vmov(F32, kCRealStep, 0.125);
44 __ Vmov(F32, kCImagStep, 0.0625);
45
46 const Register kZero = r2;
47 __ Mov(kZero, 0);
48
49 const DRegister kStars = d6;
50 const DRegister kSpaces = d7;
51 // Output characters - packed 4 characters into 32 bits.
52 __ Vmov(I8, kStars, '*');
53 __ Vmov(I8, kSpaces, ' ');
54
55 const DRegisterLane kNegTwo = DRegisterLane(d7, 1);
56 __ Vmov(s15, -2.0);
57
58 // Imaginary part of c.
59 __ Vdup(Untyped32, kCImag, kNegTwo);
60
61 // Max modulus squared.
62 __ Vmov(F32, kModSqLimit, 4.0);
63
64 // Height of output in characters.
65 __ Mov(r4, 64);
66
67 // String length will be 129, so need 132 bytes of space.
68 const uint32_t kStringLength = 132;
69
70 // Make space for our string.
71 __ Sub(sp, sp, kStringLength);
72
73 // Set up a starting pointer for the string.
74 const Register kStringPtr = r6;
75 __ Mov(kStringPtr, sp);
76
77 // Loop over imaginary values of c from -2 to 2, taking
78 // 64 equally spaced values in the range.
79 {
80 Label c_imag_loop;
81
82 __ Bind(&c_imag_loop);
83
84 // Real part of c.
85 // Store 4 equally spaced values in q0 (kCReal) to use SIMD.
86 __ Vmov(s0, -2.0);
87 __ Vmov(s1, -1.96875);
88 __ Vmov(s2, -1.9375);
89 __ Vmov(s3, -1.90625);
90
91 // Width of output in terms of sets of 4 characters - twice that
92 // of height to compensate for ratio of character height to width.
93 __ Mov(r5, 32);
94
95 const Register kWriteCursor = r3;
96 // Set a cursor ready to write the next line.
97 __ Mov(kWriteCursor, kStringPtr);
98
99 // Loop over real values of c from -2 to 2, processing
100 // 4 different values simultaneously using SIMD.
101 {
102 const QRegister kFlags = q2;
103 const DRegister kLowerFlags = d4;
104
105 Label c_real_loop;
106 __ Bind(&c_real_loop);
107
108 // Get number of iterations.
109 __ Add(r1, r0, 1);
110
111 // Perform the iterations of z(n+1) = zn^2 + c using SIMD.
112 // If the result is that c is in the set, the element of
113 // kFlags will be 0, else ~0.
114 {
115 const QRegister kZReal = q8;
116 const QRegister kZImag = q9;
117
118 // Real part of z.
119 __ Vmov(F32, kZReal, 0.0);
120
121 // Imaginary part of z.
122 __ Vmov(F32, kZImag, 0.0);
123
124 __ Vmov(F32, kFlags, 0.0);
125
126 Label iterative_formula_start, iterative_formula_end;
127 __ Bind(&iterative_formula_start);
128 __ Subs(r1, r1, 1);
129 __ B(le, &iterative_formula_end);
130
131 // z(n+1) = zn^2 + c.
132 // re(z(n+1)) = re(c) + re(zn)^2 - im(zn)^2.
133 // im(z(n+1)) = im(c) + 2 * re(zn) * im(zn)
134
135 __ Vmul(F32, q10, kZReal, kZImag); // re(zn) * im(zn)
136
137 __ Vmul(F32, kZReal, kZReal, kZReal); // re(zn)^2
138 __ Vadd(F32, kZReal, kCReal, kZReal); // re(c) + re(zn)^2
139 __ Vmls(F32, kZReal, kZImag, kZImag); // re(c) + re(zn)^2 - im(zn)^2
140
141 __ Vmov(F32, kZImag, kCImag); // im(c)
142 __ Vmls(F32, kZImag, q10, kNegTwo); // im(c) + 2 * re(zn) * im(zn)
143
144 __ Vmul(F32, q10, kZReal, kZReal); // re(z(n+1))^2
145 __ Vmla(F32, q10, kZImag, kZImag); // re(z(n+1))^2 + im(z(n+1))^2
146 __ Vcgt(F32, q10, q10, kModSqLimit); // |z(n+1)|^2 > 4 ? ~0 : 0
147 __ Vorr(F32, kFlags, kFlags, q10); // (~0/0) | above result
148
149 __ B(&iterative_formula_start);
150 __ Bind(&iterative_formula_end);
151 }
152
153 // Narrow twice so that each mask is 8 bits, packed into
154 // a single 32 bit register s4.
155 // kLowerFlags is the lower half of kFlags, so the second narrow will
156 // be working on the results of the first to halve the size of each
157 // representation again.
158 __ Vmovn(I32, kLowerFlags, kFlags);
159 __ Vmovn(I16, kLowerFlags, kFlags);
160
161 // '*' if in set, ' ' if not.
162 __ Vbsl(Untyped32, kLowerFlags, kSpaces, kStars);
163
164 // Add this to the string.
165 __ Vst1(Untyped32,
166 NeonRegisterList(kLowerFlags, 0),
167 AlignedMemOperand(kWriteCursor, k32BitAlign, PostIndex));
168
169 // Increase real part of c.
170 __ Vadd(F32, kCReal, kCReal, kCRealStep);
171
172 __ Subs(r5, r5, 1);
173 __ B(ne, &c_real_loop);
174 }
175
176 // Put terminating character.
177 __ Strb(kZero, MemOperand(kWriteCursor));
178
179 // Print the string.
180 __ Printf("%s\n", kStringPtr);
181
182 // Increase imaginary part of c.
183 __ Vadd(F32, kCImag, kCImag, kCImagStep);
184
185 __ Subs(r4, r4, 1);
186 __ B(ne, &c_imag_loop);
187 }
188 // Restore stack pointer.
189 __ Add(sp, sp, kStringLength);
190 // Restore register values.
191 __ Pop(RegisterList(r4, r5, r6));
192 __ Bx(lr);
193 }
194
195 #ifndef TEST_EXAMPLES
main()196 int main() {
197 MacroAssembler masm;
198 // Generate the code for the example function.
199 Label mandelbrot;
200 masm.Bind(&mandelbrot);
201 GenerateMandelBrot(&masm);
202 masm.FinalizeCode();
203 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH32
204 // There is no simulator defined for VIXL AArch32.
205 printf("This example cannot be simulated\n");
206 #else
207 byte* code = masm.GetBuffer()->GetStartAddress<byte*>();
208 uint32_t code_size = masm.GetSizeOfCodeGenerated();
209 ExecutableMemory memory(code, code_size);
210 // Run the example function.
211 double (*mandelbrot_func)(uint32_t) =
212 memory.GetEntryPoint<double (*)(uint32_t)>(mandelbrot,
213 masm.GetInstructionSetInUse());
214 uint32_t iterations = 1000;
215 (*mandelbrot_func)(iterations);
216 #endif
217 return 0;
218 }
219 #endif // TEST_EXAMPLES
220