• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include "examples.h"
28 
29 using namespace vixl;
30 using namespace vixl::aarch32;
31 
32 #define __ masm->
33 
GenerateMandelBrot(MacroAssembler * masm)34 void GenerateMandelBrot(MacroAssembler* masm) {
35   const QRegister kCReal = q0;
36   const QRegister kCImag = q1;
37 
38   const QRegister kCRealStep = q13;
39   const QRegister kCImagStep = q14;
40 
41   const QRegister kModSqLimit = q15;
42 
43   // Save register values.
44   __ Push(RegisterList(r4, r5, r6));
45 
46   __ Vmov(F32, kCRealStep, 0.125);
47   __ Vmov(F32, kCImagStep, 0.0625);
48 
49   const Register kZero = r2;
50   __ Mov(kZero, 0);
51 
52   const DRegister kStars = d6;
53   const DRegister kSpaces = d7;
54   // Output characters - packed 4 characters into 32 bits.
55   __ Vmov(I8, kStars, '*');
56   __ Vmov(I8, kSpaces, ' ');
57 
58   const DRegisterLane kNegTwo = DRegisterLane(d7, 1);
59   __ Vmov(s15, -2.0);
60 
61   // Imaginary part of c.
62   __ Vdup(Untyped32, kCImag, kNegTwo);
63 
64   // Max modulus squared.
65   __ Vmov(F32, kModSqLimit, 4.0);
66 
67   // Height of output in characters.
68   __ Mov(r4, 64);
69 
70   // String length will be 129, so need 132 bytes of space.
71   const uint32_t kStringLength = 132;
72 
73   // Make space for our string.
74   __ Sub(sp, sp, kStringLength);
75 
76   // Set up a starting pointer for the string.
77   const Register kStringPtr = r6;
78   __ Mov(kStringPtr, sp);
79 
80   // Loop over imaginary values of c from -2 to 2, taking
81   // 64 equally spaced values in the range.
82   {
83     Label c_imag_loop;
84 
85     __ Bind(&c_imag_loop);
86 
87     // Real part of c.
88     // Store 4 equally spaced values in q0 (kCReal) to use SIMD.
89     __ Vmov(s0, -2.0);
90     __ Vmov(s1, -1.96875);
91     __ Vmov(s2, -1.9375);
92     __ Vmov(s3, -1.90625);
93 
94     // Width of output in terms of sets of 4 characters - twice that
95     // of height to compensate for ratio of character height to width.
96     __ Mov(r5, 32);
97 
98     const Register kWriteCursor = r3;
99     // Set a cursor ready to write the next line.
100     __ Mov(kWriteCursor, kStringPtr);
101 
102     // Loop over real values of c from -2 to 2, processing
103     // 4 different values simultaneously using SIMD.
104     {
105       const QRegister kFlags = q2;
106       const DRegister kLowerFlags = d4;
107 
108       Label c_real_loop;
109       __ Bind(&c_real_loop);
110 
111       // Get number of iterations.
112       __ Add(r1, r0, 1);
113 
114       // Perform the iterations of z(n+1) = zn^2 + c using SIMD.
115       // If the result is that c is in the set, the element of
116       // kFlags will be 0, else ~0.
117       {
118         const QRegister kZReal = q8;
119         const QRegister kZImag = q9;
120 
121         // Real part of z.
122         __ Vmov(F32, kZReal, 0.0);
123 
124         // Imaginary part of z.
125         __ Vmov(F32, kZImag, 0.0);
126 
127         __ Vmov(F32, kFlags, 0.0);
128 
129         Label iterative_formula_start, iterative_formula_end;
130         __ Bind(&iterative_formula_start);
131         __ Subs(r1, r1, 1);
132         __ B(le, &iterative_formula_end);
133 
134         // z(n+1) = zn^2 + c.
135         // re(z(n+1)) = re(c) + re(zn)^2 - im(zn)^2.
136         // im(z(n+1)) = im(c) + 2 * re(zn) * im(zn)
137 
138         __ Vmul(F32, q10, kZReal, kZImag);  // re(zn) * im(zn)
139 
140         __ Vmul(F32, kZReal, kZReal, kZReal);  // re(zn)^2
141         __ Vadd(F32, kZReal, kCReal, kZReal);  // re(c) + re(zn)^2
142         __ Vmls(F32, kZReal, kZImag, kZImag);  // re(c) + re(zn)^2 - im(zn)^2
143 
144         __ Vmov(F32, kZImag, kCImag);        // im(c)
145         __ Vmls(F32, kZImag, q10, kNegTwo);  // im(c) + 2 * re(zn) * im(zn)
146 
147         __ Vmul(F32, q10, kZReal, kZReal);    // re(z(n+1))^2
148         __ Vmla(F32, q10, kZImag, kZImag);    // re(z(n+1))^2 + im(z(n+1))^2
149         __ Vcgt(F32, q10, q10, kModSqLimit);  // |z(n+1)|^2 > 4 ? ~0 : 0
150         __ Vorr(F32, kFlags, kFlags, q10);    // (~0/0) | above result
151 
152         __ B(&iterative_formula_start);
153         __ Bind(&iterative_formula_end);
154       }
155 
156       // Narrow twice so that each mask is 8 bits, packed into
157       // a single 32 bit register s4.
158       // kLowerFlags is the lower half of kFlags, so the second narrow will
159       // be working on the results of the first to halve the size of each
160       // representation again.
161       __ Vmovn(I32, kLowerFlags, kFlags);
162       __ Vmovn(I16, kLowerFlags, kFlags);
163 
164       // '*' if in set, ' ' if not.
165       __ Vbsl(Untyped32, kLowerFlags, kSpaces, kStars);
166 
167       // Add this to the string.
168       __ Vst1(Untyped32,
169               NeonRegisterList(kLowerFlags, 0),
170               AlignedMemOperand(kWriteCursor, k32BitAlign, PostIndex));
171 
172       // Increase real part of c.
173       __ Vadd(F32, kCReal, kCReal, kCRealStep);
174 
175       __ Subs(r5, r5, 1);
176       __ B(ne, &c_real_loop);
177     }
178 
179     // Put terminating character.
180     __ Strb(kZero, MemOperand(kWriteCursor));
181 
182     // Print the string.
183     __ Printf("%s\n", kStringPtr);
184 
185     // Increase imaginary part of c.
186     __ Vadd(F32, kCImag, kCImag, kCImagStep);
187 
188     __ Subs(r4, r4, 1);
189     __ B(ne, &c_imag_loop);
190   }
191   // Restore stack pointer.
192   __ Add(sp, sp, kStringLength);
193   // Restore register values.
194   __ Pop(RegisterList(r4, r5, r6));
195   __ Bx(lr);
196 }
197 
198 #ifndef TEST_EXAMPLES
main()199 int main() {
200   MacroAssembler masm;
201   // Generate the code for the example function.
202   Label mandelbrot;
203   masm.Bind(&mandelbrot);
204   GenerateMandelBrot(&masm);
205   masm.FinalizeCode();
206 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH32
207   // There is no simulator defined for VIXL AArch32.
208   printf("This example cannot be simulated\n");
209 #else
210   byte* code = masm.GetBuffer()->GetStartAddress<byte*>();
211   uint32_t code_size = masm.GetSizeOfCodeGenerated();
212   ExecutableMemory memory(code, code_size);
213   // Run the example function.
214   double (*mandelbrot_func)(uint32_t) =
215       memory.GetEntryPoint<double (*)(uint32_t)>(mandelbrot,
216                                                  masm.GetInstructionSetInUse());
217   uint32_t iterations = 1000;
218   (*mandelbrot_func)(iterations);
219 #endif
220   return 0;
221 }
222 #endif  // TEST_EXAMPLES
223