1 | // |
---|
2 | // $Id$ |
---|
3 | // |
---|
4 | // satan.sa 3.3 12/19/90 |
---|
5 | // |
---|
6 | // The entry point satan computes the arctangent of an |
---|
7 | // input value. satand does the same except the input value is a |
---|
8 | // denormalized number. |
---|
9 | // |
---|
10 | // Input: Double-extended value in memory location pointed to by address |
---|
11 | // register a0. |
---|
12 | // |
---|
13 | // Output: Arctan(X) returned in floating-point register Fp0. |
---|
14 | // |
---|
15 | // Accuracy and Monotonicity: The returned result is within 2 ulps in |
---|
16 | // 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the |
---|
17 | // result is subsequently rounded to double precision. The |
---|
18 | // result is provably monotonic in double precision. |
---|
19 | // |
---|
20 | // Speed: The program satan takes approximately 160 cycles for input |
---|
21 | // argument X such that 1/16 < |X| < 16. For the other arguments, |
---|
22 | // the program will run no worse than 10% slower. |
---|
23 | // |
---|
24 | // Algorithm: |
---|
25 | // Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. |
---|
26 | // |
---|
27 | // Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3. |
---|
28 | // Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits |
---|
29 | // of X with a bit-1 attached at the 6-th bit position. Define u |
---|
30 | // to be u = (X-F) / (1 + X*F). |
---|
31 | // |
---|
32 | // Step 3. Approximate arctan(u) by a polynomial poly. |
---|
33 | // |
---|
34 | // Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values |
---|
35 | // calculated beforehand. Exit. |
---|
36 | // |
---|
37 | // Step 5. If |X| >= 16, go to Step 7. |
---|
38 | // |
---|
39 | // Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. |
---|
40 | // |
---|
41 | // Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'. |
---|
42 | // Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. |
---|
43 | // |
---|
44 | |
---|
45 | // Copyright (C) Motorola, Inc. 1990 |
---|
46 | // All Rights Reserved |
---|
47 | // |
---|
48 | // THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA |
---|
49 | // The copyright notice above does not evidence any |
---|
50 | // actual or intended publication of such source code. |
---|
51 | |
---|
52 | //satan idnt 2,1 | Motorola 040 Floating Point Software Package |
---|
53 | |
---|
54 | |section 8 |
---|
55 | |
---|
56 | #include "fpsp.defs" |
---|
57 | |
---|
58 | BOUNDS1: .long 0x3FFB8000,0x4002FFFF |
---|
59 | |
---|
60 | ONE: .long 0x3F800000 |
---|
61 | |
---|
62 | .long 0x00000000 |
---|
63 | |
---|
64 | ATANA3: .long 0xBFF6687E,0x314987D8 |
---|
65 | ATANA2: .long 0x4002AC69,0x34A26DB3 |
---|
66 | |
---|
67 | ATANA1: .long 0xBFC2476F,0x4E1DA28E |
---|
68 | ATANB6: .long 0x3FB34444,0x7F876989 |
---|
69 | |
---|
70 | ATANB5: .long 0xBFB744EE,0x7FAF45DB |
---|
71 | ATANB4: .long 0x3FBC71C6,0x46940220 |
---|
72 | |
---|
73 | ATANB3: .long 0xBFC24924,0x921872F9 |
---|
74 | ATANB2: .long 0x3FC99999,0x99998FA9 |
---|
75 | |
---|
76 | ATANB1: .long 0xBFD55555,0x55555555 |
---|
77 | ATANC5: .long 0xBFB70BF3,0x98539E6A |
---|
78 | |
---|
79 | ATANC4: .long 0x3FBC7187,0x962D1D7D |
---|
80 | ATANC3: .long 0xBFC24924,0x827107B8 |
---|
81 | |
---|
82 | ATANC2: .long 0x3FC99999,0x9996263E |
---|
83 | ATANC1: .long 0xBFD55555,0x55555536 |
---|
84 | |
---|
85 | PPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 |
---|
86 | NPIBY2: .long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000 |
---|
87 | PTINY: .long 0x00010000,0x80000000,0x00000000,0x00000000 |
---|
88 | NTINY: .long 0x80010000,0x80000000,0x00000000,0x00000000 |
---|
89 | |
---|
90 | ATANTBL: |
---|
91 | .long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000 |
---|
92 | .long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000 |
---|
93 | .long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000 |
---|
94 | .long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000 |
---|
95 | .long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000 |
---|
96 | .long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000 |
---|
97 | .long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000 |
---|
98 | .long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000 |
---|
99 | .long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000 |
---|
100 | .long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000 |
---|
101 | .long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000 |
---|
102 | .long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000 |
---|
103 | .long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000 |
---|
104 | .long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000 |
---|
105 | .long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000 |
---|
106 | .long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000 |
---|
107 | .long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000 |
---|
108 | .long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000 |
---|
109 | .long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000 |
---|
110 | .long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000 |
---|
111 | .long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000 |
---|
112 | .long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000 |
---|
113 | .long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000 |
---|
114 | .long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000 |
---|
115 | .long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000 |
---|
116 | .long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000 |
---|
117 | .long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000 |
---|
118 | .long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000 |
---|
119 | .long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000 |
---|
120 | .long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000 |
---|
121 | .long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000 |
---|
122 | .long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000 |
---|
123 | .long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000 |
---|
124 | .long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000 |
---|
125 | .long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000 |
---|
126 | .long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000 |
---|
127 | .long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000 |
---|
128 | .long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000 |
---|
129 | .long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000 |
---|
130 | .long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000 |
---|
131 | .long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000 |
---|
132 | .long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000 |
---|
133 | .long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000 |
---|
134 | .long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000 |
---|
135 | .long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000 |
---|
136 | .long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000 |
---|
137 | .long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000 |
---|
138 | .long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000 |
---|
139 | .long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000 |
---|
140 | .long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000 |
---|
141 | .long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000 |
---|
142 | .long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000 |
---|
143 | .long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000 |
---|
144 | .long 0x3FFE0000,0x97731420,0x365E538C,0x00000000 |
---|
145 | .long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000 |
---|
146 | .long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000 |
---|
147 | .long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000 |
---|
148 | .long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000 |
---|
149 | .long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000 |
---|
150 | .long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000 |
---|
151 | .long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000 |
---|
152 | .long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000 |
---|
153 | .long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000 |
---|
154 | .long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000 |
---|
155 | .long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000 |
---|
156 | .long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000 |
---|
157 | .long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000 |
---|
158 | .long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000 |
---|
159 | .long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000 |
---|
160 | .long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000 |
---|
161 | .long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000 |
---|
162 | .long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000 |
---|
163 | .long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000 |
---|
164 | .long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000 |
---|
165 | .long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000 |
---|
166 | .long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000 |
---|
167 | .long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000 |
---|
168 | .long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000 |
---|
169 | .long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000 |
---|
170 | .long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000 |
---|
171 | .long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000 |
---|
172 | .long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000 |
---|
173 | .long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000 |
---|
174 | .long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000 |
---|
175 | .long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000 |
---|
176 | .long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000 |
---|
177 | .long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000 |
---|
178 | .long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000 |
---|
179 | .long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000 |
---|
180 | .long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000 |
---|
181 | .long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000 |
---|
182 | .long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000 |
---|
183 | .long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000 |
---|
184 | .long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000 |
---|
185 | .long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000 |
---|
186 | .long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000 |
---|
187 | .long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000 |
---|
188 | .long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000 |
---|
189 | .long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000 |
---|
190 | .long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000 |
---|
191 | .long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000 |
---|
192 | .long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000 |
---|
193 | .long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000 |
---|
194 | .long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000 |
---|
195 | .long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000 |
---|
196 | .long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000 |
---|
197 | .long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000 |
---|
198 | .long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000 |
---|
199 | .long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000 |
---|
200 | .long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000 |
---|
201 | .long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000 |
---|
202 | .long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000 |
---|
203 | .long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000 |
---|
204 | .long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000 |
---|
205 | .long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000 |
---|
206 | .long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000 |
---|
207 | .long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000 |
---|
208 | .long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000 |
---|
209 | .long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000 |
---|
210 | .long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000 |
---|
211 | .long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000 |
---|
212 | .long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000 |
---|
213 | .long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000 |
---|
214 | .long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000 |
---|
215 | .long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000 |
---|
216 | .long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000 |
---|
217 | .long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000 |
---|
218 | .long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000 |
---|
219 | |
---|
220 | .set X,FP_SCR1 |
---|
221 | .set XDCARE,X+2 |
---|
222 | .set XFRAC,X+4 |
---|
223 | .set XFRACLO,X+8 |
---|
224 | |
---|
225 | .set ATANF,FP_SCR2 |
---|
226 | .set ATANFHI,ATANF+4 |
---|
227 | .set ATANFLO,ATANF+8 |
---|
228 | |
---|
229 | |
---|
230 | | xref t_frcinx |
---|
231 | |xref t_extdnrm |
---|
232 | |
---|
233 | .global satand |
---|
234 | satand: |
---|
235 | //--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT |
---|
236 | |
---|
237 | bra t_extdnrm |
---|
238 | |
---|
239 | .global satan |
---|
240 | satan: |
---|
241 | //--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S |
---|
242 | |
---|
243 | fmovex (%a0),%fp0 // ...LOAD INPUT |
---|
244 | |
---|
245 | movel (%a0),%d0 |
---|
246 | movew 4(%a0),%d0 |
---|
247 | fmovex %fp0,X(%a6) |
---|
248 | andil #0x7FFFFFFF,%d0 |
---|
249 | |
---|
250 | cmpil #0x3FFB8000,%d0 // ...|X| >= 1/16? |
---|
251 | bges ATANOK1 |
---|
252 | bra ATANSM |
---|
253 | |
---|
254 | ATANOK1: |
---|
255 | cmpil #0x4002FFFF,%d0 // ...|X| < 16 ? |
---|
256 | bles ATANMAIN |
---|
257 | bra ATANBIG |
---|
258 | |
---|
259 | |
---|
260 | //--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE |
---|
261 | //--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ). |
---|
262 | //--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN |
---|
263 | //--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE |
---|
264 | //--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS |
---|
265 | //--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR |
---|
266 | //--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO |
---|
267 | //--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE |
---|
268 | //--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL |
---|
269 | //--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE |
---|
270 | //--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION |
---|
271 | //--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION |
---|
272 | //--WILL INVOLVE A VERY LONG POLYNOMIAL. |
---|
273 | |
---|
274 | //--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS |
---|
275 | //--WE CHOSE F TO BE +-2^K * 1.BBBB1 |
---|
276 | //--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE |
---|
277 | //--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE |
---|
278 | //--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS |
---|
279 | //-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|). |
---|
280 | |
---|
281 | ATANMAIN: |
---|
282 | |
---|
283 | movew #0x0000,XDCARE(%a6) // ...CLEAN UP X JUST IN CASE |
---|
284 | andil #0xF8000000,XFRAC(%a6) // ...FIRST 5 BITS |
---|
285 | oril #0x04000000,XFRAC(%a6) // ...SET 6-TH BIT TO 1 |
---|
286 | movel #0x00000000,XFRACLO(%a6) // ...LOCATION OF X IS NOW F |
---|
287 | |
---|
288 | fmovex %fp0,%fp1 // ...FP1 IS X |
---|
289 | fmulx X(%a6),%fp1 // ...FP1 IS X*F, NOTE THAT X*F > 0 |
---|
290 | fsubx X(%a6),%fp0 // ...FP0 IS X-F |
---|
291 | fadds #0x3F800000,%fp1 // ...FP1 IS 1 + X*F |
---|
292 | fdivx %fp1,%fp0 // ...FP0 IS U = (X-F)/(1+X*F) |
---|
293 | |
---|
294 | //--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|) |
---|
295 | //--CREATE ATAN(F) AND STORE IT IN ATANF, AND |
---|
296 | //--SAVE REGISTERS FP2. |
---|
297 | |
---|
298 | movel %d2,-(%a7) // ...SAVE d2 TEMPORARILY |
---|
299 | movel %d0,%d2 // ...THE EXPO AND 16 BITS OF X |
---|
300 | andil #0x00007800,%d0 // ...4 VARYING BITS OF F'S FRACTION |
---|
301 | andil #0x7FFF0000,%d2 // ...EXPONENT OF F |
---|
302 | subil #0x3FFB0000,%d2 // ...K+4 |
---|
303 | asrl #1,%d2 |
---|
304 | addl %d2,%d0 // ...THE 7 BITS IDENTIFYING F |
---|
305 | asrl #7,%d0 // ...INDEX INTO TBL OF ATAN(|F|) |
---|
306 | lea ATANTBL,%a1 |
---|
307 | addal %d0,%a1 // ...ADDRESS OF ATAN(|F|) |
---|
308 | movel (%a1)+,ATANF(%a6) |
---|
309 | movel (%a1)+,ATANFHI(%a6) |
---|
310 | movel (%a1)+,ATANFLO(%a6) // ...ATANF IS NOW ATAN(|F|) |
---|
311 | movel X(%a6),%d0 // ...LOAD SIGN AND EXPO. AGAIN |
---|
312 | andil #0x80000000,%d0 // ...SIGN(F) |
---|
313 | orl %d0,ATANF(%a6) // ...ATANF IS NOW SIGN(F)*ATAN(|F|) |
---|
314 | movel (%a7)+,%d2 // ...RESTORE d2 |
---|
315 | |
---|
316 | //--THAT'S ALL I HAVE TO DO FOR NOW, |
---|
317 | //--BUT ALAS, THE DIVIDE IS STILL CRANKING! |
---|
318 | |
---|
319 | //--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS |
---|
320 | //--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U |
---|
321 | //--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT. |
---|
322 | //--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3)) |
---|
323 | //--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3. |
---|
324 | //--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT |
---|
325 | //--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED |
---|
326 | |
---|
327 | |
---|
328 | fmovex %fp0,%fp1 |
---|
329 | fmulx %fp1,%fp1 |
---|
330 | fmoved ATANA3,%fp2 |
---|
331 | faddx %fp1,%fp2 // ...A3+V |
---|
332 | fmulx %fp1,%fp2 // ...V*(A3+V) |
---|
333 | fmulx %fp0,%fp1 // ...U*V |
---|
334 | faddd ATANA2,%fp2 // ...A2+V*(A3+V) |
---|
335 | fmuld ATANA1,%fp1 // ...A1*U*V |
---|
336 | fmulx %fp2,%fp1 // ...A1*U*V*(A2+V*(A3+V)) |
---|
337 | |
---|
338 | faddx %fp1,%fp0 // ...ATAN(U), FP1 RELEASED |
---|
339 | fmovel %d1,%FPCR //restore users exceptions |
---|
340 | faddx ATANF(%a6),%fp0 // ...ATAN(X) |
---|
341 | bra t_frcinx |
---|
342 | |
---|
343 | ATANBORS: |
---|
344 | //--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED. |
---|
345 | //--FP0 IS X AND |X| <= 1/16 OR |X| >= 16. |
---|
346 | cmpil #0x3FFF8000,%d0 |
---|
347 | bgt ATANBIG // ...I.E. |X| >= 16 |
---|
348 | |
---|
349 | ATANSM: |
---|
350 | //--|X| <= 1/16 |
---|
351 | //--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE |
---|
352 | //--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6))))) |
---|
353 | //--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] ) |
---|
354 | //--WHERE Y = X*X, AND Z = Y*Y. |
---|
355 | |
---|
356 | cmpil #0x3FD78000,%d0 |
---|
357 | blt ATANTINY |
---|
358 | //--COMPUTE POLYNOMIAL |
---|
359 | fmulx %fp0,%fp0 // ...FP0 IS Y = X*X |
---|
360 | |
---|
361 | |
---|
362 | movew #0x0000,XDCARE(%a6) |
---|
363 | |
---|
364 | fmovex %fp0,%fp1 |
---|
365 | fmulx %fp1,%fp1 // ...FP1 IS Z = Y*Y |
---|
366 | |
---|
367 | fmoved ATANB6,%fp2 |
---|
368 | fmoved ATANB5,%fp3 |
---|
369 | |
---|
370 | fmulx %fp1,%fp2 // ...Z*B6 |
---|
371 | fmulx %fp1,%fp3 // ...Z*B5 |
---|
372 | |
---|
373 | faddd ATANB4,%fp2 // ...B4+Z*B6 |
---|
374 | faddd ATANB3,%fp3 // ...B3+Z*B5 |
---|
375 | |
---|
376 | fmulx %fp1,%fp2 // ...Z*(B4+Z*B6) |
---|
377 | fmulx %fp3,%fp1 // ...Z*(B3+Z*B5) |
---|
378 | |
---|
379 | faddd ATANB2,%fp2 // ...B2+Z*(B4+Z*B6) |
---|
380 | faddd ATANB1,%fp1 // ...B1+Z*(B3+Z*B5) |
---|
381 | |
---|
382 | fmulx %fp0,%fp2 // ...Y*(B2+Z*(B4+Z*B6)) |
---|
383 | fmulx X(%a6),%fp0 // ...X*Y |
---|
384 | |
---|
385 | faddx %fp2,%fp1 // ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] |
---|
386 | |
---|
387 | |
---|
388 | fmulx %fp1,%fp0 // ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) |
---|
389 | |
---|
390 | fmovel %d1,%FPCR //restore users exceptions |
---|
391 | faddx X(%a6),%fp0 |
---|
392 | |
---|
393 | bra t_frcinx |
---|
394 | |
---|
395 | ATANTINY: |
---|
396 | //--|X| < 2^(-40), ATAN(X) = X |
---|
397 | movew #0x0000,XDCARE(%a6) |
---|
398 | |
---|
399 | fmovel %d1,%FPCR //restore users exceptions |
---|
400 | fmovex X(%a6),%fp0 //last inst - possible exception set |
---|
401 | |
---|
402 | bra t_frcinx |
---|
403 | |
---|
404 | ATANBIG: |
---|
405 | //--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE, |
---|
406 | //--RETURN SIGN(X)*PI/2 + ATAN(-1/X). |
---|
407 | cmpil #0x40638000,%d0 |
---|
408 | bgt ATANHUGE |
---|
409 | |
---|
410 | //--APPROXIMATE ATAN(-1/X) BY |
---|
411 | //--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X' |
---|
412 | //--THIS CAN BE RE-WRITTEN AS |
---|
413 | //--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y. |
---|
414 | |
---|
415 | fmoves #0xBF800000,%fp1 // ...LOAD -1 |
---|
416 | fdivx %fp0,%fp1 // ...FP1 IS -1/X |
---|
417 | |
---|
418 | |
---|
419 | //--DIVIDE IS STILL CRANKING |
---|
420 | |
---|
421 | fmovex %fp1,%fp0 // ...FP0 IS X' |
---|
422 | fmulx %fp0,%fp0 // ...FP0 IS Y = X'*X' |
---|
423 | fmovex %fp1,X(%a6) // ...X IS REALLY X' |
---|
424 | |
---|
425 | fmovex %fp0,%fp1 |
---|
426 | fmulx %fp1,%fp1 // ...FP1 IS Z = Y*Y |
---|
427 | |
---|
428 | fmoved ATANC5,%fp3 |
---|
429 | fmoved ATANC4,%fp2 |
---|
430 | |
---|
431 | fmulx %fp1,%fp3 // ...Z*C5 |
---|
432 | fmulx %fp1,%fp2 // ...Z*B4 |
---|
433 | |
---|
434 | faddd ATANC3,%fp3 // ...C3+Z*C5 |
---|
435 | faddd ATANC2,%fp2 // ...C2+Z*C4 |
---|
436 | |
---|
437 | fmulx %fp3,%fp1 // ...Z*(C3+Z*C5), FP3 RELEASED |
---|
438 | fmulx %fp0,%fp2 // ...Y*(C2+Z*C4) |
---|
439 | |
---|
440 | faddd ATANC1,%fp1 // ...C1+Z*(C3+Z*C5) |
---|
441 | fmulx X(%a6),%fp0 // ...X'*Y |
---|
442 | |
---|
443 | faddx %fp2,%fp1 // ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] |
---|
444 | |
---|
445 | |
---|
446 | fmulx %fp1,%fp0 // ...X'*Y*([B1+Z*(B3+Z*B5)] |
---|
447 | // ... +[Y*(B2+Z*(B4+Z*B6))]) |
---|
448 | faddx X(%a6),%fp0 |
---|
449 | |
---|
450 | fmovel %d1,%FPCR //restore users exceptions |
---|
451 | |
---|
452 | btstb #7,(%a0) |
---|
453 | beqs pos_big |
---|
454 | |
---|
455 | neg_big: |
---|
456 | faddx NPIBY2,%fp0 |
---|
457 | bra t_frcinx |
---|
458 | |
---|
459 | pos_big: |
---|
460 | faddx PPIBY2,%fp0 |
---|
461 | bra t_frcinx |
---|
462 | |
---|
463 | ATANHUGE: |
---|
464 | //--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY |
---|
465 | btstb #7,(%a0) |
---|
466 | beqs pos_huge |
---|
467 | |
---|
468 | neg_huge: |
---|
469 | fmovex NPIBY2,%fp0 |
---|
470 | fmovel %d1,%fpcr |
---|
471 | fsubx NTINY,%fp0 |
---|
472 | bra t_frcinx |
---|
473 | |
---|
474 | pos_huge: |
---|
475 | fmovex PPIBY2,%fp0 |
---|
476 | fmovel %d1,%fpcr |
---|
477 | fsubx PTINY,%fp0 |
---|
478 | bra t_frcinx |
---|
479 | |
---|
480 | |end |
---|