1 | // |
---|
2 | // ssin.sa 3.3 7/29/91 |
---|
3 | // |
---|
4 | // The entry point sSIN computes the sine of an input argument |
---|
5 | // sCOS computes the cosine, and sSINCOS computes both. The |
---|
6 | // corresponding entry points with a "d" computes the same |
---|
7 | // corresponding function values for denormalized inputs. |
---|
8 | // |
---|
9 | // Input: Double-extended number X in location pointed to |
---|
10 | // by address register a0. |
---|
11 | // |
---|
12 | // Output: The function value sin(X) or cos(X) returned in Fp0 if SIN or |
---|
13 | // COS is requested. Otherwise, for SINCOS, sin(X) is returned |
---|
14 | // in Fp0, and cos(X) is returned in Fp1. |
---|
15 | // |
---|
16 | // Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS. |
---|
17 | // |
---|
18 | // Accuracy and Monotonicity: The returned result is within 1 ulp in |
---|
19 | // 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the |
---|
20 | // result is subsequently rounded to double precision. The |
---|
21 | // result is provably monotonic in double precision. |
---|
22 | // |
---|
23 | // Speed: The programs sSIN and sCOS take approximately 150 cycles for |
---|
24 | // input argument X such that |X| < 15Pi, which is the the usual |
---|
25 | // situation. The speed for sSINCOS is approximately 190 cycles. |
---|
26 | // |
---|
27 | // Algorithm: |
---|
28 | // |
---|
29 | // SIN and COS: |
---|
30 | // 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. |
---|
31 | // |
---|
32 | // 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. |
---|
33 | // |
---|
34 | // 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let |
---|
35 | // k = N mod 4, so in particular, k = 0,1,2,or 3. Overwrite |
---|
36 | // k by k := k + AdjN. |
---|
37 | // |
---|
38 | // 4. If k is even, go to 6. |
---|
39 | // |
---|
40 | // 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r) |
---|
41 | // where cos(r) is approximated by an even polynomial in r, |
---|
42 | // 1 + r*r*(B1+s*(B2+ ... + s*B8)), s = r*r. |
---|
43 | // Exit. |
---|
44 | // |
---|
45 | // 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) |
---|
46 | // where sin(r) is approximated by an odd polynomial in r |
---|
47 | // r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. |
---|
48 | // Exit. |
---|
49 | // |
---|
50 | // 7. If |X| > 1, go to 9. |
---|
51 | // |
---|
52 | // 8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1. |
---|
53 | // |
---|
54 | // 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3. |
---|
55 | // |
---|
56 | // SINCOS: |
---|
57 | // 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. |
---|
58 | // |
---|
59 | // 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let |
---|
60 | // k = N mod 4, so in particular, k = 0,1,2,or 3. |
---|
61 | // |
---|
62 | // 3. If k is even, go to 5. |
---|
63 | // |
---|
64 | // 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e. |
---|
65 | // j1 exclusive or with the l.s.b. of k. |
---|
66 | // sgn1 := (-1)**j1, sgn2 := (-1)**j2. |
---|
67 | // SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where |
---|
68 | // sin(r) and cos(r) are computed as odd and even polynomials |
---|
69 | // in r, respectively. Exit |
---|
70 | // |
---|
71 | // 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. |
---|
72 | // SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where |
---|
73 | // sin(r) and cos(r) are computed as odd and even polynomials |
---|
74 | // in r, respectively. Exit |
---|
75 | // |
---|
76 | // 6. If |X| > 1, go to 8. |
---|
77 | // |
---|
78 | // 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. |
---|
79 | // |
---|
80 | // 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2. |
---|
81 | // |
---|
82 | |
---|
83 | // Copyright (C) Motorola, Inc. 1990 |
---|
84 | // All Rights Reserved |
---|
85 | // |
---|
86 | // THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA |
---|
87 | // The copyright notice above does not evidence any |
---|
88 | // actual or intended publication of such source code. |
---|
89 | |
---|
90 | //SSIN idnt 2,1 | Motorola 040 Floating Point Software Package |
---|
91 | |
---|
92 | |section 8 |
---|
93 | |
---|
94 | .include "fpsp.defs" |
---|
95 | |
---|
96 | BOUNDS1: .long 0x3FD78000,0x4004BC7E |
---|
97 | TWOBYPI: .long 0x3FE45F30,0x6DC9C883 |
---|
98 | |
---|
99 | SINA7: .long 0xBD6AAA77,0xCCC994F5 |
---|
100 | SINA6: .long 0x3DE61209,0x7AAE8DA1 |
---|
101 | |
---|
102 | SINA5: .long 0xBE5AE645,0x2A118AE4 |
---|
103 | SINA4: .long 0x3EC71DE3,0xA5341531 |
---|
104 | |
---|
105 | SINA3: .long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000 |
---|
106 | |
---|
107 | SINA2: .long 0x3FF80000,0x88888888,0x888859AF,0x00000000 |
---|
108 | |
---|
109 | SINA1: .long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000 |
---|
110 | |
---|
111 | COSB8: .long 0x3D2AC4D0,0xD6011EE3 |
---|
112 | COSB7: .long 0xBDA9396F,0x9F45AC19 |
---|
113 | |
---|
114 | COSB6: .long 0x3E21EED9,0x0612C972 |
---|
115 | COSB5: .long 0xBE927E4F,0xB79D9FCF |
---|
116 | |
---|
117 | COSB4: .long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000 |
---|
118 | |
---|
119 | COSB3: .long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000 |
---|
120 | |
---|
121 | COSB2: .long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E |
---|
122 | COSB1: .long 0xBF000000 |
---|
123 | |
---|
124 | INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A |
---|
125 | |
---|
126 | TWOPI1: .long 0x40010000,0xC90FDAA2,0x00000000,0x00000000 |
---|
127 | TWOPI2: .long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000 |
---|
128 | |
---|
129 | |xref PITBL |
---|
130 | |
---|
131 | .set INARG,FP_SCR4 |
---|
132 | |
---|
133 | .set X,FP_SCR5 |
---|
134 | .set XDCARE,X+2 |
---|
135 | .set XFRAC,X+4 |
---|
136 | |
---|
137 | .set RPRIME,FP_SCR1 |
---|
138 | .set SPRIME,FP_SCR2 |
---|
139 | |
---|
140 | .set POSNEG1,L_SCR1 |
---|
141 | .set TWOTO63,L_SCR1 |
---|
142 | |
---|
143 | .set ENDFLAG,L_SCR2 |
---|
144 | .set N,L_SCR2 |
---|
145 | |
---|
146 | .set ADJN,L_SCR3 |
---|
147 | |
---|
148 | | xref t_frcinx |
---|
149 | |xref t_extdnrm |
---|
150 | |xref sto_cos |
---|
151 | |
---|
152 | .global ssind |
---|
153 | ssind: |
---|
154 | //--SIN(X) = X FOR DENORMALIZED X |
---|
155 | bra t_extdnrm |
---|
156 | |
---|
157 | .global scosd |
---|
158 | scosd: |
---|
159 | //--COS(X) = 1 FOR DENORMALIZED X |
---|
160 | |
---|
161 | fmoves #0x3F800000,%fp0 |
---|
162 | // |
---|
163 | // 9D25B Fix: Sometimes the previous fmove.s sets fpsr bits |
---|
164 | // |
---|
165 | fmovel #0,%fpsr |
---|
166 | // |
---|
167 | bra t_frcinx |
---|
168 | |
---|
169 | .global ssin |
---|
170 | ssin: |
---|
171 | //--SET ADJN TO 0 |
---|
172 | movel #0,ADJN(%a6) |
---|
173 | bras SINBGN |
---|
174 | |
---|
175 | .global scos |
---|
176 | scos: |
---|
177 | //--SET ADJN TO 1 |
---|
178 | movel #1,ADJN(%a6) |
---|
179 | |
---|
180 | SINBGN: |
---|
181 | //--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE |
---|
182 | |
---|
183 | fmovex (%a0),%fp0 // ...LOAD INPUT |
---|
184 | |
---|
185 | movel (%a0),%d0 |
---|
186 | movew 4(%a0),%d0 |
---|
187 | fmovex %fp0,X(%a6) |
---|
188 | andil #0x7FFFFFFF,%d0 // ...COMPACTIFY X |
---|
189 | |
---|
190 | cmpil #0x3FD78000,%d0 // ...|X| >= 2**(-40)? |
---|
191 | bges SOK1 |
---|
192 | bra SINSM |
---|
193 | |
---|
194 | SOK1: |
---|
195 | cmpil #0x4004BC7E,%d0 // ...|X| < 15 PI? |
---|
196 | blts SINMAIN |
---|
197 | bra REDUCEX |
---|
198 | |
---|
199 | SINMAIN: |
---|
200 | //--THIS IS THE USUAL CASE, |X| <= 15 PI. |
---|
201 | //--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. |
---|
202 | fmovex %fp0,%fp1 |
---|
203 | fmuld TWOBYPI,%fp1 // ...X*2/PI |
---|
204 | |
---|
205 | //--HIDE THE NEXT THREE INSTRUCTIONS |
---|
206 | lea PITBL+0x200,%a1 // ...TABLE OF N*PI/2, N = -32,...,32 |
---|
207 | |
---|
208 | |
---|
209 | //--FP1 IS NOW READY |
---|
210 | fmovel %fp1,N(%a6) // ...CONVERT TO INTEGER |
---|
211 | |
---|
212 | movel N(%a6),%d0 |
---|
213 | asll #4,%d0 |
---|
214 | addal %d0,%a1 // ...A1 IS THE ADDRESS OF N*PIBY2 |
---|
215 | // ...WHICH IS IN TWO PIECES Y1 & Y2 |
---|
216 | |
---|
217 | fsubx (%a1)+,%fp0 // ...X-Y1 |
---|
218 | //--HIDE THE NEXT ONE |
---|
219 | fsubs (%a1),%fp0 // ...FP0 IS R = (X-Y1)-Y2 |
---|
220 | |
---|
221 | SINCONT: |
---|
222 | //--continuation from REDUCEX |
---|
223 | |
---|
224 | //--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED |
---|
225 | movel N(%a6),%d0 |
---|
226 | addl ADJN(%a6),%d0 // ...SEE IF D0 IS ODD OR EVEN |
---|
227 | rorl #1,%d0 // ...D0 WAS ODD IFF D0 IS NEGATIVE |
---|
228 | cmpil #0,%d0 |
---|
229 | blt COSPOLY |
---|
230 | |
---|
231 | SINPOLY: |
---|
232 | //--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. |
---|
233 | //--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY |
---|
234 | //--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE |
---|
235 | //--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS |
---|
236 | //--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))]) |
---|
237 | //--WHERE T=S*S. |
---|
238 | //--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION |
---|
239 | //--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT. |
---|
240 | fmovex %fp0,X(%a6) // ...X IS R |
---|
241 | fmulx %fp0,%fp0 // ...FP0 IS S |
---|
242 | //---HIDE THE NEXT TWO WHILE WAITING FOR FP0 |
---|
243 | fmoved SINA7,%fp3 |
---|
244 | fmoved SINA6,%fp2 |
---|
245 | //--FP0 IS NOW READY |
---|
246 | fmovex %fp0,%fp1 |
---|
247 | fmulx %fp1,%fp1 // ...FP1 IS T |
---|
248 | //--HIDE THE NEXT TWO WHILE WAITING FOR FP1 |
---|
249 | |
---|
250 | rorl #1,%d0 |
---|
251 | andil #0x80000000,%d0 |
---|
252 | // ...LEAST SIG. BIT OF D0 IN SIGN POSITION |
---|
253 | eorl %d0,X(%a6) // ...X IS NOW R'= SGN*R |
---|
254 | |
---|
255 | fmulx %fp1,%fp3 // ...TA7 |
---|
256 | fmulx %fp1,%fp2 // ...TA6 |
---|
257 | |
---|
258 | faddd SINA5,%fp3 // ...A5+TA7 |
---|
259 | faddd SINA4,%fp2 // ...A4+TA6 |
---|
260 | |
---|
261 | fmulx %fp1,%fp3 // ...T(A5+TA7) |
---|
262 | fmulx %fp1,%fp2 // ...T(A4+TA6) |
---|
263 | |
---|
264 | faddd SINA3,%fp3 // ...A3+T(A5+TA7) |
---|
265 | faddx SINA2,%fp2 // ...A2+T(A4+TA6) |
---|
266 | |
---|
267 | fmulx %fp3,%fp1 // ...T(A3+T(A5+TA7)) |
---|
268 | |
---|
269 | fmulx %fp0,%fp2 // ...S(A2+T(A4+TA6)) |
---|
270 | faddx SINA1,%fp1 // ...A1+T(A3+T(A5+TA7)) |
---|
271 | fmulx X(%a6),%fp0 // ...R'*S |
---|
272 | |
---|
273 | faddx %fp2,%fp1 // ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] |
---|
274 | //--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING |
---|
275 | //--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING |
---|
276 | |
---|
277 | |
---|
278 | fmulx %fp1,%fp0 // ...SIN(R')-R' |
---|
279 | //--FP1 RELEASED. |
---|
280 | |
---|
281 | fmovel %d1,%FPCR //restore users exceptions |
---|
282 | faddx X(%a6),%fp0 //last inst - possible exception set |
---|
283 | bra t_frcinx |
---|
284 | |
---|
285 | |
---|
286 | COSPOLY: |
---|
287 | //--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. |
---|
288 | //--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY |
---|
289 | //--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE |
---|
290 | //--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS |
---|
291 | //--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))]) |
---|
292 | //--WHERE T=S*S. |
---|
293 | //--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION |
---|
294 | //--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2 |
---|
295 | //--AND IS THEREFORE STORED AS SINGLE PRECISION. |
---|
296 | |
---|
297 | fmulx %fp0,%fp0 // ...FP0 IS S |
---|
298 | //---HIDE THE NEXT TWO WHILE WAITING FOR FP0 |
---|
299 | fmoved COSB8,%fp2 |
---|
300 | fmoved COSB7,%fp3 |
---|
301 | //--FP0 IS NOW READY |
---|
302 | fmovex %fp0,%fp1 |
---|
303 | fmulx %fp1,%fp1 // ...FP1 IS T |
---|
304 | //--HIDE THE NEXT TWO WHILE WAITING FOR FP1 |
---|
305 | fmovex %fp0,X(%a6) // ...X IS S |
---|
306 | rorl #1,%d0 |
---|
307 | andil #0x80000000,%d0 |
---|
308 | // ...LEAST SIG. BIT OF D0 IN SIGN POSITION |
---|
309 | |
---|
310 | fmulx %fp1,%fp2 // ...TB8 |
---|
311 | //--HIDE THE NEXT TWO WHILE WAITING FOR THE XU |
---|
312 | eorl %d0,X(%a6) // ...X IS NOW S'= SGN*S |
---|
313 | andil #0x80000000,%d0 |
---|
314 | |
---|
315 | fmulx %fp1,%fp3 // ...TB7 |
---|
316 | //--HIDE THE NEXT TWO WHILE WAITING FOR THE XU |
---|
317 | oril #0x3F800000,%d0 // ...D0 IS SGN IN SINGLE |
---|
318 | movel %d0,POSNEG1(%a6) |
---|
319 | |
---|
320 | faddd COSB6,%fp2 // ...B6+TB8 |
---|
321 | faddd COSB5,%fp3 // ...B5+TB7 |
---|
322 | |
---|
323 | fmulx %fp1,%fp2 // ...T(B6+TB8) |
---|
324 | fmulx %fp1,%fp3 // ...T(B5+TB7) |
---|
325 | |
---|
326 | faddd COSB4,%fp2 // ...B4+T(B6+TB8) |
---|
327 | faddx COSB3,%fp3 // ...B3+T(B5+TB7) |
---|
328 | |
---|
329 | fmulx %fp1,%fp2 // ...T(B4+T(B6+TB8)) |
---|
330 | fmulx %fp3,%fp1 // ...T(B3+T(B5+TB7)) |
---|
331 | |
---|
332 | faddx COSB2,%fp2 // ...B2+T(B4+T(B6+TB8)) |
---|
333 | fadds COSB1,%fp1 // ...B1+T(B3+T(B5+TB7)) |
---|
334 | |
---|
335 | fmulx %fp2,%fp0 // ...S(B2+T(B4+T(B6+TB8))) |
---|
336 | //--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING |
---|
337 | //--FP2 RELEASED. |
---|
338 | |
---|
339 | |
---|
340 | faddx %fp1,%fp0 |
---|
341 | //--FP1 RELEASED |
---|
342 | |
---|
343 | fmulx X(%a6),%fp0 |
---|
344 | |
---|
345 | fmovel %d1,%FPCR //restore users exceptions |
---|
346 | fadds POSNEG1(%a6),%fp0 //last inst - possible exception set |
---|
347 | bra t_frcinx |
---|
348 | |
---|
349 | |
---|
350 | SINBORS: |
---|
351 | //--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. |
---|
352 | //--IF |X| < 2**(-40), RETURN X OR 1. |
---|
353 | cmpil #0x3FFF8000,%d0 |
---|
354 | bgts REDUCEX |
---|
355 | |
---|
356 | |
---|
357 | SINSM: |
---|
358 | movel ADJN(%a6),%d0 |
---|
359 | cmpil #0,%d0 |
---|
360 | bgts COSTINY |
---|
361 | |
---|
362 | SINTINY: |
---|
363 | movew #0x0000,XDCARE(%a6) // ...JUST IN CASE |
---|
364 | fmovel %d1,%FPCR //restore users exceptions |
---|
365 | fmovex X(%a6),%fp0 //last inst - possible exception set |
---|
366 | bra t_frcinx |
---|
367 | |
---|
368 | |
---|
369 | COSTINY: |
---|
370 | fmoves #0x3F800000,%fp0 |
---|
371 | |
---|
372 | fmovel %d1,%FPCR //restore users exceptions |
---|
373 | fsubs #0x00800000,%fp0 //last inst - possible exception set |
---|
374 | bra t_frcinx |
---|
375 | |
---|
376 | |
---|
377 | REDUCEX: |
---|
378 | //--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. |
---|
379 | //--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING |
---|
380 | //--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. |
---|
381 | |
---|
382 | fmovemx %fp2-%fp5,-(%a7) // ...save FP2 through FP5 |
---|
383 | movel %d2,-(%a7) |
---|
384 | fmoves #0x00000000,%fp1 |
---|
385 | //--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that |
---|
386 | //--there is a danger of unwanted overflow in first LOOP iteration. In this |
---|
387 | //--case, reduce argument by one remainder step to make subsequent reduction |
---|
388 | //--safe. |
---|
389 | cmpil #0x7ffeffff,%d0 //is argument dangerously large? |
---|
390 | bnes LOOP |
---|
391 | movel #0x7ffe0000,FP_SCR2(%a6) //yes |
---|
392 | // ;create 2**16383*PI/2 |
---|
393 | movel #0xc90fdaa2,FP_SCR2+4(%a6) |
---|
394 | clrl FP_SCR2+8(%a6) |
---|
395 | ftstx %fp0 //test sign of argument |
---|
396 | movel #0x7fdc0000,FP_SCR3(%a6) //create low half of 2**16383* |
---|
397 | // ;PI/2 at FP_SCR3 |
---|
398 | movel #0x85a308d3,FP_SCR3+4(%a6) |
---|
399 | clrl FP_SCR3+8(%a6) |
---|
400 | fblt red_neg |
---|
401 | orw #0x8000,FP_SCR2(%a6) //positive arg |
---|
402 | orw #0x8000,FP_SCR3(%a6) |
---|
403 | red_neg: |
---|
404 | faddx FP_SCR2(%a6),%fp0 //high part of reduction is exact |
---|
405 | fmovex %fp0,%fp1 //save high result in fp1 |
---|
406 | faddx FP_SCR3(%a6),%fp0 //low part of reduction |
---|
407 | fsubx %fp0,%fp1 //determine low component of result |
---|
408 | faddx FP_SCR3(%a6),%fp1 //fp0/fp1 are reduced argument. |
---|
409 | |
---|
410 | //--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. |
---|
411 | //--integer quotient will be stored in N |
---|
412 | //--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1) |
---|
413 | |
---|
414 | LOOP: |
---|
415 | fmovex %fp0,INARG(%a6) // ...+-2**K * F, 1 <= F < 2 |
---|
416 | movew INARG(%a6),%d0 |
---|
417 | movel %d0,%a1 // ...save a copy of D0 |
---|
418 | andil #0x00007FFF,%d0 |
---|
419 | subil #0x00003FFF,%d0 // ...D0 IS K |
---|
420 | cmpil #28,%d0 |
---|
421 | bles LASTLOOP |
---|
422 | CONTLOOP: |
---|
423 | subil #27,%d0 // ...D0 IS L := K-27 |
---|
424 | movel #0,ENDFLAG(%a6) |
---|
425 | bras WORK |
---|
426 | LASTLOOP: |
---|
427 | clrl %d0 // ...D0 IS L := 0 |
---|
428 | movel #1,ENDFLAG(%a6) |
---|
429 | |
---|
430 | WORK: |
---|
431 | //--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN |
---|
432 | //--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. |
---|
433 | |
---|
434 | //--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), |
---|
435 | //--2**L * (PIby2_1), 2**L * (PIby2_2) |
---|
436 | |
---|
437 | movel #0x00003FFE,%d2 // ...BIASED EXPO OF 2/PI |
---|
438 | subl %d0,%d2 // ...BIASED EXPO OF 2**(-L)*(2/PI) |
---|
439 | |
---|
440 | movel #0xA2F9836E,FP_SCR1+4(%a6) |
---|
441 | movel #0x4E44152A,FP_SCR1+8(%a6) |
---|
442 | movew %d2,FP_SCR1(%a6) // ...FP_SCR1 is 2**(-L)*(2/PI) |
---|
443 | |
---|
444 | fmovex %fp0,%fp2 |
---|
445 | fmulx FP_SCR1(%a6),%fp2 |
---|
446 | //--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN |
---|
447 | //--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N |
---|
448 | //--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT |
---|
449 | //--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE |
---|
450 | //--US THE DESIRED VALUE IN FLOATING POINT. |
---|
451 | |
---|
452 | //--HIDE SIX CYCLES OF INSTRUCTION |
---|
453 | movel %a1,%d2 |
---|
454 | swap %d2 |
---|
455 | andil #0x80000000,%d2 |
---|
456 | oril #0x5F000000,%d2 // ...D2 IS SIGN(INARG)*2**63 IN SGL |
---|
457 | movel %d2,TWOTO63(%a6) |
---|
458 | |
---|
459 | movel %d0,%d2 |
---|
460 | addil #0x00003FFF,%d2 // ...BIASED EXPO OF 2**L * (PI/2) |
---|
461 | |
---|
462 | //--FP2 IS READY |
---|
463 | fadds TWOTO63(%a6),%fp2 // ...THE FRACTIONAL PART OF FP1 IS ROUNDED |
---|
464 | |
---|
465 | //--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2 |
---|
466 | movew %d2,FP_SCR2(%a6) |
---|
467 | clrw FP_SCR2+2(%a6) |
---|
468 | movel #0xC90FDAA2,FP_SCR2+4(%a6) |
---|
469 | clrl FP_SCR2+8(%a6) // ...FP_SCR2 is 2**(L) * Piby2_1 |
---|
470 | |
---|
471 | //--FP2 IS READY |
---|
472 | fsubs TWOTO63(%a6),%fp2 // ...FP2 is N |
---|
473 | |
---|
474 | addil #0x00003FDD,%d0 |
---|
475 | movew %d0,FP_SCR3(%a6) |
---|
476 | clrw FP_SCR3+2(%a6) |
---|
477 | movel #0x85A308D3,FP_SCR3+4(%a6) |
---|
478 | clrl FP_SCR3+8(%a6) // ...FP_SCR3 is 2**(L) * Piby2_2 |
---|
479 | |
---|
480 | movel ENDFLAG(%a6),%d0 |
---|
481 | |
---|
482 | //--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and |
---|
483 | //--P2 = 2**(L) * Piby2_2 |
---|
484 | fmovex %fp2,%fp4 |
---|
485 | fmulx FP_SCR2(%a6),%fp4 // ...W = N*P1 |
---|
486 | fmovex %fp2,%fp5 |
---|
487 | fmulx FP_SCR3(%a6),%fp5 // ...w = N*P2 |
---|
488 | fmovex %fp4,%fp3 |
---|
489 | //--we want P+p = W+w but |p| <= half ulp of P |
---|
490 | //--Then, we need to compute A := R-P and a := r-p |
---|
491 | faddx %fp5,%fp3 // ...FP3 is P |
---|
492 | fsubx %fp3,%fp4 // ...W-P |
---|
493 | |
---|
494 | fsubx %fp3,%fp0 // ...FP0 is A := R - P |
---|
495 | faddx %fp5,%fp4 // ...FP4 is p = (W-P)+w |
---|
496 | |
---|
497 | fmovex %fp0,%fp3 // ...FP3 A |
---|
498 | fsubx %fp4,%fp1 // ...FP1 is a := r - p |
---|
499 | |
---|
500 | //--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but |
---|
501 | //--|r| <= half ulp of R. |
---|
502 | faddx %fp1,%fp0 // ...FP0 is R := A+a |
---|
503 | //--No need to calculate r if this is the last loop |
---|
504 | cmpil #0,%d0 |
---|
505 | bgt RESTORE |
---|
506 | |
---|
507 | //--Need to calculate r |
---|
508 | fsubx %fp0,%fp3 // ...A-R |
---|
509 | faddx %fp3,%fp1 // ...FP1 is r := (A-R)+a |
---|
510 | bra LOOP |
---|
511 | |
---|
512 | RESTORE: |
---|
513 | fmovel %fp2,N(%a6) |
---|
514 | movel (%a7)+,%d2 |
---|
515 | fmovemx (%a7)+,%fp2-%fp5 |
---|
516 | |
---|
517 | |
---|
518 | movel ADJN(%a6),%d0 |
---|
519 | cmpil #4,%d0 |
---|
520 | |
---|
521 | blt SINCONT |
---|
522 | bras SCCONT |
---|
523 | |
---|
524 | .global ssincosd |
---|
525 | ssincosd: |
---|
526 | //--SIN AND COS OF X FOR DENORMALIZED X |
---|
527 | |
---|
528 | fmoves #0x3F800000,%fp1 |
---|
529 | bsr sto_cos //store cosine result |
---|
530 | bra t_extdnrm |
---|
531 | |
---|
532 | .global ssincos |
---|
533 | ssincos: |
---|
534 | //--SET ADJN TO 4 |
---|
535 | movel #4,ADJN(%a6) |
---|
536 | |
---|
537 | fmovex (%a0),%fp0 // ...LOAD INPUT |
---|
538 | |
---|
539 | movel (%a0),%d0 |
---|
540 | movew 4(%a0),%d0 |
---|
541 | fmovex %fp0,X(%a6) |
---|
542 | andil #0x7FFFFFFF,%d0 // ...COMPACTIFY X |
---|
543 | |
---|
544 | cmpil #0x3FD78000,%d0 // ...|X| >= 2**(-40)? |
---|
545 | bges SCOK1 |
---|
546 | bra SCSM |
---|
547 | |
---|
548 | SCOK1: |
---|
549 | cmpil #0x4004BC7E,%d0 // ...|X| < 15 PI? |
---|
550 | blts SCMAIN |
---|
551 | bra REDUCEX |
---|
552 | |
---|
553 | |
---|
554 | SCMAIN: |
---|
555 | //--THIS IS THE USUAL CASE, |X| <= 15 PI. |
---|
556 | //--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. |
---|
557 | fmovex %fp0,%fp1 |
---|
558 | fmuld TWOBYPI,%fp1 // ...X*2/PI |
---|
559 | |
---|
560 | //--HIDE THE NEXT THREE INSTRUCTIONS |
---|
561 | lea PITBL+0x200,%a1 // ...TABLE OF N*PI/2, N = -32,...,32 |
---|
562 | |
---|
563 | |
---|
564 | //--FP1 IS NOW READY |
---|
565 | fmovel %fp1,N(%a6) // ...CONVERT TO INTEGER |
---|
566 | |
---|
567 | movel N(%a6),%d0 |
---|
568 | asll #4,%d0 |
---|
569 | addal %d0,%a1 // ...ADDRESS OF N*PIBY2, IN Y1, Y2 |
---|
570 | |
---|
571 | fsubx (%a1)+,%fp0 // ...X-Y1 |
---|
572 | fsubs (%a1),%fp0 // ...FP0 IS R = (X-Y1)-Y2 |
---|
573 | |
---|
574 | SCCONT: |
---|
575 | //--continuation point from REDUCEX |
---|
576 | |
---|
577 | //--HIDE THE NEXT TWO |
---|
578 | movel N(%a6),%d0 |
---|
579 | rorl #1,%d0 |
---|
580 | |
---|
581 | cmpil #0,%d0 // ...D0 < 0 IFF N IS ODD |
---|
582 | bge NEVEN |
---|
583 | |
---|
584 | NODD: |
---|
585 | //--REGISTERS SAVED SO FAR: D0, A0, FP2. |
---|
586 | |
---|
587 | fmovex %fp0,RPRIME(%a6) |
---|
588 | fmulx %fp0,%fp0 // ...FP0 IS S = R*R |
---|
589 | fmoved SINA7,%fp1 // ...A7 |
---|
590 | fmoved COSB8,%fp2 // ...B8 |
---|
591 | fmulx %fp0,%fp1 // ...SA7 |
---|
592 | movel %d2,-(%a7) |
---|
593 | movel %d0,%d2 |
---|
594 | fmulx %fp0,%fp2 // ...SB8 |
---|
595 | rorl #1,%d2 |
---|
596 | andil #0x80000000,%d2 |
---|
597 | |
---|
598 | faddd SINA6,%fp1 // ...A6+SA7 |
---|
599 | eorl %d0,%d2 |
---|
600 | andil #0x80000000,%d2 |
---|
601 | faddd COSB7,%fp2 // ...B7+SB8 |
---|
602 | |
---|
603 | fmulx %fp0,%fp1 // ...S(A6+SA7) |
---|
604 | eorl %d2,RPRIME(%a6) |
---|
605 | movel (%a7)+,%d2 |
---|
606 | fmulx %fp0,%fp2 // ...S(B7+SB8) |
---|
607 | rorl #1,%d0 |
---|
608 | andil #0x80000000,%d0 |
---|
609 | |
---|
610 | faddd SINA5,%fp1 // ...A5+S(A6+SA7) |
---|
611 | movel #0x3F800000,POSNEG1(%a6) |
---|
612 | eorl %d0,POSNEG1(%a6) |
---|
613 | faddd COSB6,%fp2 // ...B6+S(B7+SB8) |
---|
614 | |
---|
615 | fmulx %fp0,%fp1 // ...S(A5+S(A6+SA7)) |
---|
616 | fmulx %fp0,%fp2 // ...S(B6+S(B7+SB8)) |
---|
617 | fmovex %fp0,SPRIME(%a6) |
---|
618 | |
---|
619 | faddd SINA4,%fp1 // ...A4+S(A5+S(A6+SA7)) |
---|
620 | eorl %d0,SPRIME(%a6) |
---|
621 | faddd COSB5,%fp2 // ...B5+S(B6+S(B7+SB8)) |
---|
622 | |
---|
623 | fmulx %fp0,%fp1 // ...S(A4+...) |
---|
624 | fmulx %fp0,%fp2 // ...S(B5+...) |
---|
625 | |
---|
626 | faddd SINA3,%fp1 // ...A3+S(A4+...) |
---|
627 | faddd COSB4,%fp2 // ...B4+S(B5+...) |
---|
628 | |
---|
629 | fmulx %fp0,%fp1 // ...S(A3+...) |
---|
630 | fmulx %fp0,%fp2 // ...S(B4+...) |
---|
631 | |
---|
632 | faddx SINA2,%fp1 // ...A2+S(A3+...) |
---|
633 | faddx COSB3,%fp2 // ...B3+S(B4+...) |
---|
634 | |
---|
635 | fmulx %fp0,%fp1 // ...S(A2+...) |
---|
636 | fmulx %fp0,%fp2 // ...S(B3+...) |
---|
637 | |
---|
638 | faddx SINA1,%fp1 // ...A1+S(A2+...) |
---|
639 | faddx COSB2,%fp2 // ...B2+S(B3+...) |
---|
640 | |
---|
641 | fmulx %fp0,%fp1 // ...S(A1+...) |
---|
642 | fmulx %fp2,%fp0 // ...S(B2+...) |
---|
643 | |
---|
644 | |
---|
645 | |
---|
646 | fmulx RPRIME(%a6),%fp1 // ...R'S(A1+...) |
---|
647 | fadds COSB1,%fp0 // ...B1+S(B2...) |
---|
648 | fmulx SPRIME(%a6),%fp0 // ...S'(B1+S(B2+...)) |
---|
649 | |
---|
650 | movel %d1,-(%sp) //restore users mode & precision |
---|
651 | andil #0xff,%d1 //mask off all exceptions |
---|
652 | fmovel %d1,%FPCR |
---|
653 | faddx RPRIME(%a6),%fp1 // ...COS(X) |
---|
654 | bsr sto_cos //store cosine result |
---|
655 | fmovel (%sp)+,%FPCR //restore users exceptions |
---|
656 | fadds POSNEG1(%a6),%fp0 // ...SIN(X) |
---|
657 | |
---|
658 | bra t_frcinx |
---|
659 | |
---|
660 | |
---|
661 | NEVEN: |
---|
662 | //--REGISTERS SAVED SO FAR: FP2. |
---|
663 | |
---|
664 | fmovex %fp0,RPRIME(%a6) |
---|
665 | fmulx %fp0,%fp0 // ...FP0 IS S = R*R |
---|
666 | fmoved COSB8,%fp1 // ...B8 |
---|
667 | fmoved SINA7,%fp2 // ...A7 |
---|
668 | fmulx %fp0,%fp1 // ...SB8 |
---|
669 | fmovex %fp0,SPRIME(%a6) |
---|
670 | fmulx %fp0,%fp2 // ...SA7 |
---|
671 | rorl #1,%d0 |
---|
672 | andil #0x80000000,%d0 |
---|
673 | faddd COSB7,%fp1 // ...B7+SB8 |
---|
674 | faddd SINA6,%fp2 // ...A6+SA7 |
---|
675 | eorl %d0,RPRIME(%a6) |
---|
676 | eorl %d0,SPRIME(%a6) |
---|
677 | fmulx %fp0,%fp1 // ...S(B7+SB8) |
---|
678 | oril #0x3F800000,%d0 |
---|
679 | movel %d0,POSNEG1(%a6) |
---|
680 | fmulx %fp0,%fp2 // ...S(A6+SA7) |
---|
681 | |
---|
682 | faddd COSB6,%fp1 // ...B6+S(B7+SB8) |
---|
683 | faddd SINA5,%fp2 // ...A5+S(A6+SA7) |
---|
684 | |
---|
685 | fmulx %fp0,%fp1 // ...S(B6+S(B7+SB8)) |
---|
686 | fmulx %fp0,%fp2 // ...S(A5+S(A6+SA7)) |
---|
687 | |
---|
688 | faddd COSB5,%fp1 // ...B5+S(B6+S(B7+SB8)) |
---|
689 | faddd SINA4,%fp2 // ...A4+S(A5+S(A6+SA7)) |
---|
690 | |
---|
691 | fmulx %fp0,%fp1 // ...S(B5+...) |
---|
692 | fmulx %fp0,%fp2 // ...S(A4+...) |
---|
693 | |
---|
694 | faddd COSB4,%fp1 // ...B4+S(B5+...) |
---|
695 | faddd SINA3,%fp2 // ...A3+S(A4+...) |
---|
696 | |
---|
697 | fmulx %fp0,%fp1 // ...S(B4+...) |
---|
698 | fmulx %fp0,%fp2 // ...S(A3+...) |
---|
699 | |
---|
700 | faddx COSB3,%fp1 // ...B3+S(B4+...) |
---|
701 | faddx SINA2,%fp2 // ...A2+S(A3+...) |
---|
702 | |
---|
703 | fmulx %fp0,%fp1 // ...S(B3+...) |
---|
704 | fmulx %fp0,%fp2 // ...S(A2+...) |
---|
705 | |
---|
706 | faddx COSB2,%fp1 // ...B2+S(B3+...) |
---|
707 | faddx SINA1,%fp2 // ...A1+S(A2+...) |
---|
708 | |
---|
709 | fmulx %fp0,%fp1 // ...S(B2+...) |
---|
710 | fmulx %fp2,%fp0 // ...s(a1+...) |
---|
711 | |
---|
712 | |
---|
713 | |
---|
714 | fadds COSB1,%fp1 // ...B1+S(B2...) |
---|
715 | fmulx RPRIME(%a6),%fp0 // ...R'S(A1+...) |
---|
716 | fmulx SPRIME(%a6),%fp1 // ...S'(B1+S(B2+...)) |
---|
717 | |
---|
718 | movel %d1,-(%sp) //save users mode & precision |
---|
719 | andil #0xff,%d1 //mask off all exceptions |
---|
720 | fmovel %d1,%FPCR |
---|
721 | fadds POSNEG1(%a6),%fp1 // ...COS(X) |
---|
722 | bsr sto_cos //store cosine result |
---|
723 | fmovel (%sp)+,%FPCR //restore users exceptions |
---|
724 | faddx RPRIME(%a6),%fp0 // ...SIN(X) |
---|
725 | |
---|
726 | bra t_frcinx |
---|
727 | |
---|
728 | SCBORS: |
---|
729 | cmpil #0x3FFF8000,%d0 |
---|
730 | bgt REDUCEX |
---|
731 | |
---|
732 | |
---|
733 | SCSM: |
---|
734 | movew #0x0000,XDCARE(%a6) |
---|
735 | fmoves #0x3F800000,%fp1 |
---|
736 | |
---|
737 | movel %d1,-(%sp) //save users mode & precision |
---|
738 | andil #0xff,%d1 //mask off all exceptions |
---|
739 | fmovel %d1,%FPCR |
---|
740 | fsubs #0x00800000,%fp1 |
---|
741 | bsr sto_cos //store cosine result |
---|
742 | fmovel (%sp)+,%FPCR //restore users exceptions |
---|
743 | fmovex X(%a6),%fp0 |
---|
744 | bra t_frcinx |
---|
745 | |
---|
746 | |end |
---|