source: rtems/c/src/lib/libcpu/m68k/m68040/fpsp/stan.s @ f9b93da

4.104.114.84.95
Last change on this file since f9b93da was f9b93da, checked in by Joel Sherrill <joel.sherrill@…>, on 04/16/97 at 17:33:04

Added the MC68040 Floating Point Support Package. This was ported
to RTEMS by Eric Norum. It is freely distributable and was acquired
from the Motorola WWW site. More info is in the FPSP README.

  • Property mode set to 100644
File size: 13.3 KB
Line 
1//
2//      stan.sa 3.3 7/29/91
3//
4//      The entry point stan computes the tangent of
5//      an input argument;
6//      stand does the same except for denormalized input.
7//
8//      Input: Double-extended number X in location pointed to
9//              by address register a0.
10//
11//      Output: The value tan(X) returned in floating-point register Fp0.
12//
13//      Accuracy and Monotonicity: The returned result is within 3 ulp in
14//              64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
15//              result is subsequently rounded to double precision. The
16//              result is provably monotonic in double precision.
17//
18//      Speed: The program sTAN takes approximately 170 cycles for
19//              input argument X such that |X| < 15Pi, which is the the usual
20//              situation.
21//
22//      Algorithm:
23//
24//      1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
25//
26//      2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
27//              k = N mod 2, so in particular, k = 0 or 1.
28//
29//      3. If k is odd, go to 5.
30//
31//      4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a
32//              rational function U/V where
33//              U = r + r*s*(P1 + s*(P2 + s*P3)), and
34//              V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.
35//              Exit.
36//
37//      4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a
38//              rational function U/V where
39//              U = r + r*s*(P1 + s*(P2 + s*P3)), and
40//              V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,
41//              -Cot(r) = -V/U. Exit.
42//
43//      6. If |X| > 1, go to 8.
44//
45//      7. (|X|<2**(-40)) Tan(X) = X. Exit.
46//
47//      8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
48//
49
50//              Copyright (C) Motorola, Inc. 1990
51//                      All Rights Reserved
52//
53//      THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
54//      The copyright notice above does not evidence any 
55//      actual or intended publication of such source code.
56
57//STAN  idnt    2,1 | Motorola 040 Floating Point Software Package
58
59        |section        8
60
61        .include "fpsp.defs"
62
63BOUNDS1:        .long 0x3FD78000,0x4004BC7E
64TWOBYPI:        .long 0x3FE45F30,0x6DC9C883
65
66TANQ4:  .long 0x3EA0B759,0xF50F8688
67TANP3:  .long 0xBEF2BAA5,0xA8924F04
68
69TANQ3:  .long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000
70
71TANP2:  .long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
72
73TANQ2:  .long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
74
75TANP1:  .long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
76
77TANQ1:  .long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
78
79INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
80
81TWOPI1: .long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
82TWOPI2: .long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
83
84//--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
85//--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
86//--MOST 69 BITS LONG.
87        .global PITBL
88PITBL:
89  .long  0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
90  .long  0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
91  .long  0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
92  .long  0xC0040000,0xB6365E22,0xEE46F000,0x21480000
93  .long  0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
94  .long  0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
95  .long  0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
96  .long  0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
97  .long  0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
98  .long  0xC0040000,0x90836524,0x88034B96,0x20B00000
99  .long  0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
100  .long  0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
101  .long  0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
102  .long  0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
103  .long  0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
104  .long  0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
105  .long  0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
106  .long  0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
107  .long  0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
108  .long  0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
109  .long  0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
110  .long  0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
111  .long  0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
112  .long  0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
113  .long  0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
114  .long  0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
115  .long  0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
116  .long  0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
117  .long  0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
118  .long  0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
119  .long  0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
120  .long  0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
121  .long  0x00000000,0x00000000,0x00000000,0x00000000
122  .long  0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
123  .long  0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
124  .long  0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
125  .long  0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
126  .long  0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
127  .long  0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
128  .long  0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
129  .long  0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
130  .long  0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
131  .long  0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
132  .long  0x40030000,0x8A3AE64F,0x76F80584,0x21080000
133  .long  0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
134  .long  0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
135  .long  0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
136  .long  0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
137  .long  0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
138  .long  0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
139  .long  0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
140  .long  0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
141  .long  0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
142  .long  0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
143  .long  0x40040000,0x8A3AE64F,0x76F80584,0x21880000
144  .long  0x40040000,0x90836524,0x88034B96,0xA0B00000
145  .long  0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
146  .long  0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
147  .long  0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
148  .long  0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
149  .long  0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
150  .long  0x40040000,0xB6365E22,0xEE46F000,0xA1480000
151  .long  0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
152  .long  0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
153  .long  0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
154
155        .set    INARG,FP_SCR4
156
157        .set    TWOTO63,L_SCR1
158        .set    ENDFLAG,L_SCR2
159        .set    N,L_SCR3
160
161        | xref  t_frcinx
162        |xref   t_extdnrm
163
164        .global stand
165stand:
166//--TAN(X) = X FOR DENORMALIZED X
167
168        bra             t_extdnrm
169
170        .global stan
171stan:
172        fmovex          (%a0),%fp0      // ...LOAD INPUT
173
174        movel           (%a0),%d0
175        movew           4(%a0),%d0
176        andil           #0x7FFFFFFF,%d0
177
178        cmpil           #0x3FD78000,%d0         // ...|X| >= 2**(-40)?
179        bges            TANOK1
180        bra             TANSM
181TANOK1:
182        cmpil           #0x4004BC7E,%d0         // ...|X| < 15 PI?
183        blts            TANMAIN
184        bra             REDUCEX
185
186
187TANMAIN:
188//--THIS IS THE USUAL CASE, |X| <= 15 PI.
189//--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
190        fmovex          %fp0,%fp1
191        fmuld           TWOBYPI,%fp1    // ...X*2/PI
192
193//--HIDE THE NEXT TWO INSTRUCTIONS
194        leal            PITBL+0x200,%a1 // ...TABLE OF N*PI/2, N = -32,...,32
195
196//--FP1 IS NOW READY
197        fmovel          %fp1,%d0                // ...CONVERT TO INTEGER
198
199        asll            #4,%d0
200        addal           %d0,%a1         // ...ADDRESS N*PIBY2 IN Y1, Y2
201
202        fsubx           (%a1)+,%fp0     // ...X-Y1
203//--HIDE THE NEXT ONE
204
205        fsubs           (%a1),%fp0      // ...FP0 IS R = (X-Y1)-Y2
206
207        rorl            #5,%d0
208        andil           #0x80000000,%d0 // ...D0 WAS ODD IFF D0 < 0
209
210TANCONT:
211
212        cmpil           #0,%d0
213        blt             NODD
214
215        fmovex          %fp0,%fp1
216        fmulx           %fp1,%fp1               // ...S = R*R
217
218        fmoved          TANQ4,%fp3
219        fmoved          TANP3,%fp2
220
221        fmulx           %fp1,%fp3               // ...SQ4
222        fmulx           %fp1,%fp2               // ...SP3
223
224        faddd           TANQ3,%fp3      // ...Q3+SQ4
225        faddx           TANP2,%fp2      // ...P2+SP3
226
227        fmulx           %fp1,%fp3               // ...S(Q3+SQ4)
228        fmulx           %fp1,%fp2               // ...S(P2+SP3)
229
230        faddx           TANQ2,%fp3      // ...Q2+S(Q3+SQ4)
231        faddx           TANP1,%fp2      // ...P1+S(P2+SP3)
232
233        fmulx           %fp1,%fp3               // ...S(Q2+S(Q3+SQ4))
234        fmulx           %fp1,%fp2               // ...S(P1+S(P2+SP3))
235
236        faddx           TANQ1,%fp3      // ...Q1+S(Q2+S(Q3+SQ4))
237        fmulx           %fp0,%fp2               // ...RS(P1+S(P2+SP3))
238
239        fmulx           %fp3,%fp1               // ...S(Q1+S(Q2+S(Q3+SQ4)))
240       
241
242        faddx           %fp2,%fp0               // ...R+RS(P1+S(P2+SP3))
243       
244
245        fadds           #0x3F800000,%fp1        // ...1+S(Q1+...)
246
247        fmovel          %d1,%fpcr               //restore users exceptions
248        fdivx           %fp1,%fp0               //last inst - possible exception set
249
250        bra             t_frcinx
251
252NODD:
253        fmovex          %fp0,%fp1
254        fmulx           %fp0,%fp0               // ...S = R*R
255
256        fmoved          TANQ4,%fp3
257        fmoved          TANP3,%fp2
258
259        fmulx           %fp0,%fp3               // ...SQ4
260        fmulx           %fp0,%fp2               // ...SP3
261
262        faddd           TANQ3,%fp3      // ...Q3+SQ4
263        faddx           TANP2,%fp2      // ...P2+SP3
264
265        fmulx           %fp0,%fp3               // ...S(Q3+SQ4)
266        fmulx           %fp0,%fp2               // ...S(P2+SP3)
267
268        faddx           TANQ2,%fp3      // ...Q2+S(Q3+SQ4)
269        faddx           TANP1,%fp2      // ...P1+S(P2+SP3)
270
271        fmulx           %fp0,%fp3               // ...S(Q2+S(Q3+SQ4))
272        fmulx           %fp0,%fp2               // ...S(P1+S(P2+SP3))
273
274        faddx           TANQ1,%fp3      // ...Q1+S(Q2+S(Q3+SQ4))
275        fmulx           %fp1,%fp2               // ...RS(P1+S(P2+SP3))
276
277        fmulx           %fp3,%fp0               // ...S(Q1+S(Q2+S(Q3+SQ4)))
278       
279
280        faddx           %fp2,%fp1               // ...R+RS(P1+S(P2+SP3))
281        fadds           #0x3F800000,%fp0        // ...1+S(Q1+...)
282       
283
284        fmovex          %fp1,-(%sp)
285        eoril           #0x80000000,(%sp)
286
287        fmovel          %d1,%fpcr               //restore users exceptions
288        fdivx           (%sp)+,%fp0     //last inst - possible exception set
289
290        bra             t_frcinx
291
292TANBORS:
293//--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
294//--IF |X| < 2**(-40), RETURN X OR 1.
295        cmpil           #0x3FFF8000,%d0
296        bgts            REDUCEX
297
298TANSM:
299
300        fmovex          %fp0,-(%sp)
301        fmovel          %d1,%fpcr                //restore users exceptions
302        fmovex          (%sp)+,%fp0     //last inst - possible exception set
303
304        bra             t_frcinx
305
306
307REDUCEX:
308//--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
309//--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
310//--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
311
312        fmovemx %fp2-%fp5,-(%a7)        // ...save FP2 through FP5
313        movel           %d2,-(%a7)
314        fmoves         #0x00000000,%fp1
315
316//--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
317//--there is a danger of unwanted overflow in first LOOP iteration.  In this
318//--case, reduce argument by one remainder step to make subsequent reduction
319//--safe.
320        cmpil   #0x7ffeffff,%d0         //is argument dangerously large?
321        bnes    LOOP
322        movel   #0x7ffe0000,FP_SCR2(%a6)        //yes
323//                                      ;create 2**16383*PI/2
324        movel   #0xc90fdaa2,FP_SCR2+4(%a6)
325        clrl    FP_SCR2+8(%a6)
326        ftstx   %fp0                    //test sign of argument
327        movel   #0x7fdc0000,FP_SCR3(%a6)        //create low half of 2**16383*
328//                                      ;PI/2 at FP_SCR3
329        movel   #0x85a308d3,FP_SCR3+4(%a6)
330        clrl   FP_SCR3+8(%a6)
331        fblt    red_neg
332        orw     #0x8000,FP_SCR2(%a6)    //positive arg
333        orw     #0x8000,FP_SCR3(%a6)
334red_neg:
335        faddx  FP_SCR2(%a6),%fp0                //high part of reduction is exact
336        fmovex  %fp0,%fp1               //save high result in fp1
337        faddx  FP_SCR3(%a6),%fp0                //low part of reduction
338        fsubx  %fp0,%fp1                        //determine low component of result
339        faddx  FP_SCR3(%a6),%fp1                //fp0/fp1 are reduced argument.
340
341//--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
342//--integer quotient will be stored in N
343//--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
344
345LOOP:
346        fmovex          %fp0,INARG(%a6) // ...+-2**K * F, 1 <= F < 2
347        movew           INARG(%a6),%d0
348        movel          %d0,%a1          // ...save a copy of D0
349        andil           #0x00007FFF,%d0
350        subil           #0x00003FFF,%d0 // ...D0 IS K
351        cmpil           #28,%d0
352        bles            LASTLOOP
353CONTLOOP:
354        subil           #27,%d0  // ...D0 IS L := K-27
355        movel           #0,ENDFLAG(%a6)
356        bras            WORK
357LASTLOOP:
358        clrl            %d0             // ...D0 IS L := 0
359        movel           #1,ENDFLAG(%a6)
360
361WORK:
362//--FIND THE REMAINDER OF (R,r) W.R.T.  2**L * (PI/2). L IS SO CHOSEN
363//--THAT        INT( X * (2/PI) / 2**(L) ) < 2**29.
364
365//--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
366//--2**L * (PIby2_1), 2**L * (PIby2_2)
367
368        movel           #0x00003FFE,%d2 // ...BIASED EXPO OF 2/PI
369        subl            %d0,%d2         // ...BIASED EXPO OF 2**(-L)*(2/PI)
370
371        movel           #0xA2F9836E,FP_SCR1+4(%a6)
372        movel           #0x4E44152A,FP_SCR1+8(%a6)
373        movew           %d2,FP_SCR1(%a6)        // ...FP_SCR1 is 2**(-L)*(2/PI)
374
375        fmovex          %fp0,%fp2
376        fmulx           FP_SCR1(%a6),%fp2
377//--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
378//--FLOATING POINT FORMAT, THE TWO FMOVE'S      FMOVE.L FP <--> N
379//--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
380//--(SIGN(INARG)*2**63  +       FP2) - SIGN(INARG)*2**63 WILL GIVE
381//--US THE DESIRED VALUE IN FLOATING POINT.
382
383//--HIDE SIX CYCLES OF INSTRUCTION
384        movel           %a1,%d2
385        swap            %d2
386        andil           #0x80000000,%d2
387        oril            #0x5F000000,%d2 // ...D2 IS SIGN(INARG)*2**63 IN SGL
388        movel           %d2,TWOTO63(%a6)
389
390        movel           %d0,%d2
391        addil           #0x00003FFF,%d2 // ...BIASED EXPO OF 2**L * (PI/2)
392
393//--FP2 IS READY
394        fadds           TWOTO63(%a6),%fp2       // ...THE FRACTIONAL PART OF FP1 IS ROUNDED
395
396//--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
397        movew           %d2,FP_SCR2(%a6)
398        clrw           FP_SCR2+2(%a6)
399        movel           #0xC90FDAA2,FP_SCR2+4(%a6)
400        clrl            FP_SCR2+8(%a6)          // ...FP_SCR2 is  2**(L) * Piby2_1     
401
402//--FP2 IS READY
403        fsubs           TWOTO63(%a6),%fp2               // ...FP2 is N
404
405        addil           #0x00003FDD,%d0
406        movew           %d0,FP_SCR3(%a6)
407        clrw           FP_SCR3+2(%a6)
408        movel           #0x85A308D3,FP_SCR3+4(%a6)
409        clrl            FP_SCR3+8(%a6)          // ...FP_SCR3 is 2**(L) * Piby2_2
410
411        movel           ENDFLAG(%a6),%d0
412
413//--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
414//--P2 = 2**(L) * Piby2_2
415        fmovex          %fp2,%fp4
416        fmulx           FP_SCR2(%a6),%fp4               // ...W = N*P1
417        fmovex          %fp2,%fp5
418        fmulx           FP_SCR3(%a6),%fp5               // ...w = N*P2
419        fmovex          %fp4,%fp3
420//--we want P+p = W+w  but  |p| <= half ulp of P
421//--Then, we need to compute  A := R-P   and  a := r-p
422        faddx           %fp5,%fp3                       // ...FP3 is P
423        fsubx           %fp3,%fp4                       // ...W-P
424
425        fsubx           %fp3,%fp0                       // ...FP0 is A := R - P
426        faddx           %fp5,%fp4                       // ...FP4 is p = (W-P)+w
427
428        fmovex          %fp0,%fp3                       // ...FP3 A
429        fsubx           %fp4,%fp1                       // ...FP1 is a := r - p
430
431//--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
432//--|r| <= half ulp of R.
433        faddx           %fp1,%fp0                       // ...FP0 is R := A+a
434//--No need to calculate r if this is the last loop
435        cmpil           #0,%d0
436        bgt             RESTORE
437
438//--Need to calculate r
439        fsubx           %fp0,%fp3                       // ...A-R
440        faddx           %fp3,%fp1                       // ...FP1 is r := (A-R)+a
441        bra             LOOP
442
443RESTORE:
444        fmovel          %fp2,N(%a6)
445        movel           (%a7)+,%d2
446        fmovemx (%a7)+,%fp2-%fp5
447
448       
449        movel           N(%a6),%d0
450        rorl            #1,%d0
451
452
453        bra             TANCONT
454
455        |end
Note: See TracBrowser for help on using the repository browser.