source: rtems/bsps/m68k/shared/fpsp/stan.S @ d584269

5
Last change on this file since d584269 was 3cf2bf63, checked in by Sebastian Huber <sebastian.huber@…>, on 03/26/18 at 10:17:06

bsps/m68k: Move fpsp support to bsps

This patch is a part of the BSP source reorganization.

Update #3285.

  • Property mode set to 100644
File size: 13.3 KB
Line 
1#include "fpsp-namespace.h"
2//
3//
4//      stan.sa 3.3 7/29/91
5//
6//      The entry point stan computes the tangent of
7//      an input argument;
8//      stand does the same except for denormalized input.
9//
10//      Input: Double-extended number X in location pointed to
11//              by address register a0.
12//
13//      Output: The value tan(X) returned in floating-point register Fp0.
14//
15//      Accuracy and Monotonicity: The returned result is within 3 ulp in
16//              64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
17//              result is subsequently rounded to double precision. The
18//              result is provably monotonic in double precision.
19//
20//      Speed: The program sTAN takes approximately 170 cycles for
21//              input argument X such that |X| < 15Pi, which is the the usual
22//              situation.
23//
24//      Algorithm:
25//
26//      1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
27//
28//      2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
29//              k = N mod 2, so in particular, k = 0 or 1.
30//
31//      3. If k is odd, go to 5.
32//
33//      4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a
34//              rational function U/V where
35//              U = r + r*s*(P1 + s*(P2 + s*P3)), and
36//              V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.
37//              Exit.
38//
39//      4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a
40//              rational function U/V where
41//              U = r + r*s*(P1 + s*(P2 + s*P3)), and
42//              V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,
43//              -Cot(r) = -V/U. Exit.
44//
45//      6. If |X| > 1, go to 8.
46//
47//      7. (|X|<2**(-40)) Tan(X) = X. Exit.
48//
49//      8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
50//
51
52//              Copyright (C) Motorola, Inc. 1990
53//                      All Rights Reserved
54//
55//      THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
56//      The copyright notice above does not evidence any
57//      actual or intended publication of such source code.
58
59//STAN  idnt    2,1 | Motorola 040 Floating Point Software Package
60
61        |section        8
62
63#include "fpsp.defs"
64
65BOUNDS1:        .long 0x3FD78000,0x4004BC7E
66TWOBYPI:        .long 0x3FE45F30,0x6DC9C883
67
68TANQ4:  .long 0x3EA0B759,0xF50F8688
69TANP3:  .long 0xBEF2BAA5,0xA8924F04
70
71TANQ3:  .long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000
72
73TANP2:  .long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
74
75TANQ2:  .long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
76
77TANP1:  .long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
78
79TANQ1:  .long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
80
81INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
82
83TWOPI1: .long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
84TWOPI2: .long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
85
86//--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
87//--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
88//--MOST 69 BITS LONG.
89        .global PITBL
90PITBL:
91  .long  0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
92  .long  0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
93  .long  0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
94  .long  0xC0040000,0xB6365E22,0xEE46F000,0x21480000
95  .long  0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
96  .long  0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
97  .long  0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
98  .long  0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
99  .long  0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
100  .long  0xC0040000,0x90836524,0x88034B96,0x20B00000
101  .long  0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
102  .long  0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
103  .long  0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
104  .long  0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
105  .long  0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
106  .long  0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
107  .long  0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
108  .long  0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
109  .long  0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
110  .long  0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
111  .long  0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
112  .long  0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
113  .long  0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
114  .long  0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
115  .long  0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
116  .long  0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
117  .long  0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
118  .long  0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
119  .long  0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
120  .long  0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
121  .long  0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
122  .long  0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
123  .long  0x00000000,0x00000000,0x00000000,0x00000000
124  .long  0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
125  .long  0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
126  .long  0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
127  .long  0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
128  .long  0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
129  .long  0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
130  .long  0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
131  .long  0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
132  .long  0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
133  .long  0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
134  .long  0x40030000,0x8A3AE64F,0x76F80584,0x21080000
135  .long  0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
136  .long  0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
137  .long  0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
138  .long  0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
139  .long  0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
140  .long  0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
141  .long  0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
142  .long  0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
143  .long  0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
144  .long  0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
145  .long  0x40040000,0x8A3AE64F,0x76F80584,0x21880000
146  .long  0x40040000,0x90836524,0x88034B96,0xA0B00000
147  .long  0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
148  .long  0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
149  .long  0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
150  .long  0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
151  .long  0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
152  .long  0x40040000,0xB6365E22,0xEE46F000,0xA1480000
153  .long  0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
154  .long  0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
155  .long  0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
156
157        .set    INARG,FP_SCR4
158
159        .set    TWOTO63,L_SCR1
160        .set    ENDFLAG,L_SCR2
161        .set    N,L_SCR3
162
163        | xref  t_frcinx
164        |xref   t_extdnrm
165
166        .global stand
167stand:
168//--TAN(X) = X FOR DENORMALIZED X
169
170        bra             t_extdnrm
171
172        .global stan
173stan:
174        fmovex          (%a0),%fp0      // ...LOAD INPUT
175
176        movel           (%a0),%d0
177        movew           4(%a0),%d0
178        andil           #0x7FFFFFFF,%d0
179
180        cmpil           #0x3FD78000,%d0         // ...|X| >= 2**(-40)?
181        bges            TANOK1
182        bra             TANSM
183TANOK1:
184        cmpil           #0x4004BC7E,%d0         // ...|X| < 15 PI?
185        blts            TANMAIN
186        bra             REDUCEX
187
188
189TANMAIN:
190//--THIS IS THE USUAL CASE, |X| <= 15 PI.
191//--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
192        fmovex          %fp0,%fp1
193        fmuld           TWOBYPI,%fp1    // ...X*2/PI
194
195//--HIDE THE NEXT TWO INSTRUCTIONS
196        leal            PITBL+0x200,%a1 // ...TABLE OF N*PI/2, N = -32,...,32
197
198//--FP1 IS NOW READY
199        fmovel          %fp1,%d0                // ...CONVERT TO INTEGER
200
201        asll            #4,%d0
202        addal           %d0,%a1         // ...ADDRESS N*PIBY2 IN Y1, Y2
203
204        fsubx           (%a1)+,%fp0     // ...X-Y1
205//--HIDE THE NEXT ONE
206
207        fsubs           (%a1),%fp0      // ...FP0 IS R = (X-Y1)-Y2
208
209        rorl            #5,%d0
210        andil           #0x80000000,%d0 // ...D0 WAS ODD IFF D0 < 0
211
212TANCONT:
213
214        cmpil           #0,%d0
215        blt             NODD
216
217        fmovex          %fp0,%fp1
218        fmulx           %fp1,%fp1               // ...S = R*R
219
220        fmoved          TANQ4,%fp3
221        fmoved          TANP3,%fp2
222
223        fmulx           %fp1,%fp3               // ...SQ4
224        fmulx           %fp1,%fp2               // ...SP3
225
226        faddd           TANQ3,%fp3      // ...Q3+SQ4
227        faddx           TANP2,%fp2      // ...P2+SP3
228
229        fmulx           %fp1,%fp3               // ...S(Q3+SQ4)
230        fmulx           %fp1,%fp2               // ...S(P2+SP3)
231
232        faddx           TANQ2,%fp3      // ...Q2+S(Q3+SQ4)
233        faddx           TANP1,%fp2      // ...P1+S(P2+SP3)
234
235        fmulx           %fp1,%fp3               // ...S(Q2+S(Q3+SQ4))
236        fmulx           %fp1,%fp2               // ...S(P1+S(P2+SP3))
237
238        faddx           TANQ1,%fp3      // ...Q1+S(Q2+S(Q3+SQ4))
239        fmulx           %fp0,%fp2               // ...RS(P1+S(P2+SP3))
240
241        fmulx           %fp3,%fp1               // ...S(Q1+S(Q2+S(Q3+SQ4)))
242
243
244        faddx           %fp2,%fp0               // ...R+RS(P1+S(P2+SP3))
245
246
247        fadds           #0x3F800000,%fp1        // ...1+S(Q1+...)
248
249        fmovel          %d1,%fpcr               //restore users exceptions
250        fdivx           %fp1,%fp0               //last inst - possible exception set
251
252        bra             t_frcinx
253
254NODD:
255        fmovex          %fp0,%fp1
256        fmulx           %fp0,%fp0               // ...S = R*R
257
258        fmoved          TANQ4,%fp3
259        fmoved          TANP3,%fp2
260
261        fmulx           %fp0,%fp3               // ...SQ4
262        fmulx           %fp0,%fp2               // ...SP3
263
264        faddd           TANQ3,%fp3      // ...Q3+SQ4
265        faddx           TANP2,%fp2      // ...P2+SP3
266
267        fmulx           %fp0,%fp3               // ...S(Q3+SQ4)
268        fmulx           %fp0,%fp2               // ...S(P2+SP3)
269
270        faddx           TANQ2,%fp3      // ...Q2+S(Q3+SQ4)
271        faddx           TANP1,%fp2      // ...P1+S(P2+SP3)
272
273        fmulx           %fp0,%fp3               // ...S(Q2+S(Q3+SQ4))
274        fmulx           %fp0,%fp2               // ...S(P1+S(P2+SP3))
275
276        faddx           TANQ1,%fp3      // ...Q1+S(Q2+S(Q3+SQ4))
277        fmulx           %fp1,%fp2               // ...RS(P1+S(P2+SP3))
278
279        fmulx           %fp3,%fp0               // ...S(Q1+S(Q2+S(Q3+SQ4)))
280
281
282        faddx           %fp2,%fp1               // ...R+RS(P1+S(P2+SP3))
283        fadds           #0x3F800000,%fp0        // ...1+S(Q1+...)
284
285
286        fmovex          %fp1,-(%sp)
287        eoril           #0x80000000,(%sp)
288
289        fmovel          %d1,%fpcr               //restore users exceptions
290        fdivx           (%sp)+,%fp0     //last inst - possible exception set
291
292        bra             t_frcinx
293
294TANBORS:
295//--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
296//--IF |X| < 2**(-40), RETURN X OR 1.
297        cmpil           #0x3FFF8000,%d0
298        bgts            REDUCEX
299
300TANSM:
301
302        fmovex          %fp0,-(%sp)
303        fmovel          %d1,%fpcr                //restore users exceptions
304        fmovex          (%sp)+,%fp0     //last inst - possible exception set
305
306        bra             t_frcinx
307
308
309REDUCEX:
310//--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
311//--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
312//--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
313
314        fmovemx %fp2-%fp5,-(%a7)        // ...save FP2 through FP5
315        movel           %d2,-(%a7)
316        fmoves         #0x00000000,%fp1
317
318//--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
319//--there is a danger of unwanted overflow in first LOOP iteration.  In this
320//--case, reduce argument by one remainder step to make subsequent reduction
321//--safe.
322        cmpil   #0x7ffeffff,%d0         //is argument dangerously large?
323        bnes    LOOP
324        movel   #0x7ffe0000,FP_SCR2(%a6)        //yes
325//                                      ;create 2**16383*PI/2
326        movel   #0xc90fdaa2,FP_SCR2+4(%a6)
327        clrl    FP_SCR2+8(%a6)
328        ftstx   %fp0                    //test sign of argument
329        movel   #0x7fdc0000,FP_SCR3(%a6)        //create low half of 2**16383*
330//                                      ;PI/2 at FP_SCR3
331        movel   #0x85a308d3,FP_SCR3+4(%a6)
332        clrl   FP_SCR3+8(%a6)
333        fblt    red_neg
334        orw     #0x8000,FP_SCR2(%a6)    //positive arg
335        orw     #0x8000,FP_SCR3(%a6)
336red_neg:
337        faddx  FP_SCR2(%a6),%fp0                //high part of reduction is exact
338        fmovex  %fp0,%fp1               //save high result in fp1
339        faddx  FP_SCR3(%a6),%fp0                //low part of reduction
340        fsubx  %fp0,%fp1                        //determine low component of result
341        faddx  FP_SCR3(%a6),%fp1                //fp0/fp1 are reduced argument.
342
343//--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
344//--integer quotient will be stored in N
345//--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
346
347LOOP:
348        fmovex          %fp0,INARG(%a6) // ...+-2**K * F, 1 <= F < 2
349        movew           INARG(%a6),%d0
350        movel          %d0,%a1          // ...save a copy of D0
351        andil           #0x00007FFF,%d0
352        subil           #0x00003FFF,%d0 // ...D0 IS K
353        cmpil           #28,%d0
354        bles            LASTLOOP
355CONTLOOP:
356        subil           #27,%d0  // ...D0 IS L := K-27
357        movel           #0,ENDFLAG(%a6)
358        bras            WORK
359LASTLOOP:
360        clrl            %d0             // ...D0 IS L := 0
361        movel           #1,ENDFLAG(%a6)
362
363WORK:
364//--FIND THE REMAINDER OF (R,r) W.R.T.  2**L * (PI/2). L IS SO CHOSEN
365//--THAT        INT( X * (2/PI) / 2**(L) ) < 2**29.
366
367//--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
368//--2**L * (PIby2_1), 2**L * (PIby2_2)
369
370        movel           #0x00003FFE,%d2 // ...BIASED EXPO OF 2/PI
371        subl            %d0,%d2         // ...BIASED EXPO OF 2**(-L)*(2/PI)
372
373        movel           #0xA2F9836E,FP_SCR1+4(%a6)
374        movel           #0x4E44152A,FP_SCR1+8(%a6)
375        movew           %d2,FP_SCR1(%a6)        // ...FP_SCR1 is 2**(-L)*(2/PI)
376
377        fmovex          %fp0,%fp2
378        fmulx           FP_SCR1(%a6),%fp2
379//--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
380//--FLOATING POINT FORMAT, THE TWO FMOVE'S      FMOVE.L FP <--> N
381//--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
382//--(SIGN(INARG)*2**63  +       FP2) - SIGN(INARG)*2**63 WILL GIVE
383//--US THE DESIRED VALUE IN FLOATING POINT.
384
385//--HIDE SIX CYCLES OF INSTRUCTION
386        movel           %a1,%d2
387        swap            %d2
388        andil           #0x80000000,%d2
389        oril            #0x5F000000,%d2 // ...D2 IS SIGN(INARG)*2**63 IN SGL
390        movel           %d2,TWOTO63(%a6)
391
392        movel           %d0,%d2
393        addil           #0x00003FFF,%d2 // ...BIASED EXPO OF 2**L * (PI/2)
394
395//--FP2 IS READY
396        fadds           TWOTO63(%a6),%fp2       // ...THE FRACTIONAL PART OF FP1 IS ROUNDED
397
398//--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
399        movew           %d2,FP_SCR2(%a6)
400        clrw           FP_SCR2+2(%a6)
401        movel           #0xC90FDAA2,FP_SCR2+4(%a6)
402        clrl            FP_SCR2+8(%a6)          // ...FP_SCR2 is  2**(L) * Piby2_1
403
404//--FP2 IS READY
405        fsubs           TWOTO63(%a6),%fp2               // ...FP2 is N
406
407        addil           #0x00003FDD,%d0
408        movew           %d0,FP_SCR3(%a6)
409        clrw           FP_SCR3+2(%a6)
410        movel           #0x85A308D3,FP_SCR3+4(%a6)
411        clrl            FP_SCR3+8(%a6)          // ...FP_SCR3 is 2**(L) * Piby2_2
412
413        movel           ENDFLAG(%a6),%d0
414
415//--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
416//--P2 = 2**(L) * Piby2_2
417        fmovex          %fp2,%fp4
418        fmulx           FP_SCR2(%a6),%fp4               // ...W = N*P1
419        fmovex          %fp2,%fp5
420        fmulx           FP_SCR3(%a6),%fp5               // ...w = N*P2
421        fmovex          %fp4,%fp3
422//--we want P+p = W+w  but  |p| <= half ulp of P
423//--Then, we need to compute  A := R-P   and  a := r-p
424        faddx           %fp5,%fp3                       // ...FP3 is P
425        fsubx           %fp3,%fp4                       // ...W-P
426
427        fsubx           %fp3,%fp0                       // ...FP0 is A := R - P
428        faddx           %fp5,%fp4                       // ...FP4 is p = (W-P)+w
429
430        fmovex          %fp0,%fp3                       // ...FP3 A
431        fsubx           %fp4,%fp1                       // ...FP1 is a := r - p
432
433//--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
434//--|r| <= half ulp of R.
435        faddx           %fp1,%fp0                       // ...FP0 is R := A+a
436//--No need to calculate r if this is the last loop
437        cmpil           #0,%d0
438        bgt             RESTORE
439
440//--Need to calculate r
441        fsubx           %fp0,%fp3                       // ...A-R
442        faddx           %fp3,%fp1                       // ...FP1 is r := (A-R)+a
443        bra             LOOP
444
445RESTORE:
446        fmovel          %fp2,N(%a6)
447        movel           (%a7)+,%d2
448        fmovemx (%a7)+,%fp2-%fp5
449
450
451        movel           N(%a6),%d0
452        rorl            #1,%d0
453
454
455        bra             TANCONT
456
457        |end
Note: See TracBrowser for help on using the repository browser.