source: rtems/bsps/m68k/shared/fpsp/slogn.S @ d7d66d7

5
Last change on this file since d7d66d7 was 3cf2bf63, checked in by Sebastian Huber <sebastian.huber@…>, on 03/26/18 at 10:17:06

bsps/m68k: Move fpsp support to bsps

This patch is a part of the BSP source reorganization.

Update #3285.

  • Property mode set to 100644
File size: 19.3 KB
Line 
1#include "fpsp-namespace.h"
2//
3//
4//      slogn.sa 3.1 12/10/90
5//
6//      slogn computes the natural logarithm of an
7//      input value. slognd does the same except the input value is a
8//      denormalized number. slognp1 computes log(1+X), and slognp1d
9//      computes log(1+X) for denormalized X.
10//
11//      Input: Double-extended value in memory location pointed to by address
12//              register a0.
13//
14//      Output: log(X) or log(1+X) returned in floating-point register Fp0.
15//
16//      Accuracy and Monotonicity: The returned result is within 2 ulps in
17//              64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
18//              result is subsequently rounded to double precision. The
19//              result is provably monotonic in double precision.
20//
21//      Speed: The program slogn takes approximately 190 cycles for input
22//              argument X such that |X-1| >= 1/16, which is the the usual
23//              situation. For those arguments, slognp1 takes approximately
24//               210 cycles. For the less common arguments, the program will
25//               run no worse than 10% slower.
26//
27//      Algorithm:
28//      LOGN:
29//      Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
30//              u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
31//
32//      Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
33//              significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
34//              2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
35//
36//      Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
37//              log(1+u) = poly.
38//
39//      Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
40//              by k*log(2) + (log(F) + poly). The values of log(F) are calculated
41//              beforehand and stored in the program.
42//
43//      lognp1:
44//      Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
45//              u where u = 2X/(2+X). Otherwise, move on to Step 2.
46//
47//      Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
48//              of the algorithm for LOGN and compute log(1+X) as
49//              k*log(2) + log(F) + poly where poly approximates log(1+u),
50//              u = (Y-F)/F.
51//
52//      Implementation Notes:
53//      Note 1. There are 64 different possible values for F, thus 64 log(F)'s
54//              need to be tabulated. Moreover, the values of 1/F are also
55//              tabulated so that the division in (Y-F)/F can be performed by a
56//              multiplication.
57//
58//      Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
59//              Y-F has to be calculated carefully when 1/2 <= X < 3/2.
60//
61//      Note 3. To fully exploit the pipeline, polynomials are usually separated
62//              into two parts evaluated independently before being added up.
63//
64
65//              Copyright (C) Motorola, Inc. 1990
66//                      All Rights Reserved
67//
68//      THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
69//      The copyright notice above does not evidence any
70//      actual or intended publication of such source code.
71
72//slogn idnt    2,1 | Motorola 040 Floating Point Software Package
73
74        |section        8
75
76#include "fpsp.defs"
77
78BOUNDS1:  .long 0x3FFEF07D,0x3FFF8841
79BOUNDS2:  .long 0x3FFE8000,0x3FFFC000
80
81LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
82
83one:    .long 0x3F800000
84zero:   .long 0x00000000
85infty:  .long 0x7F800000
86negone: .long 0xBF800000
87
88LOGA6:  .long 0x3FC2499A,0xB5E4040B
89LOGA5:  .long 0xBFC555B5,0x848CB7DB
90
91LOGA4:  .long 0x3FC99999,0x987D8730
92LOGA3:  .long 0xBFCFFFFF,0xFF6F7E97
93
94LOGA2:  .long 0x3FD55555,0x555555a4
95LOGA1:  .long 0xBFE00000,0x00000008
96
97LOGB5:  .long 0x3F175496,0xADD7DAD6
98LOGB4:  .long 0x3F3C71C2,0xFE80C7E0
99
100LOGB3:  .long 0x3F624924,0x928BCCFF
101LOGB2:  .long 0x3F899999,0x999995EC
102
103LOGB1:  .long 0x3FB55555,0x55555555
104TWO:    .long 0x40000000,0x00000000
105
106LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000
107
108LOGTBL:
109        .long  0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
110        .long  0x3FF70000,0xFF015358,0x833C47E2,0x00000000
111        .long  0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
112        .long  0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
113        .long  0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
114        .long  0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
115        .long  0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
116        .long  0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
117        .long  0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
118        .long  0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
119        .long  0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
120        .long  0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
121        .long  0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
122        .long  0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
123        .long  0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
124        .long  0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
125        .long  0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
126        .long  0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
127        .long  0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
128        .long  0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
129        .long  0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
130        .long  0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
131        .long  0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
132        .long  0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
133        .long  0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
134        .long  0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
135        .long  0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
136        .long  0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
137        .long  0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
138        .long  0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
139        .long  0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
140        .long  0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
141        .long  0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
142        .long  0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
143        .long  0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
144        .long  0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
145        .long  0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
146        .long  0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
147        .long  0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
148        .long  0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
149        .long  0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
150        .long  0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
151        .long  0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
152        .long  0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
153        .long  0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
154        .long  0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
155        .long  0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
156        .long  0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
157        .long  0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
158        .long  0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
159        .long  0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
160        .long  0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
161        .long  0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
162        .long  0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
163        .long  0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
164        .long  0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
165        .long  0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
166        .long  0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
167        .long  0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
168        .long  0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
169        .long  0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
170        .long  0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
171        .long  0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
172        .long  0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
173        .long  0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
174        .long  0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
175        .long  0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
176        .long  0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
177        .long  0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
178        .long  0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
179        .long  0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
180        .long  0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
181        .long  0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
182        .long  0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
183        .long  0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
184        .long  0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
185        .long  0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
186        .long  0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
187        .long  0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
188        .long  0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
189        .long  0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
190        .long  0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
191        .long  0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
192        .long  0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
193        .long  0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
194        .long  0x3FFE0000,0x825EFCED,0x49369330,0x00000000
195        .long  0x3FFE0000,0x9868C809,0x868C8098,0x00000000
196        .long  0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
197        .long  0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
198        .long  0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
199        .long  0x3FFE0000,0x95A02568,0x095A0257,0x00000000
200        .long  0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
201        .long  0x3FFE0000,0x94458094,0x45809446,0x00000000
202        .long  0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
203        .long  0x3FFE0000,0x92F11384,0x0497889C,0x00000000
204        .long  0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
205        .long  0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
206        .long  0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
207        .long  0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
208        .long  0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
209        .long  0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
210        .long  0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
211        .long  0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
212        .long  0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
213        .long  0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
214        .long  0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
215        .long  0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
216        .long  0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
217        .long  0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
218        .long  0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
219        .long  0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
220        .long  0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
221        .long  0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
222        .long  0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
223        .long  0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
224        .long  0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
225        .long  0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
226        .long  0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
227        .long  0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
228        .long  0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
229        .long  0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
230        .long  0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
231        .long  0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
232        .long  0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
233        .long  0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
234        .long  0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
235        .long  0x3FFE0000,0x80808080,0x80808081,0x00000000
236        .long  0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
237
238        .set    ADJK,L_SCR1
239
240        .set    X,FP_SCR1
241        .set    XDCARE,X+2
242        .set    XFRAC,X+4
243
244        .set    F,FP_SCR2
245        .set    FFRAC,F+4
246
247        .set    KLOG2,FP_SCR3
248
249        .set    SAVEU,FP_SCR4
250
251        | xref  t_frcinx
252        |xref   t_extdnrm
253        |xref   t_operr
254        |xref   t_dz
255
256        .global slognd
257slognd:
258//--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
259
260        movel           #-100,ADJK(%a6) // ...INPUT = 2^(ADJK) * FP0
261
262//----normalize the input value by left shifting k bits (k to be determined
263//----below), adjusting exponent and storing -k to  ADJK
264//----the value TWOTO100 is no longer needed.
265//----Note that this code assumes the denormalized input is NON-ZERO.
266
267     moveml     %d2-%d7,-(%a7)          // ...save some registers
268     movel      #0x00000000,%d3         // ...D3 is exponent of smallest norm. #
269     movel      4(%a0),%d4
270     movel      8(%a0),%d5              // ...(D4,D5) is (Hi_X,Lo_X)
271     clrl       %d2                     // ...D2 used for holding K
272
273     tstl       %d4
274     bnes       HiX_not0
275
276HiX_0:
277     movel      %d5,%d4
278     clrl       %d5
279     movel      #32,%d2
280     clrl       %d6
281     bfffo      %d4{#0:#32},%d6
282     lsll      %d6,%d4
283     addl       %d6,%d2                 // ...(D3,D4,D5) is normalized
284
285     movel      %d3,X(%a6)
286     movel      %d4,XFRAC(%a6)
287     movel      %d5,XFRAC+4(%a6)
288     negl       %d2
289     movel      %d2,ADJK(%a6)
290     fmovex     X(%a6),%fp0
291     moveml     (%a7)+,%d2-%d7          // ...restore registers
292     lea        X(%a6),%a0
293     bras       LOGBGN                  // ...begin regular log(X)
294
295
296HiX_not0:
297     clrl       %d6
298     bfffo      %d4{#0:#32},%d6         // ...find first 1
299     movel      %d6,%d2                 // ...get k
300     lsll       %d6,%d4
301     movel      %d5,%d7                 // ...a copy of D5
302     lsll       %d6,%d5
303     negl       %d6
304     addil      #32,%d6
305     lsrl       %d6,%d7
306     orl        %d7,%d4                 // ...(D3,D4,D5) normalized
307
308     movel      %d3,X(%a6)
309     movel      %d4,XFRAC(%a6)
310     movel      %d5,XFRAC+4(%a6)
311     negl       %d2
312     movel      %d2,ADJK(%a6)
313     fmovex     X(%a6),%fp0
314     moveml     (%a7)+,%d2-%d7          // ...restore registers
315     lea        X(%a6),%a0
316     bras       LOGBGN                  // ...begin regular log(X)
317
318
319        .global slogn
320slogn:
321//--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
322
323        fmovex          (%a0),%fp0      // ...LOAD INPUT
324        movel           #0x00000000,ADJK(%a6)
325
326LOGBGN:
327//--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
328//--A FINITE, NON-ZERO, NORMALIZED NUMBER.
329
330        movel   (%a0),%d0
331        movew   4(%a0),%d0
332
333        movel   (%a0),X(%a6)
334        movel   4(%a0),X+4(%a6)
335        movel   8(%a0),X+8(%a6)
336
337        cmpil   #0,%d0          // ...CHECK IF X IS NEGATIVE
338        blt     LOGNEG          // ...LOG OF NEGATIVE ARGUMENT IS INVALID
339        cmp2l   BOUNDS1,%d0     // ...X IS POSITIVE, CHECK IF X IS NEAR 1
340        bcc     LOGNEAR1        // ...BOUNDS IS ROUGHLY [15/16, 17/16]
341
342LOGMAIN:
343//--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
344
345//--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
346//--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
347//--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
348//--                     = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
349//--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
350//--LOG(1+U) CAN BE VERY EFFICIENT.
351//--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
352//--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
353
354//--GET K, Y, F, AND ADDRESS OF 1/F.
355        asrl    #8,%d0
356        asrl    #8,%d0          // ...SHIFTED 16 BITS, BIASED EXPO. OF X
357        subil   #0x3FFF,%d0     // ...THIS IS K
358        addl    ADJK(%a6),%d0   // ...ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
359        lea     LOGTBL,%a0      // ...BASE ADDRESS OF 1/F AND LOG(F)
360        fmovel  %d0,%fp1                // ...CONVERT K TO FLOATING-POINT FORMAT
361
362//--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
363        movel   #0x3FFF0000,X(%a6)      // ...X IS NOW Y, I.E. 2^(-K)*X
364        movel   XFRAC(%a6),FFRAC(%a6)
365        andil   #0xFE000000,FFRAC(%a6) // ...FIRST 7 BITS OF Y
366        oril    #0x01000000,FFRAC(%a6) // ...GET F: ATTACH A 1 AT THE EIGHTH BIT
367        movel   FFRAC(%a6),%d0  // ...READY TO GET ADDRESS OF 1/F
368        andil   #0x7E000000,%d0
369        asrl    #8,%d0
370        asrl    #8,%d0
371        asrl    #4,%d0          // ...SHIFTED 20, D0 IS THE DISPLACEMENT
372        addal   %d0,%a0         // ...A0 IS THE ADDRESS FOR 1/F
373
374        fmovex  X(%a6),%fp0
375        movel   #0x3fff0000,F(%a6)
376        clrl    F+8(%a6)
377        fsubx   F(%a6),%fp0             // ...Y-F
378        fmovemx %fp2-%fp2/%fp3,-(%sp)   // ...SAVE FP2 WHILE FP0 IS NOT READY
379//--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
380//--REGISTERS SAVED: FPCR, FP1, FP2
381
382LP1CONT1:
383//--AN RE-ENTRY POINT FOR LOGNP1
384        fmulx   (%a0),%fp0      // ...FP0 IS U = (Y-F)/F
385        fmulx   LOGOF2,%fp1     // ...GET K*LOG2 WHILE FP0 IS NOT READY
386        fmovex  %fp0,%fp2
387        fmulx   %fp2,%fp2               // ...FP2 IS V=U*U
388        fmovex  %fp1,KLOG2(%a6) // ...PUT K*LOG2 IN MEMORY, FREE FP1
389
390//--LOG(1+U) IS APPROXIMATED BY
391//--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
392//--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
393
394        fmovex  %fp2,%fp3
395        fmovex  %fp2,%fp1
396
397        fmuld   LOGA6,%fp1      // ...V*A6
398        fmuld   LOGA5,%fp2      // ...V*A5
399
400        faddd   LOGA4,%fp1      // ...A4+V*A6
401        faddd   LOGA3,%fp2      // ...A3+V*A5
402
403        fmulx   %fp3,%fp1               // ...V*(A4+V*A6)
404        fmulx   %fp3,%fp2               // ...V*(A3+V*A5)
405
406        faddd   LOGA2,%fp1      // ...A2+V*(A4+V*A6)
407        faddd   LOGA1,%fp2      // ...A1+V*(A3+V*A5)
408
409        fmulx   %fp3,%fp1               // ...V*(A2+V*(A4+V*A6))
410        addal   #16,%a0         // ...ADDRESS OF LOG(F)
411        fmulx   %fp3,%fp2               // ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
412
413        fmulx   %fp0,%fp1               // ...U*V*(A2+V*(A4+V*A6))
414        faddx   %fp2,%fp0               // ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
415
416        faddx   (%a0),%fp1      // ...LOG(F)+U*V*(A2+V*(A4+V*A6))
417        fmovemx  (%sp)+,%fp2-%fp2/%fp3  // ...RESTORE FP2
418        faddx   %fp1,%fp0               // ...FP0 IS LOG(F) + LOG(1+U)
419
420        fmovel  %d1,%fpcr
421        faddx   KLOG2(%a6),%fp0 // ...FINAL ADD
422        bra     t_frcinx
423
424
425LOGNEAR1:
426//--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
427        fmovex  %fp0,%fp1
428        fsubs   one,%fp1                // ...FP1 IS X-1
429        fadds   one,%fp0                // ...FP0 IS X+1
430        faddx   %fp1,%fp1               // ...FP1 IS 2(X-1)
431//--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
432//--IN U, U = 2(X-1)/(X+1) = FP1/FP0
433
434LP1CONT2:
435//--THIS IS AN RE-ENTRY POINT FOR LOGNP1
436        fdivx   %fp0,%fp1               // ...FP1 IS U
437        fmovemx %fp2-%fp2/%fp3,-(%sp)    // ...SAVE FP2
438//--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
439//--LET V=U*U, W=V*V, CALCULATE
440//--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
441//--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
442        fmovex  %fp1,%fp0
443        fmulx   %fp0,%fp0       // ...FP0 IS V
444        fmovex  %fp1,SAVEU(%a6) // ...STORE U IN MEMORY, FREE FP1
445        fmovex  %fp0,%fp1
446        fmulx   %fp1,%fp1       // ...FP1 IS W
447
448        fmoved  LOGB5,%fp3
449        fmoved  LOGB4,%fp2
450
451        fmulx   %fp1,%fp3       // ...W*B5
452        fmulx   %fp1,%fp2       // ...W*B4
453
454        faddd   LOGB3,%fp3 // ...B3+W*B5
455        faddd   LOGB2,%fp2 // ...B2+W*B4
456
457        fmulx   %fp3,%fp1       // ...W*(B3+W*B5), FP3 RELEASED
458
459        fmulx   %fp0,%fp2       // ...V*(B2+W*B4)
460
461        faddd   LOGB1,%fp1 // ...B1+W*(B3+W*B5)
462        fmulx   SAVEU(%a6),%fp0 // ...FP0 IS U*V
463
464        faddx   %fp2,%fp1       // ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
465        fmovemx (%sp)+,%fp2-%fp2/%fp3 // ...FP2 RESTORED
466
467        fmulx   %fp1,%fp0       // ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
468
469        fmovel  %d1,%fpcr
470        faddx   SAVEU(%a6),%fp0
471        bra     t_frcinx
472        rts
473
474LOGNEG:
475//--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
476        bra     t_operr
477
478        .global slognp1d
479slognp1d:
480//--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
481// Simply return the denorm
482
483        bra     t_extdnrm
484
485        .global slognp1
486slognp1:
487//--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
488
489        fmovex  (%a0),%fp0      // ...LOAD INPUT
490        fabsx   %fp0            //test magnitude
491        fcmpx   LTHOLD,%fp0     //compare with min threshold
492        fbgt    LP1REAL         //if greater, continue
493        fmovel  #0,%fpsr                //clr N flag from compare
494        fmovel  %d1,%fpcr
495        fmovex  (%a0),%fp0      //return signed argument
496        bra     t_frcinx
497
498LP1REAL:
499        fmovex  (%a0),%fp0      // ...LOAD INPUT
500        movel   #0x00000000,ADJK(%a6)
501        fmovex  %fp0,%fp1       // ...FP1 IS INPUT Z
502        fadds   one,%fp0        // ...X := ROUND(1+Z)
503        fmovex  %fp0,X(%a6)
504        movew   XFRAC(%a6),XDCARE(%a6)
505        movel   X(%a6),%d0
506        cmpil   #0,%d0
507        ble     LP1NEG0 // ...LOG OF ZERO OR -VE
508        cmp2l   BOUNDS2,%d0
509        bcs     LOGMAIN // ...BOUNDS2 IS [1/2,3/2]
510//--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
511//--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
512//--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
513
514LP1NEAR1:
515//--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
516        cmp2l   BOUNDS1,%d0
517        bcss    LP1CARE
518
519LP1ONE16:
520//--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
521//--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
522        faddx   %fp1,%fp1       // ...FP1 IS 2Z
523        fadds   one,%fp0        // ...FP0 IS 1+X
524//--U = FP1/FP0
525        bra     LP1CONT2
526
527LP1CARE:
528//--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
529//--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
530//--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
531//--THERE ARE ONLY TWO CASES.
532//--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
533//--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
534//--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
535//--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
536
537        movel   XFRAC(%a6),FFRAC(%a6)
538        andil   #0xFE000000,FFRAC(%a6)
539        oril    #0x01000000,FFRAC(%a6)  // ...F OBTAINED
540        cmpil   #0x3FFF8000,%d0 // ...SEE IF 1+Z > 1
541        bges    KISZERO
542
543KISNEG1:
544        fmoves  TWO,%fp0
545        movel   #0x3fff0000,F(%a6)
546        clrl    F+8(%a6)
547        fsubx   F(%a6),%fp0     // ...2-F
548        movel   FFRAC(%a6),%d0
549        andil   #0x7E000000,%d0
550        asrl    #8,%d0
551        asrl    #8,%d0
552        asrl    #4,%d0          // ...D0 CONTAINS DISPLACEMENT FOR 1/F
553        faddx   %fp1,%fp1               // ...GET 2Z
554        fmovemx %fp2-%fp2/%fp3,-(%sp)   // ...SAVE FP2
555        faddx   %fp1,%fp0               // ...FP0 IS Y-F = (2-F)+2Z
556        lea     LOGTBL,%a0      // ...A0 IS ADDRESS OF 1/F
557        addal   %d0,%a0
558        fmoves  negone,%fp1     // ...FP1 IS K = -1
559        bra     LP1CONT1
560
561KISZERO:
562        fmoves  one,%fp0
563        movel   #0x3fff0000,F(%a6)
564        clrl    F+8(%a6)
565        fsubx   F(%a6),%fp0             // ...1-F
566        movel   FFRAC(%a6),%d0
567        andil   #0x7E000000,%d0
568        asrl    #8,%d0
569        asrl    #8,%d0
570        asrl    #4,%d0
571        faddx   %fp1,%fp0               // ...FP0 IS Y-F
572        fmovemx %fp2-%fp2/%fp3,-(%sp)   // ...FP2 SAVED
573        lea     LOGTBL,%a0
574        addal   %d0,%a0         // ...A0 IS ADDRESS OF 1/F
575        fmoves  zero,%fp1       // ...FP1 IS K = 0
576        bra     LP1CONT1
577
578LP1NEG0:
579//--FPCR SAVED. D0 IS X IN COMPACT FORM.
580        cmpil   #0,%d0
581        blts    LP1NEG
582LP1ZERO:
583        fmoves  negone,%fp0
584
585        fmovel  %d1,%fpcr
586        bra t_dz
587
588LP1NEG:
589        fmoves  zero,%fp0
590
591        fmovel  %d1,%fpcr
592        bra     t_operr
593
594        |end
Note: See TracBrowser for help on using the repository browser.