1 | #include "fpsp-namespace.h" |
---|
2 | // |
---|
3 | // |
---|
4 | // slogn.sa 3.1 12/10/90 |
---|
5 | // |
---|
6 | // slogn computes the natural logarithm of an |
---|
7 | // input value. slognd does the same except the input value is a |
---|
8 | // denormalized number. slognp1 computes log(1+X), and slognp1d |
---|
9 | // computes log(1+X) for denormalized X. |
---|
10 | // |
---|
11 | // Input: Double-extended value in memory location pointed to by address |
---|
12 | // register a0. |
---|
13 | // |
---|
14 | // Output: log(X) or log(1+X) returned in floating-point register Fp0. |
---|
15 | // |
---|
16 | // Accuracy and Monotonicity: The returned result is within 2 ulps in |
---|
17 | // 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the |
---|
18 | // result is subsequently rounded to double precision. The |
---|
19 | // result is provably monotonic in double precision. |
---|
20 | // |
---|
21 | // Speed: The program slogn takes approximately 190 cycles for input |
---|
22 | // argument X such that |X-1| >= 1/16, which is the the usual |
---|
23 | // situation. For those arguments, slognp1 takes approximately |
---|
24 | // 210 cycles. For the less common arguments, the program will |
---|
25 | // run no worse than 10% slower. |
---|
26 | // |
---|
27 | // Algorithm: |
---|
28 | // LOGN: |
---|
29 | // Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in |
---|
30 | // u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2. |
---|
31 | // |
---|
32 | // Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven |
---|
33 | // significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base |
---|
34 | // 2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7). |
---|
35 | // |
---|
36 | // Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u, |
---|
37 | // log(1+u) = poly. |
---|
38 | // |
---|
39 | // Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) |
---|
40 | // by k*log(2) + (log(F) + poly). The values of log(F) are calculated |
---|
41 | // beforehand and stored in the program. |
---|
42 | // |
---|
43 | // lognp1: |
---|
44 | // Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in |
---|
45 | // u where u = 2X/(2+X). Otherwise, move on to Step 2. |
---|
46 | // |
---|
47 | // Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2 |
---|
48 | // of the algorithm for LOGN and compute log(1+X) as |
---|
49 | // k*log(2) + log(F) + poly where poly approximates log(1+u), |
---|
50 | // u = (Y-F)/F. |
---|
51 | // |
---|
52 | // Implementation Notes: |
---|
53 | // Note 1. There are 64 different possible values for F, thus 64 log(F)'s |
---|
54 | // need to be tabulated. Moreover, the values of 1/F are also |
---|
55 | // tabulated so that the division in (Y-F)/F can be performed by a |
---|
56 | // multiplication. |
---|
57 | // |
---|
58 | // Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value |
---|
59 | // Y-F has to be calculated carefully when 1/2 <= X < 3/2. |
---|
60 | // |
---|
61 | // Note 3. To fully exploit the pipeline, polynomials are usually separated |
---|
62 | // into two parts evaluated independently before being added up. |
---|
63 | // |
---|
64 | |
---|
65 | // Copyright (C) Motorola, Inc. 1990 |
---|
66 | // All Rights Reserved |
---|
67 | // |
---|
68 | // THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA |
---|
69 | // The copyright notice above does not evidence any |
---|
70 | // actual or intended publication of such source code. |
---|
71 | |
---|
72 | //slogn idnt 2,1 | Motorola 040 Floating Point Software Package |
---|
73 | |
---|
74 | |section 8 |
---|
75 | |
---|
76 | #include "fpsp.defs" |
---|
77 | |
---|
78 | BOUNDS1: .long 0x3FFEF07D,0x3FFF8841 |
---|
79 | BOUNDS2: .long 0x3FFE8000,0x3FFFC000 |
---|
80 | |
---|
81 | LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 |
---|
82 | |
---|
83 | one: .long 0x3F800000 |
---|
84 | zero: .long 0x00000000 |
---|
85 | infty: .long 0x7F800000 |
---|
86 | negone: .long 0xBF800000 |
---|
87 | |
---|
88 | LOGA6: .long 0x3FC2499A,0xB5E4040B |
---|
89 | LOGA5: .long 0xBFC555B5,0x848CB7DB |
---|
90 | |
---|
91 | LOGA4: .long 0x3FC99999,0x987D8730 |
---|
92 | LOGA3: .long 0xBFCFFFFF,0xFF6F7E97 |
---|
93 | |
---|
94 | LOGA2: .long 0x3FD55555,0x555555a4 |
---|
95 | LOGA1: .long 0xBFE00000,0x00000008 |
---|
96 | |
---|
97 | LOGB5: .long 0x3F175496,0xADD7DAD6 |
---|
98 | LOGB4: .long 0x3F3C71C2,0xFE80C7E0 |
---|
99 | |
---|
100 | LOGB3: .long 0x3F624924,0x928BCCFF |
---|
101 | LOGB2: .long 0x3F899999,0x999995EC |
---|
102 | |
---|
103 | LOGB1: .long 0x3FB55555,0x55555555 |
---|
104 | TWO: .long 0x40000000,0x00000000 |
---|
105 | |
---|
106 | LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000 |
---|
107 | |
---|
108 | LOGTBL: |
---|
109 | .long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000 |
---|
110 | .long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000 |
---|
111 | .long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000 |
---|
112 | .long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000 |
---|
113 | .long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000 |
---|
114 | .long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000 |
---|
115 | .long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000 |
---|
116 | .long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000 |
---|
117 | .long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000 |
---|
118 | .long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000 |
---|
119 | .long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000 |
---|
120 | .long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000 |
---|
121 | .long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000 |
---|
122 | .long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000 |
---|
123 | .long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000 |
---|
124 | .long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000 |
---|
125 | .long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000 |
---|
126 | .long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000 |
---|
127 | .long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000 |
---|
128 | .long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000 |
---|
129 | .long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000 |
---|
130 | .long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000 |
---|
131 | .long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000 |
---|
132 | .long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000 |
---|
133 | .long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000 |
---|
134 | .long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000 |
---|
135 | .long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000 |
---|
136 | .long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000 |
---|
137 | .long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000 |
---|
138 | .long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000 |
---|
139 | .long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000 |
---|
140 | .long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000 |
---|
141 | .long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000 |
---|
142 | .long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000 |
---|
143 | .long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000 |
---|
144 | .long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000 |
---|
145 | .long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000 |
---|
146 | .long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000 |
---|
147 | .long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000 |
---|
148 | .long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000 |
---|
149 | .long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000 |
---|
150 | .long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000 |
---|
151 | .long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000 |
---|
152 | .long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000 |
---|
153 | .long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000 |
---|
154 | .long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000 |
---|
155 | .long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000 |
---|
156 | .long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000 |
---|
157 | .long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000 |
---|
158 | .long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000 |
---|
159 | .long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000 |
---|
160 | .long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000 |
---|
161 | .long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000 |
---|
162 | .long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000 |
---|
163 | .long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000 |
---|
164 | .long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000 |
---|
165 | .long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000 |
---|
166 | .long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000 |
---|
167 | .long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000 |
---|
168 | .long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000 |
---|
169 | .long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000 |
---|
170 | .long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000 |
---|
171 | .long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000 |
---|
172 | .long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000 |
---|
173 | .long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000 |
---|
174 | .long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000 |
---|
175 | .long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000 |
---|
176 | .long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000 |
---|
177 | .long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000 |
---|
178 | .long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000 |
---|
179 | .long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000 |
---|
180 | .long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000 |
---|
181 | .long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000 |
---|
182 | .long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000 |
---|
183 | .long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000 |
---|
184 | .long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000 |
---|
185 | .long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000 |
---|
186 | .long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000 |
---|
187 | .long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000 |
---|
188 | .long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000 |
---|
189 | .long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000 |
---|
190 | .long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000 |
---|
191 | .long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000 |
---|
192 | .long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000 |
---|
193 | .long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000 |
---|
194 | .long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000 |
---|
195 | .long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000 |
---|
196 | .long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000 |
---|
197 | .long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000 |
---|
198 | .long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000 |
---|
199 | .long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000 |
---|
200 | .long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000 |
---|
201 | .long 0x3FFE0000,0x94458094,0x45809446,0x00000000 |
---|
202 | .long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000 |
---|
203 | .long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000 |
---|
204 | .long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000 |
---|
205 | .long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000 |
---|
206 | .long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000 |
---|
207 | .long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000 |
---|
208 | .long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000 |
---|
209 | .long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000 |
---|
210 | .long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000 |
---|
211 | .long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000 |
---|
212 | .long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000 |
---|
213 | .long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000 |
---|
214 | .long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000 |
---|
215 | .long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000 |
---|
216 | .long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000 |
---|
217 | .long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000 |
---|
218 | .long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000 |
---|
219 | .long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000 |
---|
220 | .long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000 |
---|
221 | .long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000 |
---|
222 | .long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000 |
---|
223 | .long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000 |
---|
224 | .long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000 |
---|
225 | .long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000 |
---|
226 | .long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000 |
---|
227 | .long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000 |
---|
228 | .long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000 |
---|
229 | .long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000 |
---|
230 | .long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000 |
---|
231 | .long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000 |
---|
232 | .long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000 |
---|
233 | .long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000 |
---|
234 | .long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000 |
---|
235 | .long 0x3FFE0000,0x80808080,0x80808081,0x00000000 |
---|
236 | .long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000 |
---|
237 | |
---|
238 | .set ADJK,L_SCR1 |
---|
239 | |
---|
240 | .set X,FP_SCR1 |
---|
241 | .set XDCARE,X+2 |
---|
242 | .set XFRAC,X+4 |
---|
243 | |
---|
244 | .set F,FP_SCR2 |
---|
245 | .set FFRAC,F+4 |
---|
246 | |
---|
247 | .set KLOG2,FP_SCR3 |
---|
248 | |
---|
249 | .set SAVEU,FP_SCR4 |
---|
250 | |
---|
251 | | xref t_frcinx |
---|
252 | |xref t_extdnrm |
---|
253 | |xref t_operr |
---|
254 | |xref t_dz |
---|
255 | |
---|
256 | .global slognd |
---|
257 | slognd: |
---|
258 | //--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT |
---|
259 | |
---|
260 | movel #-100,ADJK(%a6) // ...INPUT = 2^(ADJK) * FP0 |
---|
261 | |
---|
262 | //----normalize the input value by left shifting k bits (k to be determined |
---|
263 | //----below), adjusting exponent and storing -k to ADJK |
---|
264 | //----the value TWOTO100 is no longer needed. |
---|
265 | //----Note that this code assumes the denormalized input is NON-ZERO. |
---|
266 | |
---|
267 | moveml %d2-%d7,-(%a7) // ...save some registers |
---|
268 | movel #0x00000000,%d3 // ...D3 is exponent of smallest norm. # |
---|
269 | movel 4(%a0),%d4 |
---|
270 | movel 8(%a0),%d5 // ...(D4,D5) is (Hi_X,Lo_X) |
---|
271 | clrl %d2 // ...D2 used for holding K |
---|
272 | |
---|
273 | tstl %d4 |
---|
274 | bnes HiX_not0 |
---|
275 | |
---|
276 | HiX_0: |
---|
277 | movel %d5,%d4 |
---|
278 | clrl %d5 |
---|
279 | movel #32,%d2 |
---|
280 | clrl %d6 |
---|
281 | bfffo %d4{#0:#32},%d6 |
---|
282 | lsll %d6,%d4 |
---|
283 | addl %d6,%d2 // ...(D3,D4,D5) is normalized |
---|
284 | |
---|
285 | movel %d3,X(%a6) |
---|
286 | movel %d4,XFRAC(%a6) |
---|
287 | movel %d5,XFRAC+4(%a6) |
---|
288 | negl %d2 |
---|
289 | movel %d2,ADJK(%a6) |
---|
290 | fmovex X(%a6),%fp0 |
---|
291 | moveml (%a7)+,%d2-%d7 // ...restore registers |
---|
292 | lea X(%a6),%a0 |
---|
293 | bras LOGBGN // ...begin regular log(X) |
---|
294 | |
---|
295 | |
---|
296 | HiX_not0: |
---|
297 | clrl %d6 |
---|
298 | bfffo %d4{#0:#32},%d6 // ...find first 1 |
---|
299 | movel %d6,%d2 // ...get k |
---|
300 | lsll %d6,%d4 |
---|
301 | movel %d5,%d7 // ...a copy of D5 |
---|
302 | lsll %d6,%d5 |
---|
303 | negl %d6 |
---|
304 | addil #32,%d6 |
---|
305 | lsrl %d6,%d7 |
---|
306 | orl %d7,%d4 // ...(D3,D4,D5) normalized |
---|
307 | |
---|
308 | movel %d3,X(%a6) |
---|
309 | movel %d4,XFRAC(%a6) |
---|
310 | movel %d5,XFRAC+4(%a6) |
---|
311 | negl %d2 |
---|
312 | movel %d2,ADJK(%a6) |
---|
313 | fmovex X(%a6),%fp0 |
---|
314 | moveml (%a7)+,%d2-%d7 // ...restore registers |
---|
315 | lea X(%a6),%a0 |
---|
316 | bras LOGBGN // ...begin regular log(X) |
---|
317 | |
---|
318 | |
---|
319 | .global slogn |
---|
320 | slogn: |
---|
321 | //--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S |
---|
322 | |
---|
323 | fmovex (%a0),%fp0 // ...LOAD INPUT |
---|
324 | movel #0x00000000,ADJK(%a6) |
---|
325 | |
---|
326 | LOGBGN: |
---|
327 | //--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS |
---|
328 | //--A FINITE, NON-ZERO, NORMALIZED NUMBER. |
---|
329 | |
---|
330 | movel (%a0),%d0 |
---|
331 | movew 4(%a0),%d0 |
---|
332 | |
---|
333 | movel (%a0),X(%a6) |
---|
334 | movel 4(%a0),X+4(%a6) |
---|
335 | movel 8(%a0),X+8(%a6) |
---|
336 | |
---|
337 | cmpil #0,%d0 // ...CHECK IF X IS NEGATIVE |
---|
338 | blt LOGNEG // ...LOG OF NEGATIVE ARGUMENT IS INVALID |
---|
339 | cmp2l BOUNDS1,%d0 // ...X IS POSITIVE, CHECK IF X IS NEAR 1 |
---|
340 | bcc LOGNEAR1 // ...BOUNDS IS ROUGHLY [15/16, 17/16] |
---|
341 | |
---|
342 | LOGMAIN: |
---|
343 | //--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1 |
---|
344 | |
---|
345 | //--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY. |
---|
346 | //--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1. |
---|
347 | //--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y) |
---|
348 | //-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F). |
---|
349 | //--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING |
---|
350 | //--LOG(1+U) CAN BE VERY EFFICIENT. |
---|
351 | //--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO |
---|
352 | //--DIVISION IS NEEDED TO CALCULATE (Y-F)/F. |
---|
353 | |
---|
354 | //--GET K, Y, F, AND ADDRESS OF 1/F. |
---|
355 | asrl #8,%d0 |
---|
356 | asrl #8,%d0 // ...SHIFTED 16 BITS, BIASED EXPO. OF X |
---|
357 | subil #0x3FFF,%d0 // ...THIS IS K |
---|
358 | addl ADJK(%a6),%d0 // ...ADJUST K, ORIGINAL INPUT MAY BE DENORM. |
---|
359 | lea LOGTBL,%a0 // ...BASE ADDRESS OF 1/F AND LOG(F) |
---|
360 | fmovel %d0,%fp1 // ...CONVERT K TO FLOATING-POINT FORMAT |
---|
361 | |
---|
362 | //--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F |
---|
363 | movel #0x3FFF0000,X(%a6) // ...X IS NOW Y, I.E. 2^(-K)*X |
---|
364 | movel XFRAC(%a6),FFRAC(%a6) |
---|
365 | andil #0xFE000000,FFRAC(%a6) // ...FIRST 7 BITS OF Y |
---|
366 | oril #0x01000000,FFRAC(%a6) // ...GET F: ATTACH A 1 AT THE EIGHTH BIT |
---|
367 | movel FFRAC(%a6),%d0 // ...READY TO GET ADDRESS OF 1/F |
---|
368 | andil #0x7E000000,%d0 |
---|
369 | asrl #8,%d0 |
---|
370 | asrl #8,%d0 |
---|
371 | asrl #4,%d0 // ...SHIFTED 20, D0 IS THE DISPLACEMENT |
---|
372 | addal %d0,%a0 // ...A0 IS THE ADDRESS FOR 1/F |
---|
373 | |
---|
374 | fmovex X(%a6),%fp0 |
---|
375 | movel #0x3fff0000,F(%a6) |
---|
376 | clrl F+8(%a6) |
---|
377 | fsubx F(%a6),%fp0 // ...Y-F |
---|
378 | fmovemx %fp2-%fp2/%fp3,-(%sp) // ...SAVE FP2 WHILE FP0 IS NOT READY |
---|
379 | //--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K |
---|
380 | //--REGISTERS SAVED: FPCR, FP1, FP2 |
---|
381 | |
---|
382 | LP1CONT1: |
---|
383 | //--AN RE-ENTRY POINT FOR LOGNP1 |
---|
384 | fmulx (%a0),%fp0 // ...FP0 IS U = (Y-F)/F |
---|
385 | fmulx LOGOF2,%fp1 // ...GET K*LOG2 WHILE FP0 IS NOT READY |
---|
386 | fmovex %fp0,%fp2 |
---|
387 | fmulx %fp2,%fp2 // ...FP2 IS V=U*U |
---|
388 | fmovex %fp1,KLOG2(%a6) // ...PUT K*LOG2 IN MEMORY, FREE FP1 |
---|
389 | |
---|
390 | //--LOG(1+U) IS APPROXIMATED BY |
---|
391 | //--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS |
---|
392 | //--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))] |
---|
393 | |
---|
394 | fmovex %fp2,%fp3 |
---|
395 | fmovex %fp2,%fp1 |
---|
396 | |
---|
397 | fmuld LOGA6,%fp1 // ...V*A6 |
---|
398 | fmuld LOGA5,%fp2 // ...V*A5 |
---|
399 | |
---|
400 | faddd LOGA4,%fp1 // ...A4+V*A6 |
---|
401 | faddd LOGA3,%fp2 // ...A3+V*A5 |
---|
402 | |
---|
403 | fmulx %fp3,%fp1 // ...V*(A4+V*A6) |
---|
404 | fmulx %fp3,%fp2 // ...V*(A3+V*A5) |
---|
405 | |
---|
406 | faddd LOGA2,%fp1 // ...A2+V*(A4+V*A6) |
---|
407 | faddd LOGA1,%fp2 // ...A1+V*(A3+V*A5) |
---|
408 | |
---|
409 | fmulx %fp3,%fp1 // ...V*(A2+V*(A4+V*A6)) |
---|
410 | addal #16,%a0 // ...ADDRESS OF LOG(F) |
---|
411 | fmulx %fp3,%fp2 // ...V*(A1+V*(A3+V*A5)), FP3 RELEASED |
---|
412 | |
---|
413 | fmulx %fp0,%fp1 // ...U*V*(A2+V*(A4+V*A6)) |
---|
414 | faddx %fp2,%fp0 // ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED |
---|
415 | |
---|
416 | faddx (%a0),%fp1 // ...LOG(F)+U*V*(A2+V*(A4+V*A6)) |
---|
417 | fmovemx (%sp)+,%fp2-%fp2/%fp3 // ...RESTORE FP2 |
---|
418 | faddx %fp1,%fp0 // ...FP0 IS LOG(F) + LOG(1+U) |
---|
419 | |
---|
420 | fmovel %d1,%fpcr |
---|
421 | faddx KLOG2(%a6),%fp0 // ...FINAL ADD |
---|
422 | bra t_frcinx |
---|
423 | |
---|
424 | |
---|
425 | LOGNEAR1: |
---|
426 | //--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT. |
---|
427 | fmovex %fp0,%fp1 |
---|
428 | fsubs one,%fp1 // ...FP1 IS X-1 |
---|
429 | fadds one,%fp0 // ...FP0 IS X+1 |
---|
430 | faddx %fp1,%fp1 // ...FP1 IS 2(X-1) |
---|
431 | //--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL |
---|
432 | //--IN U, U = 2(X-1)/(X+1) = FP1/FP0 |
---|
433 | |
---|
434 | LP1CONT2: |
---|
435 | //--THIS IS AN RE-ENTRY POINT FOR LOGNP1 |
---|
436 | fdivx %fp0,%fp1 // ...FP1 IS U |
---|
437 | fmovemx %fp2-%fp2/%fp3,-(%sp) // ...SAVE FP2 |
---|
438 | //--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3 |
---|
439 | //--LET V=U*U, W=V*V, CALCULATE |
---|
440 | //--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY |
---|
441 | //--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] ) |
---|
442 | fmovex %fp1,%fp0 |
---|
443 | fmulx %fp0,%fp0 // ...FP0 IS V |
---|
444 | fmovex %fp1,SAVEU(%a6) // ...STORE U IN MEMORY, FREE FP1 |
---|
445 | fmovex %fp0,%fp1 |
---|
446 | fmulx %fp1,%fp1 // ...FP1 IS W |
---|
447 | |
---|
448 | fmoved LOGB5,%fp3 |
---|
449 | fmoved LOGB4,%fp2 |
---|
450 | |
---|
451 | fmulx %fp1,%fp3 // ...W*B5 |
---|
452 | fmulx %fp1,%fp2 // ...W*B4 |
---|
453 | |
---|
454 | faddd LOGB3,%fp3 // ...B3+W*B5 |
---|
455 | faddd LOGB2,%fp2 // ...B2+W*B4 |
---|
456 | |
---|
457 | fmulx %fp3,%fp1 // ...W*(B3+W*B5), FP3 RELEASED |
---|
458 | |
---|
459 | fmulx %fp0,%fp2 // ...V*(B2+W*B4) |
---|
460 | |
---|
461 | faddd LOGB1,%fp1 // ...B1+W*(B3+W*B5) |
---|
462 | fmulx SAVEU(%a6),%fp0 // ...FP0 IS U*V |
---|
463 | |
---|
464 | faddx %fp2,%fp1 // ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED |
---|
465 | fmovemx (%sp)+,%fp2-%fp2/%fp3 // ...FP2 RESTORED |
---|
466 | |
---|
467 | fmulx %fp1,%fp0 // ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] ) |
---|
468 | |
---|
469 | fmovel %d1,%fpcr |
---|
470 | faddx SAVEU(%a6),%fp0 |
---|
471 | bra t_frcinx |
---|
472 | rts |
---|
473 | |
---|
474 | LOGNEG: |
---|
475 | //--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID |
---|
476 | bra t_operr |
---|
477 | |
---|
478 | .global slognp1d |
---|
479 | slognp1d: |
---|
480 | //--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT |
---|
481 | // Simply return the denorm |
---|
482 | |
---|
483 | bra t_extdnrm |
---|
484 | |
---|
485 | .global slognp1 |
---|
486 | slognp1: |
---|
487 | //--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S |
---|
488 | |
---|
489 | fmovex (%a0),%fp0 // ...LOAD INPUT |
---|
490 | fabsx %fp0 //test magnitude |
---|
491 | fcmpx LTHOLD,%fp0 //compare with min threshold |
---|
492 | fbgt LP1REAL //if greater, continue |
---|
493 | fmovel #0,%fpsr //clr N flag from compare |
---|
494 | fmovel %d1,%fpcr |
---|
495 | fmovex (%a0),%fp0 //return signed argument |
---|
496 | bra t_frcinx |
---|
497 | |
---|
498 | LP1REAL: |
---|
499 | fmovex (%a0),%fp0 // ...LOAD INPUT |
---|
500 | movel #0x00000000,ADJK(%a6) |
---|
501 | fmovex %fp0,%fp1 // ...FP1 IS INPUT Z |
---|
502 | fadds one,%fp0 // ...X := ROUND(1+Z) |
---|
503 | fmovex %fp0,X(%a6) |
---|
504 | movew XFRAC(%a6),XDCARE(%a6) |
---|
505 | movel X(%a6),%d0 |
---|
506 | cmpil #0,%d0 |
---|
507 | ble LP1NEG0 // ...LOG OF ZERO OR -VE |
---|
508 | cmp2l BOUNDS2,%d0 |
---|
509 | bcs LOGMAIN // ...BOUNDS2 IS [1/2,3/2] |
---|
510 | //--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z, |
---|
511 | //--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE, |
---|
512 | //--SIMPLY INVOKE LOG(X) FOR LOG(1+Z). |
---|
513 | |
---|
514 | LP1NEAR1: |
---|
515 | //--NEXT SEE IF EXP(-1/16) < X < EXP(1/16) |
---|
516 | cmp2l BOUNDS1,%d0 |
---|
517 | bcss LP1CARE |
---|
518 | |
---|
519 | LP1ONE16: |
---|
520 | //--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2) |
---|
521 | //--WHERE U = 2Z/(2+Z) = 2Z/(1+X). |
---|
522 | faddx %fp1,%fp1 // ...FP1 IS 2Z |
---|
523 | fadds one,%fp0 // ...FP0 IS 1+X |
---|
524 | //--U = FP1/FP0 |
---|
525 | bra LP1CONT2 |
---|
526 | |
---|
527 | LP1CARE: |
---|
528 | //--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE |
---|
529 | //--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST |
---|
530 | //--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2], |
---|
531 | //--THERE ARE ONLY TWO CASES. |
---|
532 | //--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z |
---|
533 | //--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z |
---|
534 | //--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF |
---|
535 | //--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED. |
---|
536 | |
---|
537 | movel XFRAC(%a6),FFRAC(%a6) |
---|
538 | andil #0xFE000000,FFRAC(%a6) |
---|
539 | oril #0x01000000,FFRAC(%a6) // ...F OBTAINED |
---|
540 | cmpil #0x3FFF8000,%d0 // ...SEE IF 1+Z > 1 |
---|
541 | bges KISZERO |
---|
542 | |
---|
543 | KISNEG1: |
---|
544 | fmoves TWO,%fp0 |
---|
545 | movel #0x3fff0000,F(%a6) |
---|
546 | clrl F+8(%a6) |
---|
547 | fsubx F(%a6),%fp0 // ...2-F |
---|
548 | movel FFRAC(%a6),%d0 |
---|
549 | andil #0x7E000000,%d0 |
---|
550 | asrl #8,%d0 |
---|
551 | asrl #8,%d0 |
---|
552 | asrl #4,%d0 // ...D0 CONTAINS DISPLACEMENT FOR 1/F |
---|
553 | faddx %fp1,%fp1 // ...GET 2Z |
---|
554 | fmovemx %fp2-%fp2/%fp3,-(%sp) // ...SAVE FP2 |
---|
555 | faddx %fp1,%fp0 // ...FP0 IS Y-F = (2-F)+2Z |
---|
556 | lea LOGTBL,%a0 // ...A0 IS ADDRESS OF 1/F |
---|
557 | addal %d0,%a0 |
---|
558 | fmoves negone,%fp1 // ...FP1 IS K = -1 |
---|
559 | bra LP1CONT1 |
---|
560 | |
---|
561 | KISZERO: |
---|
562 | fmoves one,%fp0 |
---|
563 | movel #0x3fff0000,F(%a6) |
---|
564 | clrl F+8(%a6) |
---|
565 | fsubx F(%a6),%fp0 // ...1-F |
---|
566 | movel FFRAC(%a6),%d0 |
---|
567 | andil #0x7E000000,%d0 |
---|
568 | asrl #8,%d0 |
---|
569 | asrl #8,%d0 |
---|
570 | asrl #4,%d0 |
---|
571 | faddx %fp1,%fp0 // ...FP0 IS Y-F |
---|
572 | fmovemx %fp2-%fp2/%fp3,-(%sp) // ...FP2 SAVED |
---|
573 | lea LOGTBL,%a0 |
---|
574 | addal %d0,%a0 // ...A0 IS ADDRESS OF 1/F |
---|
575 | fmoves zero,%fp1 // ...FP1 IS K = 0 |
---|
576 | bra LP1CONT1 |
---|
577 | |
---|
578 | LP1NEG0: |
---|
579 | //--FPCR SAVED. D0 IS X IN COMPACT FORM. |
---|
580 | cmpil #0,%d0 |
---|
581 | blts LP1NEG |
---|
582 | LP1ZERO: |
---|
583 | fmoves negone,%fp0 |
---|
584 | |
---|
585 | fmovel %d1,%fpcr |
---|
586 | bra t_dz |
---|
587 | |
---|
588 | LP1NEG: |
---|
589 | fmoves zero,%fp0 |
---|
590 | |
---|
591 | fmovel %d1,%fpcr |
---|
592 | bra t_operr |
---|
593 | |
---|
594 | |end |
---|