1 | // |
---|
2 | // $Id$ |
---|
3 | // |
---|
4 | // round.sa 3.4 7/29/91 |
---|
5 | // |
---|
6 | // handle rounding and normalization tasks |
---|
7 | // |
---|
8 | // |
---|
9 | // |
---|
10 | // Copyright (C) Motorola, Inc. 1990 |
---|
11 | // All Rights Reserved |
---|
12 | // |
---|
13 | // THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA |
---|
14 | // The copyright notice above does not evidence any |
---|
15 | // actual or intended publication of such source code. |
---|
16 | |
---|
17 | //ROUND idnt 2,1 | Motorola 040 Floating Point Software Package |
---|
18 | |
---|
19 | |section 8 |
---|
20 | |
---|
21 | #include "fpsp.defs" |
---|
22 | |
---|
23 | // |
---|
24 | // round --- round result according to precision/mode |
---|
25 | // |
---|
26 | // a0 points to the input operand in the internal extended format |
---|
27 | // d1(high word) contains rounding precision: |
---|
28 | // ext = $0000xxxx |
---|
29 | // sgl = $0001xxxx |
---|
30 | // dbl = $0002xxxx |
---|
31 | // d1(low word) contains rounding mode: |
---|
32 | // RN = $xxxx0000 |
---|
33 | // RZ = $xxxx0001 |
---|
34 | // RM = $xxxx0010 |
---|
35 | // RP = $xxxx0011 |
---|
36 | // d0{31:29} contains the g,r,s bits (extended) |
---|
37 | // |
---|
38 | // On return the value pointed to by a0 is correctly rounded, |
---|
39 | // a0 is preserved and the g-r-s bits in d0 are cleared. |
---|
40 | // The result is not typed - the tag field is invalid. The |
---|
41 | // result is still in the internal extended format. |
---|
42 | // |
---|
43 | // The INEX bit of USER_FPSR will be set if the rounded result was |
---|
44 | // inexact (i.e. if any of the g-r-s bits were set). |
---|
45 | // |
---|
46 | |
---|
47 | .global round |
---|
48 | round: |
---|
49 | // If g=r=s=0 then result is exact and round is done, else set |
---|
50 | // the inex flag in status reg and continue. |
---|
51 | // |
---|
52 | bsrs ext_grs //this subroutine looks at the |
---|
53 | // :rounding precision and sets |
---|
54 | // ;the appropriate g-r-s bits. |
---|
55 | tstl %d0 //if grs are zero, go force |
---|
56 | bne rnd_cont //lower bits to zero for size |
---|
57 | |
---|
58 | swap %d1 //set up d1.w for round prec. |
---|
59 | bra truncate |
---|
60 | |
---|
61 | rnd_cont: |
---|
62 | // |
---|
63 | // Use rounding mode as an index into a jump table for these modes. |
---|
64 | // |
---|
65 | orl #inx2a_mask,USER_FPSR(%a6) //set inex2/ainex |
---|
66 | lea mode_tab,%a1 |
---|
67 | movel (%a1,%d1.w*4),%a1 |
---|
68 | jmp (%a1) |
---|
69 | // |
---|
70 | // Jump table indexed by rounding mode in d1.w. All following assumes |
---|
71 | // grs != 0. |
---|
72 | // |
---|
73 | mode_tab: |
---|
74 | .long rnd_near |
---|
75 | .long rnd_zero |
---|
76 | .long rnd_mnus |
---|
77 | .long rnd_plus |
---|
78 | // |
---|
79 | // ROUND PLUS INFINITY |
---|
80 | // |
---|
81 | // If sign of fp number = 0 (positive), then add 1 to l. |
---|
82 | // |
---|
83 | rnd_plus: |
---|
84 | swap %d1 //set up d1 for round prec. |
---|
85 | tstb LOCAL_SGN(%a0) //check for sign |
---|
86 | bmi truncate //if positive then truncate |
---|
87 | movel #0xffffffff,%d0 //force g,r,s to be all f's |
---|
88 | lea add_to_l,%a1 |
---|
89 | movel (%a1,%d1.w*4),%a1 |
---|
90 | jmp (%a1) |
---|
91 | // |
---|
92 | // ROUND MINUS INFINITY |
---|
93 | // |
---|
94 | // If sign of fp number = 1 (negative), then add 1 to l. |
---|
95 | // |
---|
96 | rnd_mnus: |
---|
97 | swap %d1 //set up d1 for round prec. |
---|
98 | tstb LOCAL_SGN(%a0) //check for sign |
---|
99 | bpl truncate //if negative then truncate |
---|
100 | movel #0xffffffff,%d0 //force g,r,s to be all f's |
---|
101 | lea add_to_l,%a1 |
---|
102 | movel (%a1,%d1.w*4),%a1 |
---|
103 | jmp (%a1) |
---|
104 | // |
---|
105 | // ROUND ZERO |
---|
106 | // |
---|
107 | // Always truncate. |
---|
108 | rnd_zero: |
---|
109 | swap %d1 //set up d1 for round prec. |
---|
110 | bra truncate |
---|
111 | // |
---|
112 | // |
---|
113 | // ROUND NEAREST |
---|
114 | // |
---|
115 | // If (g=1), then add 1 to l and if (r=s=0), then clear l |
---|
116 | // Note that this will round to even in case of a tie. |
---|
117 | // |
---|
118 | rnd_near: |
---|
119 | swap %d1 //set up d1 for round prec. |
---|
120 | asll #1,%d0 //shift g-bit to c-bit |
---|
121 | bcc truncate //if (g=1) then |
---|
122 | lea add_to_l,%a1 |
---|
123 | movel (%a1,%d1.w*4),%a1 |
---|
124 | jmp (%a1) |
---|
125 | |
---|
126 | // |
---|
127 | // ext_grs --- extract guard, round and sticky bits |
---|
128 | // |
---|
129 | // Input: d1 = PREC:ROUND |
---|
130 | // Output: d0{31:29}= guard, round, sticky |
---|
131 | // |
---|
132 | // The ext_grs extract the guard/round/sticky bits according to the |
---|
133 | // selected rounding precision. It is called by the round subroutine |
---|
134 | // only. All registers except d0 are kept intact. d0 becomes an |
---|
135 | // updated guard,round,sticky in d0{31:29} |
---|
136 | // |
---|
137 | // Notes: the ext_grs uses the round PREC, and therefore has to swap d1 |
---|
138 | // prior to usage, and needs to restore d1 to original. |
---|
139 | // |
---|
140 | ext_grs: |
---|
141 | swap %d1 //have d1.w point to round precision |
---|
142 | cmpiw #0,%d1 |
---|
143 | bnes sgl_or_dbl |
---|
144 | bras end_ext_grs |
---|
145 | |
---|
146 | sgl_or_dbl: |
---|
147 | moveml %d2/%d3,-(%a7) //make some temp registers |
---|
148 | cmpiw #1,%d1 |
---|
149 | bnes grs_dbl |
---|
150 | grs_sgl: |
---|
151 | bfextu LOCAL_HI(%a0){#24:#2},%d3 //sgl prec. g-r are 2 bits right |
---|
152 | movel #30,%d2 //of the sgl prec. limits |
---|
153 | lsll %d2,%d3 //shift g-r bits to MSB of d3 |
---|
154 | movel LOCAL_HI(%a0),%d2 //get word 2 for s-bit test |
---|
155 | andil #0x0000003f,%d2 //s bit is the or of all other |
---|
156 | bnes st_stky //bits to the right of g-r |
---|
157 | tstl LOCAL_LO(%a0) //test lower mantissa |
---|
158 | bnes st_stky //if any are set, set sticky |
---|
159 | tstl %d0 //test original g,r,s |
---|
160 | bnes st_stky //if any are set, set sticky |
---|
161 | bras end_sd //if words 3 and 4 are clr, exit |
---|
162 | grs_dbl: |
---|
163 | bfextu LOCAL_LO(%a0){#21:#2},%d3 //dbl-prec. g-r are 2 bits right |
---|
164 | movel #30,%d2 //of the dbl prec. limits |
---|
165 | lsll %d2,%d3 //shift g-r bits to the MSB of d3 |
---|
166 | movel LOCAL_LO(%a0),%d2 //get lower mantissa for s-bit test |
---|
167 | andil #0x000001ff,%d2 //s bit is the or-ing of all |
---|
168 | bnes st_stky //other bits to the right of g-r |
---|
169 | tstl %d0 //test word original g,r,s |
---|
170 | bnes st_stky //if any are set, set sticky |
---|
171 | bras end_sd //if clear, exit |
---|
172 | st_stky: |
---|
173 | bset #rnd_stky_bit,%d3 |
---|
174 | end_sd: |
---|
175 | movel %d3,%d0 //return grs to d0 |
---|
176 | moveml (%a7)+,%d2/%d3 //restore scratch registers |
---|
177 | end_ext_grs: |
---|
178 | swap %d1 //restore d1 to original |
---|
179 | rts |
---|
180 | |
---|
181 | //******************* Local Equates |
---|
182 | .set ad_1_sgl,0x00000100 // constant to add 1 to l-bit in sgl prec |
---|
183 | .set ad_1_dbl,0x00000800 // constant to add 1 to l-bit in dbl prec |
---|
184 | |
---|
185 | |
---|
186 | //Jump table for adding 1 to the l-bit indexed by rnd prec |
---|
187 | |
---|
188 | add_to_l: |
---|
189 | .long add_ext |
---|
190 | .long add_sgl |
---|
191 | .long add_dbl |
---|
192 | .long add_dbl |
---|
193 | // |
---|
194 | // ADD SINGLE |
---|
195 | // |
---|
196 | add_sgl: |
---|
197 | addl #ad_1_sgl,LOCAL_HI(%a0) |
---|
198 | bccs scc_clr //no mantissa overflow |
---|
199 | roxrw LOCAL_HI(%a0) //shift v-bit back in |
---|
200 | roxrw LOCAL_HI+2(%a0) //shift v-bit back in |
---|
201 | addw #0x1,LOCAL_EX(%a0) //and incr exponent |
---|
202 | scc_clr: |
---|
203 | tstl %d0 //test for rs = 0 |
---|
204 | bnes sgl_done |
---|
205 | andiw #0xfe00,LOCAL_HI+2(%a0) //clear the l-bit |
---|
206 | sgl_done: |
---|
207 | andil #0xffffff00,LOCAL_HI(%a0) //truncate bits beyond sgl limit |
---|
208 | clrl LOCAL_LO(%a0) //clear d2 |
---|
209 | rts |
---|
210 | |
---|
211 | // |
---|
212 | // ADD EXTENDED |
---|
213 | // |
---|
214 | add_ext: |
---|
215 | addql #1,LOCAL_LO(%a0) //add 1 to l-bit |
---|
216 | bccs xcc_clr //test for carry out |
---|
217 | addql #1,LOCAL_HI(%a0) //propagate carry |
---|
218 | bccs xcc_clr |
---|
219 | roxrw LOCAL_HI(%a0) //mant is 0 so restore v-bit |
---|
220 | roxrw LOCAL_HI+2(%a0) //mant is 0 so restore v-bit |
---|
221 | roxrw LOCAL_LO(%a0) |
---|
222 | roxrw LOCAL_LO+2(%a0) |
---|
223 | addw #0x1,LOCAL_EX(%a0) //and inc exp |
---|
224 | xcc_clr: |
---|
225 | tstl %d0 //test rs = 0 |
---|
226 | bnes add_ext_done |
---|
227 | andib #0xfe,LOCAL_LO+3(%a0) //clear the l bit |
---|
228 | add_ext_done: |
---|
229 | rts |
---|
230 | // |
---|
231 | // ADD DOUBLE |
---|
232 | // |
---|
233 | add_dbl: |
---|
234 | addl #ad_1_dbl,LOCAL_LO(%a0) |
---|
235 | bccs dcc_clr |
---|
236 | addql #1,LOCAL_HI(%a0) //propagate carry |
---|
237 | bccs dcc_clr |
---|
238 | roxrw LOCAL_HI(%a0) //mant is 0 so restore v-bit |
---|
239 | roxrw LOCAL_HI+2(%a0) //mant is 0 so restore v-bit |
---|
240 | roxrw LOCAL_LO(%a0) |
---|
241 | roxrw LOCAL_LO+2(%a0) |
---|
242 | addw #0x1,LOCAL_EX(%a0) //incr exponent |
---|
243 | dcc_clr: |
---|
244 | tstl %d0 //test for rs = 0 |
---|
245 | bnes dbl_done |
---|
246 | andiw #0xf000,LOCAL_LO+2(%a0) //clear the l-bit |
---|
247 | |
---|
248 | dbl_done: |
---|
249 | andil #0xfffff800,LOCAL_LO(%a0) //truncate bits beyond dbl limit |
---|
250 | rts |
---|
251 | |
---|
252 | error: |
---|
253 | rts |
---|
254 | // |
---|
255 | // Truncate all other bits |
---|
256 | // |
---|
257 | trunct: |
---|
258 | .long end_rnd |
---|
259 | .long sgl_done |
---|
260 | .long dbl_done |
---|
261 | .long dbl_done |
---|
262 | |
---|
263 | truncate: |
---|
264 | lea trunct,%a1 |
---|
265 | movel (%a1,%d1.w*4),%a1 |
---|
266 | jmp (%a1) |
---|
267 | |
---|
268 | end_rnd: |
---|
269 | rts |
---|
270 | |
---|
271 | // |
---|
272 | // NORMALIZE |
---|
273 | // |
---|
274 | // These routines (nrm_zero & nrm_set) normalize the unnorm. This |
---|
275 | // is done by shifting the mantissa left while decrementing the |
---|
276 | // exponent. |
---|
277 | // |
---|
278 | // NRM_SET shifts and decrements until there is a 1 set in the integer |
---|
279 | // bit of the mantissa (msb in d1). |
---|
280 | // |
---|
281 | // NRM_ZERO shifts and decrements until there is a 1 set in the integer |
---|
282 | // bit of the mantissa (msb in d1) unless this would mean the exponent |
---|
283 | // would go less than 0. In that case the number becomes a denorm - the |
---|
284 | // exponent (d0) is set to 0 and the mantissa (d1 & d2) is not |
---|
285 | // normalized. |
---|
286 | // |
---|
287 | // Note that both routines have been optimized (for the worst case) and |
---|
288 | // therefore do not have the easy to follow decrement/shift loop. |
---|
289 | // |
---|
290 | // NRM_ZERO |
---|
291 | // |
---|
292 | // Distance to first 1 bit in mantissa = X |
---|
293 | // Distance to 0 from exponent = Y |
---|
294 | // If X < Y |
---|
295 | // Then |
---|
296 | // nrm_set |
---|
297 | // Else |
---|
298 | // shift mantissa by Y |
---|
299 | // set exponent = 0 |
---|
300 | // |
---|
301 | //input: |
---|
302 | // FP_SCR1 = exponent, ms mantissa part, ls mantissa part |
---|
303 | //output: |
---|
304 | // L_SCR1{4} = fpte15 or ete15 bit |
---|
305 | // |
---|
306 | .global nrm_zero |
---|
307 | nrm_zero: |
---|
308 | movew LOCAL_EX(%a0),%d0 |
---|
309 | cmpw #64,%d0 //see if exp > 64 |
---|
310 | bmis d0_less |
---|
311 | bsr nrm_set //exp > 64 so exp won't exceed 0 |
---|
312 | rts |
---|
313 | d0_less: |
---|
314 | moveml %d2/%d3/%d5/%d6,-(%a7) |
---|
315 | movel LOCAL_HI(%a0),%d1 |
---|
316 | movel LOCAL_LO(%a0),%d2 |
---|
317 | |
---|
318 | bfffo %d1{#0:#32},%d3 //get the distance to the first 1 |
---|
319 | // ;in ms mant |
---|
320 | beqs ms_clr //branch if no bits were set |
---|
321 | cmpw %d3,%d0 //of X>Y |
---|
322 | bmis greater //then exp will go past 0 (neg) if |
---|
323 | // ;it is just shifted |
---|
324 | bsr nrm_set //else exp won't go past 0 |
---|
325 | moveml (%a7)+,%d2/%d3/%d5/%d6 |
---|
326 | rts |
---|
327 | greater: |
---|
328 | movel %d2,%d6 //save ls mant in d6 |
---|
329 | lsll %d0,%d2 //shift ls mant by count |
---|
330 | lsll %d0,%d1 //shift ms mant by count |
---|
331 | movel #32,%d5 |
---|
332 | subl %d0,%d5 //make op a denorm by shifting bits |
---|
333 | lsrl %d5,%d6 //by the number in the exp, then |
---|
334 | // ;set exp = 0. |
---|
335 | orl %d6,%d1 //shift the ls mant bits into the ms mant |
---|
336 | movel #0,%d0 //same as if decremented exp to 0 |
---|
337 | // ;while shifting |
---|
338 | movew %d0,LOCAL_EX(%a0) |
---|
339 | movel %d1,LOCAL_HI(%a0) |
---|
340 | movel %d2,LOCAL_LO(%a0) |
---|
341 | moveml (%a7)+,%d2/%d3/%d5/%d6 |
---|
342 | rts |
---|
343 | ms_clr: |
---|
344 | bfffo %d2{#0:#32},%d3 //check if any bits set in ls mant |
---|
345 | beqs all_clr //branch if none set |
---|
346 | addw #32,%d3 |
---|
347 | cmpw %d3,%d0 //if X>Y |
---|
348 | bmis greater //then branch |
---|
349 | bsr nrm_set //else exp won't go past 0 |
---|
350 | moveml (%a7)+,%d2/%d3/%d5/%d6 |
---|
351 | rts |
---|
352 | all_clr: |
---|
353 | movew #0,LOCAL_EX(%a0) //no mantissa bits set. Set exp = 0. |
---|
354 | moveml (%a7)+,%d2/%d3/%d5/%d6 |
---|
355 | rts |
---|
356 | // |
---|
357 | // NRM_SET |
---|
358 | // |
---|
359 | .global nrm_set |
---|
360 | nrm_set: |
---|
361 | movel %d7,-(%a7) |
---|
362 | bfffo LOCAL_HI(%a0){#0:#32},%d7 //find first 1 in ms mant to d7) |
---|
363 | beqs lower //branch if ms mant is all 0's |
---|
364 | |
---|
365 | movel %d6,-(%a7) |
---|
366 | |
---|
367 | subw %d7,LOCAL_EX(%a0) //sub exponent by count |
---|
368 | movel LOCAL_HI(%a0),%d0 //d0 has ms mant |
---|
369 | movel LOCAL_LO(%a0),%d1 //d1 has ls mant |
---|
370 | |
---|
371 | lsll %d7,%d0 //shift first 1 to j bit position |
---|
372 | movel %d1,%d6 //copy ls mant into d6 |
---|
373 | lsll %d7,%d6 //shift ls mant by count |
---|
374 | movel %d6,LOCAL_LO(%a0) //store ls mant into memory |
---|
375 | moveql #32,%d6 |
---|
376 | subl %d7,%d6 //continue shift |
---|
377 | lsrl %d6,%d1 //shift off all bits but those that will |
---|
378 | // ;be shifted into ms mant |
---|
379 | orl %d1,%d0 //shift the ls mant bits into the ms mant |
---|
380 | movel %d0,LOCAL_HI(%a0) //store ms mant into memory |
---|
381 | moveml (%a7)+,%d7/%d6 //restore registers |
---|
382 | rts |
---|
383 | |
---|
384 | // |
---|
385 | // We get here if ms mant was = 0, and we assume ls mant has bits |
---|
386 | // set (otherwise this would have been tagged a zero not a denorm). |
---|
387 | // |
---|
388 | lower: |
---|
389 | movew LOCAL_EX(%a0),%d0 //d0 has exponent |
---|
390 | movel LOCAL_LO(%a0),%d1 //d1 has ls mant |
---|
391 | subw #32,%d0 //account for ms mant being all zeros |
---|
392 | bfffo %d1{#0:#32},%d7 //find first 1 in ls mant to d7) |
---|
393 | subw %d7,%d0 //subtract shift count from exp |
---|
394 | lsll %d7,%d1 //shift first 1 to integer bit in ms mant |
---|
395 | movew %d0,LOCAL_EX(%a0) //store ms mant |
---|
396 | movel %d1,LOCAL_HI(%a0) //store exp |
---|
397 | clrl LOCAL_LO(%a0) //clear ls mant |
---|
398 | movel (%a7)+,%d7 |
---|
399 | rts |
---|
400 | // |
---|
401 | // denorm --- denormalize an intermediate result |
---|
402 | // |
---|
403 | // Used by underflow. |
---|
404 | // |
---|
405 | // Input: |
---|
406 | // a0 points to the operand to be denormalized |
---|
407 | // (in the internal extended format) |
---|
408 | // |
---|
409 | // d0: rounding precision |
---|
410 | // Output: |
---|
411 | // a0 points to the denormalized result |
---|
412 | // (in the internal extended format) |
---|
413 | // |
---|
414 | // d0 is guard,round,sticky |
---|
415 | // |
---|
416 | // d0 comes into this routine with the rounding precision. It |
---|
417 | // is then loaded with the denormalized exponent threshold for the |
---|
418 | // rounding precision. |
---|
419 | // |
---|
420 | |
---|
421 | .global denorm |
---|
422 | denorm: |
---|
423 | btstb #6,LOCAL_EX(%a0) //check for exponents between $7fff-$4000 |
---|
424 | beqs no_sgn_ext |
---|
425 | bsetb #7,LOCAL_EX(%a0) //sign extend if it is so |
---|
426 | no_sgn_ext: |
---|
427 | |
---|
428 | cmpib #0,%d0 //if 0 then extended precision |
---|
429 | bnes not_ext //else branch |
---|
430 | |
---|
431 | clrl %d1 //load d1 with ext threshold |
---|
432 | clrl %d0 //clear the sticky flag |
---|
433 | bsr dnrm_lp //denormalize the number |
---|
434 | tstb %d1 //check for inex |
---|
435 | beq no_inex //if clr, no inex |
---|
436 | bras dnrm_inex //if set, set inex |
---|
437 | |
---|
438 | not_ext: |
---|
439 | cmpil #1,%d0 //if 1 then single precision |
---|
440 | beqs load_sgl //else must be 2, double prec |
---|
441 | |
---|
442 | load_dbl: |
---|
443 | movew #dbl_thresh,%d1 //put copy of threshold in d1 |
---|
444 | movel %d1,%d0 //copy d1 into d0 |
---|
445 | subw LOCAL_EX(%a0),%d0 //diff = threshold - exp |
---|
446 | cmpw #67,%d0 //if diff > 67 (mant + grs bits) |
---|
447 | bpls chk_stky //then branch (all bits would be |
---|
448 | // ; shifted off in denorm routine) |
---|
449 | clrl %d0 //else clear the sticky flag |
---|
450 | bsr dnrm_lp //denormalize the number |
---|
451 | tstb %d1 //check flag |
---|
452 | beqs no_inex //if clr, no inex |
---|
453 | bras dnrm_inex //if set, set inex |
---|
454 | |
---|
455 | load_sgl: |
---|
456 | movew #sgl_thresh,%d1 //put copy of threshold in d1 |
---|
457 | movel %d1,%d0 //copy d1 into d0 |
---|
458 | subw LOCAL_EX(%a0),%d0 //diff = threshold - exp |
---|
459 | cmpw #67,%d0 //if diff > 67 (mant + grs bits) |
---|
460 | bpls chk_stky //then branch (all bits would be |
---|
461 | // ; shifted off in denorm routine) |
---|
462 | clrl %d0 //else clear the sticky flag |
---|
463 | bsr dnrm_lp //denormalize the number |
---|
464 | tstb %d1 //check flag |
---|
465 | beqs no_inex //if clr, no inex |
---|
466 | bras dnrm_inex //if set, set inex |
---|
467 | |
---|
468 | chk_stky: |
---|
469 | tstl LOCAL_HI(%a0) //check for any bits set |
---|
470 | bnes set_stky |
---|
471 | tstl LOCAL_LO(%a0) //check for any bits set |
---|
472 | bnes set_stky |
---|
473 | bras clr_mant |
---|
474 | set_stky: |
---|
475 | orl #inx2a_mask,USER_FPSR(%a6) //set inex2/ainex |
---|
476 | movel #0x20000000,%d0 //set sticky bit in return value |
---|
477 | clr_mant: |
---|
478 | movew %d1,LOCAL_EX(%a0) //load exp with threshold |
---|
479 | movel #0,LOCAL_HI(%a0) //set d1 = 0 (ms mantissa) |
---|
480 | movel #0,LOCAL_LO(%a0) //set d2 = 0 (ms mantissa) |
---|
481 | rts |
---|
482 | dnrm_inex: |
---|
483 | orl #inx2a_mask,USER_FPSR(%a6) //set inex2/ainex |
---|
484 | no_inex: |
---|
485 | rts |
---|
486 | |
---|
487 | // |
---|
488 | // dnrm_lp --- normalize exponent/mantissa to specified threshold |
---|
489 | // |
---|
490 | // Input: |
---|
491 | // a0 points to the operand to be denormalized |
---|
492 | // d0{31:29} initial guard,round,sticky |
---|
493 | // d1{15:0} denormalization threshold |
---|
494 | // Output: |
---|
495 | // a0 points to the denormalized operand |
---|
496 | // d0{31:29} final guard,round,sticky |
---|
497 | // d1.b inexact flag: all ones means inexact result |
---|
498 | // |
---|
499 | // The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2 |
---|
500 | // so that bfext can be used to extract the new low part of the mantissa. |
---|
501 | // Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there |
---|
502 | // is no LOCAL_GRS scratch word following it on the fsave frame. |
---|
503 | // |
---|
504 | .global dnrm_lp |
---|
505 | dnrm_lp: |
---|
506 | movel %d2,-(%sp) //save d2 for temp use |
---|
507 | btstb #E3,E_BYTE(%a6) //test for type E3 exception |
---|
508 | beqs not_E3 //not type E3 exception |
---|
509 | bfextu WBTEMP_GRS(%a6){#6:#3},%d2 //extract guard,round, sticky bit |
---|
510 | movel #29,%d0 |
---|
511 | lsll %d0,%d2 //shift g,r,s to their positions |
---|
512 | movel %d2,%d0 |
---|
513 | not_E3: |
---|
514 | movel (%sp)+,%d2 //restore d2 |
---|
515 | movel LOCAL_LO(%a0),FP_SCR2+LOCAL_LO(%a6) |
---|
516 | movel %d0,FP_SCR2+LOCAL_GRS(%a6) |
---|
517 | movel %d1,%d0 //copy the denorm threshold |
---|
518 | subw LOCAL_EX(%a0),%d1 //d1 = threshold - uns exponent |
---|
519 | bles no_lp //d1 <= 0 |
---|
520 | cmpw #32,%d1 |
---|
521 | blts case_1 //0 = d1 < 32 |
---|
522 | cmpw #64,%d1 |
---|
523 | blts case_2 //32 <= d1 < 64 |
---|
524 | bra case_3 //d1 >= 64 |
---|
525 | // |
---|
526 | // No normalization necessary |
---|
527 | // |
---|
528 | no_lp: |
---|
529 | clrb %d1 //set no inex2 reported |
---|
530 | movel FP_SCR2+LOCAL_GRS(%a6),%d0 //restore original g,r,s |
---|
531 | rts |
---|
532 | // |
---|
533 | // case (0<d1<32) |
---|
534 | // |
---|
535 | case_1: |
---|
536 | movel %d2,-(%sp) |
---|
537 | movew %d0,LOCAL_EX(%a0) //exponent = denorm threshold |
---|
538 | movel #32,%d0 |
---|
539 | subw %d1,%d0 //d0 = 32 - d1 |
---|
540 | bfextu LOCAL_EX(%a0){%d0:#32},%d2 |
---|
541 | bfextu %d2{%d1:%d0},%d2 //d2 = new LOCAL_HI |
---|
542 | bfextu LOCAL_HI(%a0){%d0:#32},%d1 //d1 = new LOCAL_LO |
---|
543 | bfextu FP_SCR2+LOCAL_LO(%a6){%d0:#32},%d0 //d0 = new G,R,S |
---|
544 | movel %d2,LOCAL_HI(%a0) //store new LOCAL_HI |
---|
545 | movel %d1,LOCAL_LO(%a0) //store new LOCAL_LO |
---|
546 | clrb %d1 |
---|
547 | bftst %d0{#2:#30} |
---|
548 | beqs c1nstky |
---|
549 | bsetl #rnd_stky_bit,%d0 |
---|
550 | st %d1 |
---|
551 | c1nstky: |
---|
552 | movel FP_SCR2+LOCAL_GRS(%a6),%d2 //restore original g,r,s |
---|
553 | andil #0xe0000000,%d2 //clear all but G,R,S |
---|
554 | tstl %d2 //test if original G,R,S are clear |
---|
555 | beqs grs_clear |
---|
556 | orl #0x20000000,%d0 //set sticky bit in d0 |
---|
557 | grs_clear: |
---|
558 | andil #0xe0000000,%d0 //clear all but G,R,S |
---|
559 | movel (%sp)+,%d2 |
---|
560 | rts |
---|
561 | // |
---|
562 | // case (32<=d1<64) |
---|
563 | // |
---|
564 | case_2: |
---|
565 | movel %d2,-(%sp) |
---|
566 | movew %d0,LOCAL_EX(%a0) //unsigned exponent = threshold |
---|
567 | subw #32,%d1 //d1 now between 0 and 32 |
---|
568 | movel #32,%d0 |
---|
569 | subw %d1,%d0 //d0 = 32 - d1 |
---|
570 | bfextu LOCAL_EX(%a0){%d0:#32},%d2 |
---|
571 | bfextu %d2{%d1:%d0},%d2 //d2 = new LOCAL_LO |
---|
572 | bfextu LOCAL_HI(%a0){%d0:#32},%d1 //d1 = new G,R,S |
---|
573 | bftst %d1{#2:#30} |
---|
574 | bnes c2_sstky //bra if sticky bit to be set |
---|
575 | bftst FP_SCR2+LOCAL_LO(%a6){%d0:#32} |
---|
576 | bnes c2_sstky //bra if sticky bit to be set |
---|
577 | movel %d1,%d0 |
---|
578 | clrb %d1 |
---|
579 | bras end_c2 |
---|
580 | c2_sstky: |
---|
581 | movel %d1,%d0 |
---|
582 | bsetl #rnd_stky_bit,%d0 |
---|
583 | st %d1 |
---|
584 | end_c2: |
---|
585 | clrl LOCAL_HI(%a0) //store LOCAL_HI = 0 |
---|
586 | movel %d2,LOCAL_LO(%a0) //store LOCAL_LO |
---|
587 | movel FP_SCR2+LOCAL_GRS(%a6),%d2 //restore original g,r,s |
---|
588 | andil #0xe0000000,%d2 //clear all but G,R,S |
---|
589 | tstl %d2 //test if original G,R,S are clear |
---|
590 | beqs clear_grs |
---|
591 | orl #0x20000000,%d0 //set sticky bit in d0 |
---|
592 | clear_grs: |
---|
593 | andil #0xe0000000,%d0 //get rid of all but G,R,S |
---|
594 | movel (%sp)+,%d2 |
---|
595 | rts |
---|
596 | // |
---|
597 | // d1 >= 64 Force the exponent to be the denorm threshold with the |
---|
598 | // correct sign. |
---|
599 | // |
---|
600 | case_3: |
---|
601 | movew %d0,LOCAL_EX(%a0) |
---|
602 | tstw LOCAL_SGN(%a0) |
---|
603 | bges c3con |
---|
604 | c3neg: |
---|
605 | orl #0x80000000,LOCAL_EX(%a0) |
---|
606 | c3con: |
---|
607 | cmpw #64,%d1 |
---|
608 | beqs sixty_four |
---|
609 | cmpw #65,%d1 |
---|
610 | beqs sixty_five |
---|
611 | // |
---|
612 | // Shift value is out of range. Set d1 for inex2 flag and |
---|
613 | // return a zero with the given threshold. |
---|
614 | // |
---|
615 | clrl LOCAL_HI(%a0) |
---|
616 | clrl LOCAL_LO(%a0) |
---|
617 | movel #0x20000000,%d0 |
---|
618 | st %d1 |
---|
619 | rts |
---|
620 | |
---|
621 | sixty_four: |
---|
622 | movel LOCAL_HI(%a0),%d0 |
---|
623 | bfextu %d0{#2:#30},%d1 |
---|
624 | andil #0xc0000000,%d0 |
---|
625 | bras c3com |
---|
626 | |
---|
627 | sixty_five: |
---|
628 | movel LOCAL_HI(%a0),%d0 |
---|
629 | bfextu %d0{#1:#31},%d1 |
---|
630 | andil #0x80000000,%d0 |
---|
631 | lsrl #1,%d0 //shift high bit into R bit |
---|
632 | |
---|
633 | c3com: |
---|
634 | tstl %d1 |
---|
635 | bnes c3ssticky |
---|
636 | tstl LOCAL_LO(%a0) |
---|
637 | bnes c3ssticky |
---|
638 | tstb FP_SCR2+LOCAL_GRS(%a6) |
---|
639 | bnes c3ssticky |
---|
640 | clrb %d1 |
---|
641 | bras c3end |
---|
642 | |
---|
643 | c3ssticky: |
---|
644 | bsetl #rnd_stky_bit,%d0 |
---|
645 | st %d1 |
---|
646 | c3end: |
---|
647 | clrl LOCAL_HI(%a0) |
---|
648 | clrl LOCAL_LO(%a0) |
---|
649 | rts |
---|
650 | |
---|
651 | |end |
---|