source: rtems/c/src/lib/libcpu/m68k/m68040/fpsp/bindec.S @ 54f440d

4.104.114.84.95
Last change on this file since 54f440d was 54f440d, checked in by Joel Sherrill <joel.sherrill@…>, on 07/26/99 at 22:11:02

Patch from Charles-Antoine Gauthier <charles.gauthier@…>.
to address m68k-rtemself for the MVME167.

Here is the rtems patch I promissed you a long time ago to enable ELF
with m68k. The target name I selected is m68k-rtemself. It preserves the
m68k-rtems COFF target, and is parterned after the other ELF/COFF dual
targets.

The mvme167.cfg file causes the -qelf flag to be used during compilation
if the name of the compiler contains rtemself. This flag is used in the
bsp_specs file to select the elflinkcmds file rather than the linkcmds
file. The former is for ELF, the latter for COFF.

Some patches are required to the mc68040 FPSP code. Some of the
assembler files contain instructions that were rejected by the
m68k-rtemself-as assembler. This is a minor bug in the m68k ELF
assembler, I think.

  • Property mode set to 100644
File size: 28.3 KB
Line 
1//
2//      $Id$
3//
4//      bindec.sa 3.4 1/3/91
5//
6//      bindec
7//
8//      Description:
9//              Converts an input in extended precision format
10//              to bcd format.
11//
12//      Input:
13//              a0 points to the input extended precision value
14//              value in memory; d0 contains the k-factor sign-extended
15//              to 32-bits.  The input may be either normalized,
16//              unnormalized, or denormalized.
17//
18//      Output: result in the FP_SCR1 space on the stack.
19//
20//      Saves and Modifies: D2-D7,A2,FP2
21//
22//      Algorithm:
23//
24//      A1.     Set RM and size ext;  Set SIGMA = sign of input. 
25//              The k-factor is saved for use in d7. Clear the
26//              BINDEC_FLG for separating normalized/denormalized
27//              input.  If input is unnormalized or denormalized,
28//              normalize it.
29//
30//      A2.     Set X = abs(input).
31//
32//      A3.     Compute ILOG.
33//              ILOG is the log base 10 of the input value.  It is
34//              approximated by adding e + 0.f when the original
35//              value is viewed as 2^^e * 1.f in extended precision. 
36//              This value is stored in d6.
37//
38//      A4.     Clr INEX bit.
39//              The operation in A3 above may have set INEX2. 
40//
41//      A5.     Set ICTR = 0;
42//              ICTR is a flag used in A13.  It must be set before the
43//              loop entry A6.
44//
45//      A6.     Calculate LEN.
46//              LEN is the number of digits to be displayed.  The
47//              k-factor can dictate either the total number of digits,
48//              if it is a positive number, or the number of digits
49//              after the decimal point which are to be included as
50//              significant.  See the 68882 manual for examples.
51//              If LEN is computed to be greater than 17, set OPERR in
52//              USER_FPSR.  LEN is stored in d4.
53//
54//      A7.     Calculate SCALE.
55//              SCALE is equal to 10^ISCALE, where ISCALE is the number
56//              of decimal places needed to insure LEN integer digits
57//              in the output before conversion to bcd. LAMBDA is the
58//              sign of ISCALE, used in A9. Fp1 contains
59//              10^^(abs(ISCALE)) using a rounding mode which is a
60//              function of the original rounding mode and the signs
61//              of ISCALE and X.  A table is given in the code.
62//
63//      A8.     Clr INEX; Force RZ.
64//              The operation in A3 above may have set INEX2. 
65//              RZ mode is forced for the scaling operation to insure
66//              only one rounding error.  The grs bits are collected in
67//              the INEX flag for use in A10.
68//
69//      A9.     Scale X -> Y.
70//              The mantissa is scaled to the desired number of
71//              significant digits.  The excess digits are collected
72//              in INEX2.
73//
74//      A10.    Or in INEX.
75//              If INEX is set, round error occurred.  This is
76//              compensated for by 'or-ing' in the INEX2 flag to
77//              the lsb of Y.
78//
79//      A11.    Restore original FPCR; set size ext.
80//              Perform FINT operation in the user's rounding mode.
81//              Keep the size to extended.
82//
83//      A12.    Calculate YINT = FINT(Y) according to user's rounding
84//              mode.  The FPSP routine sintd0 is used.  The output
85//              is in fp0.
86//
87//      A13.    Check for LEN digits.
88//              If the int operation results in more than LEN digits,
89//              or less than LEN -1 digits, adjust ILOG and repeat from
90//              A6.  This test occurs only on the first pass.  If the
91//              result is exactly 10^LEN, decrement ILOG and divide
92//              the mantissa by 10.
93//
94//      A14.    Convert the mantissa to bcd.
95//              The binstr routine is used to convert the LEN digit
96//              mantissa to bcd in memory.  The input to binstr is
97//              to be a fraction; i.e. (mantissa)/10^LEN and adjusted
98//              such that the decimal point is to the left of bit 63.
99//              The bcd digits are stored in the correct position in
100//              the final string area in memory.
101//
102//      A15.    Convert the exponent to bcd.
103//              As in A14 above, the exp is converted to bcd and the
104//              digits are stored in the final string.
105//              Test the length of the final exponent string.  If the
106//              length is 4, set operr.
107//
108//      A16.    Write sign bits to final string.
109//
110//      Implementation Notes:
111//
112//      The registers are used as follows:
113//
114//              d0: scratch; LEN input to binstr
115//              d1: scratch
116//              d2: upper 32-bits of mantissa for binstr
117//              d3: scratch;lower 32-bits of mantissa for binstr
118//              d4: LEN
119//                      d5: LAMBDA/ICTR
120//              d6: ILOG
121//              d7: k-factor
122//              a0: ptr for original operand/final result
123//              a1: scratch pointer
124//              a2: pointer to FP_X; abs(original value) in ext
125//              fp0: scratch
126//              fp1: scratch
127//              fp2: scratch
128//              F_SCR1:
129//              F_SCR2:
130//              L_SCR1:
131//              L_SCR2:
132
133//              Copyright (C) Motorola, Inc. 1990
134//                      All Rights Reserved
135//
136//      THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
137//      The copyright notice above does not evidence any 
138//      actual or intended publication of such source code.
139
140//BINDEC    idnt    2,1 | Motorola 040 Floating Point Software Package
141
142#include "fpsp.defs"
143
144        |section        8
145
146// Constants in extended precision
147LOG2:   .long   0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
148LOG2UP1:        .long   0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
149
150// Constants in single precision
151FONE:   .long   0x3F800000,0x00000000,0x00000000,0x00000000
152FTWO:   .long   0x40000000,0x00000000,0x00000000,0x00000000
153FTEN:   .long   0x41200000,0x00000000,0x00000000,0x00000000
154F4933:  .long   0x459A2800,0x00000000,0x00000000,0x00000000
155
156RBDTBL:         .byte   0,0,0,0
157        .byte   3,3,2,2
158        .byte   3,2,2,3
159        .byte   2,3,3,2
160
161        |xref   binstr
162        |xref   sintdo
163        |xref   ptenrn,ptenrm,ptenrp
164
165        .global bindec
166        .global sc_mul
167bindec:
168        moveml  %d2-%d7/%a2,-(%a7)
169        fmovemx %fp0-%fp2,-(%a7)
170
171// A1. Set RM and size ext. Set SIGMA = sign input;
172//     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
173//     separating  normalized/denormalized input.  If the input
174//     is a denormalized number, set the BINDEC_FLG memory word
175//     to signal denorm.  If the input is unnormalized, normalize
176//     the input and test for denormalized result. 
177//
178        fmovel  #rm_mode,%FPCR  //set RM and ext
179        movel   (%a0),L_SCR2(%a6)       //save exponent for sign check
180        movel   %d0,%d7         //move k-factor to d7
181        clrb    BINDEC_FLG(%a6) //clr norm/denorm flag
182        movew   STAG(%a6),%d0   //get stag
183        andiw   #0xe000,%d0     //isolate stag bits
184        beq     A2_str          //if zero, input is norm
185//
186// Normalize the denorm
187//
188un_de_norm:
189        movew   (%a0),%d0
190        andiw   #0x7fff,%d0     //strip sign of normalized exp
191        movel   4(%a0),%d1
192        movel   8(%a0),%d2
193norm_loop:
194        subw    #1,%d0
195        lsll    #1,%d2
196        roxll   #1,%d1
197        tstl    %d1
198        bges    norm_loop
199//
200// Test if the normalized input is denormalized
201//
202        tstw    %d0
203        bgts    pos_exp         //if greater than zero, it is a norm
204        st      BINDEC_FLG(%a6) //set flag for denorm
205pos_exp:
206        andiw   #0x7fff,%d0     //strip sign of normalized exp
207        movew   %d0,(%a0)
208        movel   %d1,4(%a0)
209        movel   %d2,8(%a0)
210
211// A2. Set X = abs(input).
212//
213A2_str:
214        movel   (%a0),FP_SCR2(%a6) // move input to work space
215        movel   4(%a0),FP_SCR2+4(%a6) // move input to work space
216        movel   8(%a0),FP_SCR2+8(%a6) // move input to work space
217        andil   #0x7fffffff,FP_SCR2(%a6) //create abs(X)
218
219// A3. Compute ILOG.
220//     ILOG is the log base 10 of the input value.  It is approx-
221//     imated by adding e + 0.f when the original value is viewed
222//     as 2^^e * 1.f in extended precision.  This value is stored
223//     in d6.
224//
225// Register usage:
226//      Input/Output
227//      d0: k-factor/exponent
228//      d2: x/x
229//      d3: x/x
230//      d4: x/x
231//      d5: x/x
232//      d6: x/ILOG
233//      d7: k-factor/Unchanged
234//      a0: ptr for original operand/final result
235//      a1: x/x
236//      a2: x/x
237//      fp0: x/float(ILOG)
238//      fp1: x/x
239//      fp2: x/x
240//      F_SCR1:x/x
241//      F_SCR2:Abs(X)/Abs(X) with $3fff exponent
242//      L_SCR1:x/x
243//      L_SCR2:first word of X packed/Unchanged
244
245        tstb    BINDEC_FLG(%a6) //check for denorm
246        beqs    A3_cont         //if clr, continue with norm
247        movel   #-4933,%d6      //force ILOG = -4933
248        bras    A4_str
249A3_cont:
250        movew   FP_SCR2(%a6),%d0        //move exp to d0
251        movew   #0x3fff,FP_SCR2(%a6) //replace exponent with 0x3fff
252        fmovex  FP_SCR2(%a6),%fp0       //now fp0 has 1.f
253        subw    #0x3fff,%d0     //strip off bias
254        faddw   %d0,%fp0                //add in exp
255        fsubs   FONE,%fp0       //subtract off 1.0
256        fbge    pos_res         //if pos, branch
257        fmulx   LOG2UP1,%fp0    //if neg, mul by LOG2UP1
258        fmovel  %fp0,%d6                //put ILOG in d6 as a lword
259        bras    A4_str          //go move out ILOG
260pos_res:
261        fmulx   LOG2,%fp0       //if pos, mul by LOG2
262        fmovel  %fp0,%d6                //put ILOG in d6 as a lword
263
264
265// A4. Clr INEX bit.
266//     The operation in A3 above may have set INEX2. 
267
268A4_str:
269        fmovel  #0,%FPSR                //zero all of fpsr - nothing needed
270
271
272// A5. Set ICTR = 0;
273//     ICTR is a flag used in A13.  It must be set before the
274//     loop entry A6. The lower word of d5 is used for ICTR.
275
276        clrw    %d5             //clear ICTR
277
278
279// A6. Calculate LEN.
280//     LEN is the number of digits to be displayed.  The k-factor
281//     can dictate either the total number of digits, if it is
282//     a positive number, or the number of digits after the
283//     original decimal point which are to be included as
284//     significant.  See the 68882 manual for examples.
285//     If LEN is computed to be greater than 17, set OPERR in
286//     USER_FPSR.  LEN is stored in d4.
287//
288// Register usage:
289//      Input/Output
290//      d0: exponent/Unchanged
291//      d2: x/x/scratch
292//      d3: x/x
293//      d4: exc picture/LEN
294//      d5: ICTR/Unchanged
295//      d6: ILOG/Unchanged
296//      d7: k-factor/Unchanged
297//      a0: ptr for original operand/final result
298//      a1: x/x
299//      a2: x/x
300//      fp0: float(ILOG)/Unchanged
301//      fp1: x/x
302//      fp2: x/x
303//      F_SCR1:x/x
304//      F_SCR2:Abs(X) with $3fff exponent/Unchanged
305//      L_SCR1:x/x
306//      L_SCR2:first word of X packed/Unchanged
307
308A6_str:
309        tstl    %d7             //branch on sign of k
310        bles    k_neg           //if k <= 0, LEN = ILOG + 1 - k
311        movel   %d7,%d4         //if k > 0, LEN = k
312        bras    len_ck          //skip to LEN check
313k_neg:
314        movel   %d6,%d4         //first load ILOG to d4
315        subl    %d7,%d4         //subtract off k
316        addql   #1,%d4          //add in the 1
317len_ck:
318        tstl    %d4             //LEN check: branch on sign of LEN
319        bles    LEN_ng          //if neg, set LEN = 1
320        cmpl    #17,%d4         //test if LEN > 17
321        bles    A7_str          //if not, forget it
322        movel   #17,%d4         //set max LEN = 17
323        tstl    %d7             //if negative, never set OPERR
324        bles    A7_str          //if positive, continue
325        orl     #opaop_mask,USER_FPSR(%a6) //set OPERR & AIOP in USER_FPSR
326        bras    A7_str          //finished here
327LEN_ng:
328        moveql  #1,%d4          //min LEN is 1
329
330
331// A7. Calculate SCALE.
332//     SCALE is equal to 10^ISCALE, where ISCALE is the number
333//     of decimal places needed to insure LEN integer digits
334//     in the output before conversion to bcd. LAMBDA is the sign
335//     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
336//     the rounding mode as given in the following table (see
337//     Coonen, p. 7.23 as ref.; however, the SCALE variable is
338//     of opposite sign in bindec.sa from Coonen).
339//
340//      Initial                                 USE
341//      FPCR[6:5]       LAMBDA  SIGN(X)         FPCR[6:5]
342//      ----------------------------------------------
343//       RN     00         0       0            00/0    RN
344//       RN     00         0       1            00/0    RN
345//       RN     00         1       0            00/0    RN
346//       RN     00         1       1            00/0    RN
347//       RZ     01         0       0            11/3    RP
348//       RZ     01         0       1            11/3    RP
349//       RZ     01         1       0            10/2    RM
350//       RZ     01         1       1            10/2    RM
351//       RM     10         0       0            11/3    RP
352//       RM     10         0       1            10/2    RM
353//       RM     10         1       0            10/2    RM
354//       RM     10         1       1            11/3    RP
355//       RP     11         0       0            10/2    RM
356//       RP     11         0       1            11/3    RP
357//       RP     11         1       0            11/3    RP
358//       RP     11         1       1            10/2    RM
359//
360// Register usage:
361//      Input/Output
362//      d0: exponent/scratch - final is 0
363//      d2: x/0 or 24 for A9
364//      d3: x/scratch - offset ptr into PTENRM array
365//      d4: LEN/Unchanged
366//      d5: 0/ICTR:LAMBDA
367//      d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
368//      d7: k-factor/Unchanged
369//      a0: ptr for original operand/final result
370//      a1: x/ptr to PTENRM array
371//      a2: x/x
372//      fp0: float(ILOG)/Unchanged
373//      fp1: x/10^ISCALE
374//      fp2: x/x
375//      F_SCR1:x/x
376//      F_SCR2:Abs(X) with $3fff exponent/Unchanged
377//      L_SCR1:x/x
378//      L_SCR2:first word of X packed/Unchanged
379
380A7_str:
381        tstl    %d7             //test sign of k
382        bgts    k_pos           //if pos and > 0, skip this
383        cmpl    %d6,%d7         //test k - ILOG
384        blts    k_pos           //if ILOG >= k, skip this
385        movel   %d7,%d6         //if ((k<0) & (ILOG < k)) ILOG = k
386k_pos: 
387        movel   %d6,%d0         //calc ILOG + 1 - LEN in d0
388        addql   #1,%d0          //add the 1
389        subl    %d4,%d0         //sub off LEN
390        swap    %d5             //use upper word of d5 for LAMBDA
391        clrw    %d5             //set it zero initially
392        clrw    %d2             //set up d2 for very small case
393        tstl    %d0             //test sign of ISCALE
394        bges    iscale          //if pos, skip next inst
395        addqw   #1,%d5          //if neg, set LAMBDA true
396        cmpl    #0xffffecd4,%d0 //test iscale <= -4908
397        bgts    no_inf          //if false, skip rest
398        addil   #24,%d0         //add in 24 to iscale
399        movel   #24,%d2         //put 24 in d2 for A9
400no_inf:
401        negl    %d0             //and take abs of ISCALE
402iscale:
403        fmoves  FONE,%fp1       //init fp1 to 1
404        bfextu  USER_FPCR(%a6){#26:#2},%d1 //get initial rmode bits
405        lslw    #1,%d1          //put them in bits 2:1
406        addw    %d5,%d1         //add in LAMBDA
407        lslw    #1,%d1          //put them in bits 3:1
408        tstl    L_SCR2(%a6)     //test sign of original x
409        bges    x_pos           //if pos, don't set bit 0
410        addql   #1,%d1          //if neg, set bit 0
411x_pos:
412        leal    RBDTBL,%a2      //load rbdtbl base
413        moveb   (%a2,%d1),%d3   //load d3 with new rmode
414        lsll    #4,%d3          //put bits in proper position
415        fmovel  %d3,%fpcr               //load bits into fpu
416        lsrl    #4,%d3          //put bits in proper position
417        tstb    %d3             //decode new rmode for pten table
418        bnes    not_rn          //if zero, it is RN
419        leal    PTENRN,%a1      //load a1 with RN table base
420        bras    rmode           //exit decode
421not_rn:
422        lsrb    #1,%d3          //get lsb in carry
423        bccs    not_rp          //if carry clear, it is RM
424        leal    PTENRP,%a1      //load a1 with RP table base
425        bras    rmode           //exit decode
426not_rp:
427        leal    PTENRM,%a1      //load a1 with RM table base
428rmode:
429        clrl    %d3             //clr table index
430e_loop:
431        lsrl    #1,%d0          //shift next bit into carry
432        bccs    e_next          //if zero, skip the mul
433        fmulx   (%a1,%d3),%fp1  //mul by 10**(d3_bit_no)
434e_next:
435        addl    #12,%d3         //inc d3 to next pwrten table entry
436        tstl    %d0             //test if ISCALE is zero
437        bnes    e_loop          //if not, loop
438
439
440// A8. Clr INEX; Force RZ.
441//     The operation in A3 above may have set INEX2. 
442//     RZ mode is forced for the scaling operation to insure
443//     only one rounding error.  The grs bits are collected in
444//     the INEX flag for use in A10.
445//
446// Register usage:
447//      Input/Output
448
449        fmovel  #0,%FPSR                //clr INEX
450        fmovel  #rz_mode,%FPCR  //set RZ rounding mode
451
452
453// A9. Scale X -> Y.
454//     The mantissa is scaled to the desired number of significant
455//     digits.  The excess digits are collected in INEX2. If mul,
456//     Check d2 for excess 10 exponential value.  If not zero,
457//     the iscale value would have caused the pwrten calculation
458//     to overflow.  Only a negative iscale can cause this, so
459//     multiply by 10^(d2), which is now only allowed to be 24,
460//     with a multiply by 10^8 and 10^16, which is exact since
461//     10^24 is exact.  If the input was denormalized, we must
462//     create a busy stack frame with the mul command and the
463//     two operands, and allow the fpu to complete the multiply.
464//
465// Register usage:
466//      Input/Output
467//      d0: FPCR with RZ mode/Unchanged
468//      d2: 0 or 24/unchanged
469//      d3: x/x
470//      d4: LEN/Unchanged
471//      d5: ICTR:LAMBDA
472//      d6: ILOG/Unchanged
473//      d7: k-factor/Unchanged
474//      a0: ptr for original operand/final result
475//      a1: ptr to PTENRM array/Unchanged
476//      a2: x/x
477//      fp0: float(ILOG)/X adjusted for SCALE (Y)
478//      fp1: 10^ISCALE/Unchanged
479//      fp2: x/x
480//      F_SCR1:x/x
481//      F_SCR2:Abs(X) with $3fff exponent/Unchanged
482//      L_SCR1:x/x
483//      L_SCR2:first word of X packed/Unchanged
484
485A9_str:
486        fmovex  (%a0),%fp0      //load X from memory
487        fabsx   %fp0            //use abs(X)
488        tstw    %d5             //LAMBDA is in lower word of d5
489        bne     sc_mul          //if neg (LAMBDA = 1), scale by mul
490        fdivx   %fp1,%fp0               //calculate X / SCALE -> Y to fp0
491        bras    A10_st          //branch to A10
492
493sc_mul:
494        tstb    BINDEC_FLG(%a6) //check for denorm
495        beqs    A9_norm         //if norm, continue with mul
496        fmovemx %fp1-%fp1,-(%a7)        //load ETEMP with 10^ISCALE
497        movel   8(%a0),-(%a7)   //load FPTEMP with input arg
498        movel   4(%a0),-(%a7)
499        movel   (%a0),-(%a7)
500        movel   #18,%d3         //load count for busy stack
501A9_loop:
502        clrl    -(%a7)          //clear lword on stack
503        dbf     %d3,A9_loop     
504        moveb   VER_TMP(%a6),(%a7) //write current version number
505        moveb   #BUSY_SIZE-4,1(%a7) //write current busy size
506        moveb   #0x10,0x44(%a7) //set fcefpte[15] bit
507        movew   #0x0023,0x40(%a7)       //load cmdreg1b with mul command
508        moveb   #0xfe,0x8(%a7)  //load all 1s to cu savepc
509        frestore (%a7)+         //restore frame to fpu for completion
510        fmulx   36(%a1),%fp0    //multiply fp0 by 10^8
511        fmulx   48(%a1),%fp0    //multiply fp0 by 10^16
512        bras    A10_st
513A9_norm:
514        tstw    %d2             //test for small exp case
515        beqs    A9_con          //if zero, continue as normal
516        fmulx   36(%a1),%fp0    //multiply fp0 by 10^8
517        fmulx   48(%a1),%fp0    //multiply fp0 by 10^16
518A9_con:
519        fmulx   %fp1,%fp0               //calculate X * SCALE -> Y to fp0
520
521
522// A10. Or in INEX.
523//      If INEX is set, round error occurred.  This is compensated
524//      for by 'or-ing' in the INEX2 flag to the lsb of Y.
525//
526// Register usage:
527//      Input/Output
528//      d0: FPCR with RZ mode/FPSR with INEX2 isolated
529//      d2: x/x
530//      d3: x/x
531//      d4: LEN/Unchanged
532//      d5: ICTR:LAMBDA
533//      d6: ILOG/Unchanged
534//      d7: k-factor/Unchanged
535//      a0: ptr for original operand/final result
536//      a1: ptr to PTENxx array/Unchanged
537//      a2: x/ptr to FP_SCR2(a6)
538//      fp0: Y/Y with lsb adjusted
539//      fp1: 10^ISCALE/Unchanged
540//      fp2: x/x
541
542A10_st:
543        fmovel  %FPSR,%d0               //get FPSR
544        fmovex  %fp0,FP_SCR2(%a6)       //move Y to memory
545        leal    FP_SCR2(%a6),%a2        //load a2 with ptr to FP_SCR2
546        btstl   #9,%d0          //check if INEX2 set
547        beqs    A11_st          //if clear, skip rest
548        oril    #1,8(%a2)       //or in 1 to lsb of mantissa
549        fmovex  FP_SCR2(%a6),%fp0       //write adjusted Y back to fpu
550
551
552// A11. Restore original FPCR; set size ext.
553//      Perform FINT operation in the user's rounding mode.  Keep
554//      the size to extended.  The sintdo entry point in the sint
555//      routine expects the FPCR value to be in USER_FPCR for
556//      mode and precision.  The original FPCR is saved in L_SCR1.
557
558A11_st:
559        movel   USER_FPCR(%a6),L_SCR1(%a6) //save it for later
560        andil   #0x00000030,USER_FPCR(%a6) //set size to ext,
561//                                      ;block exceptions
562
563
564// A12. Calculate YINT = FINT(Y) according to user's rounding mode.
565//      The FPSP routine sintd0 is used.  The output is in fp0.
566//
567// Register usage:
568//      Input/Output
569//      d0: FPSR with AINEX cleared/FPCR with size set to ext
570//      d2: x/x/scratch
571//      d3: x/x
572//      d4: LEN/Unchanged
573//      d5: ICTR:LAMBDA/Unchanged
574//      d6: ILOG/Unchanged
575//      d7: k-factor/Unchanged
576//      a0: ptr for original operand/src ptr for sintdo
577//      a1: ptr to PTENxx array/Unchanged
578//      a2: ptr to FP_SCR2(a6)/Unchanged
579//      a6: temp pointer to FP_SCR2(a6) - orig value saved and restored
580//      fp0: Y/YINT
581//      fp1: 10^ISCALE/Unchanged
582//      fp2: x/x
583//      F_SCR1:x/x
584//      F_SCR2:Y adjusted for inex/Y with original exponent
585//      L_SCR1:x/original USER_FPCR
586//      L_SCR2:first word of X packed/Unchanged
587
588A12_st:
589        moveml  %d0-%d1/%a0-%a1,-(%a7)  //save regs used by sintd0     
590        movel   L_SCR1(%a6),-(%a7)
591        movel   L_SCR2(%a6),-(%a7)
592        leal    FP_SCR2(%a6),%a0                //a0 is ptr to F_SCR2(a6)
593        fmovex  %fp0,(%a0)              //move Y to memory at FP_SCR2(a6)
594        tstl    L_SCR2(%a6)             //test sign of original operand
595        bges    do_fint                 //if pos, use Y
596        orl     #0x80000000,(%a0)               //if neg, use -Y
597do_fint:
598        movel   USER_FPSR(%a6),-(%a7)
599        bsr     sintdo                  //sint routine returns int in fp0
600        moveb   (%a7),USER_FPSR(%a6)
601        addl    #4,%a7
602        movel   (%a7)+,L_SCR2(%a6)
603        movel   (%a7)+,L_SCR1(%a6)
604        moveml  (%a7)+,%d0-%d1/%a0-%a1  //restore regs used by sint     
605        movel   L_SCR2(%a6),FP_SCR2(%a6)        //restore original exponent
606        movel   L_SCR1(%a6),USER_FPCR(%a6) //restore user's FPCR
607
608
609// A13. Check for LEN digits.
610//      If the int operation results in more than LEN digits,
611//      or less than LEN -1 digits, adjust ILOG and repeat from
612//      A6.  This test occurs only on the first pass.  If the
613//      result is exactly 10^LEN, decrement ILOG and divide
614//      the mantissa by 10.  The calculation of 10^LEN cannot
615//      be inexact, since all powers of ten upto 10^27 are exact
616//      in extended precision, so the use of a previous power-of-ten
617//      table will introduce no error.
618//
619//
620// Register usage:
621//      Input/Output
622//      d0: FPCR with size set to ext/scratch final = 0
623//      d2: x/x
624//      d3: x/scratch final = x
625//      d4: LEN/LEN adjusted
626//      d5: ICTR:LAMBDA/LAMBDA:ICTR
627//      d6: ILOG/ILOG adjusted
628//      d7: k-factor/Unchanged
629//      a0: pointer into memory for packed bcd string formation
630//      a1: ptr to PTENxx array/Unchanged
631//      a2: ptr to FP_SCR2(a6)/Unchanged
632//      fp0: int portion of Y/abs(YINT) adjusted
633//      fp1: 10^ISCALE/Unchanged
634//      fp2: x/10^LEN
635//      F_SCR1:x/x
636//      F_SCR2:Y with original exponent/Unchanged
637//      L_SCR1:original USER_FPCR/Unchanged
638//      L_SCR2:first word of X packed/Unchanged
639
640A13_st:
641        swap    %d5             //put ICTR in lower word of d5
642        tstw    %d5             //check if ICTR = 0
643        bne     not_zr          //if non-zero, go to second test
644//
645// Compute 10^(LEN-1)
646//
647        fmoves  FONE,%fp2       //init fp2 to 1.0
648        movel   %d4,%d0         //put LEN in d0
649        subql   #1,%d0          //d0 = LEN -1
650        clrl    %d3             //clr table index
651l_loop:
652        lsrl    #1,%d0          //shift next bit into carry
653        bccs    l_next          //if zero, skip the mul
654        fmulx   (%a1,%d3),%fp2  //mul by 10**(d3_bit_no)
655l_next:
656        addl    #12,%d3         //inc d3 to next pwrten table entry
657        tstl    %d0             //test if LEN is zero
658        bnes    l_loop          //if not, loop
659//
660// 10^LEN-1 is computed for this test and A14.  If the input was
661// denormalized, check only the case in which YINT > 10^LEN.
662//
663        tstb    BINDEC_FLG(%a6) //check if input was norm
664        beqs    A13_con         //if norm, continue with checking
665        fabsx   %fp0            //take abs of YINT
666        bra     test_2
667//
668// Compare abs(YINT) to 10^(LEN-1) and 10^LEN
669//
670A13_con:
671        fabsx   %fp0            //take abs of YINT
672        fcmpx   %fp2,%fp0               //compare abs(YINT) with 10^(LEN-1)
673        fbge    test_2          //if greater, do next test
674        subql   #1,%d6          //subtract 1 from ILOG
675        movew   #1,%d5          //set ICTR
676        fmovel  #rm_mode,%FPCR  //set rmode to RM
677        fmuls   FTEN,%fp2       //compute 10^LEN
678        bra     A6_str          //return to A6 and recompute YINT
679test_2:
680        fmuls   FTEN,%fp2       //compute 10^LEN
681        fcmpx   %fp2,%fp0               //compare abs(YINT) with 10^LEN
682        fblt    A14_st          //if less, all is ok, go to A14
683        fbgt    fix_ex          //if greater, fix and redo
684        fdivs   FTEN,%fp0       //if equal, divide by 10
685        addql   #1,%d6          // and inc ILOG
686        bras    A14_st          // and continue elsewhere
687fix_ex:
688        addql   #1,%d6          //increment ILOG by 1
689        movew   #1,%d5          //set ICTR
690        fmovel  #rm_mode,%FPCR  //set rmode to RM
691        bra     A6_str          //return to A6 and recompute YINT
692//
693// Since ICTR <> 0, we have already been through one adjustment,
694// and shouldn't have another; this is to check if abs(YINT) = 10^LEN
695// 10^LEN is again computed using whatever table is in a1 since the
696// value calculated cannot be inexact.
697//
698not_zr:
699        fmoves  FONE,%fp2       //init fp2 to 1.0
700        movel   %d4,%d0         //put LEN in d0
701        clrl    %d3             //clr table index
702z_loop:
703        lsrl    #1,%d0          //shift next bit into carry
704        bccs    z_next          //if zero, skip the mul
705        fmulx   (%a1,%d3),%fp2  //mul by 10**(d3_bit_no)
706z_next:
707        addl    #12,%d3         //inc d3 to next pwrten table entry
708        tstl    %d0             //test if LEN is zero
709        bnes    z_loop          //if not, loop
710        fabsx   %fp0            //get abs(YINT)
711        fcmpx   %fp2,%fp0               //check if abs(YINT) = 10^LEN
712        fbne    A14_st          //if not, skip this
713        fdivs   FTEN,%fp0       //divide abs(YINT) by 10
714        addql   #1,%d6          //and inc ILOG by 1
715        addql   #1,%d4          // and inc LEN
716        fmuls   FTEN,%fp2       // if LEN++, the get 10^^LEN
717
718
719// A14. Convert the mantissa to bcd.
720//      The binstr routine is used to convert the LEN digit
721//      mantissa to bcd in memory.  The input to binstr is
722//      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
723//      such that the decimal point is to the left of bit 63.
724//      The bcd digits are stored in the correct position in
725//      the final string area in memory.
726//
727//
728// Register usage:
729//      Input/Output
730//      d0: x/LEN call to binstr - final is 0
731//      d1: x/0
732//      d2: x/ms 32-bits of mant of abs(YINT)
733//      d3: x/ls 32-bits of mant of abs(YINT)
734//      d4: LEN/Unchanged
735//      d5: ICTR:LAMBDA/LAMBDA:ICTR
736//      d6: ILOG
737//      d7: k-factor/Unchanged
738//      a0: pointer into memory for packed bcd string formation
739//          /ptr to first mantissa byte in result string
740//      a1: ptr to PTENxx array/Unchanged
741//      a2: ptr to FP_SCR2(a6)/Unchanged
742//      fp0: int portion of Y/abs(YINT) adjusted
743//      fp1: 10^ISCALE/Unchanged
744//      fp2: 10^LEN/Unchanged
745//      F_SCR1:x/Work area for final result
746//      F_SCR2:Y with original exponent/Unchanged
747//      L_SCR1:original USER_FPCR/Unchanged
748//      L_SCR2:first word of X packed/Unchanged
749
750A14_st:
751        fmovel  #rz_mode,%FPCR  //force rz for conversion
752        fdivx   %fp2,%fp0               //divide abs(YINT) by 10^LEN
753        leal    FP_SCR1(%a6),%a0
754        fmovex  %fp0,(%a0)      //move abs(YINT)/10^LEN to memory
755        movel   4(%a0),%d2      //move 2nd word of FP_RES to d2
756        movel   8(%a0),%d3      //move 3rd word of FP_RES to d3
757        clrl    4(%a0)          //zero word 2 of FP_RES
758        clrl    8(%a0)          //zero word 3 of FP_RES
759        movel   (%a0),%d0               //move exponent to d0
760        swap    %d0             //put exponent in lower word
761        beqs    no_sft          //if zero, don't shift
762        subil   #0x3ffd,%d0     //sub bias less 2 to make fract
763        tstl    %d0             //check if > 1
764        bgts    no_sft          //if so, don't shift
765        negl    %d0             //make exp positive
766m_loop:
767        lsrl    #1,%d2          //shift d2:d3 right, add 0s
768        roxrl   #1,%d3          //the number of places
769        dbf     %d0,m_loop      //given in d0
770no_sft:
771        tstl    %d2             //check for mantissa of zero
772        bnes    no_zr           //if not, go on
773        tstl    %d3             //continue zero check
774        beqs    zer_m           //if zero, go directly to binstr
775no_zr:
776        clrl    %d1             //put zero in d1 for addx
777        addil   #0x00000080,%d3 //inc at bit 7
778        addxl   %d1,%d2         //continue inc
779        andil   #0xffffff80,%d3 //strip off lsb not used by 882
780zer_m:
781        movel   %d4,%d0         //put LEN in d0 for binstr call
782        addql   #3,%a0          //a0 points to M16 byte in result
783        bsr     binstr          //call binstr to convert mant
784
785
786// A15. Convert the exponent to bcd.
787//      As in A14 above, the exp is converted to bcd and the
788//      digits are stored in the final string.
789//
790//      Digits are stored in L_SCR1(a6) on return from BINDEC as:
791//
792//       32               16 15                0
793//      -----------------------------------------
794//      |  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
795//      -----------------------------------------
796//
797// And are moved into their proper places in FP_SCR1.  If digit e4
798// is non-zero, OPERR is signaled.  In all cases, all 4 digits are
799// written as specified in the 881/882 manual for packed decimal.
800//
801// Register usage:
802//      Input/Output
803//      d0: x/LEN call to binstr - final is 0
804//      d1: x/scratch (0);shift count for final exponent packing
805//      d2: x/ms 32-bits of exp fraction/scratch
806//      d3: x/ls 32-bits of exp fraction
807//      d4: LEN/Unchanged
808//      d5: ICTR:LAMBDA/LAMBDA:ICTR
809//      d6: ILOG
810//      d7: k-factor/Unchanged
811//      a0: ptr to result string/ptr to L_SCR1(a6)
812//      a1: ptr to PTENxx array/Unchanged
813//      a2: ptr to FP_SCR2(a6)/Unchanged
814//      fp0: abs(YINT) adjusted/float(ILOG)
815//      fp1: 10^ISCALE/Unchanged
816//      fp2: 10^LEN/Unchanged
817//      F_SCR1:Work area for final result/BCD result
818//      F_SCR2:Y with original exponent/ILOG/10^4
819//      L_SCR1:original USER_FPCR/Exponent digits on return from binstr
820//      L_SCR2:first word of X packed/Unchanged
821
822A15_st:
823        tstb    BINDEC_FLG(%a6) //check for denorm
824        beqs    not_denorm
825        ftstx   %fp0            //test for zero
826        fbeq    den_zero        //if zero, use k-factor or 4933
827        fmovel  %d6,%fp0                //float ILOG
828        fabsx   %fp0            //get abs of ILOG
829        bras    convrt
830den_zero:
831        tstl    %d7             //check sign of the k-factor
832        blts    use_ilog        //if negative, use ILOG
833        fmoves  F4933,%fp0      //force exponent to 4933
834        bras    convrt          //do it
835use_ilog:
836        fmovel  %d6,%fp0                //float ILOG
837        fabsx   %fp0            //get abs of ILOG
838        bras    convrt
839not_denorm:
840        ftstx   %fp0            //test for zero
841        fbne    not_zero        //if zero, force exponent
842        fmoves  FONE,%fp0       //force exponent to 1
843        bras    convrt          //do it
844not_zero:       
845        fmovel  %d6,%fp0                //float ILOG
846        fabsx   %fp0            //get abs of ILOG
847convrt:
848        fdivx   24(%a1),%fp0    //compute ILOG/10^4
849        fmovex  %fp0,FP_SCR2(%a6)       //store fp0 in memory
850        movel   4(%a2),%d2      //move word 2 to d2
851        movel   8(%a2),%d3      //move word 3 to d3
852        movew   (%a2),%d0               //move exp to d0
853        beqs    x_loop_fin      //if zero, skip the shift
854        subiw   #0x3ffd,%d0     //subtract off bias
855        negw    %d0             //make exp positive
856x_loop:
857        lsrl    #1,%d2          //shift d2:d3 right
858        roxrl   #1,%d3          //the number of places
859        dbf     %d0,x_loop      //given in d0
860x_loop_fin:
861        clrl    %d1             //put zero in d1 for addx
862        addil   #0x00000080,%d3 //inc at bit 6
863        addxl   %d1,%d2         //continue inc
864        andil   #0xffffff80,%d3 //strip off lsb not used by 882
865        movel   #4,%d0          //put 4 in d0 for binstr call
866        leal    L_SCR1(%a6),%a0 //a0 is ptr to L_SCR1 for exp digits
867        bsr     binstr          //call binstr to convert exp
868        movel   L_SCR1(%a6),%d0 //load L_SCR1 lword to d0
869        movel   #12,%d1         //use d1 for shift count
870        lsrl    %d1,%d0         //shift d0 right by 12
871        bfins   %d0,FP_SCR1(%a6){#4:#12} //put e3:e2:e1 in FP_SCR1
872        lsrl    %d1,%d0         //shift d0 right by 12
873        bfins   %d0,FP_SCR1(%a6){#16:#4} //put e4 in FP_SCR1
874        tstb    %d0             //check if e4 is zero
875        beqs    A16_st          //if zero, skip rest
876        orl     #opaop_mask,USER_FPSR(%a6) //set OPERR & AIOP in USER_FPSR
877
878
879// A16. Write sign bits to final string.
880//         Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
881//
882// Register usage:
883//      Input/Output
884//      d0: x/scratch - final is x
885//      d2: x/x
886//      d3: x/x
887//      d4: LEN/Unchanged
888//      d5: ICTR:LAMBDA/LAMBDA:ICTR
889//      d6: ILOG/ILOG adjusted
890//      d7: k-factor/Unchanged
891//      a0: ptr to L_SCR1(a6)/Unchanged
892//      a1: ptr to PTENxx array/Unchanged
893//      a2: ptr to FP_SCR2(a6)/Unchanged
894//      fp0: float(ILOG)/Unchanged
895//      fp1: 10^ISCALE/Unchanged
896//      fp2: 10^LEN/Unchanged
897//      F_SCR1:BCD result with correct signs
898//      F_SCR2:ILOG/10^4
899//      L_SCR1:Exponent digits on return from binstr
900//      L_SCR2:first word of X packed/Unchanged
901
902A16_st:
903        clrl    %d0             //clr d0 for collection of signs
904        andib   #0x0f,FP_SCR1(%a6) //clear first nibble of FP_SCR1
905        tstl    L_SCR2(%a6)     //check sign of original mantissa
906        bges    mant_p          //if pos, don't set SM
907        moveql  #2,%d0          //move 2 in to d0 for SM
908mant_p:
909        tstl    %d6             //check sign of ILOG
910        bges    wr_sgn          //if pos, don't set SE
911        addql   #1,%d0          //set bit 0 in d0 for SE
912wr_sgn:
913        bfins   %d0,FP_SCR1(%a6){#0:#2} //insert SM and SE into FP_SCR1
914
915// Clean up and restore all registers used.
916
917        fmovel  #0,%FPSR                //clear possible inex2/ainex bits
918        fmovemx (%a7)+,%fp0-%fp2
919        moveml  (%a7)+,%d2-%d7/%a2
920        rts
921
922        |end
Note: See TracBrowser for help on using the repository browser.