
/*
 * fpuset.c -- $Id$
 * set up FPU to trap floating point exceptions
 * - this is very non-portable, not covered by ANSI C, POSIX, or even C9X
 * - if you port to a new platform (eg- Ultrix) please contact the author

  Copyright (c) 1996.  The Regents of the University of California.
			 All rights reserved.

Permission to use, copy, modify, and distribute this software for any
purpose without fee is hereby granted, provided that this entire
notice is included in all copies of any software which is or includes
a copy or modification of this software and in all copies of the
supporting documentation for such software.

This work was produced at the University of California, Lawrence
Livermore National Laboratory under contract no. W-7405-ENG-48 between
the U.S. Department of Energy and The Regents of the University of
California for the operation of UC LLNL.


			      DISCLAIMER

This software was prepared as an account of work sponsored by an
agency of the United States Government.  Neither the United States
Government nor the University of California nor any of their
employees, makes any warranty, express or implied, or assumes any
liability or responsibility for the accuracy, completeness, or
usefulness of any information, apparatus, product, or process
disclosed, or represents that its use would not infringe
privately-owned rights.  Reference herein to any specific commercial
products, process, or service by trade name, trademark, manufacturer,
or otherwise, does not necessarily constitute or imply its
endorsement, recommendation, or favoring by the United States
Government or the University of California.  The views and opinions of
authors expressed herein do not necessarily state or reflect those of
the United States Government or the University of California, and
shall not be used for advertising or product endorsement purposes.


				AUTHOR

                             David H. Munro

*/


#include "runtime.h"


static struct {
    unsigned invalid:1;
    unsigned denormal:1;
    unsigned zerodiv:1;
    unsigned overflow:1;
    unsigned underflow:1;
    unsigned inexact:1;

} exception;





#if defined(FPU_DIGITAL) || defined(FPU_ALPHA_LINUX)

/* FPU_ALPHA_LINUX: see README.fpu */
/* man pages: exception_intro, ieee */
# ifdef FPU_DIGITAL
#  include <machine/fpu.h>
# else
   extern void ieee_set_fp_control(long);
#  define IEEE_TRAP_ENABLE_INV 0x000002
#  define IEEE_TRAP_ENABLE_DZE 0x000004
#  define IEEE_TRAP_ENABLE_OVF 0x000008
#  define IEEE_TRAP_ENABLE_UNF 0x000010
#  define IEEE_TRAP_ENABLE_INE 0x000020
#  define IEEE_TRAP_ENABLE_DNO 0x000040

#  define IEEE_MAP_DMZ         (1UL<<12)
#  define IEEE_MAP_UMZ         (1UL<<13)
# endif

static void set_fpu(void) {
int cw;
  /* possibly should include IEEE_MAP_DMZ and IEEE_MAP_UMZ
   * to map denorm inputs and underflowed outputs to zero
   * --however, these apparently only have an effect for software
   * completed operations (the hardware always maps underflows to zero)
   */

    cw = 0;
    if (options.fpu_invalid)   cw |= IEEE_TRAP_ENABLE_INV;
    if (options.fpu_denormal)  cw |= IEEE_TRAP_ENABLE_DNO;
    if (options.fpu_zerodiv)   cw |= IEEE_TRAP_ENABLE_DZE;
    if (options.fpu_overflow)  cw |= IEEE_TRAP_ENABLE_OVF;
    if (options.fpu_underflow) cw |= IEEE_TRAP_ENABLE_UNF;
    if (options.fpu_inexact)   cw |= IEEE_TRAP_ENABLE_INE;

    ieee_set_fp_control(cw);
}

#elif defined(FPU_AIX)

/* man pages: fp_trap, fp_enable */
#include <fptrap.h>
static void set_fpu(void) {
int cw;

    cw = 0;

    if (options.fpu_invalid)   cw |= TRP_INVALID;
    if (options.fpu_denormal)  cw |= 0;
    if (options.fpu_zerodiv)   cw |= TRP_DIV_BY_ZERO;
    if (options.fpu_overflow)  cw |= TRP_OVERFLOW;
    if (options.fpu_underflow) cw |= TRP_UNDERFLOW;
    if (options.fpu_inexact)   cw |= TRP_INEXACT;

    fp_trap(FP_TRAP_FASTMODE);
    fp_enable(cw);
}

#elif defined(FPU_HPPA)

static unsigned int fsr;

static void set_fpu(void) {

    asm("fstws %%fr0L, %0\n" : "=m" (fsr));
    fsr &= 0x03ffffe0;
    fsr |= 1 << 25;

    if (options.fpu_inexact)   fsr |= 0x01;
    if (options.fpu_underflow) fsr |= 0x02;
    if (options.fpu_overflow)  fsr |= 0x04;
    if (options.fpu_zerodiv)   fsr |= 0x08;
    if (options.fpu_invalid)   fsr |= 0x10;

    switch(options.fpu_round) {
    case FP_ROUND_NEAREST:    fsr |= 0 << 9;  break;
    case FP_ROUND_ZERO:       fsr |= 1 << 9;  break;
    case FP_ROUND_UP:         fsr |= 2 << 9;  break;
    case FP_ROUND_DOWN:       fsr |= 3 << 9;  break;
    }

    asm("fldws %0, %%fr0L\n" : : "m" (fsr));
}


static void query_fpu(void) {

    asm("fstws %%fr0L, %0\n" : "=m" (fsr));

    exception.invalid   = !!(fsr & (0x10 << 27));
    exception.zerodiv   = !!(fsr & (0x08 << 27));
    exception.overflow  = !!(fsr & (0x04 << 27));
    exception.underflow = !!(fsr & (0x02 << 27));
    exception.inexact   = !!(fsr & (0x01 << 27));
}


#elif defined(FPU_MIPS)

/* man pages: handle_sigfpes, note lethal TRAP_FPE environment variable
 * library: -lfpe
 * note: earlier versions used get_fpc_csr/set_fpc_csr?, sys/fpu.h */

#if 0
extern void u_sigfpe(int sig);  /* from handler.c (or fputest.c) */

#include <sigfpe.h>
static void set_fpu(void) {
int cw;

    cw = 0;

    if (options.fpu_invalid)   cw |= _EN_INVALID;
    if (options.fpu_denormal)  cw |= 0;
    if (options.fpu_zerodiv)   cw |= _EN_DIVZERO;
    if (options.fpu_overflow)  cw |= _EN_OVERFL;
    if (options.fpu_underflow) cw |= _EN_UNDERFL;
    if (options.fpu_inexact)   cw |= 0;

    handle_sigfpes(_ON, cw, (void (*)())0, _USER_HANDLER,
		   (void (*)()) NULL);
}
#endif

static void set_fpu(void) { }

static void query_fpu(void) { }


/* Not sure how we got this for sparc, there is other code below. */

#elif 0 && defined(FPU_SPARC)

/* man pages: fpsetmask
 *    Sun's -fnonstd compiler switch switches between __fnonstd.o
 *      and __fstd.o under Solaris, as far as I can tell.  Use FPU_IGNORE
 *        if you do this.  */
#include <ieeefp.h>

static void set_fpu(void) {
int cw;

    cw = 0;

    if (options.fpu_invalid)   cw |= FP_X_INV;
    if (options.fpu_denormal)  cw |= FP_X_DNML;
    if (options.fpu_zerodiv)   cw |= FP_X_DZ;
    if (options.fpu_overflow)  cw |= FP_X_OFL;
    if (options.fpu_underflow) cw |= FP_X_UFL;
    if (options.fpu_inexact)   cw |= FP_X_IMP;

    fpsetmask(FP_X_INV | FP_X_DZ | FP_X_OFL);
 
   /* this doesn't set the "nonstandard arithmetic" bit, which prevents
     * software emulation of IEEE gradual underflow
     * -- apparently no way to do this in libc (see FPU_GCC_SPARC) */
}


#elif defined(FPU_GNU_FENV) || defined(FPU_ALPHA)

/* GCC enhanced C9X fenv.h interface by adding feenableexcept */
#include <fenv.h>

static void set_fpu(void) {
int cw;

    cw = 0;

    if (options.fpu_invalid)   cw |= FE_INVALID;
    if (options.fpu_denormal)  cw |= 0;
    if (options.fpu_zerodiv)   cw |= FE_DIVBYZERO;
    if (options.fpu_overflow)  cw |= FE_OVERFLOW;
    if (options.fpu_underflow) cw |= FE_UNDERFLOW;
    if (options.fpu_inexact)   cw |= FE_INEXACT;

    feclearexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW |
		  FE_INEXACT);

    feenableexcept(cw);
}


static void query_fpu(void) {
int w;

    w = fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW |
		     FE_INEXACT);

    exception.inexact   = !!(w & FE_INEXACT);
    exception.underflow = !!(w & FE_UNDERFLOW);
    exception.overflow  = !!(w & FE_OVERFLOW);
    exception.zerodiv   = !!(w & FE_DIVBYZERO);
    exception.invalid   = !!(w & FE_INVALID);
}



#elif defined(FPU_387)

/* have_sse()-- Returns nonzero if the processor supports SSE instructions. */

static int have_sse(void) {

      return ({ int result;
      asm("push %" EBX "\n"
	  "mov $1, %%eax\n"
	  "cpuid\n"
	  "pop %" EBX "\n"
	  "mov %%edx, %%eax\n"
	  "shr $25, %%eax\n"
	  "and $1, %%eax\n" : "=a" (result) : : "%ecx", "%edx");
      result; });
}


#define FE_INVALID     0x0001
#define FE_DENORM      0x0002
#define FE_DIVBYZERO   0x0004
#define FE_OVERFLOW    0x0008
#define FE_UNDERFLOW   0x0010
#define FE_INEXACT     0x0020

static void set_fpu(void) {
int cw387, cw;

    if (have_sse()) {
	asm("stmxcsr %0" : : "m" (cw));

	cw &= 0xFFFF0000;

	if (!options.fpu_invalid)       cw |= 1 <<  7;
	if (!options.fpu_denormal)      cw |= 1 <<  8;
	if (!options.fpu_zerodiv)       cw |= 1 <<  9;
	if (!options.fpu_overflow)      cw |= 1 << 10;
	if (!options.fpu_underflow)     cw |= 1 << 11;
	if (!options.fpu_inexact)       cw |= 1 << 12;
	if (options.fpu_no_denormals)   cw |= 1 << 15;

	switch(options.fpu_round) {
	case FP_ROUND_NEAREST:     cw |= 0 << 13; break;
	case FP_ROUND_DOWN:        cw |= 1 << 13; break;
	case FP_ROUND_UP:          cw |= 2 << 13; break;
	case FP_ROUND_ZERO:        cw |= 3 << 13; break;
	}

	asm("ldmxcsr %0" : : "m" (cw));
    }

    /* 387 side */

    asm("fnclex");
    asm("fstcw %0" : : "m" (cw387));

    cw387 |= FE_INVALID | FE_DENORM | FE_DIVBYZERO | FE_OVERFLOW |
	FE_UNDERFLOW | FE_INEXACT;

    if (options.fpu_invalid)   cw387 &= ~FE_INVALID;
    if (options.fpu_denormal)  cw387 &= ~FE_DENORM;
    if (options.fpu_zerodiv)   cw387 &= ~FE_DIVBYZERO;
    if (options.fpu_overflow)  cw387 &= ~FE_OVERFLOW;
    if (options.fpu_underflow) cw387 &= ~FE_UNDERFLOW;
    if (options.fpu_inexact)   cw387 &= ~FE_INEXACT;

    cw387 &= 0xF0FF;

    switch(options.fpu_precision) {
    case 24:   cw387 |= 0;      break;
    case 53:   cw387 |= 0x0200; break;
    case 64:
    default:   cw387 |= 0x0300; break;  
    }

    switch(options.fpu_round) {
    case FP_ROUND_NEAREST:      cw387 |= 0x0000; break;
    case FP_ROUND_DOWN:         cw387 |= 0x0400; break;
    case FP_ROUND_UP:           cw387 |= 0x0800; break;
    case FP_ROUND_ZERO:         cw387 |= 0x0C00; break;
    }

    asm("fldcw %0" : : "m" (cw387));
}
  

void query_fpu(void) {
int cw387, cw;

    if (have_sse()) {
	asm("stmxcsr %0" : : "m" (cw));

	if (cw & (1 << 0)) exception.invalid   = 1;
	if (cw & (1 << 1)) exception.denormal  = 1;
	if (cw & (1 << 2)) exception.zerodiv   = 1;
	if (cw & (1 << 3)) exception.overflow  = 1;
	if (cw & (1 << 4)) exception.underflow = 1;
	if (cw & (1 << 5)) exception.inexact   = 1;

	asm("fnstsw %0" : : "m" (cw387));
    }

    if (cw387 & FE_INVALID)    exception.invalid   = 1;
    if (cw387 & FE_DENORM)     exception.denormal  = 1;
    if (cw387 & FE_DIVBYZERO)  exception.zerodiv   = 1;
    if (cw387 & FE_OVERFLOW)   exception.overflow  = 1;
    if (cw387 & FE_UNDERFLOW)  exception.underflow = 1;
    if (cw387 & FE_INEXACT)    exception.inexact   = 1;
}


#elif defined(FPU_PPC1) || defined(FPU_PPC2)

static void set_fpu(void) {

    asm("mtfsb0 2");
    if (options.fpu_invalid)
	asm("mtfsb1 24");
    else
	asm("mtfsb0 24");

    asm("mtfsb0 3");
    if (options.fpu_overflow)
	asm("mtfsb1 25");
    else
	asm("mtfsb0 25");

    asm("mtfsb0 4");
    if (options.fpu_underflow)
	asm("mtfsb1 26");
    else
	asm("mtfsb0 26");

    asm("mtfsb0 5");
    if (options.fpu_zerodiv)
	asm("mtfsb1 27");
    else
	asm("mtfsb0 27");

    asm("mtfsb0 6");
    if (options.fpu_inexact)
	asm("mtfsb1 28");
    else
	asm("mtfsb0 28");

    switch(options.fpu_round) {
    case FP_ROUND_NEAREST:  asm("mtfsfi 7,0");  break;
    case FP_ROUND_ZERO:     asm("mtfsfi 7,1");  break;
    case FP_ROUND_UP:       asm("mtfsfi 7,2");  break;
    case FP_ROUND_DOWN:     asm("mtfsfi 7,3");  break;
    }
}


static void query_fpu(void) {
union {
    char buffer[8];
    G95_INT4 m[2];
} u;

#if defined(FPU_PPC1)
    asm("mffs 1");
    asm("stfd 1, %0" : "=m" (u.buffer) : : "1");
#else
    asm("mffs f1");
    asm("stfd f1, %0" : "=m" (u.buffer) : : "f1");
#endif

    exception.invalid   = !!(u.m[1] & 0x20000000);
    exception.overflow  = !!(u.m[1] & 0x10000000);
    exception.underflow = !!(u.m[1] & 0x08000000);
    exception.zerodiv   = !!(u.m[1] & 0x04000000);
    exception.inexact   = !!(u.m[1] & 0x02000000);
}


#elif defined(FPU_SPARC)

static void set_fpu(void) {
int fsr;

    asm("st %%fsr, %0" : "=m" (fsr) :);

    fsr &= 0x307FFC00;

    if (options.fpu_invalid)   fsr |= 0x08000000;
    if (options.fpu_overflow)  fsr |= 0x04000000;
    if (options.fpu_underflow) fsr |= 0x02000000;
    if (options.fpu_zerodiv)   fsr |= 0x01000000;
    if (options.fpu_inexact)   fsr |= 0x00800000;

    switch(options.fpu_round) {
    case FP_ROUND_NEAREST:  fsr |= 0x00000000; break;
    case FP_ROUND_ZERO:     fsr |= 0x40000000; break;
    case FP_ROUND_UP:       fsr |= 0x80000000; break;
    case FP_ROUND_DOWN:     fsr |= 0xC0000000; break;
    }

    asm("ld %0, %%fsr" : : "m" (fsr));
}


static void query_fpu(void) {
int fsr;

    asm("st %%fsr, %0" : "=m" (fsr) :);

    exception.invalid   = !!(fsr & 0x200);
    exception.overflow  = !!(fsr & 0x100);
    exception.underflow = !!(fsr & 0x080);
    exception.zerodiv   = !!(fsr & 0x040);
    exception.inexact   = !!(fsr & 0x020);
}


#else

static void set_fpu(void) { }

static void query_fpu(void) { }

#endif





/* show_exceptions()-- Show floating point exceptions that have been
 * recorded. */

void show_exceptions(void) {

    exception.invalid   = 0;
    exception.denormal  = 0;
    exception.zerodiv   = 0;
    exception.overflow  = 0;
    exception.underflow = 0;
    exception.inexact   = 0;

    query_fpu();

    st_printf("Floating point exceptions recorded:\n");

    if (exception.invalid)    st_printf("  Invalid operand\n");
    if (exception.denormal)   st_printf("  Denormal number\n");
    if (exception.zerodiv)    st_printf("  Division by Zero\n");
    if (exception.underflow)  st_printf("  Numeric underflow\n");
    if (exception.overflow)   st_printf("  Numeric overflow\n");
    if (exception.inexact)    st_printf("  Precision loss (inexact)\n");

    if (!exception.invalid  && !exception.denormal  &&
	!exception.zerodiv  && !exception.underflow &&
	!exception.overflow && !exception.inexact)
	st_printf("  (No exceptions)\n");
}


/* init_fpu()-- Initialize the floating point unit */

void init_fpu(void) {

    set_fpu();
}
