/* * Mesa 3-D graphics library * Version: 3.4 * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * Faster arithmetic functions. If the FAST_MATH preprocessor symbol is * defined on the command line (-DFAST_MATH) then we'll use some (hopefully) * faster functions for sqrt(), etc. */ #ifndef MMATH_H #define MMATH_H #include "glheader.h" /* * Set the x86 FPU control word to guarentee only 32 bits of presision * are stored in registers. Allowing the FPU to store more introduces * differences between situations where numbers are pulled out of memory * vs. situations where the compiler is able to optimize register usage. * * In the worst case, we force the compiler to use a memory access to * truncate the float, by specifying the 'volatile' keyword. */ #if defined(__linux__) && defined(__i386__) #include #if !defined(_FPU_SETCW) #define _FPU_SETCW __setfpucw typedef unsigned short fpu_control_t; #endif #if !defined(_FPU_GETCW) #define _FPU_GETCW(a) (a) = __fpu_control; #endif /* Set it up how we want it. */ #if !defined(NO_FAST_MATH) #define START_FAST_MATH(x) \ { \ static fpu_control_t mask = _FPU_SINGLE | _FPU_MASK_IM \ | _FPU_MASK_DM | _FPU_MASK_ZM | _FPU_MASK_OM \ | _FPU_MASK_UM | _FPU_MASK_PM; \ _FPU_GETCW( x ); \ _FPU_SETCW( mask ); \ } #else #define START_FAST_MATH(x) \ { \ static fpu_control_t mask = _FPU_DEFAULT; \ _FPU_GETCW( x ); \ _FPU_SETCW( mask ); \ } #endif /* Put it back how the application had it. */ #define END_FAST_MATH(x) \ { \ _FPU_SETCW( x ); \ } #define HAVE_FAST_MATH #elif defined(__WATCOMC__) && !defined(NO_FAST_MATH) /* This is the watcom specific inline assembly version of setcw and getcw */ void START_FAST_MATH2(unsigned short *x); #pragma aux START_FAST_MATH2 = \ "fstcw word ptr [esi]" \ "or word ptr [esi], 0x3f" \ "fldcw word ptr [esi]" \ parm [esi] \ modify exact []; void END_FAST_MATH2(unsigned short *x); #pragma aux END_FAST_MATH2 = \ "fldcw word ptr [esi]" \ parm [esi] \ modify exact []; #define START_FAST_MATH(x) START_FAST_MATH2(& x) #define END_FAST_MATH(x) END_FAST_MATH2(& x) /* __inline START_FAST_MATH(unsigned short x) { _asm { fstcw ax mov x , ax or ax, 0x3f fldcw ax } } __inline END_FAST_MATH(unsigned short x) { _asm { fldcw x } } */ #define HAVE_FAST_MATH #else #define START_FAST_MATH(x) (void)(x) #define END_FAST_MATH(x) (void)(x) /* The mac float really is a float, with the same precision as a * single precision 387 float. */ #if defined(macintosh) #define HAVE_FAST_MATH #endif #endif /* * Float -> Int conversion */ #if defined(USE_X86_ASM) #if defined(__GNUC__) && defined(__i386__) static __inline__ int FloatToInt(float f) { int r; __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st"); return r; } #elif defined(__MSC__) && defined(__WIN32__) && !defined(__CYGWIN__) static __inline int FloatToInt(float f) { int r; _asm { fld f fistp r } return r; } #elif defined(__WATCOMC__) long FloatToInt(float f); #pragma aux FloatToInt = \ "push eax" \ "fistp dword ptr [esp]" \ "pop eax" \ parm [8087] \ value [eax] \ modify exact [eax]; float asm_sqrt (float x); #pragma aux asm_sqrt = \ "fsqrt" \ parm [8087] \ value [8087] \ modify exact []; #else #define FloatToInt(F) ((int) (F)) #endif #else #define FloatToInt(F) ((int) (F)) #endif /* * Square root */ extern float gl_sqrt(float x); #ifdef FAST_MATH #if defined (__WATCOMC__) && defined(USE_X86_ASM) # define GL_SQRT(X) asm_sqrt(X) #else # define GL_SQRT(X) gl_sqrt(X) #endif #else # define GL_SQRT(X) sqrt(X) #endif /* * Normalize a 3-element vector to unit length. */ #define NORMALIZE_3FV( V ) \ do { \ GLdouble len = LEN_SQUARED_3FV(V); \ if (len > 1e-50) { \ len = 1.0 / GL_SQRT(len); \ V[0] = (GLfloat) (V[0] * len); \ V[1] = (GLfloat) (V[1] * len); \ V[2] = (GLfloat) (V[2] * len); \ } \ } while(0) #define LEN_3FV( V ) (GL_SQRT(V[0]*V[0]+V[1]*V[1]+V[2]*V[2])) #define LEN_SQUARED_3FV( V ) (V[0]*V[0]+V[1]*V[1]+V[2]*V[2]) /* * Optimization for: * GLfloat f; * GLubyte b = FloatToInt(CLAMP(f, 0, 1) * 255) */ #if defined(__i386__) || defined(__sparc__) || defined(__s390x__) || \ ( defined(__alpha__) && ( defined( __IEEE_FLOAT ) || !defined( VMS ) ) ) #define USE_IEEE #define IEEE_ONE 0x3f7f0000 #endif #if defined(USE_IEEE) && !defined(DEBUG) #define CLAMP_FLOAT_COLOR(f) \ do { \ if (*(GLuint *)&f >= IEEE_ONE) \ f = (*(GLint *)&f < 0) ? 0 : 1; \ } while(0) #define CLAMP_FLOAT_COLOR_VALUE(f) \ ( (*(GLuint *)&f >= IEEE_ONE) \ ? ((*(GLint *)&f < 0) ? 0 : 1) \ : f ) /* * This function/macro is sensitive to precision. Test carefully * if you change it. */ #define FLOAT_COLOR_TO_UBYTE_COLOR(b, f) \ do { \ union { GLfloat r; GLuint i; } tmp; \ tmp.r = f; \ b = ((tmp.i >= IEEE_ONE) \ ? ((GLint)tmp.i < 0) ? (GLubyte)0 : (GLubyte)255 \ : (tmp.r = tmp.r*(255.0F/256.0F) + 32768.0F, \ (GLubyte)tmp.i)); \ } while (0) #define CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(b,f) \ FLOAT_COLOR_TO_UBYTE_COLOR(b, f) #else #define CLAMP_FLOAT_COLOR(f) \ (void) CLAMP_SELF(f,0,1) #define CLAMP_FLOAT_COLOR_VALUE(f) \ CLAMP(f,0,1) #define FLOAT_COLOR_TO_UBYTE_COLOR(b, f) \ b = ((GLubyte) FloatToInt(CLAMP(f, 0.0F, 1.0F) * 255.0F)) #define CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(b,f) \ b = ((GLubyte) FloatToInt(f * 255.0F)) #endif extern float gl_ubyte_to_float_color_tab[256]; extern float gl_ubyte_to_float_255_color_tab[256]; #define UBYTE_COLOR_TO_FLOAT_COLOR(c) gl_ubyte_to_float_color_tab[c] #define UBYTE_COLOR_TO_FLOAT_255_COLOR(c) gl_ubyte_to_float_255_color_tab[c] #define UBYTE_COLOR_TO_FLOAT_255_COLOR2(f,c) \ (*(int *)&(f)) = ((int *)gl_ubyte_to_float_255_color_tab)[c] #define UBYTE_RGBA_TO_FLOAT_RGBA(f,b) \ do { \ f[0] = UBYTE_COLOR_TO_FLOAT_COLOR(b[0]); \ f[1] = UBYTE_COLOR_TO_FLOAT_COLOR(b[1]); \ f[2] = UBYTE_COLOR_TO_FLOAT_COLOR(b[2]); \ f[3] = UBYTE_COLOR_TO_FLOAT_COLOR(b[3]); \ } while(0) #define UBYTE_RGBA_TO_FLOAT_255_RGBA(f,b) \ do { \ f[0] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[0]); \ f[1] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[1]); \ f[2] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[2]); \ f[3] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[3]); \ } while(0) #define FLOAT_RGBA_TO_UBYTE_RGBA(b,f) \ do { \ FLOAT_COLOR_TO_UBYTE_COLOR((b[0]),(f[0])); \ FLOAT_COLOR_TO_UBYTE_COLOR((b[1]),(f[1])); \ FLOAT_COLOR_TO_UBYTE_COLOR((b[2]),(f[2])); \ FLOAT_COLOR_TO_UBYTE_COLOR((b[3]),(f[3])); \ } while(0) #define FLOAT_RGB_TO_UBYTE_RGB(b,f) \ do { \ FLOAT_COLOR_TO_UBYTE_COLOR(b[0],f[0]); \ FLOAT_COLOR_TO_UBYTE_COLOR(b[1],f[1]); \ FLOAT_COLOR_TO_UBYTE_COLOR(b[2],f[2]); \ } while(0) extern void _mesa_init_math(void); extern GLuint _mesa_bitcount(GLuint n); #endif