vectormath_lib.h

/****************************  vectormath_lib.h   *****************************
* Author:        Agner Fog
* Date created:  2012-05-30
* Last modified: 2016-04-26
* Version:       1.22
* Project:       vector classes
* Description:
* Header file defining mathematical functions on floating point vectors
* May use Intel SVML library or AMD LIBM library
*
* Instructions:
* Define VECTORMATH to one of the following values:
*   0:  Use ordinary math library (slow)
*   1:  Use AMD LIBM library
*   2:  Use Intel SVML library with any compiler
*   3:  Use Intel SVML library with Intel compiler
*
* For detailed instructions, see VectorClass.pdf
*
* (c) Copyright 2012-2016 GNU General Public License http://www.gnu.org/licenses
\*****************************************************************************/

// check combination of header files
#ifndef VECTORMATH_LIB_H
#define VECTORMATH_LIB_H

#include "vectorf128.h"

#ifndef VECTORMATH
#ifdef __INTEL_COMPILER
#define VECTORMATH 3
#else
#define VECTORMATH 0
#endif // __INTEL_COMPILER
#endif // VECTORMATH

#include <math.h>

#ifdef VCL_NAMESPACE
namespace VCL_NAMESPACE {
#endif

/*****************************************************************************
*
*      VECTORMATH = 0. Use ordinary library (scalar)
*
*****************************************************************************/
#if VECTORMATH == 0

#ifndef VECTORMATH_COMMON_H
// exponential and power functions
static inline Vec4f exp (Vec4f const & x) {
    float xx[4];
    x.store(xx);
    return Vec4f(expf(xx[0]), expf(xx[1]), expf(xx[2]), expf(xx[3]));
}
static inline Vec2d exp (Vec2d const & x) {
    double xx[4];
    x.store(xx);
    return Vec2d(::exp(xx[0]), ::exp(xx[1]));
}

// There is no certain way to know which functions are available, but at least some (Gnu)
// compilers have defines to specify this
#ifdef HAVE_EXPM1
static inline Vec4f expm1 (Vec4f const & x) {
    float xx[4];
    x.store(xx);
    return Vec4f(expm1(xx[0]), expm1(xx[1]), expm1(xx[2]), expm1(xx[3]));
}
static inline Vec2d expm1 (Vec2d const & x) {
    double xx[4];
    x.store(xx);
    return Vec2d(expm1(xx[0]), expm1(xx[1]));
}
#endif

#ifdef HAVE_EXP2
static inline Vec4f exp2 (Vec4f const & x) {
    float xx[4];
    x.store(xx);
    return Vec4f(exp2(xx[0]), exp2(xx[1]), exp2(xx[2]), exp2(xx[3]));
}
static inline Vec2d exp2 (Vec2d const & x) {
    double xx[4];
    x.store(xx);
    return Vec2d(exp2(xx[0]), exp2(xx[1]));
}
#else
static inline Vec4f exp2 (Vec4f const & x) {
    return exp(x*Vec4f(0.693147180559945309417f /* log(2) */));
}
static inline Vec2d exp2 (Vec2d const & x) {
    return exp(x*Vec2d(0.693147180559945309417  /* log(2) */));
}
#endif

static inline Vec4f exp10 (Vec4f const & x) {
    return exp(x*Vec4f(2.30258509299404568402f /* log(10) */));
}
static inline Vec2d exp10 (Vec2d const & x) {
    return exp(x*Vec2d(2.30258509299404568402  /* log(10) */));
}

static inline Vec4f pow (Vec4f const & a, Vec4f const & b) {
    float aa[4], bb[4];
    a.store(aa);  b.store(bb);
    return Vec4f(powf(aa[0],bb[0]), powf(aa[1],bb[1]), powf(aa[2],bb[2]), powf(aa[3],bb[3]));
}
static inline Vec2d pow (Vec2d const & a, Vec2d const & b) {
    double aa[4], bb[4];
    a.store(aa);  b.store(bb);
    return Vec2d(::pow(aa[0],bb[0]), ::pow(aa[1],bb[1]));
}

static inline Vec4f log (Vec4f const & x) {
    float xx[4];
    x.store(xx);
    return Vec4f(logf(xx[0]), logf(xx[1]), logf(xx[2]), logf(xx[3]));
}
static inline Vec2d log (Vec2d const & x) {
    double xx[4];
    x.store(xx);
    return Vec2d(::log(xx[0]), ::log(xx[1]));
}

#ifdef HAVE_LOG1P
static inline Vec4f log1p (Vec4f const & x) {
    float xx[4];
    x.store(xx);
    return Vec4f(::log1p(xx[0]), ::log1p(xx[1]), ::log1p(xx[2]), ::log1p(xx[3]));
}
static inline Vec2d log1p (Vec2d const & x) {
    double xx[4];
    x.store(xx);
    return Vec2d(::log1p(xx[0]), ::log1p(xx[1]));
}
#endif

static inline Vec4f log2 (Vec4f const & x) {   // logarithm base 2
    return log(x)*Vec4f(1.44269504088896340736f/* log2(e) */);
}
static inline Vec2d log2 (Vec2d const & x) {   // logarithm base 2
    return log(x)*Vec2d(1.44269504088896340736 /* log2(e) */);
}

static inline Vec4f log10 (Vec4f const & x) {  // logarithm base 10
    float xx[4];
    x.store(xx);
    return Vec4f(log10f(xx[0]), log10f(xx[1]), log10f(xx[2]), log10f(xx[3]));
}
static inline Vec2d log10 (Vec2d const & x) {  // logarithm base 10
    double xx[4];
    x.store(xx);
    return Vec2d(::log10(xx[0]), ::log10(xx[1]));
}

// trigonometric functions
static inline Vec4f sin(Vec4f const & x) {
    float xx[4];
    x.store(xx);
    return Vec4f(sinf(xx[0]), sinf(xx[1]), sinf(xx[2]), sinf(xx[3]));
}
static inline Vec2d sin (Vec2d const & x) {
    double xx[4];
    x.store(xx);
    return Vec2d(::sin(xx[0]), ::sin(xx[1]));
}

static inline Vec4f cos(Vec4f const & x) {
    float xx[4];
    x.store(xx);
    return Vec4f(cosf(xx[0]), cosf(xx[1]), cosf(xx[2]), cosf(xx[3]));
}
static inline Vec2d cos (Vec2d const & x) {
    double xx[4];
    x.store(xx);
    return Vec2d(::cos(xx[0]), ::cos(xx[1]));
}

static inline Vec4f sincos (Vec4f * pcos, Vec4f const & x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
    *pcos = cos(x);
    return sin(x);
}
static inline Vec2d sincos (Vec2d * pcos, Vec2d const & x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
    *pcos = cos(x);
    return sin(x);
}

static inline Vec4f tan(Vec4f const & x) {
    float xx[4];
    x.store(xx);
    return Vec4f(tanf(xx[0]), tanf(xx[1]), tanf(xx[2]), tanf(xx[3]));
}
static inline Vec2d tan (Vec2d const & x) {
    double xx[4];
    x.store(xx);
    return Vec2d(::tan(xx[0]), ::tan(xx[1]));
}

// inverse trigonometric functions
static inline Vec4f asin(Vec4f const & x) {
    float xx[4];
    x.store(xx);
    return Vec4f(asinf(xx[0]), asinf(xx[1]), asinf(xx[2]), asinf(xx[3]));
}
static inline Vec2d asin (Vec2d const & x) {
    double xx[4];
    x.store(xx);
    return Vec2d(::asin(xx[0]), ::asin(xx[1]));
}

static inline Vec4f acos(Vec4f const & x) {
    float xx[4];
    x.store(xx);
    return Vec4f(acosf(xx[0]), acosf(xx[1]), acosf(xx[2]), acosf(xx[3]));
}
static inline Vec2d acos (Vec2d const & x) {
    double xx[4];
    x.store(xx);
    return Vec2d(::acos(xx[0]), ::acos(xx[1]));
}

static inline Vec4f atan(Vec4f const & x) {
    float xx[4];
    x.store(xx);
    return Vec4f(atanf(xx[0]), atanf(xx[1]), atanf(xx[2]), atanf(xx[3]));
}
static inline Vec2d atan (Vec2d const & x) {
    double xx[4];
    x.store(xx);
    return Vec2d(::atan(xx[0]), ::atan(xx[1]));
}

static inline Vec4f atan2 (Vec4f const & a, Vec4f const & b) {   // inverse tangent of a/b
    float aa[4], bb[4];
    a.store(aa);  b.store(bb);
    return Vec4f(atan2f(aa[0],bb[0]), atan2f(aa[1],bb[1]), atan2f(aa[2],bb[2]), atan2f(aa[3],bb[3]));
}
static inline Vec2d atan2 (Vec2d const & a, Vec2d const & b) {   // inverse tangent of a/b
    double aa[4], bb[4];
    a.store(aa);  b.store(bb);
    return Vec2d(::atan2(aa[0],bb[0]), ::atan2(aa[1],bb[1]));
}
#endif // VECTORMATH_COMMON_H

// hyperbolic functions
static inline Vec4f sinh(Vec4f const & x) {   // hyperbolic sine
    float xx[4];
    x.store(xx);
    return Vec4f(sinhf(xx[0]), sinhf(xx[1]), sinhf(xx[2]), sinhf(xx[3]));
}
static inline Vec2d sinh (Vec2d const & x) {
    double xx[4];
    x.store(xx);
    return Vec2d(::sinh(xx[0]), ::sinh(xx[1]));
}

static inline Vec4f cosh(Vec4f const & x) {   // hyperbolic cosine
    float xx[4];
    x.store(xx);
    return Vec4f(coshf(xx[0]), coshf(xx[1]), coshf(xx[2]), coshf(xx[3]));
}
static inline Vec2d cosh (Vec2d const & x) {
    double xx[4];
    x.store(xx);
    return Vec2d(::cosh(xx[0]), ::cosh(xx[1]));
}

static inline Vec4f tanh(Vec4f const & x) {   // hyperbolic tangent
    float xx[4];
    x.store(xx);
    return Vec4f(tanhf(xx[0]), tanhf(xx[1]), tanhf(xx[2]), tanhf(xx[3]));
}
static inline Vec2d tanh (Vec2d const & x) {
    double xx[4];
    x.store(xx);
    return Vec2d(::tanh(xx[0]), ::tanh(xx[1]));
}

// error function
#ifdef HAVE_ERF
static inline Vec4f erf(Vec4f const & x) {
    float xx[4];
    x.store(xx);
    return Vec4f(::erf(xx[0]), ::erf(xx[1]), ::erf(xx[2]), ::erf(xx[3]));
}
static inline Vec2d erf (Vec2d const & x) {
    double xx[4];
    x.store(xx);
    return Vec2d(::erf(xx[0]), ::erf(xx[1]));
}
#endif

#ifdef HAVE_ERFC
static inline Vec4f erfc(Vec4f const & x) {
    float xx[4];
    x.store(xx);
    return Vec4f(::erfc(xx[0]), ::erfc(xx[1]), ::erfc(xx[2]), ::erfc(xx[3]));
}
static inline Vec2d erfc (Vec2d const & x) {
    double xx[4];
    x.store(xx);
    return Vec2d(::erfc(xx[0]), ::erfc(xx[1]));
}
#endif

// complex exponential function (real part in even numbered elements, imaginary part in odd numbered elements)
static inline Vec4f cexp (Vec4f const & x) {   // complex exponential function
    float xx[4], ee[2];
    x.store(xx);
    Vec4f z(cosf(xx[1]),sinf(xx[1]),cosf(xx[3]),sinf(xx[3])); 
    ee[0] = expf(xx[0]);  ee[1] = expf(xx[2]);
    return z * Vec4f(ee[0],ee[0],ee[1],ee[1]);
}

static inline Vec2d cexp (Vec2d const & x) {   // complex exponential function
    double xx[2];
    x.store(xx);
    Vec2d z(::cos(xx[1]), ::sin(xx[1]));
    return z * ::exp(xx[0]);
}

#if defined (VECTORF256_H)  // 256 bit vectors defined

#ifndef VECTORMATH_COMMON_H

// exponential and power functions
static inline Vec8f exp (Vec8f const & x) {   // exponential function
    return Vec8f(exp(x.get_low()), exp(x.get_high()));
}
static inline Vec4d exp (Vec4d const & x) {   // exponential function
    return Vec4d(exp(x.get_low()), exp(x.get_high()));
}
#ifdef HAVE_EXPM1
static inline Vec8f expm1 (Vec8f const & x) {   // exp(x)-1
    return Vec8f(expm1(x.get_low()), expm1(x.get_high()));
}
static inline Vec4d expm1 (Vec4d const & x) {   // exp(x)-1
    return Vec4d(expm1(x.get_low()), expm1(x.get_high()));
}
#endif

static inline Vec8f exp2 (Vec8f const & x) {   // pow(2,x)
    return Vec8f(exp2(x.get_low()), exp2(x.get_high()));
}
static inline Vec4d exp2 (Vec4d const & x) {   // pow(2,x)
    return Vec4d(exp2(x.get_low()), exp2(x.get_high()));
}

static inline Vec8f exp10 (Vec8f const & x) {   // pow(10,x)
    return Vec8f(exp10(x.get_low()), exp10(x.get_high()));
}
static inline Vec4d exp10 (Vec4d const & x) {   // pow(10,x)
    return Vec4d(exp10(x.get_low()), exp10(x.get_high()));
}

static inline Vec8f pow (Vec8f const & a, Vec8f const & b) {   // pow(a,b) = a to the power of b
    return Vec8f(pow(a.get_low(),b.get_low()), pow(a.get_high(),b.get_high()));
}
static inline Vec4d pow (Vec4d const & a, Vec4d const & b) {   // pow(a,b) = a to the power of b
    return Vec4d(pow(a.get_low(),b.get_low()), pow(a.get_high(),b.get_high()));
}

// logarithms
static inline Vec8f log (Vec8f const & x) {   // natural logarithm
    return Vec8f(log(x.get_low()), log(x.get_high()));
}
static inline Vec4d log (Vec4d const & x) {   // natural logarithm
    return Vec4d(log(x.get_low()), log(x.get_high()));
}
#ifdef HAVE_LOG1P
static inline Vec8f log1p (Vec8f const & x) {   // log(1+x). Avoids loss of precision if 1+x is close to 1
    return Vec8f(log1p(x.get_low()), log1p(x.get_high()));
}
static inline Vec4d log1p (Vec4d const & x) {   // log(1+x). Avoids loss of precision if 1+x is close to 1
    return Vec4d(log1p(x.get_low()), log1p(x.get_high()));
}
#endif

static inline Vec8f log2 (Vec8f const & x) {   // logarithm base 2
    return Vec8f(log2(x.get_low()), log2(x.get_high()));
}
static inline Vec4d log2 (Vec4d const & x) {   // logarithm base 2
    return Vec4d(log2(x.get_low()), log2(x.get_high()));
}

static inline Vec8f log10 (Vec8f const & x) {   // logarithm base 10
    return Vec8f(log10(x.get_low()), log10(x.get_high()));
}
static inline Vec4d log10 (Vec4d const & x) {   // logarithm base 10
    return Vec4d(log10(x.get_low()), log10(x.get_high()));
}

// trigonometric functions (angles in radians)
static inline Vec8f sin (Vec8f const & x) {   // sine
    return Vec8f(sin(x.get_low()), sin(x.get_high()));
}
static inline Vec4d sin (Vec4d const & x) {   // sine
    return Vec4d(sin(x.get_low()), sin(x.get_high()));
}

static inline Vec8f cos (Vec8f const & x) {   // cosine
    return Vec8f(cos(x.get_low()), cos(x.get_high()));
}
static inline Vec4d cos (Vec4d const & x) {   // cosine
    return Vec4d(cos(x.get_low()), cos(x.get_high()));
}

static inline Vec8f sincos (Vec8f * pcos, Vec8f const & x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
    *pcos = Vec8f(cos(x.get_low()), cos(x.get_high())); 
    return Vec8f(sin(x.get_low()), sin(x.get_high()));
}
static inline Vec4d sincos (Vec4d * pcos, Vec4d const & x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
    *pcos = Vec4d(cos(x.get_low()), cos(x.get_high())); 
    return Vec4d(sin(x.get_low()), sin(x.get_high()));
}

static inline Vec8f tan (Vec8f const & x) {   // tangent
    return Vec8f(tan(x.get_low()), tan(x.get_high()));
}
static inline Vec4d tan (Vec4d const & x) {   // tangent
    return Vec4d(tan(x.get_low()), tan(x.get_high()));
}

// inverse trigonometric functions
static inline Vec8f asin (Vec8f const & x) {   // inverse sine
    return Vec8f(asin(x.get_low()), asin(x.get_high()));
}
static inline Vec4d asin (Vec4d const & x) {   // inverse sine
    return Vec4d(asin(x.get_low()), asin(x.get_high()));
}

static inline Vec8f acos (Vec8f const & x) {   // inverse cosine
    return Vec8f(acos(x.get_low()), acos(x.get_high()));
}
static inline Vec4d acos (Vec4d const & x) {   // inverse cosine
    return Vec4d(acos(x.get_low()), acos(x.get_high()));
}

static inline Vec8f atan (Vec8f const & x) {   // inverse tangent
    return Vec8f(atan(x.get_low()), atan(x.get_high()));
}
static inline Vec4d atan (Vec4d const & x) {   // inverse tangent
    return Vec4d(atan(x.get_low()), atan(x.get_high()));
}

static inline Vec8f atan2 (Vec8f const & a, Vec8f const & b) {   // inverse tangent of a/b
    return Vec8f(atan2(a.get_low(),b.get_low()), atan2(a.get_high(),b.get_high()));
}
static inline Vec4d atan2 (Vec4d const & a, Vec4d const & b) {   // inverse tangent of a/b
    return Vec4d(atan2(a.get_low(),b.get_low()), atan2(a.get_high(),b.get_high()));
}
#endif // VECTORMATH_COMMON_H

// hyperbolic functions and inverse hyperbolic functions
static inline Vec8f sinh (Vec8f const & x) {   // hyperbolic sine
    return Vec8f(sinh(x.get_low()), sinh(x.get_high()));
}
static inline Vec4d sinh (Vec4d const & x) {   // hyperbolic sine
    return Vec4d(sinh(x.get_low()), sinh(x.get_high()));
}

static inline Vec8f cosh (Vec8f const & x) {   // hyperbolic cosine
    return Vec8f(cosh(x.get_low()), cosh(x.get_high()));
}
static inline Vec4d cosh (Vec4d const & x) {   // hyperbolic cosine
    return Vec4d(cosh(x.get_low()), cosh(x.get_high()));
}

static inline Vec8f tanh (Vec8f const & x) {   // hyperbolic tangent
    return Vec8f(tanh(x.get_low()), tanh(x.get_high()));
}
static inline Vec4d tanh (Vec4d const & x) {   // hyperbolic tangent
    return Vec4d(tanh(x.get_low()), tanh(x.get_high()));
}

// error function
#ifdef HAVE_ERF
static inline Vec8f erf (Vec8f const & x) {   // error function
    return Vec8f(erf(x.get_low()), erf(x.get_high()));
}
static inline Vec4d erf (Vec4d const & x) {   // error function
    return Vec4d(erf(x.get_low()), erf(x.get_high()));
}
#endif
#ifdef HAVE_ERFC
static inline Vec8f erfc (Vec8f const & x) {   // error function complement
    return Vec8f(erfc(x.get_low()), erfc(x.get_high()));
}
static inline Vec4d erfc (Vec4d const & x) {   // error function complement
    return Vec4d(erfc(x.get_low()), erfc(x.get_high()));
}
#endif

// complex exponential function (real part in even numbered elements, imaginary part in odd numbered elements)
static inline Vec8f cexp (Vec8f const & x) {   // complex exponential function
    return Vec8f(cexp(x.get_low()), cexp(x.get_high()));
}
static inline Vec4d cexp (Vec4d const & x) {   // complex exponential function
    return Vec4d(cexp(x.get_low()), cexp(x.get_high()));
}

#endif // VECTORF256_H == 1


/*****************************************************************************
*
*      VECTORMATH = 1. Use AMD LIBM library
*
*****************************************************************************/
#elif VECTORMATH == 1
//#include <amdlibm.h> 
#include "amdlibm.h" // if header file is in current directory

#ifndef VECTORMATH_COMMON_H

// exponential and power functions
static inline Vec4f exp (Vec4f const & x) {   // exponential function
    return amd_vrs4_expf(x);
}
static inline Vec2d exp (Vec2d const & x) {   // exponential function
    return amd_vrd2_exp(x);
}

static inline Vec4f expm1 (Vec4f const & x) {   // exp(x)-1. Avoids loss of precision if x is close to 1
    return amd_vrs4_expm1f(x);
}
static inline Vec2d expm1 (Vec2d const & x) {   // exp(x)-1. Avoids loss of precision if x is close to 1
    return amd_vrd2_expm1(x);
}

static inline Vec4f exp2 (Vec4f const & x) {   // pow(2,x)
    return amd_vrs4_exp2f(x);
}
static inline Vec2d exp2 (Vec2d const & x) {   // pow(2,x)
    return amd_vrd2_exp2(x);
}

static inline Vec4f exp10 (Vec4f const & x) {   // pow(10,x)
    return amd_vrs4_exp10f(x);
}
static inline Vec2d exp10 (Vec2d const & x) {   // pow(10,x)
    return amd_vrd2_exp10(x);
}

static inline Vec4f pow (Vec4f const & a, Vec4f const & b) {   // pow(a,b) = a to the power of b
    return amd_vrs4_powf(a,b);
}
static inline Vec2d pow (Vec2d const & a, Vec2d const & b) {   // pow(a,b) = a to the power of b
    return amd_vrd2_pow(a,b);
}

static inline Vec4f cbrt (Vec4f const & x) {   // pow(x,1/3)
    return amd_vrs4_cbrtf(x);
}
static inline Vec2d cbrt (Vec2d const & x) {   // pow(x,1/3)
    return amd_vrd2_cbrt(x);
}

// logarithms
static inline Vec4f log (Vec4f const & x) {   // natural logarithm
    return amd_vrs4_logf(x);
}
static inline Vec2d log (Vec2d const & x) {   // natural logarithm
    return amd_vrd2_log(x);
}

static inline Vec4f log1p (Vec4f const & x) {   // log(1+x). Avoids loss of precision if 1+x is close to 1
    return amd_vrs4_log1pf(x);
}
static inline Vec2d log1p (Vec2d const & x) {   // log(1+x). Avoids loss of precision if 1+x is close to 1
    return amd_vrd2_log1p(x);
}

static inline Vec4f log2 (Vec4f const & x) {   // logarithm base 2
    return amd_vrs4_log2f(x);
}
static inline Vec2d log2 (Vec2d const & x) {   // logarithm base 2
    return amd_vrd2_log2(x);
}

static inline Vec4f log10 (Vec4f const & x) {   // logarithm base 10
    return amd_vrs4_log10f(x);
}
static inline Vec2d log10 (Vec2d const & x) {   // logarithm base 10
    return amd_vrd2_log10(x);
}

// trigonometric functions (angles in radians)
static inline Vec4f sin (Vec4f const & x) {   // sine
    return amd_vrs4_sinf(x);
}
static inline Vec2d sin (Vec2d const & x) {   // sine
    return amd_vrd2_sin(x);
}

static inline Vec4f cos (Vec4f const & x) {   // cosine
    return amd_vrs4_cosf(x);
}
static inline Vec2d cos (Vec2d const & x) {   // cosine
    return amd_vrd2_cos(x);
}

static inline Vec4f sincos (Vec4f * pcos, Vec4f const & x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
    __m128 r_sin;
    amd_vrs4_sincosf(x, &r_sin, (__m128*)pcos);
    return r_sin;
}
static inline Vec2d sincos (Vec2d * pcos, Vec2d const & x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
    __m128d r_sin;
    amd_vrd2_sincos(x, &r_sin, (__m128d*)pcos);
    return r_sin;
}

static inline Vec4f tan (Vec4f const & x) {   // tangent
    return amd_vrs4_tanf(x);
}
static inline Vec2d tan (Vec2d const & x) {   // tangent
    return amd_vrd2_tan(x);
}

// inverse trigonometric functions not supported

#endif // VECTORMATH_COMMON_H

// hyperbolic functions and inverse hyperbolic functions not supported

// error function not supported

// complex exponential function not supported

#ifdef VECTORF256_H

// Emulate 256 bit vector functions with two 128-bit vectors

#ifndef VECTORMATH_COMMON_H

// exponential and power functions
static inline Vec8f exp (Vec8f const & x) {   // exponential function
    return Vec8f(exp(x.get_low()), exp(x.get_high()));
}
static inline Vec4d exp (Vec4d const & x) {   // exponential function
    return Vec4d(exp(x.get_low()), exp(x.get_high()));
}

static inline Vec8f expm1 (Vec8f const & x) {   // exp(x)-1. Avoids loss of precision if x is close to 1
    return Vec8f(expm1(x.get_low()), expm1(x.get_high()));
}
static inline Vec4d expm1 (Vec4d const & x) {   // exp(x)-1. Avoids loss of precision if x is close to 1
    return Vec4d(expm1(x.get_low()), expm1(x.get_high()));
}

static inline Vec8f exp2 (Vec8f const & x) {   // pow(2,x)
    return Vec8f(exp2(x.get_low()), exp2(x.get_high()));
}
static inline Vec4d exp2 (Vec4d const & x) {   // pow(2,x)
    return Vec4d(exp2(x.get_low()), exp2(x.get_high()));
}

static inline Vec8f exp10 (Vec8f const & x) {   // pow(10,x)
    return Vec8f(exp10(x.get_low()), exp10(x.get_high()));
}
static inline Vec4d exp10 (Vec4d const & x) {   // pow(10,x)
    return Vec4d(exp10(x.get_low()), exp10(x.get_high()));
}

static inline Vec8f pow (Vec8f const & a, Vec8f const & b) {   // pow(a,b) = a to the power of b
    return Vec8f(pow(a.get_low(),b.get_low()), pow(a.get_high(),b.get_high()));
}
static inline Vec4d pow (Vec4d const & a, Vec4d const & b) {   // pow(a,b) = a to the power of b
    return Vec4d(pow(a.get_low(),b.get_low()), pow(a.get_high(),b.get_high()));
}

static inline Vec8f cbrt (Vec8f const & x) {   // pow(x,1/3)
    return Vec8f(cbrt(x.get_low()), cbrt(x.get_high()));
}
static inline Vec4d cbrt (Vec4d const & x) {   // pow(x,1/3)
    return Vec4d(cbrt(x.get_low()), cbrt(x.get_high()));
}


// logarithms
static inline Vec8f log (Vec8f const & x) {   // natural logarithm
    return Vec8f(log(x.get_low()), log(x.get_high()));
}
static inline Vec4d log (Vec4d const & x) {   // natural logarithm
    return Vec4d(log(x.get_low()), log(x.get_high()));
}

static inline Vec8f log1p (Vec8f const & x) {   // log(1+x). Avoids loss of precision if 1+x is close to 1
    return Vec8f(log1p(x.get_low()), log1p(x.get_high()));
}
static inline Vec4d log1p (Vec4d const & x) {   // log(1+x). Avoids loss of precision if 1+x is close to 1
    return Vec4d(log1p(x.get_low()), log1p(x.get_high()));
}

static inline Vec8f log2 (Vec8f const & x) {   // logarithm base 2
    return Vec8f(log2(x.get_low()), log2(x.get_high()));
}
static inline Vec4d log2 (Vec4d const & x) {   // logarithm base 2
    return Vec4d(log2(x.get_low()), log2(x.get_high()));
}

static inline Vec8f log10 (Vec8f const & x) {   // logarithm base 10
    return Vec8f(log10(x.get_low()), log10(x.get_high()));
}
static inline Vec4d log10 (Vec4d const & x) {   // logarithm base 10
    return Vec4d(log10(x.get_low()), log10(x.get_high()));
}

// trigonometric functions (angles in radians)
static inline Vec8f sin (Vec8f const & x) {   // sine
    return Vec8f(sin(x.get_low()), sin(x.get_high()));
}
static inline Vec4d sin (Vec4d const & x) {   // sine
    return Vec4d(sin(x.get_low()), sin(x.get_high()));
}

static inline Vec8f cos (Vec8f const & x) {   // cosine
    return Vec8f(cos(x.get_low()), cos(x.get_high()));
}
static inline Vec4d cos (Vec4d const & x) {   // cosine
    return Vec4d(cos(x.get_low()), cos(x.get_high()));
}

static inline Vec8f sincos (Vec8f * pcos, Vec8f const & x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
    Vec4f r_sin0, r_sin1, r_cos0, r_cos1;
    r_sin0 = sincos(&r_cos0, x.get_low()); 
    r_sin1 = sincos(&r_cos1, x.get_high());
    *pcos = Vec8f(r_cos0, r_cos1);
    return Vec8f(r_sin0, r_sin1); 
}
static inline Vec4d sincos (Vec4d * pcos, Vec4d const & x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
    Vec2d r_sin0, r_sin1, r_cos0, r_cos1;
    r_sin0 = sincos(&r_cos0, x.get_low()); 
    r_sin1 = sincos(&r_cos1, x.get_high());
    *pcos = Vec4d(r_cos0, r_cos1);
    return Vec4d(r_sin0, r_sin1); 
}

static inline Vec8f tan (Vec8f const & x) {   // tangent
    return Vec8f(tan(x.get_low()), tan(x.get_high()));
}
static inline Vec4d tan (Vec4d const & x) {   // tangent
    return Vec4d(tan(x.get_low()), tan(x.get_high()));
}

#endif // VECTORMATH_COMMON_H

#endif // VECTORF256_H == 1


/*****************************************************************************
*
*      VECTORMATH = 2. Use Intel SVML library with any compiler
*
*****************************************************************************/
#elif VECTORMATH == 2 

extern "C" {
extern __m128  __svml_expf4       (__m128);
extern __m128d __svml_exp2        (__m128d);
extern __m128  __svml_expm1f4     (__m128);
extern __m128d __svml_expm12      (__m128d);
extern __m128  __svml_exp2f4      (__m128);
extern __m128d __svml_exp22       (__m128d);
extern __m128  __svml_exp10f4     (__m128);
extern __m128d __svml_exp102      (__m128d);
extern __m128  __svml_powf4       (__m128,  __m128);
extern __m128d __svml_pow2        (__m128d, __m128d);
extern __m128  __svml_cbrtf4      (__m128);
extern __m128d __svml_cbrt2       (__m128d);
extern __m128  __svml_invsqrtf4   (__m128);
extern __m128d __svml_invsqrt2    (__m128d);
extern __m128  __svml_logf4       (__m128);
extern __m128d __svml_log2        (__m128d);
extern __m128  __svml_log1pf4     (__m128);
extern __m128d __svml_log1p2      (__m128d);
extern __m128  __svml_log2f4      (__m128);
extern __m128d __svml_log22       (__m128d);
extern __m128  __svml_log10f4     (__m128);
extern __m128d __svml_log102      (__m128d);
extern __m128  __svml_sinf4       (__m128);
extern __m128d __svml_sin2        (__m128d);
extern __m128  __svml_cosf4       (__m128);
extern __m128d __svml_cos2        (__m128d);
extern __m128  __svml_sincosf4    (__m128);  // cos returned in xmm1
extern __m128d __svml_sincos2     (__m128d); // cos returned in xmm1
extern __m128  __svml_tanf4       (__m128);
extern __m128d __svml_tan2        (__m128d);
extern __m128  __svml_asinf4      (__m128);
extern __m128d __svml_asin2       (__m128d);
extern __m128  __svml_acosf4      (__m128);
extern __m128d __svml_acos2       (__m128d);
extern __m128  __svml_atanf4      (__m128);
extern __m128d __svml_atan2       (__m128d);
extern __m128  __svml_atan2f4     (__m128,  __m128);
extern __m128d __svml_atan22      (__m128d, __m128d);
extern __m128  __svml_sinhf4      (__m128);
extern __m128d __svml_sinh2       (__m128d);
extern __m128  __svml_coshf4      (__m128);
extern __m128d __svml_cosh2       (__m128d);
extern __m128  __svml_tanhf4      (__m128);
extern __m128d __svml_tanh2       (__m128d);
extern __m128  __svml_asinhf4     (__m128);
extern __m128d __svml_asinh2      (__m128d);
extern __m128  __svml_acoshf4     (__m128);
extern __m128d __svml_acosh2      (__m128d);
extern __m128  __svml_atanhf4     (__m128);
extern __m128d __svml_atanh2      (__m128d);
extern __m128  __svml_erff4       (__m128);
extern __m128d __svml_erf2        (__m128d);
extern __m128  __svml_erfcf4      (__m128);
extern __m128d __svml_erfc2       (__m128d);
extern __m128  __svml_erfinvf4    (__m128);
extern __m128d __svml_erfinv2     (__m128d);
extern __m128  __svml_cdfnorminvf4(__m128);
extern __m128d __svml_cdfnorminv2 (__m128d);
extern __m128  __svml_cdfnormf4   (__m128);
extern __m128d __svml_cdfnorm2    (__m128d);
extern __m128  __svml_cexpf4      (__m128);
extern __m128d __svml_cexp2       (__m128d);
}

#ifndef VECTORMATH_COMMON_H

// exponential and power functions
static inline Vec4f exp (Vec4f const & x) {   // exponential function
    return __svml_expf4(x);
}
static inline Vec2d exp (Vec2d const & x) {   // exponential function
    return __svml_exp2(x);
}

static inline Vec4f expm1 (Vec4f const & x) {   // exp(x)-1. Avoids loss of precision if x is close to 1
    return __svml_expm1f4(x);
}
static inline Vec2d expm1 (Vec2d const & x) {   // exp(x)-1. Avoids loss of precision if x is close to 1
    return __svml_expm12(x);
}

static inline Vec4f exp2 (Vec4f const & x) {   // pow(2,x)
    return __svml_exp2f4(x);
}
static inline Vec2d exp2 (Vec2d const & x) {   // pow(2,x)
    return __svml_exp22(x);
}

static inline Vec4f exp10 (Vec4f const & x) {   // pow(10,x)
    return __svml_exp10f4(x);
}
static inline Vec2d exp10 (Vec2d const & x) {   // pow(10,x)
    return __svml_exp102(x);
}

static inline Vec4f pow (Vec4f const & a, Vec4f const & b) {   // pow(a,b) = a to the power of b
    return __svml_powf4(a,b);
}
static inline Vec2d pow (Vec2d const & a, Vec2d const & b) {   // pow(a,b) = a to the power of b
    return __svml_pow2(a,b);
}

static inline Vec4f cbrt (Vec4f const & x) {   // pow(x,1/3)
    return __svml_cbrtf4(x);
}
static inline Vec2d cbrt (Vec2d const & x) {   // pow(x,1/3)
    return __svml_cbrt2(x);
}

// logarithms
static inline Vec4f log (Vec4f const & x) {   // natural logarithm
    return __svml_logf4(x);
}
static inline Vec2d log (Vec2d const & x) {   // natural logarithm
    return __svml_log2(x);
}

static inline Vec4f log1p (Vec4f const & x) {   // log(1+x). Avoids loss of precision if 1+x is close to 1
    return __svml_log1pf4(x);
}
static inline Vec2d log1p (Vec2d const & x) {   // log(1+x). Avoids loss of precision if 1+x is close to 1
    return __svml_log1p2(x);
}

static inline Vec4f log2 (Vec4f const & x) {   // logarithm base 2
    return __svml_log2f4(x);
}
static inline Vec2d log2 (Vec2d const & x) {   // logarithm base 2
    return __svml_log22(x);
}

static inline Vec4f log10 (Vec4f const & x) {   // logarithm base 10
    return __svml_log10f4(x);
}
static inline Vec2d log10 (Vec2d const & x) {   // logarithm base 10
    return __svml_log102(x);
}

// trigonometric functions (angles in radians)
static inline Vec4f sin (Vec4f const & x) {   // sine
    return __svml_sinf4(x);
}
static inline Vec2d sin (Vec2d const & x) {   // sine
    return __svml_sin2(x);
}

static inline Vec4f cos (Vec4f const & x) {   // cosine
    return __svml_cosf4(x);
}
static inline Vec2d cos (Vec2d const & x) {   // cosine
    return __svml_cos2(x);
}

#if defined(__unix__) || defined(__INTEL_COMPILER) || !defined(__x86_64__) || !defined(_MSC_VER)
// no inline assembly in 64 bit MS compiler
static inline Vec4f sincos (Vec4f * pcos, Vec4f const & x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
    __m128 r_sin, r_cos;
    r_sin = __svml_sincosf4(x);
#if defined(__unix__) || defined(__GNUC__)
    //   __asm__ ( "call __svml_sincosf4 \n movaps %%xmm0, %0 \n movaps %%xmm1, %1" : "=m"(r_sin), "=m"(r_cos) : "xmm0"(x) );
     __asm__ __volatile__ ( "movaps %%xmm1, %0":"=m"(r_cos));
#else // Windows
    _asm movaps r_cos, xmm1;
#endif
    *pcos = r_cos;
    return r_sin;
}
static inline Vec2d sincos (Vec2d * pcos, Vec2d const & x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
    __m128d r_sin, r_cos;
    r_sin = __svml_sincos2(x);
#if defined(__unix__) || defined(__GNUC__)
     __asm__ __volatile__ ( "movaps %%xmm1, %0":"=m"(r_cos));
#else // Windows
    _asm movapd r_cos, xmm1;
#endif
    *pcos = r_cos;
    return r_sin;
}
#endif // inline assembly available

static inline Vec4f tan (Vec4f const & x) {   // tangent
    return __svml_tanf4(x);
}
static inline Vec2d tan (Vec2d const & x) {   // tangent
    return __svml_tan2(x);
}

// inverse trigonometric functions
static inline Vec4f asin (Vec4f const & x) {   // inverse sine
    return __svml_asinf4(x);
}
static inline Vec2d asin (Vec2d const & x) {   // inverse sine
    return __svml_asin2(x);
}

static inline Vec4f acos (Vec4f const & x) {   // inverse cosine
    return __svml_acosf4(x);
}
static inline Vec2d acos (Vec2d const & x) {   // inverse cosine
    return __svml_acos2(x);
}

static inline Vec4f atan (Vec4f const & x) {   // inverse tangent
    return __svml_atanf4(x);
}
static inline Vec2d atan (Vec2d const & x) {   // inverse tangent
    return __svml_atan2(x);
}

static inline Vec4f atan2 (Vec4f const & a, Vec4f const & b) {   // inverse tangent of a/b
    return __svml_atan2f4(a,b);
}
static inline Vec2d atan2 (Vec2d const & a, Vec2d const & b) {   // inverse tangent of a/b
    return __svml_atan22(a,b);
}

#endif // VECTORMATH_COMMON_H

// hyperbolic functions and inverse hyperbolic functions
static inline Vec4f sinh (Vec4f const & x) {   // hyperbolic sine
    return __svml_sinhf4(x);
}
static inline Vec2d sinh (Vec2d const & x) {   // hyperbolic sine
    return __svml_sinh2(x);
}

static inline Vec4f cosh (Vec4f const & x) {   // hyperbolic cosine
    return __svml_coshf4(x);
}
static inline Vec2d cosh (Vec2d const & x) {   // hyperbolic cosine
    return __svml_cosh2(x);
}

static inline Vec4f tanh (Vec4f const & x) {   // hyperbolic tangent
    return __svml_tanhf4(x);
}
static inline Vec2d tanh (Vec2d const & x) {   // hyperbolic tangent
    return __svml_tanh2(x);
}

static inline Vec4f asinh (Vec4f const & x) {   // inverse hyperbolic sine
    return __svml_asinhf4(x);
}
static inline Vec2d asinh (Vec2d const & x) {   // inverse hyperbolic sine
    return __svml_asinh2(x);
}

static inline Vec4f acosh (Vec4f const & x) {   // inverse hyperbolic cosine
    return __svml_acoshf4(x);
}
static inline Vec2d acosh (Vec2d const & x) {   // inverse hyperbolic cosine
    return __svml_acosh2(x);
}

static inline Vec4f atanh (Vec4f const & x) {   // inverse hyperbolic tangent
    return __svml_atanhf4(x);
}
static inline Vec2d atanh (Vec2d const & x) {   // inverse hyperbolic tangent
    return __svml_atanh2(x);
}

// error function
static inline Vec4f erf (Vec4f const & x) {   // error function
    return __svml_erff4(x);
}
static inline Vec2d erf (Vec2d const & x) {   // error function
    return __svml_erf2(x);
}

static inline Vec4f erfc (Vec4f const & x) {   // error function complement
    return __svml_erfcf4(x);
}
static inline Vec2d erfc (Vec2d const & x) {   // error function complement
    return __svml_erfc2(x);
}

static inline Vec4f erfinv (Vec4f const & x) {   // inverse error function
    return __svml_erfinvf4(x);
}
static inline Vec2d erfinv (Vec2d const & x) {   // inverse error function
    return __svml_erfinv2(x);
}

static inline Vec4f cdfnorm (Vec4f const & x) {   // cumulative normal distribution function
    return __svml_cdfnormf4(x);
}
static inline Vec2d cdfnorm (Vec2d const & x) {   // cumulative normal distribution function
    return __svml_cdfnorm2(x);
}

static inline Vec4f cdfnorminv (Vec4f const & x) {   // inverse cumulative normal distribution function
    return __svml_cdfnorminvf4(x);
}
static inline Vec2d cdfnorminv (Vec2d const & x) {   // inverse cumulative normal distribution function
    return __svml_cdfnorminv2(x);
}

// complex exponential function (real part in even numbered elements, imaginary part in odd numbered elements)
static inline Vec4f cexp (Vec4f const & x) {   // complex exponential function
    return __svml_cexpf4(x);
}
static inline Vec2d cexp (Vec2d const & x) {   // complex exponential function
    return __svml_cexp2(x);
}


#if defined (VECTORF256_H) && VECTORF256_H >= 2  
// AVX gives 256 bit vectors

extern "C" {
extern __m256  __svml_expf8       (__m256);
extern __m256d __svml_exp4        (__m256d);
extern __m256  __svml_expm1f8     (__m256);
extern __m256d __svml_expm14      (__m256d);
extern __m256  __svml_exp2f8      (__m256);
extern __m256d __svml_exp24       (__m256d);
extern __m256  __svml_exp10f8     (__m256);
extern __m256d __svml_exp104      (__m256d);
extern __m256  __svml_powf8       (__m256,  __m256);
extern __m256d __svml_pow4        (__m256d, __m256d);
extern __m256  __svml_cbrtf8      (__m256);
extern __m256d __svml_cbrt4       (__m256d);
extern __m256  __svml_invsqrtf8   (__m256);
extern __m256d __svml_invsqrt4    (__m256d);
extern __m256  __svml_logf8       (__m256);
extern __m256d __svml_log4        (__m256d);
extern __m256  __svml_log1pf8     (__m256);
extern __m256d __svml_log1p4      (__m256d);
extern __m256  __svml_log2f8      (__m256);
extern __m256d __svml_log24       (__m256d);
extern __m256  __svml_log10f8     (__m256);
extern __m256d __svml_log104      (__m256d);
extern __m256  __svml_sinf8       (__m256);
extern __m256d __svml_sin4        (__m256d);
extern __m256  __svml_cosf8       (__m256);
extern __m256d __svml_cos4        (__m256d);
extern __m256  __svml_sincosf8    (__m256);  // cos returned in ymm1
extern __m256d __svml_sincos4     (__m256d); // cos returned in ymm1
extern __m256  __svml_tanf8       (__m256);
extern __m256d __svml_tan4        (__m256d);
extern __m256  __svml_asinf8      (__m256);
extern __m256d __svml_asin4       (__m256d);
extern __m256  __svml_acosf8      (__m256);
extern __m256d __svml_acos4       (__m256d);
extern __m256  __svml_atanf8      (__m256);
extern __m256d __svml_atan4       (__m256d);
extern __m256  __svml_atan2f8     (__m256, __m256);
extern __m256d __svml_atan24      (__m256d, __m256d);
extern __m256  __svml_sinhf8      (__m256);
extern __m256d __svml_sinh4       (__m256d);
extern __m256  __svml_coshf8      (__m256);
extern __m256d __svml_cosh4       (__m256d);
extern __m256  __svml_tanhf8      (__m256);
extern __m256d __svml_tanh4       (__m256d);
extern __m256  __svml_asinhf8     (__m256);
extern __m256d __svml_asinh4      (__m256d);
extern __m256  __svml_acoshf8     (__m256);
extern __m256d __svml_acosh4      (__m256d);
extern __m256  __svml_atanhf8     (__m256);
extern __m256d __svml_atanh4      (__m256d);
extern __m256  __svml_erff8       (__m256);
extern __m256d __svml_erf4        (__m256d);
extern __m256  __svml_erfcf8      (__m256);
extern __m256d __svml_erfc4       (__m256d);
extern __m256  __svml_erfinvf8    (__m256);
extern __m256d __svml_erfinv4     (__m256d);
extern __m256  __svml_cdfnorminvf8(__m256);
extern __m256d __svml_cdfnorminv4 (__m256d);
extern __m256  __svml_cdfnormf8   (__m256);
extern __m256d __svml_cdfnorm4    (__m256d);
//extern __m256  __svml_cexpf8      (__m256); // missing in current version of SVML (jan 2012)
//extern __m256d __svml_cexp4       (__m256d);
}

#ifndef VECTORMATH_COMMON_H

// exponential and power functions
static inline Vec8f exp (Vec8f const & x) {   // exponential function
    return __svml_expf8(x);
}
static inline Vec4d exp (Vec4d const & x) {   // exponential function
    return __svml_exp4(x);
}

static inline Vec8f expm1 (Vec8f const & x) {   // exp(x)-1. Avoids loss of precision if x is close to 1
    return __svml_expm1f8(x);
}
static inline Vec4d expm1 (Vec4d const & x) {   // exp(x)-1. Avoids loss of precision if x is close to 1
    return __svml_expm14(x);
}

static inline Vec8f exp2 (Vec8f const & x) {   // pow(2,x)
    return __svml_exp2f8(x);
}
static inline Vec4d exp2 (Vec4d const & x) {   // pow(2,x)
    return __svml_exp24(x);
}

static inline Vec8f exp10 (Vec8f const & x) {   // pow(10,x)
    return __svml_exp10f8(x);
}
static inline Vec4d exp10 (Vec4d const & x) {   // pow(10,x)
    return __svml_exp104(x);
}

static inline Vec8f pow (Vec8f const & a, Vec8f const & b) {   // pow(a,b) = a to the power of b
    return __svml_powf8(a,b);
}
static inline Vec4d pow (Vec4d const & a, Vec4d const & b) {   // pow(a,b) = a to the power of b
    return __svml_pow4(a,b);
}

static inline Vec8f cbrt (Vec8f const & x) {   // pow(x,1/3)
    return __svml_cbrtf8(x);
}
static inline Vec4d cbrt (Vec4d const & x) {   // pow(x,1/3)
    return __svml_cbrt4(x);
}

// logarithms
static inline Vec8f log (Vec8f const & x) {   // natural logarithm
    return __svml_logf8(x);
}
static inline Vec4d log (Vec4d const & x) {   // natural logarithm
    return __svml_log4(x);
}

static inline Vec8f log1p (Vec8f const & x) {   // log(1+x). Avoids loss of precision if 1+x is close to 1
    return __svml_log1pf8(x);
}
static inline Vec4d log1p (Vec4d const & x) {   // log(1+x). Avoids loss of precision if 1+x is close to 1
    return __svml_log1p4(x);
}

static inline Vec8f log2 (Vec8f const & x) {   // logarithm base 2
    return __svml_log2f8(x);
}
static inline Vec4d log2 (Vec4d const & x) {   // logarithm base 2
    return __svml_log24(x);
}

static inline Vec8f log10 (Vec8f const & x) {   // logarithm base 10
    return __svml_log10f8(x);
}
static inline Vec4d log10 (Vec4d const & x) {   // logarithm base 10
    return __svml_log104(x);
}

// trigonometric functions (angles in radians)
static inline Vec8f sin (Vec8f const & x) {   // sine
    return __svml_sinf8(x);
}
static inline Vec4d sin (Vec4d const & x) {   // sine
    return __svml_sin4(x);
}

static inline Vec8f cos (Vec8f const & x) {   // cosine
    return __svml_cosf8(x);
}
static inline Vec4d cos (Vec4d const & x) {   // cosine
    return __svml_cos4(x);
}

#if defined(__unix__) || defined(__INTEL_COMPILER) || !defined(__x86_64__) || !defined(_MSC_VER)
// no inline assembly in 64 bit MS compiler
static inline Vec8f sincos (Vec8f * pcos, Vec8f const & x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
    __m256 r_sin, r_cos;
    r_sin = __svml_sincosf8(x);
#if defined(__unix__) || defined(__GNUC__)
    __asm__ __volatile__ ( "vmovaps %%ymm1, %0":"=m"(r_cos));
#else // Windows
    _asm vmovaps r_cos, ymm1;
#endif
    *pcos = r_cos;
    return r_sin;
}
static inline Vec4d sincos (Vec4d * pcos, Vec4d const & x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
    __m256d r_sin, r_cos;
    r_sin = __svml_sincos4(x);
#if defined(__unix__) || defined(__GNUC__)
     __asm__ __volatile__ ( "vmovaps %%ymm1, %0":"=m"(r_cos));
#else // Windows
    _asm vmovapd r_cos, ymm1;
#endif
    *pcos = r_cos;
    return r_sin;
}
#endif // inline assembly available

static inline Vec8f tan (Vec8f const & x) {   // tangent
    return __svml_tanf8(x);
}
static inline Vec4d tan (Vec4d const & x) {   // tangent
    return __svml_tan4(x);
}

// inverse trigonometric functions
static inline Vec8f asin (Vec8f const & x) {   // inverse sine
    return __svml_asinf8(x);
}
static inline Vec4d asin (Vec4d const & x) {   // inverse sine
    return __svml_asin4(x);
}

static inline Vec8f acos (Vec8f const & x) {   // inverse cosine
    return __svml_acosf8(x);
}
static inline Vec4d acos (Vec4d const & x) {   // inverse cosine
    return __svml_acos4(x);
}

static inline Vec8f atan (Vec8f const & x) {   // inverse tangent
    return __svml_atanf8(x);
}
static inline Vec4d atan (Vec4d const & x) {   // inverse tangent
    return __svml_atan4(x);
}

static inline Vec8f atan2 (Vec8f const & a, Vec8f const & b) {   // inverse tangent of a/b
    return __svml_atan2f8(a,b);
}
static inline Vec4d atan2 (Vec4d const & a, Vec4d const & b) {   // inverse tangent of a/b
    return __svml_atan24(a,b);
}

#endif // VECTORMATH_COMMON_H

// hyperbolic functions and inverse hyperbolic functions
static inline Vec8f sinh (Vec8f const & x) {   // hyperbolic sine
    return __svml_sinhf8(x);
}
static inline Vec4d sinh (Vec4d const & x) {   // hyperbolic sine
    return __svml_sinh4(x);
}

static inline Vec8f cosh (Vec8f const & x) {   // hyperbolic cosine
    return __svml_coshf8(x);
}
static inline Vec4d cosh (Vec4d const & x) {   // hyperbolic cosine
    return __svml_cosh4(x);
}

static inline Vec8f tanh (Vec8f const & x) {   // hyperbolic tangent
    return __svml_tanhf8(x);
}
static inline Vec4d tanh (Vec4d const & x) {   // hyperbolic tangent
    return __svml_tanh4(x);
}

static inline Vec8f asinh (Vec8f const & x) {   // inverse hyperbolic sine
    return __svml_asinhf8(x);
}
static inline Vec4d asinh (Vec4d const & x) {   // inverse hyperbolic sine
    return __svml_asinh4(x);
}

static inline Vec8f acosh (Vec8f const & x) {   // inverse hyperbolic cosine
    return __svml_acoshf8(x);
}
static inline Vec4d acosh (Vec4d const & x) {   // inverse hyperbolic cosine
    return __svml_acosh4(x);
}

static inline Vec8f atanh (Vec8f const & x) {   // inverse hyperbolic tangent
    return __svml_atanhf8(x);
}
static inline Vec4d atanh (Vec4d const & x) {   // inverse hyperbolic tangent
    return __svml_atanh4(x);
}

// error function
static inline Vec8f erf (Vec8f const & x) {   // error function
    return __svml_erff8(x);
}
static inline Vec4d erf (Vec4d const & x) {   // error function
    return __svml_erf4(x);
}

static inline Vec8f erfc (Vec8f const & x) {   // error function complement
    return __svml_erfcf8(x);
}
static inline Vec4d erfc (Vec4d const & x) {   // error function complement
    return __svml_erfc4(x);
}

static inline Vec8f erfinv (Vec8f const & x) {   // inverse error function
    return __svml_erfinvf8(x);
}
static inline Vec4d erfinv (Vec4d const & x) {   // inverse error function
    return __svml_erfinv4(x);
}

static inline Vec8f cdfnorm (Vec8f const & x) {   // cumulative normal distribution function
    return __svml_cdfnormf8(x);
}
static inline Vec4d cdfnorm (Vec4d const & x) {   // cumulative normal distribution function
    return __svml_cdfnorm4(x);
}

static inline Vec8f cdfnorminv (Vec8f const & x) {   // inverse cumulative normal distribution function
    return __svml_cdfnorminvf8(x);
}
static inline Vec4d cdfnorminv (Vec4d const & x) {   // inverse cumulative normal distribution function
    return __svml_cdfnorminv4(x);
}

// complex exponential function (real part in even numbered elements, imaginary part in odd numbered elements)
// 256-bit version missing in current version of SVML (jan 2012). Use 128 bit version
static inline Vec8f cexp (Vec8f const & x) {   // complex exponential function
    return Vec8f(cexp(x.get_low()), cexp(x.get_high()));
}
static inline Vec4d cexp (Vec4d const & x) {   // complex exponential function
    return Vec4d(cexp(x.get_low()), cexp(x.get_high()));
}

#endif // VECTORF256_H == 2


/*****************************************************************************
*
*      VECTORMATH = 3. Use Intel SVML library with Intel compiler
*
*****************************************************************************/
#elif VECTORMATH == 3 
#include <ia32intrin.h>    // intel svml functions defined in Intel version of immintrin.h

// 128 bit vectors

#ifndef VECTORMATH_COMMON_H

// exponential and power functions
static inline Vec4f exp (Vec4f const & x) {   // exponential function
    return _mm_exp_ps(x);
}
static inline Vec2d exp (Vec2d const & x) {   // exponential function
    return _mm_exp_pd(x);
}

static inline Vec4f expm1 (Vec4f const & x) {   // exp(x)-1. Avoids loss of precision if x is close to 1
    return _mm_expm1_ps(x);
}
static inline Vec2d expm1 (Vec2d const & x) {   // exp(x)-1. Avoids loss of precision if x is close to 1
    return _mm_expm1_pd(x);
}

static inline Vec4f exp2 (Vec4f const & x) {   // pow(2,x)
    return _mm_exp2_ps(x);
}
static inline Vec2d exp2 (Vec2d const & x) {   // pow(2,x)
    return _mm_exp2_pd(x);
}

static inline Vec4f exp10 (Vec4f const & x) {   // pow(10,x)
    return _mm_exp10_ps(x);
}
static inline Vec2d exp10 (Vec2d const & x) {   // pow(10,x)
    return _mm_exp10_pd(x);
}

static inline Vec4f pow (Vec4f const & a, Vec4f const & b) {   // pow(a,b) = a to the power of b
    return _mm_pow_ps(a,b);
}
static inline Vec2d pow (Vec2d const & a, Vec2d const & b) {   // pow(a,b) = a to the power of b
    return _mm_pow_pd(a,b);
}

static inline Vec4f cbrt (Vec4f const & x) {   // pow(x,1/3)
    return _mm_cbrt_ps(x);
}
static inline Vec2d cbrt (Vec2d const & x) {   // pow(x,1/3)
    return _mm_cbrt_pd(x);
}

// logarithms
static inline Vec4f log (Vec4f const & x) {   // natural logarithm
    return _mm_log_ps(x);
}
static inline Vec2d log (Vec2d const & x) {   // natural logarithm
    return _mm_log_pd(x);
}

static inline Vec4f log1p (Vec4f const & x) {   // log(1+x). Avoids loss of precision if 1+x is close to 1
    return _mm_log1p_ps(x);
}
static inline Vec2d log1p (Vec2d const & x) {   // log(1+x). Avoids loss of precision if 1+x is close to 1
    return _mm_log1p_pd(x);
}

static inline Vec4f log2 (Vec4f const & x) {   // logarithm base 2
    return _mm_log2_ps(x);
}
static inline Vec2d log2 (Vec2d const & x) {   // logarithm base 2
    return _mm_log2_pd(x);
}

static inline Vec4f log10 (Vec4f const & x) {   // logarithm base 10
    return _mm_log10_ps(x);
}
static inline Vec2d log10 (Vec2d const & x) {   // logarithm base 10
    return _mm_log10_pd(x);
}

// trigonometric functions
static inline Vec4f sin (Vec4f const & x) {   // sine
    return _mm_sin_ps(x);
}
static inline Vec2d sin (Vec2d const & x) {   // sine
    return _mm_sin_pd(x);
}

static inline Vec4f cos (Vec4f const & x) {   // cosine
    return _mm_cos_ps(x);
}
static inline Vec2d cos (Vec2d const & x) {   // cosine
    return _mm_cos_pd(x);
}

static inline Vec4f sincos (Vec4f * pcos, Vec4f const & x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
    __m128 r_sin, r_cos;
    r_sin = _mm_sincos_ps(&r_cos, x);
    *pcos = r_cos;
    return r_sin;
}
static inline Vec2d sincos (Vec2d * pcos, Vec2d const & x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
    __m128d r_sin, r_cos;
    r_sin = _mm_sincos_pd(&r_cos, x);
    *pcos = r_cos;
    return r_sin;
}

static inline Vec4f tan (Vec4f const & x) {   // tangent
    return _mm_tan_ps(x);
}
static inline Vec2d tan (Vec2d const & x) {   // tangent
    return _mm_tan_pd(x);
}

// inverse trigonometric functions
static inline Vec4f asin (Vec4f const & x) {   // inverse sine
    return _mm_asin_ps(x);
}
static inline Vec2d asin (Vec2d const & x) {   // inverse sine
    return _mm_asin_pd(x);
}

static inline Vec4f acos (Vec4f const & x) {   // inverse cosine
    return _mm_acos_ps(x);
}
static inline Vec2d acos (Vec2d const & x) {   // inverse cosine
    return _mm_acos_pd(x);
}

static inline Vec4f atan (Vec4f const & x) {   // inverse tangent
    return _mm_atan_ps(x);
}
static inline Vec2d atan (Vec2d const & x) {   // inverse tangent
    return _mm_atan_pd(x);
}

static inline Vec4f atan2 (Vec4f const & a, Vec4f const & b) {   // inverse tangent of a/b
    return _mm_atan2_ps(a,b);
}
static inline Vec2d atan2 (Vec2d const & a, Vec2d const & b) {   // inverse tangent of a/b
    return _mm_atan2_pd(a,b);
}

#endif // VECTORMATH_COMMON_H

// hyperbolic functions and inverse hyperbolic functions
static inline Vec4f sinh (Vec4f const & x) {   // hyperbolic sine
    return _mm_sinh_ps(x);
}
static inline Vec2d sinh (Vec2d const & x) {   // hyperbolic sine
    return _mm_sinh_pd(x);
}

static inline Vec4f cosh (Vec4f const & x) {   // hyperbolic cosine
    return _mm_cosh_ps(x);
}
static inline Vec2d cosh (Vec2d const & x) {   // hyperbolic cosine
    return _mm_cosh_pd(x);
}

static inline Vec4f tanh (Vec4f const & x) {   // hyperbolic tangent
    return _mm_tanh_ps(x);
}
static inline Vec2d tanh (Vec2d const & x) {   // hyperbolic tangent
    return _mm_tanh_pd(x);
}

static inline Vec4f asinh (Vec4f const & x) {   // inverse hyperbolic sine
    return _mm_asinh_ps(x);
}
static inline Vec2d asinh (Vec2d const & x) {   // inverse hyperbolic sine
    return _mm_asinh_pd(x);
}

static inline Vec4f acosh (Vec4f const & x) {   // inverse hyperbolic cosine
    return _mm_acosh_ps(x);
}
static inline Vec2d acosh (Vec2d const & x) {   // inverse hyperbolic cosine
    return _mm_acosh_pd(x);
}

static inline Vec4f atanh (Vec4f const & x) {   // inverse hyperbolic tangent
    return _mm_atanh_ps(x);
}
static inline Vec2d atanh (Vec2d const & x) {   // inverse hyperbolic tangent
    return _mm_atanh_pd(x);
}

// error function
static inline Vec4f erf (Vec4f const & x) {   // error function
    return _mm_erf_ps(x);
}
static inline Vec2d erf (Vec2d const & x) {   // error function
    return _mm_erf_pd(x);
}

static inline Vec4f erfc (Vec4f const & x) {   // error function complement
    return _mm_erfc_ps(x);
}
static inline Vec2d erfc (Vec2d const & x) {   // error function complement
    return _mm_erfc_pd(x);
}

static inline Vec4f erfinv (Vec4f const & x) {   // inverse error function
    return _mm_erfinv_ps(x);
}
static inline Vec2d erfinv (Vec2d const & x) {   // inverse error function
    return _mm_erfinv_pd(x);
}

extern "C" {
extern __m128 __svml_cdfnormf4(__m128);  // not in immintrin.h
extern __m128d __svml_cdfnorm2(__m128d); // not in immintrin.h
}

static inline Vec4f cdfnorm (Vec4f const & x) {   // cumulative normal distribution function
    return __svml_cdfnormf4(x);
}
static inline Vec2d cdfnorm (Vec2d const & x) {   // cumulative normal distribution function
    return __svml_cdfnorm2(x);
}

static inline Vec4f cdfnorminv (Vec4f const & x) {   // inverse cumulative normal distribution function
    return _mm_cdfnorminv_ps(x);
}
static inline Vec2d cdfnorminv (Vec2d const & x) {   // inverse cumulative normal distribution function
    return _mm_cdfnorminv_pd(x);
}

// complex functions
extern "C" {
extern __m128  __svml_cexpf2(__m128);   // not in immintrin.h
extern __m128  __svml_cexpf4(__m128);   // not in immintrin.h
extern __m128d __svml_cexp2(__m128d);   // not in immintrin.h
}

static inline Vec4f cexp (Vec4f const & x) {   // complex exponential function
    return __svml_cexpf4(x);
}
static inline Vec2d cexp (Vec2d const & x) {   // complex exponential function
    return __svml_cexp2(x);
}

#if defined (VECTORF256_H) && VECTORF256_H >= 2

// 256 bit vectors

#ifndef VECTORMATH_COMMON_H

// exponential and power functions
static inline Vec8f exp (Vec8f const & x) {   // exponential function
    return _mm256_exp_ps(x);
}
static inline Vec4d exp (Vec4d const & x) {   // exponential function
    return _mm256_exp_pd(x);
}

static inline Vec8f expm1 (Vec8f const & x) {   // exp(x)-1. Avoids loss of precision if x is close to 1
    return _mm256_expm1_ps(x);
}
static inline Vec4d expm1 (Vec4d const & x) {   // exp(x)-1. Avoids loss of precision if x is close to 1
    return _mm256_expm1_pd(x);
}

static inline Vec8f exp2 (Vec8f const & x) {   // pow(2,x)
    return _mm256_exp2_ps(x);
}
static inline Vec4d exp2 (Vec4d const & x) {   // pow(2,x)
    return _mm256_exp2_pd(x);
}

static inline Vec8f exp10 (Vec8f const & x) {   // pow(10,x)
    return _mm256_exp10_ps(x);
}
static inline Vec4d exp10 (Vec4d const & x) {   // pow(10,x)
    return _mm256_exp10_pd(x);
}

static inline Vec8f pow (Vec8f const & a, Vec8f const & b) {   // pow(a,b) = a to the power of b
    return _mm256_pow_ps(a,b);
}
static inline Vec4d pow (Vec4d const & a, Vec4d const & b) {   // pow(a,b) = a to the power of b
    return _mm256_pow_pd(a,b);
}

static inline Vec8f cbrt (Vec8f const & x) {   // pow(x,1/3)
    return _mm256_cbrt_ps(x);
}
static inline Vec4d cbrt (Vec4d const & x) {   // pow(x,1/3)
    return _mm256_cbrt_pd(x);
}

// logarithms
static inline Vec8f log (Vec8f const & x) {   // natural logarithm
    return _mm256_log_ps(x);
}
static inline Vec4d log (Vec4d const & x) {   // natural logarithm
    return _mm256_log_pd(x);
}

static inline Vec8f log1p (Vec8f const & x) {   // log(1+x). Avoids loss of precision if 1+x is close to 1
    return _mm256_log1p_ps(x);
}
static inline Vec4d log1p (Vec4d const & x) {   // log(1+x). Avoids loss of precision if 1+x is close to 1
    return _mm256_log1p_pd(x);
}

static inline Vec8f log2 (Vec8f const & x) {   // logarithm base 2
    return _mm256_log2_ps(x);
}
static inline Vec4d log2 (Vec4d const & x) {   // logarithm base 2
    return _mm256_log2_pd(x);
}

static inline Vec8f log10 (Vec8f const & x) {   // logarithm base 10
    return _mm256_log10_ps(x);
}
static inline Vec4d log10 (Vec4d const & x) {   // logarithm base 10
    return _mm256_log10_pd(x);
}

// trigonometric functions
static inline Vec8f sin (Vec8f const & x) {   // sine
    return _mm256_sin_ps(x);
}
static inline Vec4d sin (Vec4d const & x) {   // sine
    return _mm256_sin_pd(x);
}

static inline Vec8f cos (Vec8f const & x) {   // cosine
    return _mm256_cos_ps(x);
}
static inline Vec4d cos (Vec4d const & x) {   // cosine
    return _mm256_cos_pd(x);
}

static inline Vec8f sincos (Vec8f * pcos, Vec8f const & x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
    __m256 r_sin, r_cos;
    r_sin = _mm256_sincos_ps(&r_cos, x);
    *pcos = r_cos;
    return r_sin;
}
static inline Vec4d sincos (Vec4d * pcos, Vec4d const & x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
    __m256d r_sin, r_cos;
    r_sin = _mm256_sincos_pd(&r_cos, x);
    *pcos = r_cos;
    return r_sin;
}

static inline Vec8f tan (Vec8f const & x) {   // tangent
    return _mm256_tan_ps(x);
}
static inline Vec4d tan (Vec4d const & x) {   // tangent
    return _mm256_tan_pd(x);
}

// inverse trigonometric functions
static inline Vec8f asin (Vec8f const & x) {   // inverse sine
    return _mm256_asin_ps(x);
}
static inline Vec4d asin (Vec4d const & x) {   // inverse sine
    return _mm256_asin_pd(x);
}

static inline Vec8f acos (Vec8f const & x) {   // inverse cosine
    return _mm256_acos_ps(x);
}
static inline Vec4d acos (Vec4d const & x) {   // inverse cosine
    return _mm256_acos_pd(x);
}

static inline Vec8f atan (Vec8f const & x) {   // inverse tangent
    return _mm256_atan_ps(x);
}
static inline Vec4d atan (Vec4d const & x) {   // inverse tangent
    return _mm256_atan_pd(x);
}

static inline Vec8f atan2 (Vec8f const & a, Vec8f const & b) {   // inverse tangent of a/b
    return _mm256_atan2_ps(a,b);
}
static inline Vec4d atan2 (Vec4d const & a, Vec4d const & b) {   // inverse tangent of a/b
    return _mm256_atan2_pd(a,b);
}

#endif // VECTORMATH_COMMON_H

// hyperbolic functions and inverse hyperbolic functions
static inline Vec8f sinh (Vec8f const & x) {   // hyperbolic sine
    return _mm256_sinh_ps(x);
}
static inline Vec4d sinh (Vec4d const & x) {   // hyperbolic sine
    return _mm256_sinh_pd(x);
}

static inline Vec8f cosh (Vec8f const & x) {   // hyperbolic cosine
    return _mm256_cosh_ps(x);
}
static inline Vec4d cosh (Vec4d const & x) {   // hyperbolic cosine
    return _mm256_cosh_pd(x);
}

static inline Vec8f tanh (Vec8f const & x) {   // hyperbolic tangent
    return _mm256_tanh_ps(x);
}
static inline Vec4d tanh (Vec4d const & x) {   // hyperbolic tangent
    return _mm256_tanh_pd(x);
}

static inline Vec8f asinh (Vec8f const & x) {   // inverse hyperbolic sine
    return _mm256_asinh_ps(x);
}
static inline Vec4d asinh (Vec4d const & x) {   // inverse hyperbolic sine
    return _mm256_asinh_pd(x);
}

static inline Vec8f acosh (Vec8f const & x) {   // inverse hyperbolic cosine
    return _mm256_acosh_ps(x);
}
static inline Vec4d acosh (Vec4d const & x) {   // inverse hyperbolic cosine
    return _mm256_acosh_pd(x);
}

static inline Vec8f atanh (Vec8f const & x) {   // inverse hyperbolic tangent
    return _mm256_atanh_ps(x);
}
static inline Vec4d atanh (Vec4d const & x) {   // inverse hyperbolic tangent
    return _mm256_atanh_pd(x);
}

// error function
static inline Vec8f erf (Vec8f const & x) {   // error function
    return _mm256_erf_ps(x);
}
static inline Vec4d erf (Vec4d const & x) {   // error function
    return _mm256_erf_pd(x);
}

static inline Vec8f erfc (Vec8f const & x) {   // error function complement
    return _mm256_erfc_ps(x);
}
static inline Vec4d erfc (Vec4d const & x) {   // error function complement
    return _mm256_erfc_pd(x);
}

static inline Vec8f erfinv (Vec8f const & x) {   // inverse error function
    return _mm256_erfinv_ps(x);
}
static inline Vec4d erfinv (Vec4d const & x) {   // inverse error function
    return _mm256_erfinv_pd(x);
}

extern "C" {
extern __m256 __svml_cdfnormf8(__m256);  // not in immintrin.h
extern __m256d __svml_cdfnorm4(__m256d); // not in immintrin.h
}
static inline Vec8f cdfnorm (Vec8f const & x) {   // cumulative normal distribution function
    return __svml_cdfnormf8(x);
}
static inline Vec4d cdfnorm (Vec4d const & x) {   // cumulative normal distribution function
    return __svml_cdfnorm4(x);
}

static inline Vec8f cdfnorminv (Vec8f const & x) {   // inverse cumulative normal distribution function
    return _mm256_cdfnorminv_ps(x);
}
static inline Vec4d cdfnorminv (Vec4d const & x) {   // inverse cumulative normal distribution function
    return _mm256_cdfnorminv_pd(x);
}

// complex exponential function (real part in even numbered elements, imaginary part in odd numbered elements)
static inline Vec8f cexp (Vec8f const & x) {   // complex exponential function
    return Vec8f(cexp(x.get_low()), cexp(x.get_high()));
}
static inline Vec4d cexp (Vec4d const & x) {   // complex exponential function
    return Vec4d(cexp(x.get_low()), cexp(x.get_high()));
}

#endif // VECTORF256_H >= 2

#else
#error unknown value of VECTORMATH
#endif // VECTORMATH


#if defined (VECTORF256_H) && VECTORF256_H == 1 && (VECTORMATH == 2 || VECTORMATH == 3)
/*****************************************************************************
*
*      VECTORF256_H == 1. 256 bit vectors emulated as two 128-bit vectors,
*      SVML library
*
*****************************************************************************/

#ifndef VECTORMATH_COMMON_H

// exponential and power functions
static inline Vec8f exp (Vec8f const & x) {   // exponential function
    return Vec8f(exp(x.get_low()), exp(x.get_high()));
}
static inline Vec4d exp (Vec4d const & x) {   // exponential function
    return Vec4d(exp(x.get_low()), exp(x.get_high()));
}

static inline Vec8f expm1 (Vec8f const & x) {   // exp(x)-1. Avoids loss of precision if x is close to 1
    return Vec8f(expm1(x.get_low()), expm1(x.get_high()));
}
static inline Vec4d expm1 (Vec4d const & x) {   // exp(x)-1. Avoids loss of precision if x is close to 1
    return Vec4d(expm1(x.get_low()), expm1(x.get_high()));
}

static inline Vec8f exp2 (Vec8f const & x) {   // pow(2,x)
    return Vec8f(exp2(x.get_low()), exp2(x.get_high()));
}
static inline Vec4d exp2 (Vec4d const & x) {   // pow(2,x)
    return Vec4d(exp2(x.get_low()), exp2(x.get_high()));
}

static inline Vec8f exp10 (Vec8f const & x) {   // pow(10,x)
    return Vec8f(exp10(x.get_low()), exp10(x.get_high()));
}
static inline Vec4d exp10 (Vec4d const & x) {   // pow(10,x)
    return Vec4d(exp10(x.get_low()), exp10(x.get_high()));
}

static inline Vec8f pow (Vec8f const & a, Vec8f const & b) {   // pow(a,b) = a to the power of b
    return Vec8f(pow(a.get_low(),b.get_low()), pow(a.get_high(),b.get_high()));
}
static inline Vec4d pow (Vec4d const & a, Vec4d const & b) {   // pow(a,b) = a to the power of b
    return Vec4d(pow(a.get_low(),b.get_low()), pow(a.get_high(),b.get_high()));
}

static inline Vec8f cbrt (Vec8f const & x) {   // pow(x,1/3)
    return Vec8f(cbrt(x.get_low()), cbrt(x.get_high()));
}
static inline Vec4d cbrt (Vec4d const & x) {   // pow(x,1/3)
    return Vec4d(cbrt(x.get_low()), cbrt(x.get_high()));
}

// logarithms
static inline Vec8f log (Vec8f const & x) {   // natural logarithm
    return Vec8f(log(x.get_low()), log(x.get_high()));
}
static inline Vec4d log (Vec4d const & x) {   // natural logarithm
    return Vec4d(log(x.get_low()), log(x.get_high()));
}

static inline Vec8f log1p (Vec8f const & x) {   // log(1+x). Avoids loss of precision if 1+x is close to 1
    return Vec8f(log1p(x.get_low()), log1p(x.get_high()));
}
static inline Vec4d log1p (Vec4d const & x) {   // log(1+x). Avoids loss of precision if 1+x is close to 1
    return Vec4d(log1p(x.get_low()), log1p(x.get_high()));
}

static inline Vec8f log2 (Vec8f const & x) {   // logarithm base 2
    return Vec8f(log2(x.get_low()), log2(x.get_high()));
}
static inline Vec4d log2 (Vec4d const & x) {   // logarithm base 2
    return Vec4d(log2(x.get_low()), log2(x.get_high()));
}

static inline Vec8f log10 (Vec8f const & x) {   // logarithm base 10
    return Vec8f(log10(x.get_low()), log10(x.get_high()));
}
static inline Vec4d log10 (Vec4d const & x) {   // logarithm base 10
    return Vec4d(log10(x.get_low()), log10(x.get_high()));
}

// trigonometric functions (angles in radians)
static inline Vec8f sin (Vec8f const & x) {   // sine
    return Vec8f(sin(x.get_low()), sin(x.get_high()));
}
static inline Vec4d sin (Vec4d const & x) {   // sine
    return Vec4d(sin(x.get_low()), sin(x.get_high()));
}

static inline Vec8f cos (Vec8f const & x) {   // cosine
    return Vec8f(cos(x.get_low()), cos(x.get_high()));
}
static inline Vec4d cos (Vec4d const & x) {   // cosine
    return Vec4d(cos(x.get_low()), cos(x.get_high()));
}

#if defined(__unix__) || defined(__INTEL_COMPILER) || !defined(__x86_64__) || !defined(_MSC_VER)
// no inline assembly in 64 bit MS compiler
static inline Vec8f sincos (Vec8f * pcos, Vec8f const & x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
    Vec4f r_sin0, r_sin1, r_cos0, r_cos1;
    r_sin0 = sincos(&r_cos0, x.get_low()); 
    r_sin1 = sincos(&r_cos1, x.get_high());
    *pcos = Vec8f(r_cos0, r_cos1);
    return Vec8f(r_sin0, r_sin1); 
}
static inline Vec4d sincos (Vec4d * pcos, Vec4d const & x) {   // sine and cosine. sin(x) returned, cos(x) in pcos
    Vec2d r_sin0, r_sin1, r_cos0, r_cos1;
    r_sin0 = sincos(&r_cos0, x.get_low()); 
    r_sin1 = sincos(&r_cos1, x.get_high());
    *pcos = Vec4d(r_cos0, r_cos1);
    return Vec4d(r_sin0, r_sin1); 
}
#endif // inline assembly available

static inline Vec8f tan (Vec8f const & x) {   // tangent
    return Vec8f(tan(x.get_low()), tan(x.get_high()));
}
static inline Vec4d tan (Vec4d const & x) {   // tangent
    return Vec4d(tan(x.get_low()), tan(x.get_high()));
}

// inverse trigonometric functions
static inline Vec8f asin (Vec8f const & x) {   // inverse sine
    return Vec8f(asin(x.get_low()), asin(x.get_high()));
}
static inline Vec4d asin (Vec4d const & x) {   // inverse sine
    return Vec4d(asin(x.get_low()), asin(x.get_high()));
}

static inline Vec8f acos (Vec8f const & x) {   // inverse cosine
    return Vec8f(acos(x.get_low()), acos(x.get_high()));
}
static inline Vec4d acos (Vec4d const & x) {   // inverse cosine
    return Vec4d(acos(x.get_low()), acos(x.get_high()));
}

static inline Vec8f atan (Vec8f const & x) {   // inverse tangent
    return Vec8f(atan(x.get_low()), atan(x.get_high()));
}
static inline Vec4d atan (Vec4d const & x) {   // inverse tangent
    return Vec4d(atan(x.get_low()), atan(x.get_high()));
}

static inline Vec8f atan2 (Vec8f const & a, Vec8f const & b) {   // inverse tangent of a/b
    return Vec8f(atan2(a.get_low(),b.get_low()), atan2(a.get_high(),b.get_high()));
}
static inline Vec4d atan2 (Vec4d const & a, Vec4d const & b) {   // inverse tangent of a/b
    return Vec4d(atan2(a.get_low(),b.get_low()), atan2(a.get_high(),b.get_high()));
}

#endif // VECTORMATH_COMMON_H

// hyperbolic functions and inverse hyperbolic functions
static inline Vec8f sinh (Vec8f const & x) {   // hyperbolic sine
    return Vec8f(sinh(x.get_low()), sinh(x.get_high()));
}
static inline Vec4d sinh (Vec4d const & x) {   // hyperbolic sine
    return Vec4d(sinh(x.get_low()), sinh(x.get_high()));
}

static inline Vec8f cosh (Vec8f const & x) {   // hyperbolic cosine
    return Vec8f(cosh(x.get_low()), cosh(x.get_high()));
}
static inline Vec4d cosh (Vec4d const & x) {   // hyperbolic cosine
    return Vec4d(cosh(x.get_low()), cosh(x.get_high()));
}

static inline Vec8f tanh (Vec8f const & x) {   // hyperbolic tangent
    return Vec8f(tanh(x.get_low()), tanh(x.get_high()));
}
static inline Vec4d tanh (Vec4d const & x) {   // hyperbolic tangent
    return Vec4d(tanh(x.get_low()), tanh(x.get_high()));
}

static inline Vec8f asinh (Vec8f const & x) {   // inverse hyperbolic sine
    return Vec8f(asinh(x.get_low()), asinh(x.get_high()));
}
static inline Vec4d asinh (Vec4d const & x) {   // inverse hyperbolic sine
    return Vec4d(asinh(x.get_low()), asinh(x.get_high()));
}

static inline Vec8f acosh (Vec8f const & x) {   // inverse hyperbolic cosine
    return Vec8f(acosh(x.get_low()), acosh(x.get_high()));
}
static inline Vec4d acosh (Vec4d const & x) {   // inverse hyperbolic cosine
    return Vec4d(acosh(x.get_low()), acosh(x.get_high()));
}

static inline Vec8f atanh (Vec8f const & x) {   // inverse hyperbolic tangent
    return Vec8f(atanh(x.get_low()), atanh(x.get_high()));
}
static inline Vec4d atanh (Vec4d const & x) {   // inverse hyperbolic tangent
    return Vec4d(atanh(x.get_low()), atanh(x.get_high()));
}

// error function
static inline Vec8f erf (Vec8f const & x) {   // error function
    return Vec8f(erf(x.get_low()), erf(x.get_high()));
}
static inline Vec4d erf (Vec4d const & x) {   // error function
    return Vec4d(erf(x.get_low()), erf(x.get_high()));
}

static inline Vec8f erfc (Vec8f const & x) {   // error function complement
    return Vec8f(erfc(x.get_low()), erfc(x.get_high()));
}
static inline Vec4d erfc (Vec4d const & x) {   // error function complement
    return Vec4d(erfc(x.get_low()), erfc(x.get_high()));
}

static inline Vec8f erfinv (Vec8f const & x) {   // inverse error function
    return Vec8f(erfinv(x.get_low()), erfinv(x.get_high()));
}
static inline Vec4d erfinv (Vec4d const & x) {   // inverse error function
    return Vec4d(erfinv(x.get_low()), erfinv(x.get_high()));
}

static inline Vec8f cdfnorm (Vec8f const & x) {   // cumulative normal distribution function
    return Vec8f(cdfnorm(x.get_low()), cdfnorm(x.get_high()));
}
static inline Vec4d cdfnorm (Vec4d const & x) {   // cumulative normal distribution function
    return Vec4d(cdfnorm(x.get_low()), cdfnorm(x.get_high()));
}

static inline Vec8f cdfnorminv (Vec8f const & x) {   // inverse cumulative normal distribution function
    return Vec8f(cdfnorminv(x.get_low()), cdfnorminv(x.get_high()));
}
static inline Vec4d cdfnorminv (Vec4d const & x) {   // inverse cumulative normal distribution function
    return Vec4d(cdfnorminv(x.get_low()), cdfnorminv(x.get_high()));
}

// complex exponential function (real part in even numbered elements, imaginary part in odd numbered elements)
static inline Vec8f cexp (Vec8f const & x) {   // complex exponential function
    return Vec8f(cexp(x.get_low()), cexp(x.get_high()));
}
static inline Vec4d cexp (Vec4d const & x) {   // complex exponential function
    return Vec4d(cexp(x.get_low()), cexp(x.get_high()));
}

#endif // VECTORF256_H == 1

#ifdef VCL_NAMESPACE
}
#endif

#endif // VECTORMATH_LIB_H