Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Error completing request:07, 6.27it/s] #7

Open
Tobe2d opened this issue Dec 20, 2022 · 1 comment
Open

Error completing request:07, 6.27it/s] #7

Tobe2d opened this issue Dec 20, 2022 · 1 comment

Comments

@Tobe2d
Copy link

Tobe2d commented Dec 20, 2022

I have installed the extention and when I try testing it I am running into error just after generating the Image.

at the very end of the log it say this:
nvrtc: error: invalid value for --gpu-architecture (-arch)

My GPU is Nvidia RTX 4090 with I9 13th gen.

below is the full log:

`Error completing request:07, 6.27it/s]
Arguments: ('A major', '', 'None', 'None', 20, 0, False, False, 1, 1, 7, -1.0, -1.0, 0, 0, 0, False, 512, 512, False, 0.7, 0, 0, 20, 0, 0, 0, 0, 0.25, '

\n

Combinations

\n\n Choose a number of terms from a list, in this case we choose two artists: \n {2$$artist1|artist2|artist3}
\n\n If $$ is not provided, then 1$$ is assumed.

\n\n If the chosen number of terms is greater than the available terms, then some terms will be duplicated, otherwise chosen terms will be unique. This is useful in the case of wildcards, e.g.\n {2$$artist} is equivalent to {2$$artist|artist}

\n\n A range can be provided:\n {1-3$$artist1|artist2|artist3}
\n In this case, a random number of artists between 1 and 3 is chosen.

\n\n Wildcards can be used and the joiner can also be specified:\n {{1-$$and$$adjective}}
\n\n Here, a random number between 1 and 3 words from adjective.txt will be chosen and joined together with the word 'and' instead of the default comma.\n\n

\n\n

Wildcards

\n Find and manage wildcards in the Wildcards Manager tab.\n\n

\n\n You can add more wildcards by creating a text file with one term per line and name is mywildcards.txt. Place it in E:\Ai__Project\stable-diffusion-webui\extensions\sd-dynamic-prompts\wildcards. <folder>/mywildcards will then become available.\n
\n\n', True, False, 1, False, False, False, 100, 0.7, False, False, False, False, False, False, False, False, True, False, '\n

Combinations

\n Choose a number of terms from a list, in this case we choose two artists\n {2$$artist1|artist2|artist3}\n If $$ is not provided, then 1$$ is assumed.\n
\n A range can be provided:\n {1-3$$artist1|artist2|artist3}\n In this case, a random number of artists between 1 and 3 is chosen.\n

\n\n

Wildcards

\n

Available wildcards

\n
    \n
  • adjective
  • artist
  • genre
  • site
  • style
\n
\n WILDCARD_DIR: scripts/wildcards
\n You can add more wildcards by creating a text file with one term per line and name is mywildcards.txt. Place it in scripts/wildcards. mywildcards will then become available.\n ', None, '', 'outputs', False, False, 1, 1, False, False, None, None, '', '', '', '', 'Auto rename', {'label': 'Upload avatars config'}, 'Open outputs directory', 'Export to WebUI style', True, {'label': 'Presets'}, {'label': 'QC preview'}, '', [], 'Select', 'QC scan', 'Show pics', None, '', 1, True, 100, False, False, '', 25, True, 5.0, False, False, '', 2, 10.0, True, 30.0, True, 'Illustration', 'svg', True, True, False, 0.5, True, 16, True, 16, False, 1, '', 0, '', True, False, False, '', 'None', 30, 4, 0, 0, False, 'None', '
', 'None', 30, 4, 0, 0, 4, 0.4, True, 32, '', 5, 24, 12.5, 1000, 'DDIM', 0, 64, 64, '', 64, 7.5, 0.42, 'DDIM', 64, 64, 1, 0, 92, True, True, True, False, False, False, '
Confused/new? View the README for usage instructions.

', False, True, False, True, True, '', '🔄', True, False, 'E:\Ai__Project\stable-diffusion-webui\extensions\sd-webui-riffusion\outputs', 'Refresh Inline Audio (Last Batch)', None, None, None, None, None, None, None, None, False, 4.0, '', 10.0, False, False, True, 30.0, True, False, False, 0, 0.0, 10.0, True, 30.0, True, 0, 4, 384, 384, False, False, True, True, True, False, True, 1, False, False, 6.4, 38.5, 'fixed', 1, 'linear', '30', 'grad_min', 0.01, 1, 'clip', 1.0, 1.0, 'mp4', 10.0, 0, '', True) {}
Traceback (most recent call last):
File "E:\Ai__Project\stable-diffusion-webui\modules\call_queue.py", line 45, in f
res = list(func(*args, **kwargs))
File "E:\Ai__Project\stable-diffusion-webui\modules\call_queue.py", line 28, in f
res = func(*args, **kwargs)
File "E:\Ai__Project\stable-diffusion-webui\modules\txt2img.py", line 46, in txt2img
processed = modules.scripts.scripts_txt2img.run(p, *args)
File "E:\Ai__Project\stable-diffusion-webui\modules\scripts.py", line 328, in run
processed = script.run(p, *script_args)
File "E:\Ai__Project\stable-diffusion-webui\extensions\sd-webui-riffusion\scripts\riffusion.py", line 133, in run
wav_bytes, duration_s = self.wav_bytes_from_spectrogram_image(
File "E:\Ai__Project\stable-diffusion-webui\extensions\sd-webui-riffusion\scripts\riffusion.py", line 184, in wav_bytes_from_spectrogram_image
samples = self.waveform_from_spectrogram(
File "E:\Ai__Project\stable-diffusion-webui\extensions\sd-webui-riffusion\scripts\riffusion.py", line 315, in waveform_from_spectrogram
waveform = griffin_lim(Sxx_torch).cpu().numpy()
File "E:\Ai__Project\stable-diffusion-webui\venv\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "E:\Ai__Project\stable-diffusion-webui\venv\lib\site-packages\torchaudio\transforms_transforms.py", line 280, in forward
return F.griffinlim(
File "E:\Ai__Project\stable-diffusion-webui\venv\lib\site-packages\torchaudio\functional\functional.py", line 306, in griffinlim
angles = angles.div(angles.abs().add(1e-16))
RuntimeError:
#define POS_INFINITY __int_as_float(0x7f800000)
#define INFINITY POS_INFINITY
#define NEG_INFINITY __int_as_float(0xff800000)
#define NAN __int_as_float(0x7fffffff)

typedef long long int int64_t;
typedef unsigned int uint32_t;
typedef signed char int8_t;
typedef unsigned char uint8_t; // NOTE: this MUST be "unsigned char"! "char" is equivalent to "signed char"
typedef short int16_t;
static_assert(sizeof(int64_t) == 8, "expected size does not match");
static_assert(sizeof(uint32_t) == 4, "expected size does not match");
static_assert(sizeof(int8_t) == 1, "expected size does not match");
constexpr int num_threads = 128;
constexpr int thread_work_size = 4; // TODO: make template substitution once we decide where those vars live
constexpr int block_work_size = thread_work_size * num_threads;
//TODO use _assert_fail, because assert is disabled in non-debug builds
#define ERROR_UNSUPPORTED_CAST assert(false);

namespace std {

template
_Tp&& __declval(int);
template
_Tp __declval(long);
template
decltype(__declval<_Tp>(0)) declval() noexcept;

template <class _Tp, _Tp __v>
struct integral_constant {
static const _Tp value = __v;
typedef _Tp value_type;
typedef integral_constant type;
};

typedef integral_constant<bool, true> true_type;
typedef integral_constant<bool, false> false_type;

// is_same, functional
template <class _Tp, class _Up> struct is_same : public false_type {};
template struct is_same<_Tp, _Tp> : public true_type {};

// is_integral, for some types.
template struct is_integral
: public integral_constant<bool, false> {};
template <> struct is_integral
: public integral_constant<bool, true> {};
template <> struct is_integral
: public integral_constant<bool, true> {};
template <> struct is_integral
: public integral_constant<bool, true> {};
template <> struct is_integral
: public integral_constant<bool, true> {};
template <> struct is_integral
: public integral_constant<bool, true> {};
template <> struct is_integral
: public integral_constant<bool, true> {};

// enable_if, functional
template <bool _C, typename _Tp> struct enable_if{};
template struct enable_if<true, _Tp>{
using type = _Tp;
};
template <bool b, class T=void>
using enable_if_t = typename enable_if<b,T>::type;

template struct remove_const {typedef _Tp type;};
template struct remove_const {typedef _Tp type;};
template using remove_const_t = typename remove_const<_Tp>::type;

template struct remove_volatile {typedef _Tp type;};
template struct remove_volatile {typedef _Tp type;};
template using remove_volatile_t = typename remove_volatile<_Tp>::type;

template struct remove_cv
{typedef typename remove_volatile<typename remove_const<_Tp>::type>::type type;};
template using remove_cv_t = typename remove_cv<_Tp>::type;

template struct __libcpp_is_floating_point : public false_type {};
template <> struct __libcpp_is_floating_point : public true_type {};
template <> struct __libcpp_is_floating_point : public true_type {};
template <> struct __libcpp_is_floating_point : public true_type {};

template struct is_floating_point
: public __libcpp_is_floating_point<typename remove_cv<_Tp>::type> {};

template struct is_arithmetic
: public integral_constant<bool, is_integral<_Tp>::value ||
is_floating_point<_Tp>::value> {};
template
inline constexpr bool is_arithmetic_v = is_arithmetic<_Tp>::value;

template
struct __numeric_type
{
static void __test(...);
static float __test(float);
static double __test(char);
static double __test(int);
static double __test(unsigned);
static double __test(long);
static double __test(unsigned long);
static double __test(long long);
static double __test(unsigned long long);
static double __test(double);
static long double __test(long double);

 typedef decltype(__test(declval<_Tp>())) type;
 static const bool value = !is_same<type, void>::value;

};

template <>
struct __numeric_type
{
static const bool value = true;
};

// __promote

template <class _A1, class _A2 = void, class _A3 = void,
bool = __numeric_type<_A1>::value &&
__numeric_type<_A2>::value &&
__numeric_type<_A3>::value>
class __promote_imp
{
public:
static const bool value = false;
};

template <class _A1, class _A2, class _A3>
class __promote_imp<_A1, _A2, _A3, true>
{
private:
typedef typename __promote_imp<_A1>::type __type1;
typedef typename __promote_imp<_A2>::type __type2;
typedef typename __promote_imp<_A3>::type __type3;
public:
typedef decltype(__type1() + __type2() + __type3()) type;
static const bool value = true;
};

template <class _A1, class _A2>
class __promote_imp<_A1, _A2, void, true>
{
private:
typedef typename __promote_imp<_A1>::type __type1;
typedef typename __promote_imp<_A2>::type __type2;
public:
typedef decltype(__type1() + __type2()) type;
static const bool value = true;
};

template
class __promote_imp<_A1, void, void, true>
{
public:
typedef typename __numeric_type<_A1>::type type;
static const bool value = true;
};

template <class _A1, class _A2 = void, class _A3 = void>
class __promote : public __promote_imp<_A1, _A2, _A3> {};

} // namespace std

namespace std {

using ::signbit;
using ::isfinite;
using ::isinf;
using ::isnan;

using ::abs;

using ::acos;
using ::acosf;
using ::asin;
using ::asinf;
using ::atan;
using ::atanf;
using ::atan2;
using ::atan2f;
using ::ceil;
using ::ceilf;
using ::cos;
using ::cosf;
using ::cosh;
using ::coshf;

using ::exp;
using ::expf;

using ::fabs;
using ::fabsf;
using ::floor;
using ::floorf;

using ::fmod;
using ::fmodf;

using ::frexp;
using ::frexpf;
using ::ldexp;
using ::ldexpf;

using ::log;
using ::logf;

using ::log10;
using ::log10f;
using ::modf;
using ::modff;

using ::pow;
using ::powf;

using ::sin;
using ::sinf;
using ::sinh;
using ::sinhf;

using ::sqrt;
using ::sqrtf;
using ::tan;
using ::tanf;

using ::tanh;
using ::tanhf;

using ::acosh;
using ::acoshf;
using ::asinh;
using ::asinhf;
using ::atanh;
using ::atanhf;
using ::cbrt;
using ::cbrtf;

using ::copysign;
using ::copysignf;

using ::erf;
using ::erff;
using ::erfc;
using ::erfcf;
using ::exp2;
using ::exp2f;
using ::expm1;
using ::expm1f;
using ::fdim;
using ::fdimf;
using ::fmaf;
using ::fma;
using ::fmax;
using ::fmaxf;
using ::fmin;
using ::fminf;
using ::hypot;
using ::hypotf;
using ::ilogb;
using ::ilogbf;
using ::lgamma;
using ::lgammaf;
using ::llrint;
using ::llrintf;
using ::llround;
using ::llroundf;
using ::log1p;
using ::log1pf;
using ::log2;
using ::log2f;
using ::logb;
using ::logbf;
using ::lrint;
using ::lrintf;
using ::lround;
using ::lroundf;

using ::nan;
using ::nanf;

using ::nearbyint;
using ::nearbyintf;
using ::nextafter;
using ::nextafterf;
using ::remainder;
using ::remainderf;
using ::remquo;
using ::remquof;
using ::rint;
using ::rintf;
using ::round;
using ::roundf;
using ::scalbln;
using ::scalblnf;
using ::scalbn;
using ::scalbnf;
using ::tgamma;
using ::tgammaf;
using ::trunc;
using ::truncf;

} // namespace std

// NB: Order matters for this macro; it is relied upon in
// promoteTypesLookup and the serialization format.
// Note, some types have ctype as void because we don't support them in codegen
#define AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(
)
_(uint8_t, Byte) /* 0 /
_(int8_t, Char) /
1 /
_(int16_t, Short) /
2 /
_(int, Int) /
3 /
_(int64_t, Long) /
4 /
_(at::Half, Half) /
5 /
_(float, Float) /
6 /
_(double, Double) /
7 /
_(std::complexat::Half, ComplexHalf) /
8 /
_(std::complex, ComplexFloat) /
9 /
_(std::complex, ComplexDouble) /
10 /
_(bool, Bool) /
11 /
_(void, QInt8) /
12 /
_(void, QUInt8) /
13 /
_(void, QInt32) /
14 /
_(at::BFloat16, BFloat16) /
15 */ \

#define AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_EXCEPT_QINT(_)
_(uint8_t, Byte)
_(int8_t, Char)
_(int16_t, Short)
_(int, Int)
_(int64_t, Long)
_(at::Half, Half)
_(float, Float)
_(double, Double)
_(std::complexat::Half, ComplexHalf)
_(std::complex, ComplexFloat)
_(std::complex, ComplexDouble)
_(bool, Bool)
_(at::BFloat16, BFloat16)

enum class ScalarType : int8_t {
#define DEFINE_ENUM(_1, n) n,
AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(DEFINE_ENUM)
#undef DEFINE_ENUM
Undefined,
NumOptions
};

template <typename T, int size>
struct Array {
T data[size];

device T operator[](int i) const {
return data[i];
}
device T& operator[](int i) {
return data[i];
}
Array() = default;
Array(const Array&) = default;
Array& operator=(const Array&) = default;
device Array(T x) {
for (int i = 0; i < size; i++) {
data[i] = x;
}
}
};

namespace std {

template class complex;

template complex<_Tp> operator*(const complex<_Tp>& __z, const complex<_Tp>& __w);
template complex<_Tp> operator/(const complex<_Tp>& __x, const complex<_Tp>& __y);

template
class complex
{
public:
typedef _Tp value_type;
private:
value_type _re;
value_type _im;
public:
constexpr
complex(const value_type& __re = value_type(), const value_type& __im = value_type())
: _re(__re), _im(__im) {}
template constexpr
complex(const complex<_Xp>& __c)
: _re(__c.real()), _im(__c.imag()) {}

  constexpr value_type real() const {return __re_;}
  constexpr value_type imag() const {return __im_;}

  void real(value_type __re) {__re_ = __re;}
  void imag(value_type __im) {__im_ = __im;}

  complex& operator= (const value_type& __re)
      {__re_ = __re; __im_ = value_type(); return *this;}
  complex& operator+=(const value_type& __re) {__re_ += __re; return *this;}
  complex& operator-=(const value_type& __re) {__re_ -= __re; return *this;}
  complex& operator*=(const value_type& __re) {__re_ *= __re; __im_ *= __re; return *this;}
  complex& operator/=(const value_type& __re) {__re_ /= __re; __im_ /= __re; return *this;}

  template<class _Xp> complex& operator= (const complex<_Xp>& __c)
      {
          __re_ = __c.real();
          __im_ = __c.imag();
          return *this;
      }
  template<class _Xp> complex& operator+=(const complex<_Xp>& __c)
      {
          __re_ += __c.real();
          __im_ += __c.imag();
          return *this;
      }
  template<class _Xp> complex& operator-=(const complex<_Xp>& __c)
      {
          __re_ -= __c.real();
          __im_ -= __c.imag();
          return *this;
      }
  template<class _Xp> complex& operator*=(const complex<_Xp>& __c)
      {
          *this = *this * complex(__c.real(), __c.imag());
          return *this;
      }
  template<class _Xp> complex& operator/=(const complex<_Xp>& __c)
      {
          *this = *this / complex(__c.real(), __c.imag());
          return *this;
      }

};

template<> class complex;

template<>
class complex
{
float _re;
float _im;
public:
typedef float value_type;

  constexpr complex(float __re = 0.0f, float __im = 0.0f)
      : __re_(__re), __im_(__im) {}

  explicit constexpr complex(const complex<double>& __c);

  constexpr float real() const {return __re_;}
  constexpr float imag() const {return __im_;}

  void real(value_type __re) {__re_ = __re;}
  void imag(value_type __im) {__im_ = __im;}

  complex& operator= (float __re)
      {__re_ = __re; __im_ = value_type(); return *this;}
  complex& operator+=(float __re) {__re_ += __re; return *this;}
  complex& operator-=(float __re) {__re_ -= __re; return *this;}
  complex& operator*=(float __re) {__re_ *= __re; __im_ *= __re; return *this;}
  complex& operator/=(float __re) {__re_ /= __re; __im_ /= __re; return *this;}

  template<class _Xp> complex& operator= (const complex<_Xp>& __c)
      {
          __re_ = __c.real();
          __im_ = __c.imag();
          return *this;
      }
  template<class _Xp> complex& operator+=(const complex<_Xp>& __c)
      {
          __re_ += __c.real();
          __im_ += __c.imag();
          return *this;
      }
  template<class _Xp> complex& operator-=(const complex<_Xp>& __c)
      {
          __re_ -= __c.real();
          __im_ -= __c.imag();
          return *this;
      }
  template<class _Xp> complex& operator*=(const complex<_Xp>& __c)
      {
          *this = *this * complex(__c.real(), __c.imag());
          return *this;
      }
  template<class _Xp> complex& operator/=(const complex<_Xp>& __c)
      {
          *this = *this / complex(__c.real(), __c.imag());
          return *this;
      }

};

template<>
class complex
{
double _re;
double _im;
public:
typedef double value_type;

  constexpr complex(double __re = 0.0, double __im = 0.0)
      : __re_(__re), __im_(__im) {}

  constexpr complex(const complex<float>& __c);

  constexpr double real() const {return __re_;}
  constexpr double imag() const {return __im_;}

  void real(value_type __re) {__re_ = __re;}
  void imag(value_type __im) {__im_ = __im;}

  complex& operator= (double __re)
      {__re_ = __re; __im_ = value_type(); return *this;}
  complex& operator+=(double __re) {__re_ += __re; return *this;}
  complex& operator-=(double __re) {__re_ -= __re; return *this;}
  complex& operator*=(double __re) {__re_ *= __re; __im_ *= __re; return *this;}
  complex& operator/=(double __re) {__re_ /= __re; __im_ /= __re; return *this;}

  template<class _Xp> complex& operator= (const complex<_Xp>& __c)
      {
          __re_ = __c.real();
          __im_ = __c.imag();
          return *this;
      }
  template<class _Xp> complex& operator+=(const complex<_Xp>& __c)
      {
          __re_ += __c.real();
          __im_ += __c.imag();
          return *this;
      }
  template<class _Xp> complex& operator-=(const complex<_Xp>& __c)
      {
          __re_ -= __c.real();
          __im_ -= __c.imag();
          return *this;
      }
  template<class _Xp> complex& operator*=(const complex<_Xp>& __c)
      {
          *this = *this * complex(__c.real(), __c.imag());
          return *this;
      }
  template<class _Xp> complex& operator/=(const complex<_Xp>& __c)
      {
          *this = *this / complex(__c.real(), __c.imag());
          return *this;
      }

};

inline
constexpr
complex::complex(const complex& __c)
: _re(__c.real()), _im(__c.imag()) {}

inline
constexpr
complex::complex(const complex& __c)
: _re(__c.real()), _im(__c.imag()) {}

// 26.3.6 operators:

template
inline
complex<_Tp>
operator+(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
complex<_Tp> __t(__x);
__t += __y;
return __t;
}

template
inline
complex<_Tp>
operator+(const complex<_Tp>& __x, const _Tp& __y)
{
complex<_Tp> __t(__x);
__t += __y;
return __t;
}

template
inline
complex<_Tp>
operator+(const _Tp& __x, const complex<_Tp>& __y)
{
complex<_Tp> __t(__y);
__t += __x;
return __t;
}

template
inline
complex<_Tp>
operator-(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
complex<_Tp> __t(__x);
__t -= __y;
return __t;
}

template
inline
complex<_Tp>
operator-(const complex<_Tp>& __x, const _Tp& __y)
{
complex<_Tp> __t(__x);
__t -= __y;
return __t;
}

template
inline
complex<_Tp>
operator-(const _Tp& __x, const complex<_Tp>& __y)
{
complex<_Tp> __t(-__y);
__t += __x;
return __t;
}

template
complex<_Tp>
operator*(const complex<_Tp>& __z, const complex<_Tp>& __w)
{
_Tp __a = __z.real();
_Tp __b = __z.imag();
_Tp __c = __w.real();
_Tp __d = __w.imag();
_Tp __ac = __a * __c;
_Tp __bd = __b * __d;
_Tp __ad = __a * __d;
_Tp __bc = __b * __c;
_Tp __x = __ac - __bd;
_Tp __y = __ad + __bc;
if (isnan(__x) && isnan(__y))
{
bool __recalc = false;
if (isinf(__a) || isinf(__b))
{
__a = copysign(isinf(__a) ? _Tp(1) : _Tp(0), __a);
__b = copysign(isinf(__b) ? _Tp(1) : _Tp(0), __b);
if (isnan(__c))
__c = copysign(_Tp(0), __c);
if (isnan(__d))
__d = copysign(_Tp(0), __d);
__recalc = true;
}
if (isinf(__c) || isinf(__d))
{
__c = copysign(isinf(__c) ? _Tp(1) : _Tp(0), __c);
__d = copysign(isinf(__d) ? _Tp(1) : _Tp(0), __d);
if (isnan(__a))
__a = copysign(_Tp(0), __a);
if (isnan(__b))
__b = copysign(_Tp(0), __b);
__recalc = true;
}
if (!__recalc && (isinf(__ac) || isinf(__bd) ||
isinf(__ad) || isinf(__bc)))
{
if (isnan(__a))
__a = copysign(_Tp(0), __a);
if (isnan(__b))
__b = copysign(_Tp(0), __b);
if (isnan(__c))
__c = copysign(_Tp(0), __c);
if (isnan(__d))
__d = copysign(_Tp(0), __d);
__recalc = true;
}
if (__recalc)
{
__x = _Tp(INFINITY) * (__a * __c - __b * __d);
__y = _Tp(INFINITY) * (__a * __d + __b * __c);
}
}
return complex<_Tp>(__x, __y);
}

template
inline
complex<_Tp>
operator*(const complex<_Tp>& __x, const _Tp& __y)
{
complex<_Tp> __t(__x);
__t *= __y;
return __t;
}

template
inline
complex<_Tp>
operator*(const _Tp& __x, const complex<_Tp>& __y)
{
complex<_Tp> __t(__y);
__t *= __x;
return __t;
}

template
complex<_Tp>
operator/(const complex<_Tp>& __z, const complex<_Tp>& __w)
{
int __ilogbw = 0;
_Tp __a = __z.real();
_Tp __b = __z.imag();
_Tp __c = __w.real();
_Tp __d = __w.imag();
_Tp __logbw = logb(fmax(fabs(__c), fabs(__d)));
if (isfinite(__logbw))
{
__ilogbw = static_cast(__logbw);
__c = scalbn(__c, -__ilogbw);
__d = scalbn(__d, -__ilogbw);
}
_Tp __denom = __c * __c + __d * __d;
_Tp __x = scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
_Tp __y = scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
if (isnan(__x) && isnan(__y))
{
if ((__denom == _Tp(0)) && (!isnan(__a) || !isnan(__b)))
{
__x = copysign(_Tp(INFINITY), __c) * __a;
__y = copysign(_Tp(INFINITY), __c) * __b;
}
else if ((isinf(__a) || isinf(__b)) && isfinite(__c) && isfinite(__d))
{
__a = copysign(isinf(__a) ? _Tp(1) : _Tp(0), __a);
__b = copysign(isinf(__b) ? _Tp(1) : _Tp(0), __b);
__x = _Tp(INFINITY) * (__a * __c + __b * __d);
__y = _Tp(INFINITY) * (__b * __c - __a * __d);
}
else if (isinf(__logbw) && __logbw > _Tp(0) && isfinite(__a) && isfinite(__b))
{
__c = copysign(isinf(__c) ? _Tp(1) : _Tp(0), __c);
__d = copysign(isinf(__d) ? _Tp(1) : _Tp(0), __d);
__x = _Tp(0) * (__a * __c + __b * __d);
__y = _Tp(0) * (__b * __c - __a * __d);
}
}
return complex<_Tp>(__x, __y);
}

template
inline
complex<_Tp>
operator/(const complex<_Tp>& __x, const _Tp& __y)
{
return complex<_Tp>(__x.real() / __y, __x.imag() / __y);
}

template
inline
complex<_Tp>
operator/(const _Tp& __x, const complex<_Tp>& __y)
{
complex<_Tp> __t(__x);
__t /= __y;
return __t;
}

template
inline
complex<_Tp>
operator+(const complex<_Tp>& __x)
{
return __x;
}

template
inline
complex<_Tp>
operator-(const complex<_Tp>& __x)
{
return complex<_Tp>(-__x.real(), -__x.imag());
}

template
inline constexpr
bool
operator==(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
return __x.real() == __y.real() && __x.imag() == __y.imag();
}

template
inline constexpr
bool
operator==(const complex<_Tp>& __x, const _Tp& __y)
{
return __x.real() == __y && __x.imag() == 0;
}

template
inline constexpr
bool
operator==(const _Tp& __x, const complex<_Tp>& __y)
{
return __x == __y.real() && 0 == __y.imag();
}

template
inline constexpr
bool
operator!=(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
return !(__x == __y);
}

template
inline constexpr
bool
operator!=(const complex<_Tp>& __x, const _Tp& __y)
{
return !(__x == __y);
}

template
inline constexpr
bool
operator!=(const _Tp& __x, const complex<_Tp>& __y)
{
return !(__x == __y);
}

template
inline constexpr
bool
operator&&(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
return (__x.real() || __x.imag()) && (__y.real() || __y.imag());
}

// 26.3.7 values:

template <class _Tp, bool = is_integral<_Tp>::value,
bool = is_floating_point<_Tp>::value
>
struct __libcpp_complex_overload_traits {};

// Integral Types
template
struct __libcpp_complex_overload_traits<_Tp, true, false>
{
typedef double _ValueType;
typedef complex _ComplexType;
};

// Floating point types
template
struct __libcpp_complex_overload_traits<_Tp, false, true>
{
typedef _Tp _ValueType;
typedef complex<_Tp> _ComplexType;
};

// real

template
inline constexpr
_Tp
real(const complex<_Tp>& __c)
{
return __c.real();
}

template
inline constexpr
typename __libcpp_complex_overload_traits<_Tp>::_ValueType
real(_Tp __re)
{
return __re;
}

// imag

template
inline constexpr
_Tp
imag(const complex<_Tp>& __c)
{
return __c.imag();
}

template
inline constexpr
typename __libcpp_complex_overload_traits<_Tp>::_ValueType
imag(_Tp)
{
return 0;
}

// abs

template
inline
_Tp
abs(const complex<_Tp>& __c)
{
return hypot(__c.real(), __c.imag());
}

// arg

template
inline
_Tp
arg(const complex<_Tp>& __c)
{
return atan2(__c.imag(), __c.real());
}

template
inline
typename enable_if
<
is_integral<_Tp>::value || is_same<_Tp, double>::value,
double

::type
arg(_Tp __re)
{
return atan2(0., __re);
}

template
inline
typename enable_if<
is_same<_Tp, float>::value,
float

::type
arg(_Tp __re)
{
return atan2f(0.F, __re);
}

}

namespace std {

// norm

template
inline
_Tp
norm(const complex<_Tp>& __c)
{
if (isinf(__c.real()))
return abs(__c.real());
if (isinf(__c.imag()))
return abs(__c.imag());
return __c.real() * __c.real() + __c.imag() * __c.imag();
}

template
inline
typename __libcpp_complex_overload_traits<_Tp>::_ValueType
norm(_Tp __re)
{
typedef typename __libcpp_complex_overload_traits<_Tp>::_ValueType _ValueType;
return static_cast<_ValueType>(__re) * __re;
}

// conj

template
inline
complex<_Tp>
conj(const complex<_Tp>& __c)
{
return complex<_Tp>(__c.real(), -__c.imag());
}

template
inline
typename __libcpp_complex_overload_traits<_Tp>::_ComplexType
conj(_Tp __re)
{
typedef typename __libcpp_complex_overload_traits<_Tp>::_ComplexType _ComplexType;
return _ComplexType(__re);
}

// proj

template
inline
complex<_Tp>
proj(const complex<_Tp>& __c)
{
complex<_Tp> __r = __c;
if (isinf(__c.real()) || isinf(__c.imag()))
__r = complex<_Tp>(INFINITY, copysign(_Tp(0), __c.imag()));
return __r;
}

template
inline
typename enable_if
<
is_floating_point<_Tp>::value,
typename __libcpp_complex_overload_traits<_Tp>::_ComplexType

::type
proj(_Tp __re)
{
if (isinf(__re))
__re = abs(__re);
return complex<_Tp>(__re);
}

template
inline
typename enable_if
<
is_integral<_Tp>::value,
typename __libcpp_complex_overload_traits<_Tp>::_ComplexType

::type
proj(_Tp __re)
{
typedef typename __libcpp_complex_overload_traits<_Tp>::_ComplexType _ComplexType;
return _ComplexType(__re);
}

// polar

template
complex<_Tp>
polar(const _Tp& __rho, const _Tp& __theta = _Tp())
{
if (isnan(__rho) || signbit(__rho))
return complex<_Tp>(_Tp(NAN), _Tp(NAN));
if (isnan(__theta))
{
if (isinf(__rho))
return complex<_Tp>(__rho, __theta);
return complex<_Tp>(__theta, __theta);
}
if (isinf(__theta))
{
if (isinf(__rho))
return complex<_Tp>(__rho, _Tp(NAN));
return complex<_Tp>(_Tp(NAN), _Tp(NAN));
}
_Tp __x = __rho * cos(__theta);
if (isnan(__x))
__x = 0;
_Tp __y = __rho * sin(__theta);
if (isnan(__y))
__y = 0;
return complex<_Tp>(__x, __y);
}

// log

template
inline
complex<_Tp>
log(const complex<_Tp>& __x)
{
return complex<_Tp>(log(abs(__x)), arg(__x));
}

// log10

template
inline
complex<_Tp>
log10(const complex<_Tp>& __x)
{
return log(__x) / log(_Tp(10));
}

// log2

template
inline
complex<_Tp>
log2(const complex<_Tp>& __x)
{
return log(__x) / log(_Tp(2));
}

// sqrt

template
complex<_Tp>
sqrt(const complex<_Tp>& __x)
{
if (isinf(__x.imag()))
return complex<_Tp>(_Tp(INFINITY), __x.imag());
if (isinf(__x.real()))
{
if (__x.real() > _Tp(0))
return complex<_Tp>(__x.real(), isnan(__x.imag()) ? __x.imag() : copysign(_Tp(0), __x.imag()));
return complex<_Tp>(isnan(__x.imag()) ? __x.imag() : _Tp(0), copysign(__x.real(), __x.imag()));
}
return polar(sqrt(abs(__x)), arg(__x) / _Tp(2));
}

// exp

template
complex<_Tp>
exp(const complex<_Tp>& __x)
{
_Tp __i = __x.imag();
if (__i == 0) {
return complex<_Tp>(exp(__x.real()), copysign(_Tp(0), __x.imag()));
}
if (isinf(__x.real()))
{
if (__x.real() < _Tp(0))
{
if (!isfinite(__i))
__i = _Tp(1);
}
else if (__i == 0 || !isfinite(__i))
{
if (isinf(__i))
__i = _Tp(NAN);
return complex<_Tp>(__x.real(), __i);
}
}
_Tp __e = exp(__x.real());
return complex<_Tp>(__e * cos(__i), __e * sin(__i));
}

// pow

template
inline
complex<_Tp>
pow(const complex<_Tp>& __x, const complex<_Tp>& __y)
{
return exp(__y * log(__x));
}

template<class _Tp, class _Up>
inline
complex<typename __promote<_Tp, _Up>::type>
pow(const complex<_Tp>& __x, const complex<_Up>& __y)
{
typedef complex<typename __promote<_Tp, _Up>::type> result_type;
return _VSTD::pow(result_type(__x), result_type(__y));
}

template<class _Tp, class _Up>
inline
typename enable_if
<
is_arithmetic<_Up>::value,
complex<typename __promote<_Tp, _Up>::type>

::type
pow(const complex<_Tp>& __x, const _Up& __y)
{
typedef complex<typename __promote<_Tp, _Up>::type> result_type;
return _VSTD::pow(result_type(__x), result_type(__y));
}

template<class _Tp, class _Up>
inline
typename enable_if
<
is_arithmetic<_Tp>::value,
complex<typename __promote<_Tp, _Up>::type>

::type
pow(const _Tp& __x, const complex<_Up>& __y)
{
typedef complex<typename __promote<_Tp, _Up>::type> result_type;
return _VSTD::pow(result_type(__x), result_type(__y));
}

// __sqr, computes pow(x, 2)

template
inline
complex<_Tp>
__sqr(const complex<_Tp>& __x)
{
return complex<_Tp>((__x.real() - __x.imag()) * (__x.real() + __x.imag()),
_Tp(2) * __x.real() * __x.imag());
}

// asinh

template
complex<_Tp>
asinh(const complex<_Tp>& __x)
{
const _Tp __pi(atan2(+0., -0.));
if (isinf(__x.real()))
{
if (isnan(__x.imag()))
return __x;
if (isinf(__x.imag()))
return complex<_Tp>(__x.real(), copysign(__pi * _Tp(0.25), __x.imag()));
return complex<_Tp>(__x.real(), copysign(_Tp(0), __x.imag()));
}
if (isnan(__x.real()))
{
if (isinf(__x.imag()))
return complex<_Tp>(__x.imag(), __x.real());
if (__x.imag() == 0)
return __x;
return complex<_Tp>(__x.real(), __x.real());
}
if (isinf(__x.imag()))
return complex<_Tp>(copysign(__x.imag(), __x.real()), copysign(__pi/_Tp(2), __x.imag()));
complex<_Tp> __z = log(__x + sqrt(__sqr(__x) + _Tp(1)));
return complex<_Tp>(copysign(__z.real(), __x.real()), copysign(__z.imag(), __x.imag()));
}

// acosh

template
complex<_Tp>
acosh(const complex<_Tp>& __x)
{
const _Tp __pi(atan2(+0., -0.));
if (isinf(__x.real()))
{
if (isnan(__x.imag()))
return complex<_Tp>(abs(__x.real()), __x.imag());
if (isinf(__x.imag()))
{
if (__x.real() > 0)
return complex<_Tp>(__x.real(), copysign(__pi * _Tp(0.25), __x.imag()));
else
return complex<_Tp>(-__x.real(), copysign(__pi * _Tp(0.75), __x.imag()));
}
if (__x.real() < 0)
return complex<_Tp>(-__x.real(), copysign(__pi, __x.imag()));
return complex<_Tp>(__x.real(), copysign(_Tp(0), __x.imag()));
}
if (isnan(__x.real()))
{
if (isinf(__x.imag()))
return complex<_Tp>(abs(__x.imag()), __x.real());
return complex<_Tp>(__x.real(), __x.real());
}
if (isinf(__x.imag()))
return complex<_Tp>(abs(__x.imag()), copysign(__pi/_Tp(2), __x.imag()));
complex<_Tp> __z = log(__x + sqrt(__sqr(__x) - _Tp(1)));
return complex<_Tp>(copysign(__z.real(), _Tp(0)), copysign(__z.imag(), __x.imag()));
}

// atanh

template
complex<_Tp>
atanh(const complex<_Tp>& __x)
{
const _Tp __pi(atan2(+0., -0.));
if (isinf(__x.imag()))
{
return complex<_Tp>(copysign(_Tp(0), __x.real()), copysign(__pi/_Tp(2), __x.imag()));
}
if (isnan(__x.imag()))
{
if (isinf(__x.real()) || __x.real() == 0)
return complex<_Tp>(copysign(_Tp(0), __x.real()), __x.imag());
return complex<_Tp>(__x.imag(), __x.imag());
}
if (isnan(__x.real()))
{
return complex<_Tp>(__x.real(), __x.real());
}
if (isinf(__x.real()))
{
return complex<_Tp>(copysign(_Tp(0), __x.real()), copysign(__pi/_Tp(2), __x.imag()));
}
if (abs(__x.real()) == _Tp(1) && __x.imag() == _Tp(0))
{
return complex<_Tp>(copysign(_Tp(INFINITY), __x.real()), copysign(_Tp(0), __x.imag()));
}
complex<_Tp> __z = log((_Tp(1) + __x) / (_Tp(1) - __x)) / _Tp(2);
return complex<_Tp>(copysign(__z.real(), __x.real()), copysign(__z.imag(), __x.imag()));
}

// sinh

template
complex<_Tp>
sinh(const complex<_Tp>& __x)
{
if (isinf(__x.real()) && !isfinite(__x.imag()))
return complex<_Tp>(__x.real(), _Tp(NAN));
if (__x.real() == 0 && !isfinite(__x.imag()))
return complex<_Tp>(__x.real(), _Tp(NAN));
if (__x.imag() == 0 && !isfinite(__x.real()))
return __x;
return complex<_Tp>(sinh(__x.real()) * cos(__x.imag()), cosh(__x.real()) * sin(__x.imag()));
}

// cosh

template
complex<_Tp>
cosh(const complex<_Tp>& __x)
{
if (isinf(__x.real()) && !isfinite(__x.imag()))
return complex<_Tp>(abs(__x.real()), _Tp(NAN));
if (__x.real() == 0 && !isfinite(__x.imag()))
return complex<_Tp>(_Tp(NAN), __x.real());
if (__x.real() == 0 && __x.imag() == 0)
return complex<_Tp>(_Tp(1), __x.imag());
if (__x.imag() == 0 && !isfinite(__x.real()))
return complex<_Tp>(abs(__x.real()), __x.imag());
return complex<_Tp>(cosh(__x.real()) * cos(__x.imag()), sinh(__x.real()) * sin(__x.imag()));
}

// tanh

template
complex<_Tp>
tanh(const complex<_Tp>& __x)
{
if (isinf(__x.real()))
{
if (!isfinite(__x.imag()))
return complex<_Tp>(copysign(_Tp(1), __x.real()), _Tp(0));
return complex<_Tp>(copysign(_Tp(1), __x.real()), copysign(_Tp(0), sin(_Tp(2) * __x.imag())));
}
if (isnan(__x.real()) && __x.imag() == 0)
return __x;
_Tp __2r(_Tp(2) * __x.real());
_Tp __2i(_Tp(2) * __x.imag());
_Tp __d(cosh(__2r) + cos(__2i));
_Tp __2rsh(sinh(__2r));
if (isinf(__2rsh) && isinf(__d))
return complex<_Tp>(__2rsh > _Tp(0) ? _Tp(1) : _Tp(-1),
__2i > _Tp(0) ? _Tp(0) : _Tp(-0.));
return complex<_Tp>(__2rsh/__d, sin(__2i)/__d);
}

// asin

template
complex<_Tp>
asin(const complex<_Tp>& __x)
{
complex<_Tp> __z = asinh(complex<_Tp>(-__x.imag(), __x.real()));
return complex<_Tp>(__z.imag(), -__z.real());
}

// acos

template
complex<_Tp>
acos(const complex<_Tp>& __x)
{
const _Tp __pi(atan2(+0., -0.));
if (isinf(__x.real()))
{
if (isnan(__x.imag()))
return complex<_Tp>(__x.imag(), __x.real());
if (isinf(__x.imag()))
{
if (__x.real() < _Tp(0))
return complex<_Tp>(_Tp(0.75) * __pi, -__x.imag());
return complex<_Tp>(_Tp(0.25) * __pi, -__x.imag());
}
if (__x.real() < _Tp(0))
return complex<_Tp>(__pi, signbit(__x.imag()) ? -__x.real() : __x.real());
return complex<_Tp>(_Tp(0), signbit(__x.imag()) ? __x.real() : -__x.real());
}
if (isnan(__x.real()))
{
if (isinf(__x.imag()))
return complex<_Tp>(__x.real(), -__x.imag());
return complex<_Tp>(__x.real(), __x.real());
}
if (isinf(__x.imag()))
return complex<_Tp>(__pi/_Tp(2), -__x.imag());
if (__x.real() == 0 && (__x.imag() == 0 || isnan(__x.imag())))
return complex<_Tp>(__pi/_Tp(2), -__x.imag());
complex<_Tp> __z = log(__x + sqrt(__sqr(__x) - _Tp(1)));
if (signbit(__x.imag()))
return complex<_Tp>(abs(__z.imag()), abs(__z.real()));
return complex<_Tp>(abs(__z.imag()), -abs(__z.real()));
}

// atan

template
complex<_Tp>
atan(const complex<_Tp>& __x)
{
complex<_Tp> __z = atanh(complex<_Tp>(-__x.imag(), __x.real()));
return complex<_Tp>(__z.imag(), -__z.real());
}

// sin

template
complex<_Tp>
sin(const complex<_Tp>& __x)
{
complex<_Tp> __z = sinh(complex<_Tp>(-__x.imag(), __x.real()));
return complex<_Tp>(__z.imag(), -__z.real());
}

// cos

template
inline
complex<_Tp>
cos(const complex<_Tp>& __x)
{
return cosh(complex<_Tp>(-__x.imag(), __x.real()));
}

// tan

template
complex<_Tp>
tan(const complex<_Tp>& __x)
{
complex<_Tp> __z = tanh(complex<_Tp>(-__x.imag(), __x.real()));
return complex<_Tp>(__z.imag(), -__z.real());
}

// Literal suffix for complex number literals [complex.literals]
inline namespace literals
{
inline namespace complex_literals
{
constexpr complex operator""i(long double __im)
{
return { 0.0, static_cast(__im) };
}

  constexpr complex<double> operator""i(unsigned long long __im)
  {
      return { 0.0, static_cast<double>(__im) };
  }


  constexpr complex<float> operator""if(long double __im)
  {
      return { 0.0f, static_cast<float>(__im) };
  }

  constexpr complex<float> operator""if(unsigned long long __im)
  {
      return { 0.0f, static_cast<float>(__im) };
  }
} // namespace complex_literals

} // namespace literals

} // namespace std

template
device inline scalar_t load(char* base_ptr, uint32_t offset) {
return (reinterpret_cast<scalar_t>(base_ptr) + offset);
}

template
device inline void store(scalar_t value, char *base_ptr, uint32_t offset) {
*(reinterpret_cast<scalar_t *>(base_ptr) + offset) = value;
}

// aligned vector generates vectorized load/store on CUDA
template<typename scalar_t, int vec_size>
struct alignas(sizeof(scalar_t) * vec_size) aligned_vector {
scalar_t val[vec_size];
};

template T abs_kernel(T x) { return std::abs(x); }

// TODO: setup grid-stride loop

extern "C" global
void abs_kernel_vectorized4_kernel(
const int N,
Array<char*, 1+1> data,
std::complex scalar_val) //[1+1],
{
constexpr int vec_size = 4;
int remaining = N - block_work_size * blockIdx.x;
auto thread_idx = threadIdx.x;
int idx = blockIdx.x;
std::complex arg0[4];

  std::complex<float> out0[4];


  if (remaining < block_work_size) {
    #pragma unroll
    for (int j = 0; j < thread_work_size; j++){
      if (thread_idx >= remaining) {
        break;
      }
      int linear_idx = thread_idx + block_work_size * idx;
      arg0[j] = load<std::complex<float>>(data[1], linear_idx);

      thread_idx += num_threads;
    }
    #pragma unroll
    for (int j = 0; j < thread_work_size; j++) {
      if ((threadIdx.x  + j*num_threads) < remaining) {
        out0[j] = abs_kernel<std::complex<float>>(arg0[j] );
      }
    }
    thread_idx = threadIdx.x;
    #pragma unroll
    for (int j = 0; j < thread_work_size; j++) {
      if (thread_idx >= remaining) {
          break;
      }
      int linear_idx = thread_idx + block_work_size * idx;
      store<std::complex<float>>(out0[j], data[0], linear_idx);

      thread_idx += num_threads;
    }
  } else {
    static constexpr int loop_size = thread_work_size / vec_size;

//actual loading
using vec_t_input = aligned_vector<std::complex, vec_size>;
vec_t_input * vec0 = reinterpret_cast<vec_t_input *>(data[0+1]) + block_work_size / vec_size * idx;

    #pragma unroll
    for (int i = 0; i<loop_size; i++){
      vec_t_input v;
      v = vec0[thread_idx];
      #pragma unroll
      for (int j=0; j < vec_size; j++){
        arg0[vec_size * i + j] = v.val[j];
      }

      thread_idx += num_threads;
    }

    #pragma unroll
    for (int j = 0; j < thread_work_size; j++) {
      out0[j] = abs_kernel<std::complex<float>>(arg0[j] );
    }

    using vec_t_output = aligned_vector<std::complex<float>, vec_size>;
    vec_t_output* to_0 = reinterpret_cast<vec_t_output*>(data[0]) + block_work_size / vec_size * idx;

    int thread_idx = threadIdx.x;
    #pragma unroll
    for (int i = 0; i<loop_size; i++){
      vec_t_output v;
      #pragma unroll
      for (int j=0; j<vec_size; j++){
      v.val[j] = out0[vec_size * i + j];
      }
      to_0[thread_idx] = v;

      thread_idx += num_threads;
    }
  }

}
nvrtc: error: invalid value for --gpu-architecture (-arch)`

@Tobe2d
Copy link
Author

Tobe2d commented Dec 30, 2022

I still cant figure out how to get it to work. any help?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant