Point72 · svatasoiu · Jun 20, 2024 · Jun 13, 2024 · Jun 18, 2024
@@ -1,3 +1,6 @@
+#ifndef _IN_CSP_CPPNODES_STATSIMPL_H
+#define _IN_CSP_CPPNODES_STATSIMPL_H
+
 #include <csp/engine/CppNode.h>
 #include <csp/engine/WindowBuffer.h>
 
@@ -1569,7 +1572,7 @@ class EMA
                 m_ema = x;
                 m_first = false;
             }
-            else if( unlikely( isnan( x ) ) && !m_ignore_na )
+            else if( unlikely( isnan( x ) ) && !m_ignore_na && likely( !m_first ) )
             {
                 m_offset++;
             }
@@ -1582,8 +1585,9 @@ class EMA
                 }
                 else
                 {
-                    m_ema = ( pow( m_ema * ( 1 - m_alpha ), m_offset ) + m_alpha * x ) /
+                    m_ema = ( m_ema * pow( ( 1 - m_alpha ), m_offset ) + m_alpha * x ) / 
                         ( pow( 1 - m_alpha, m_offset ) + m_alpha );
+                    m_offset = 1;
                 }
             }
         }
@@ -1599,7 +1603,7 @@ class EMA
 
         double compute() const
         {
-            return m_ema;
+            return unlikely( m_first ) ? std::numeric_limits<double>::quiet_NaN() : m_ema;
         }
 
     private:
@@ -1710,7 +1714,7 @@ class AlphaDebiasEMA
 
         void add( double x )
         {
-            if( m_first )
+            if( m_first && likely( !isnan( x ) ) )
             {
                 m_wsum = 1;
                 m_sqsum = 1;
@@ -1730,8 +1734,14 @@ class AlphaDebiasEMA
                     w0 = 1 - m_decay;
                 m_sqsum += pow( w0, 2 );
                 m_wsum += w0;
+                if( !m_adjust )
+                {
+                    double correction = decay_factor + w0;
+                    m_wsum /= correction;
+                    m_sqsum /= ( correction * correction );
+                }
             }
-            else
+            else if ( likely( !m_first ) )
             {
                 m_offset++;
                 m_nan_count++;
@@ -2358,3 +2368,5 @@ DECLARE_CPPNODE( _generic_cross_sectional )
 };
 
 }
+
+#endif // _IN_CSP_CPPNODES_STATSIMPL_H
@@ -464,32 +464,53 @@ def graph():
         np.testing.assert_almost_equal(excess_kurtosis, kurtosis - 3, decimal=7)
 
     def test_ema(self):
-        dvalues = np.random.uniform(low=-100, high=100, size=(1000,))
+        N = 1000
+        dvalues = np.random.uniform(low=-100, high=100, size=(N,))
+        dvalues[0] = np.nan  # this forces edge cases around first value being nan
+        for i in range(N):
+            p = np.random.rand()
+            if p < 0.2:
+                dvalues[i] = np.nan
+
         st = datetime(2020, 1, 1)
 
         @csp.graph
         def graph():
-            x = csp.curve(typ=float, data=[(st + timedelta(milliseconds=i + 1), dvalues[i]) for i in range(1000)])
+            x = csp.curve(typ=float, data=[(st + timedelta(milliseconds=i + 1), dvalues[i]) for i in range(N)])
             ema = csp.stats.ema(x, alpha=0.1, adjust=False)
             ema_var = csp.stats.ema_var(x, min_periods=3, span=20, adjust=True, bias=True)
             ema_std = csp.stats.ema_std(x, min_periods=3, span=20, adjust=True, bias=False)
+            ema_std2 = csp.stats.ema_std(x, min_periods=3, span=20, adjust=False, ignore_na=False, bias=False)
 
             csp.add_graph_output("ema", ema)
             csp.add_graph_output("ema_v", ema_var)
             csp.add_graph_output("ema_s", ema_std)
+            csp.add_graph_output("ema_s2", ema_std2)
 
         values = pd.Series(dvalues)
         pd_alpha = values.ewm(alpha=0.1, adjust=False).mean()
         pd_span = values.ewm(span=20, adjust=True)
         pd_var = pd_span.var(bias=True)
         pd_std = pd_span.std(bias=False)
 
+        pd_span2 = values.ewm(span=20, adjust=False, ignore_na=False)
+        pd_std2 = pd_span2.std(bias=False)
+
         results = csp.run(graph, starttime=st, endtime=st + timedelta(milliseconds=1000))
 
-        # floats, ensure accurate to 1e-6
-        np.testing.assert_almost_equal(np.array(pd_alpha), np.array(results["ema"])[:, 1], decimal=7)
-        np.testing.assert_almost_equal(np.array(pd_var)[2:], np.array(results["ema_v"])[:, 1], decimal=7)
-        np.testing.assert_almost_equal(np.array(pd_std)[2:], np.array(results["ema_s"])[:, 1], decimal=7)
+        # floats, ensure accurate to 1.5e-7
+        np.testing.assert_allclose(
+            np.array(pd_alpha), np.array(results["ema"])[:, 1].astype(np.float64), atol=1.5e-7, equal_nan=True
+        )
+        np.testing.assert_allclose(
+            np.array(pd_var)[2:], np.array(results["ema_v"])[:, 1].astype(np.float64), atol=1.5e-7, equal_nan=True
+        )
+        np.testing.assert_allclose(
+            np.array(pd_std)[2:], np.array(results["ema_s"])[:, 1].astype(np.float64), atol=1.5e-7, equal_nan=True
+        )
+        np.testing.assert_allclose(
+            np.array(pd_std2)[2:], np.array(results["ema_s2"])[:, 1].astype(np.float64), atol=1.5e-7, equal_nan=True
+        )
 
     def test_triggers(self):
         dvalues = [i + 1 for i in range(20)]