Merge pull request #104 from Point72/tkp/dropdupsfloat

Compare floats by epsilon in `drop_dupes_float`
Point72 · Feb 20, 2024 · 498d83a · 498d83a
2 parents dd9898f + 69fd5de
commit 498d83a
Show file tree

Hide file tree

Showing 4 changed files with 62 additions and 19 deletions.
diff --git a/cpp/csp/cppnodes/baselibimpl.cpp b/cpp/csp/cppnodes/baselibimpl.cpp
@@ -1,6 +1,6 @@
 #include <csp/engine/Dictionary.h>
 #include <csp/engine/CppNode.h>
-
+#include <numeric>
 
 namespace csp::cppnodes
 {
@@ -271,6 +271,41 @@ DECLARE_CPPNODE( filter )
 
 EXPORT_CPPNODE( filter )
 
+/*
+@csp.node
+def _drop_dups_float(x: ts[float], eps: float):
+    __outputs__(ts[float])
+*/
+DECLARE_CPPNODE( _drop_dups_float )
+{
+    INIT_CPPNODE( _drop_dups_float )
+    {}
+
+    TS_INPUT( double, x );
+
+    SCALAR_INPUT( double, eps );
+
+    TS_OUTPUT( double );
+
+    STATE_VAR( bool, s_first{true} );
+    STATE_VAR( double, s_prev{} );
+
+    INVOKE()
+    {
+        if( csp.ticked( x ) )
+        {
+            if( s_first || ( isnan( x ) != isnan( s_prev ) ) || ( !isnan( x ) && fabs( x - s_prev ) >= eps ))
+            {
+                s_first = false;
+                s_prev = x;
+                RETURN( x );
+            }
+        }
+    }
+};
+
+EXPORT_CPPNODE( _drop_dups_float )
+
 /*
 @csp.node
 def drop_nans(x: ts[float]):

diff --git a/cpp/csp/python/cspbaselibimpl.cpp b/cpp/csp/python/cspbaselibimpl.cpp
@@ -335,6 +335,7 @@ REGISTER_CPPNODE( csp::cppnodes, merge );
 REGISTER_CPPNODE( csp::cppnodes, split );
 REGISTER_CPPNODE( csp::cppnodes, cast_int_to_float );
 REGISTER_CPPNODE( csp::cppnodes, filter );
+REGISTER_CPPNODE( csp::cppnodes, _drop_dups_float );
 REGISTER_CPPNODE( csp::cppnodes, drop_nans );
 REGISTER_CPPNODE( csp::cppnodes, unroll );
 REGISTER_CPPNODE( csp::cppnodes, collect );

diff --git a/csp/baselib.py b/csp/baselib.py
@@ -457,26 +457,26 @@ def _drop_dups(x: ts["T"]) -> ts["T"]:
         return x
 
 
-# TODO cppimpl
-@node
-def _drop_dups_float(x: ts[float]) -> ts[float]:
+@node(cppimpl=_cspbaselibimpl._drop_dups_float)
+def _drop_dups_float(x: ts[float], eps: float) -> ts[float]:
     with csp.start():
-        s_prev = csp.impl.constants.UNSET
+        s_prev = None
 
     if csp.ticked(x):
-        if math.isnan(x):
-            if s_prev is csp.impl.constants.UNSET or not math.isnan(s_prev):
-                s_prev = x
-                return x
-        elif x != s_prev:
+        if s_prev is None or math.isnan(x) != math.isnan(s_prev) or (not math.isnan(x) and abs(x - s_prev) >= eps):
             s_prev = x
             return x
 
 
 @graph
-def drop_dups(x: ts["T"]) -> ts["T"]:
+def drop_dups(x: ts["T"], eps: float = None) -> ts["T"]:
+    """removes consecutive duplicates from the input series"""
     if x.tstype.typ is float:
-        return _drop_dups_float(x)
+        if eps is None:
+            eps = 1e-12
+        return _drop_dups_float(x, eps)
+    if eps is not None:
+        raise ValueError("eps should not be passed for non-float")
     return _drop_dups(x)
 
 

diff --git a/csp/tests/test_baselib.py b/csp/tests/test_baselib.py
@@ -895,28 +895,35 @@ def g():
 
     def test_drop_dups(self):
         @csp.graph
-        def g(d1: list, d2: list, d3: list):
+        def g(d1: list, d2: list, d3: list, d4: list, d5: list):
             d1 = csp.unroll(csp.const.using(T=[int])(d1))
             d2 = csp.unroll(csp.const.using(T=[tuple])(d2))
             d3 = csp.unroll(csp.const.using(T=[float])(d3))
+            d4 = csp.unroll(csp.const.using(T=[float])(d4))
+            d5 = csp.unroll(csp.const.using(T=[float])(d5))
 
             csp.add_graph_output("d1", csp.drop_dups(d1))
             csp.add_graph_output("d2", csp.drop_dups(d2))
             csp.add_graph_output("d3", csp.drop_dups(d3))
+            csp.add_graph_output("d4", csp.drop_dups(d4, eps=1e-1))
+            csp.add_graph_output("d5", csp.drop_dups(d5, eps=1e-7))
 
+        eps = {"d4": 1e-1, "d5": 1e-7}
         nan = float("nan")
         d1 = [1, 2, 3, 3, 4, 3, 5, 5]
         d2 = [(1, 2), (1, 2), (3, 4)]
-        d3 = [1.0, 2.0, 3.0, 3.0, nan, 4.0, nan, nan, nan, 5]
-        res = csp.run(g, d1, d2, d3, starttime=datetime(2022, 5, 13))
+        d3 = [1.0, 2.0, 3.0, 3.0, nan, 4.0, nan, nan, nan, 5, 0.3 - 0.2, 0.1]
+        d4 = [0.1, 0.19, 0.5, nan]
+        d5 = [0.3 - 0.2, 0.1, 0.09999999999999, nan, 0.2]
+        res = csp.run(g, d1, d2, d3, d4, d5, starttime=datetime(2022, 5, 13))
 
         for k, tseries in res.items():
             prev = None
             for v in tseries:
-                self.assertNotEqual(v, prev)
-                self.assertTrue(
-                    (not isinstance(v, float)) or (not isinstance(prev, float)) or (math.isnan(v) != math.isnan(prev))
-                )
+                if prev and isinstance(v[1], float) and isinstance(prev[1], float):
+                    self.assertTrue(math.isnan(v[1]) != math.isnan(prev[1]) or abs(v[1] - prev[1]) > eps.get(k, 1e-12))
+                else:
+                    self.assertNotEqual(v, prev)
                 prev = v
 
     def test_struct_fromts(self):