Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dataframe from list of data with multiple symbols #8334

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions Algorithm.Python/OptionChainFullDataRegressionAlgorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,19 @@ def initialize(self):

goog = self.add_equity("GOOG").symbol

option_chain = self.option_chain(goog)

# Demonstration using data frame:
# Get contracts expiring within 10 days, with an implied volatility greater than 0.5 and a delta less than 0.5
contracts = [
contract_data
for contract_data in self.option_chain(goog)
if contract_data.id.date - self.time <= timedelta(days=10) and contract_data.implied_volatility > 0.5 and contract_data.greeks.delta < 0.5
contracts_ids = [
contract_data["id"]
for index, contract_data in option_chain.data_frame.iterrows()
if contract_data["id"].date - self.time <= timedelta(days=10) and contract_data["impliedvolatility"] > 0.5 and contract_data["greeks"].delta < 0.5
]

# Get the contract with the latest expiration date
self._option_contract = sorted(contracts, key=lambda x: x.id.date, reverse=True)[0]
option_contract_id = sorted(contracts_ids, key=lambda id: id.date, reverse=True)[0]
self._option_contract = [x.symbol for x in option_chain if x.symbol.id == option_contract_id][0]

self.add_option_contract(self._option_contract)

Expand Down
8 changes: 8 additions & 0 deletions Common/Data/UniverseSelection/OptionUniverse.cs
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,14 @@ public PreCalculatedGreeks(string csvLine)
{
_csvLine = csvLine;
}

/// <summary>
/// Gets a string representation of the greeks values
/// </summary>
public override string ToString()
{
return $"D: {Delta}, G: {Gamma}, V: {Vega}, T: {Theta}, R: {Rho}";
}
}
}
}
82 changes: 49 additions & 33 deletions Common/Python/PandasConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,22 +70,7 @@ public PyObject GetDataFrame(IEnumerable<Slice> data, Type dataType = null)
AddSliceDataTypeDataToDict(slice, requestedTick, requestedTradeBar, requestedQuoteBar, sliceDataDict, ref maxLevels, dataType);
}

using (Py.GIL())
{
if (sliceDataDict.Count == 0)
{
return _pandas.DataFrame();
}
using var dataFrames = sliceDataDict.Select(x => x.Value.ToPandasDataFrame(maxLevels)).ToPyListUnSafe();
using var sortDic = Py.kw("sort", true);
var result = _concat.Invoke(new[] { dataFrames }, sortDic);

foreach (var df in dataFrames)
{
df.Dispose();
}
return result;
}
return CreateDataFrame(sliceDataDict, maxLevels);
}

/// <summary>
Expand All @@ -97,27 +82,23 @@ public PyObject GetDataFrame(IEnumerable<Slice> data, Type dataType = null)
public PyObject GetDataFrame<T>(IEnumerable<T> data)
where T : IBaseData
{
PandasData sliceData = null;
var pandasDataBySymbol = new Dictionary<SecurityIdentifier, PandasData>();
foreach (var datum in data)
{
if (sliceData == null)
if (!pandasDataBySymbol.TryGetValue(datum.Symbol.ID, out var pandasData))
{
sliceData = new PandasData(datum);
pandasData = new PandasData(datum);
pandasDataBySymbol[datum.Symbol.ID] = pandasData;
}

sliceData.Add(datum);
pandasData.Add(datum);
}

using (Py.GIL())
{
// If sliceData is still null, data is an empty enumerable
// returns an empty pandas.DataFrame
if (sliceData == null)
{
return _pandas.DataFrame();
}
return sliceData.ToPandasDataFrame();
}
return CreateDataFrame(pandasDataBySymbol,
sort: false,
// Multiple data frames (one for each symbol) will be concatenated,
// so make sure rows with missing values only are not filtered out before concatenation
filterMissingValueColumns: pandasDataBySymbol.Count <= 1);
}

/// <summary>
Expand Down Expand Up @@ -187,9 +168,44 @@ public PyObject GetIndicatorDataFrame(PyObject data)
/// <returns></returns>
public override string ToString()
{
return _pandas == null
? Messages.PandasConverter.PandasModuleNotImported
: _pandas.Repr();
if (_pandas == null)
{
return Messages.PandasConverter.PandasModuleNotImported;
}

using (Py.GIL())
{
return _pandas.Repr();
}
}

/// <summary>
/// Create a data frame by concatenated the resulting data frames from the given data
/// </summary>
private static PyObject CreateDataFrame(Dictionary<SecurityIdentifier, PandasData> dataBySymbol, int maxLevels = 2, bool sort = true,
bool filterMissingValueColumns = true)
{
using (Py.GIL())
{
if (dataBySymbol.Count == 0)
{
return _pandas.DataFrame();
}

using var dataFrames = dataBySymbol.Select(x => x.Value.ToPandasDataFrame(maxLevels, filterMissingValueColumns)).ToPyListUnSafe();
using var sortDic = Py.kw("sort", sort);
var result = _concat.Invoke(new[] { dataFrames }, sortDic);

// Drop columns with only NaN or None values
using var kwargs = Py.kw("axis", 1, "inplace", true, "how", "all");
result.GetAttr("dropna").Invoke(Array.Empty<PyObject>(), kwargs);

foreach (var df in dataFrames)
{
df.Dispose();
}
return result;
}
}

/// <summary>
Expand Down
14 changes: 8 additions & 6 deletions Common/Python/PandasData.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
using System.Globalization;
using System.Linq;
using System.Reflection;
using QuantConnect.Util;

namespace QuantConnect.Python
{
Expand Down Expand Up @@ -158,7 +157,10 @@ public PandasData(object data)
}
else
{
var members = type.GetMembers().Where(x => x.MemberType == MemberTypes.Field || x.MemberType == MemberTypes.Property).ToList();
var members = type
.GetMembers(BindingFlags.Instance | BindingFlags.Public)
.Where(x => x.MemberType == MemberTypes.Field || x.MemberType == MemberTypes.Property)
.ToList();

var duplicateKeys = members.GroupBy(x => x.Name.ToLowerInvariant()).Where(x => x.Count() > 1).Select(x => x.Key);
foreach (var duplicateKey in duplicateKeys)
Expand All @@ -179,8 +181,7 @@ public PandasData(object data)
}
}

var customColumns = new HashSet<string>(columns);
customColumns.Add("value");
var customColumns = new HashSet<string>(columns) { "value" };
customColumns.UnionWith(keys);

columns = customColumns;
Expand Down Expand Up @@ -342,8 +343,9 @@ public void AddTick(Tick tick)
/// Get the pandas.DataFrame of the current <see cref="PandasData"/> state
/// </summary>
/// <param name="levels">Number of levels of the multi index</param>
/// <param name="filterMissingValueColumns">If false, make sure columns with "missing" values only are still added to the dataframe</param>
/// <returns>pandas.DataFrame object</returns>
public PyObject ToPandasDataFrame(int levels = 2)
public PyObject ToPandasDataFrame(int levels = 2, bool filterMissingValueColumns = true)
{
List<PyObject> list;
var symbol = _symbol.ID.ToString().ToPython();
Expand Down Expand Up @@ -383,7 +385,7 @@ public PyObject ToPandasDataFrame(int levels = 2)
using var pyDict = new PyDict();
foreach (var kvp in _series)
{
if (kvp.Value.ShouldFilter) continue;
if (filterMissingValueColumns && kvp.Value.ShouldFilter) continue;

if (!indexCache.TryGetValue(kvp.Value.Times, out var index))
{
Expand Down
35 changes: 35 additions & 0 deletions Tests/Python/PandasConverterTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,41 @@ public void HandlesEnumerableDataType()
}
}

[Test]
public void HandlesEnumerableWithMultipleSymbols()
{
var converter = new PandasConverter();
var data = new List<BaseData>
{
new TradeBar(new DateTime(2020, 1, 2), Symbols.IBM, 101m, 102m, 100m, 101m, 10m),
new TradeBar(new DateTime(2020, 1, 3), Symbols.IBM, 101m, 102m, 100m, 101m, 20m),
new TradeBar(new DateTime(2020, 1, 2), Symbols.SPY_C_192_Feb19_2016, 51m, 52m, 50m, 51m, 100m),
new TradeBar(new DateTime(2020, 1, 3), Symbols.SPY_C_192_Feb19_2016, 51m, 52m, 50m, 51m, 200m),
};

dynamic dataFrame = converter.GetDataFrame(data);

using (Py.GIL())
{
Assert.Multiple(() =>
{
foreach (var symbol in data.Select(x => x.Symbol).Distinct())
{
Assert.IsFalse(dataFrame.empty.AsManagedObject(typeof(bool)), $"Unexpected empty sub dataframe for {symbol}");

var subDataFrame = dataFrame.loc[symbol];
Assert.IsFalse(subDataFrame.empty.AsManagedObject(typeof(bool)));

var count = subDataFrame.__len__().AsManagedObject(typeof(int));
Assert.AreEqual(2, count, $"Unexpected rows count for {symbol} sub dataframe");

var dataCount = subDataFrame.values.__len__().AsManagedObject(typeof(int));
Assert.AreEqual(2, dataCount, $"Unexpected rows count for {symbol} sub dataframe");
}
});
}
}

[Test]
public void HandlesEmptyEnumerable()
{
Expand Down
Loading