Skip to content

Commit

Permalink
Merge branch 'main' into jpivarski/make-to_list-safe-for-overriding-g…
Browse files Browse the repository at this point in the history
…etitem
  • Loading branch information
jpivarski authored Aug 31, 2022
2 parents af14c2a + 9901b44 commit 079951d
Show file tree
Hide file tree
Showing 15 changed files with 545 additions and 245 deletions.
9 changes: 9 additions & 0 deletions .all-contributorsrc
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,15 @@
"contributions": [
"code"
]
},
{
"login": "Saransh-cpp",
"name": "Saransh",
"avatar_url": "https://avatars.githubusercontent.com/u/74055102?v=4",
"profile": "https://saransh-cpp.github.io/",
"contributions": [
"code"
]
}
],
"contributorsPerLine": 7,
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ Thanks especially to the gracious help of Awkward Array contributors (including
<td align="center"><a href="https://github.com/Ahmad-AlSubaie"><img src="https://avatars.githubusercontent.com/u/32343365?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Ahmad-AlSubaie</b></sub></a><br /><a href="https://github.com/scikit-hep/awkward/commits?author=Ahmad-AlSubaie" title="Code">💻</a></td>
<td align="center"><a href="https://github.com/ManasviGoyal"><img src="https://avatars.githubusercontent.com/u/55101825?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Manasvi Goyal</b></sub></a><br /><a href="https://github.com/scikit-hep/awkward/commits?author=ManasviGoyal" title="Code">💻</a></td>
<td align="center"><a href="https://github.com/aryan26roy"><img src="https://avatars.githubusercontent.com/u/50577809?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Aryan Roy</b></sub></a><br /><a href="https://github.com/scikit-hep/awkward/commits?author=aryan26roy" title="Code">💻</a></td>
<td align="center"><a href="https://saransh-cpp.github.io/"><img src="https://avatars.githubusercontent.com/u/74055102?v=4?s=100" width="100px;" alt=""/><br /><sub><b>Saransh</b></sub></a><br /><a href="https://github.com/scikit-hep/awkward/commits?author=Saransh-cpp" title="Code">💻</a></td>
</tr>
</table>

Expand Down
314 changes: 189 additions & 125 deletions src/awkward/_v2/_connect/rdataframe/from_rdataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,21 @@
from awkward._v2.types.numpytype import primitive_to_dtype

cpp_type_of = {
"float64": "double",
"bool": "bool",
"int8": "int8_t",
"uint8": "uint8_t",
"int16": "int16_t",
"uint16": "uint16_t",
"int32": "int32_t",
"uint32": "uint32_t",
"int64": "int64_t",
"uint64": "uint64_t",
"float32": "float",
"float64": "double",
"complex64": "std::complex<float>",
"complex128": "std::complex<double>",
"uint8": "uint8_t",
"datetime64": "std::time_t",
"timedelta64": "std::difftime",
}

np = ak.nplike.NumpyMetadata.instance()
Expand Down Expand Up @@ -43,140 +54,193 @@
assert done is True


def from_rdataframe(data_frame, column):
def _wrap_as_record_array(array):
layout = array.layout if isinstance(array, ak._v2.highlevel.Array) else array
return ak._v2._util.wrap(
ak._v2.contents.RecordArray(
fields=[column],
contents=[layout],
),
highlevel=True,
)
def from_rdataframe(data_frame, columns):
def form_dtype(form):
if isinstance(form, ak._v2.forms.NumpyForm) and form.inner_shape == ():
return primitive_to_dtype(form.primitive)
elif isinstance(form, ak._v2.forms.ListOffsetForm):
return form_dtype(form.content)

def empty_buffers(cpp_buffers_self, names_nbytes):
buffers = {}
for item in names_nbytes:
buffers[item.first] = ak.nplike.numpy.empty(item.second)
cpp_buffers_self.append(
item.first,
buffers[item.first].ctypes.data_as(ctypes.POINTER(ctypes.c_ubyte)),
)
return buffers

# Cast input node to base RNode type
data_frame_rnode = cppyy.gbl.ROOT.RDF.AsRNode(data_frame)
def cpp_builder_type(depth, data_type):
if depth == 1:
return f"awkward::LayoutBuilder::Numpy<{data_type}>>"
else:
return (
"awkward::LayoutBuilder::ListOffset<int64_t, "
+ cpp_builder_type(depth - 1, data_type)
+ ">"
)

column_type = data_frame_rnode.GetColumnType(column)
form_str = ROOT.awkward.type_to_form[column_type](0)
def cpp_fill_offsets_and_flatten(depth):
if depth == 1:
return "\nfor (auto it : vec1) {\n" + " builder1.append(it);\n" + "}\n"
else:
return (
f"for (auto const& vec{depth - 1} : vec{depth}) "
+ "{\n"
+ f" auto& builder{depth - 1} = builder{depth}.begin_list();\n"
+ " "
+ cpp_fill_offsets_and_flatten(depth - 1)
+ "\n"
+ f" builder{depth}.end_list();\n"
+ "}\n"
)

# 'Take' is a lazy action:
result_ptrs = data_frame_rnode.Take[column_type](column)

if form_str.startswith("{"):
form = ak._v2.forms.from_json(form_str)
list_depth = form.purelist_depth
if list_depth > 4:
raise ak._v2._util.error(
NotImplementedError(
"Retrieving arbitrary depth nested containers is not implemented yet."
)
def cpp_fill_function(depth):
if depth == 1:
return (
"template<class BUILDER, typename PRIMITIVE>\n"
+ "void\n"
+ "fill_from(BUILDER& builder, ROOT::RDF::RResultPtr<std::vector<PRIMITIVE>>& result) {"
+ " for (auto it : result) {\n"
+ " builder.append(it);\n"
+ " }\n"
+ "}\n"
)
else:
return (
"template<class BUILDER, typename PRIMITIVE>\n"
+ "void\n"
+ f"fill_offsets_and_flatten{depth}(BUILDER& builder{depth}, ROOT::RDF::RResultPtr<std::vector<PRIMITIVE>>& result) "
+ "{\n"
+ f" for (auto const& vec{depth - 1} : result) "
+ "{\n"
+ f" auto& builder{depth - 1} = builder{depth}.begin_list();\n"
+ " "
+ cpp_fill_offsets_and_flatten(depth - 1)
+ "\n"
+ f" builder{depth}.end_list();\n"
+ "}\n"
+ "}\n"
)

def supported(form):
if form.purelist_depth == 1:
# special case for a list of strings form
return isinstance(
form, (ak._v2.forms.ListOffsetForm, ak._v2.forms.NumpyForm)
)
else:
return isinstance(form, ak._v2.forms.ListOffsetForm) and supported(
form.content
)
is_indexed = True if "awkward_index_" in data_frame.GetColumnNames() else False

if not supported(form):
raise ak._v2._util.error(NotImplementedError)
# Register Take action for each column
# 'Take' is a lazy action:
result_ptrs = {}
column_types = {}
contents_index = None
columns = (
columns + ("awkward_index_",)
if (is_indexed and "awkward_index_" not in columns)
else columns
)
for col in columns:
column_types[col] = data_frame.GetColumnType(col)
result_ptrs[col] = data_frame.Take[column_types[col]](col)

contents = {}
awkward_contents = {}
contents_index = {}
for col in columns:
col_type = column_types[col]
if ROOT.awkward.is_awkward_type[col_type](): # Retrieve Awkward arrays

# ROOT::RDF::RResultPtr<T>::begin Returns an iterator to the beginning of
# the contained object if this makes sense, throw a compilation error otherwise.
#
# Does not trigger event loop and execution of all actions booked in
# the associated RLoopManager.
lookup = result_ptrs[col].begin().lookup()
generator = lookup[col].generator
layout = generator.tolayout(lookup[col], 0, ())
awkward_contents[col] = layout

else: # Convert the C++ vectors to Awkward arrays
form_str = ROOT.awkward.type_to_form[col_type](0)
form = ak._v2.forms.from_json(form_str)

list_depth = form.purelist_depth
form_dtype_name = form_dtype(form).name
data_type = cpp_type_of[form_dtype_name]

# pull in the CppBuffers (after which we can import from it)
CppBuffers = cppyy.gbl.awkward.CppBuffers[col_type]
cpp_buffers_self = CppBuffers(result_ptrs[col])

if isinstance(form, ak._v2.forms.NumpyForm):

NumpyBuilder = cppyy.gbl.awkward.LayoutBuilder.Numpy[data_type]
builder = NumpyBuilder()
builder_type = type(builder).__cpp_name__

cpp_buffers_self.fill_from[builder_type, col_type](
builder, result_ptrs[col]
)

def form_dtype(form):
if form.purelist_depth == 1:
# special case for a list of strings form
return (
primitive_to_dtype(form.content.primitive)
if isinstance(form, ak._v2.forms.ListOffsetForm)
else primitive_to_dtype(form.primitive)
names_nbytes = cpp_buffers_self.names_nbytes[builder_type](builder)
buffers = empty_buffers(cpp_buffers_self, names_nbytes)
cpp_buffers_self.to_char_buffers[builder_type](builder)

elif isinstance(form, ak._v2.forms.ListOffsetForm):
if isinstance(form.content, ak._v2.forms.NumpyForm):
# NOTE: list_depth == 2 or 1 if its the list of strings
list_depth = 2

ListOffsetBuilder = cppyy.gbl.awkward.LayoutBuilder.ListOffset[
"int64_t",
cpp_builder_type(list_depth - 1, data_type),
]
builder = ListOffsetBuilder()
builder_type = type(builder).__cpp_name__

if not hasattr(
cppyy.gbl.awkward, f"fill_offsets_and_flatten{list_depth}"
):
done = cppyy.cppdef(
"namespace awkward {" + cpp_fill_function(list_depth) + "}"
)
assert done is True

fill_from_func = getattr(
cppyy.gbl.awkward, f"fill_offsets_and_flatten{list_depth}"
)
fill_from_func[builder_type, col_type](builder, result_ptrs[col])
else:
return form_dtype(form.content)

def empty_buffers(cpp_buffers_self, names_nbytes):
buffers = {}
for item in names_nbytes:
buffers[item.first] = ak.nplike.numpy.empty(item.second)
cpp_buffers_self.append(
item.first,
buffers[item.first].ctypes.data_as(ctypes.POINTER(ctypes.c_ubyte)),
raise ak._v2._util.error(
AssertionError(f"unrecognized Form: {type(form)}")
)
return buffers

data_type = cpp_type_of[form_dtype(form).name]

# pull in the CppBuffers (after which we can import from it)
CppBuffers = cppyy.gbl.awkward.CppBuffers[column_type]
cpp_buffers_self = CppBuffers(result_ptrs)

if isinstance(form, ak._v2.forms.NumpyForm):

NumpyBuilder = cppyy.gbl.awkward.LayoutBuilder.Numpy[data_type]
builder = NumpyBuilder()
builder_type = type(builder).__cpp_name__

cpp_buffers_self.fill_from[builder_type](builder)

elif isinstance(form, ak._v2.forms.ListOffsetForm) and isinstance(
form.content, ak._v2.forms.NumpyForm
):
# NOTE: list_depth == 2 or 1 if its the list of strings
ListOffsetBuilder = cppyy.gbl.awkward.LayoutBuilder.ListOffset[
"int64_t",
f"awkward::LayoutBuilder::Numpy<{data_type}",
]
builder = ListOffsetBuilder()
builder_type = type(builder).__cpp_name__

cpp_buffers_self.fill_offsets_and_flatten_2[builder_type](builder)
names_nbytes = cpp_buffers_self.names_nbytes[builder_type](builder)
buffers = empty_buffers(cpp_buffers_self, names_nbytes)
cpp_buffers_self.to_char_buffers[builder_type](builder)

elif list_depth == 3:
ListOffsetBuilder = cppyy.gbl.awkward.LayoutBuilder.ListOffset[
"int64_t",
f"awkward::LayoutBuilder::ListOffset<int64_t, awkward::LayoutBuilder::Numpy<{data_type}>",
]
builder = ListOffsetBuilder()
builder_type = type(builder).__cpp_name__

cpp_buffers_self.fill_offsets_and_flatten_3[builder_type](builder)
array = ak._v2.from_buffers(
form,
builder.length(),
buffers,
)

if col == "awkward_index_":
contents_index = ak._v2.index.Index64(
array.layout.to_numpy(allow_missing=True)
)
else:
contents[col] = array.layout

for col, content in awkward_contents.items():
# wrap Awkward array in IndexedArray only if needed
if contents_index is not None and len(contents_index) < len(content):
array = ak._v2._util.wrap(
ak._v2.contents.IndexedArray(contents_index, content),
highlevel=True,
)
contents[col] = array.layout
else:
ListOffsetBuilder = cppyy.gbl.awkward.LayoutBuilder.ListOffset[
"int64_t",
f"awkward::LayoutBuilder::ListOffset<int64_t, awkward::LayoutBuilder::ListOffset<int64_t, awkward::LayoutBuilder::Numpy<{data_type}>>",
]
builder = ListOffsetBuilder()
builder_type = type(builder).__cpp_name__

cpp_buffers_self.fill_offsets_and_flatten_4[builder_type](builder)

names_nbytes = cpp_buffers_self.names_nbytes[builder_type](builder)
buffers = empty_buffers(cpp_buffers_self, names_nbytes)
cpp_buffers_self.to_char_buffers[builder_type, data_type](builder)

array = ak._v2.from_buffers(
form,
builder.length(),
buffers,
)
return _wrap_as_record_array(array)

elif form_str == "awkward type":

# ROOT::RDF::RResultPtr<T>::begin Returns an iterator to the beginning of
# the contained object if this makes sense, throw a compilation error otherwise.
#
# Does not trigger event loop and execution of all actions booked in
# the associated RLoopManager.
lookup = result_ptrs.begin().lookup()
generator = lookup[column].generator
layout = generator.tolayout(lookup[column], 0, ())

return _wrap_as_record_array(layout)
else:
raise ak._v2._util.error(NotImplementedError)
contents[col] = content

return ak._v2._util.wrap(
ak._v2.contents.RecordArray(list(contents.values()), list(contents.keys())),
highlevel=True,
)
2 changes: 2 additions & 0 deletions src/awkward/_v2/_connect/rdataframe/to_rdataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,4 +314,6 @@ class {array_data_source} final
(self.data_ptrs_list),
)

rdf = rdf.Define("awkward_index_", "(int64_t)rdfentry_")

return rdf
Loading

0 comments on commit 079951d

Please sign in to comment.