Skip to content

Commit b3d0539

Browse files
committed
[python] Extend RDataFrame to Numpy array conversion of boolean types
* Use ROOT::RVec as the single data structure to recover all types of values via the Take operation. This helps avoiding issues with vectors of boolean values, whether they are visible as the 'bool' or 'Bool_t' C++ types. * Add a test that checks both 'bool' and 'Bool_t' types. For the latter, it is required to write a TTree branch.
1 parent 07f28fb commit b3d0539

File tree

2 files changed

+20
-10
lines changed

2 files changed

+20
-10
lines changed

bindings/pyroot/pythonizations/python/ROOT/_pythonization/_rdataframe.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -296,19 +296,17 @@ def RDataFrameAsNumpy(
296296
result_ptrs = {}
297297
for column in columns:
298298
column_type = df.GetColumnType(column)
299-
# bool columns should be taken as unsigned chars, because NumPy stores
300-
# bools in bytes - different from the std::vector<bool> returned by the
301-
# action, which might do some space optimization
302-
column_type = "unsigned char" if column_type == "bool" else column_type
303299

304300
# If the column type is a class, make sure cling knows about it
305301
tclass = ROOT.TClass.GetClass(column_type)
306302
if tclass and not tclass.GetClassInfo():
307303
raise RuntimeError(
308304
f'The column named "{column}" is of type "{column_type}", which is not known to the ROOT interpreter. Please load the corresponding header files or dictionaries.'
309305
)
310-
311-
result_ptrs[column] = df.Take[column_type](column)
306+
# We take the values via ROOT::RVec to avoid having to deal with std::vector<bool>
307+
# This uses one single data structure for all array types, which exposes the array interface
308+
# allowing zero-copy conversion to numpy array
309+
result_ptrs[column] = df.Take[f"{column_type}, ROOT::RVec<{column_type}>"](column)
312310

313311
result = AsNumpyResult(result_ptrs, columns)
314312

bindings/pyroot/pythonizations/test/rdataframe_asnumpy.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import numpy as np
88
import ROOT
99
from ROOT._pythonization._rdataframe import _clone_asnumpyresult
10-
10+
import os
1111

1212
def make_tree(*dtypes):
1313
"""
@@ -90,10 +90,22 @@ def test_branch_bool(self):
9090
Test bool data-type as a special case since we cannot adopt
9191
the std::vector<bool> with numpy arrays
9292
"""
93-
df = ROOT.RDataFrame(2).Define("x", "bool(rdfentry_)")
93+
treename = "test_branch_bool"
94+
filename = "test_branch_bool.root"
95+
# Snapshot a TTree so that column 'x' will be of type 'Bool_t'
96+
ROOT.RDataFrame(2).Define("x", "bool(rdfentry_)").Snapshot(treename, filename)
97+
# The column 'y' will instead have type 'bool'
98+
df = ROOT.RDataFrame(treename, filename).Define("y", "bool(rdfentry_)")
99+
self.assertEqual(df.GetColumnType("x"), "Bool_t")
100+
self.assertEqual(df.GetColumnType("y"), "bool")
94101
npy = df.AsNumpy()
95-
self.assertFalse(bool(npy["x"][0]))
96-
self.assertTrue(bool(npy["x"][1]))
102+
# Both numpy arrays should have dtype bool
103+
self.assertEqual(npy["x"].dtype, bool)
104+
self.assertEqual(npy["y"].dtype, bool)
105+
self.assertFalse(npy["x"][0])
106+
self.assertTrue(npy["x"][1])
107+
self.assertFalse(npy["y"][0])
108+
self.assertTrue(npy["y"][1])
97109

98110
def test_read_array(self):
99111
"""

0 commit comments

Comments
 (0)