Skip to content

Commit 002274a

Browse files
committed
[rfile] add ListKeys() method
1 parent cea6428 commit 002274a

File tree

3 files changed

+541
-45
lines changed

3 files changed

+541
-45
lines changed

io/io/inc/ROOT/RFile.hxx

Lines changed: 160 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,14 @@
1010

1111
#include <ROOT/RError.hxx>
1212

13+
#include <deque>
1314
#include <memory>
15+
#include <iostream>
1416
#include <string_view>
1517
#include <typeinfo>
1618

1719
class TFile;
20+
class TIterator;
1821
class TKey;
1922

2023
namespace ROOT {
@@ -29,6 +32,122 @@ ROOT::RLogChannel &RFileLog();
2932

3033
} // namespace Internal
3134

35+
/// Given a "path-like" string (like foo/bar/baz), returns a pair `{ dirName, baseName }`.
36+
/// `baseName` will be empty if the string ends with '/'.
37+
/// `dirName` will be empty if the string contains no '/'.
38+
/// `dirName`, if not empty, always ends with a '/'.
39+
/// NOTE: this function does no semantic checking or path expansion, nor does it interact with the
40+
/// filesystem in any way (so it won't follow symlink or anything like that).
41+
/// Moreover it doesn't trim the path in any way, so any leading or trailing whitespaces will be preserved.
42+
/// This function does not perform any copy: the returned string_views have the same lifetime as `path`.
43+
std::pair<std::string_view, std::string_view> DecomposePath(std::string_view path);
44+
45+
class RFileKeyIterable;
46+
47+
/**
48+
\class ROOT::Experimental::RKeyInfo
49+
\ingroup RFile
50+
\brief Information about an RFile object's Key.
51+
52+
Every object inside a ROOT file has an associated "Key" which contains metadata on the object, such as its name, type
53+
etc.
54+
Querying this information can be done via RFile::ListKeys(). Reading an object's Key
55+
doesn't deserialize the full object, so it's a relatively lightweight operation.
56+
*/
57+
class RKeyInfo final {
58+
friend class ROOT::Experimental::RFileKeyIterable;
59+
60+
public:
61+
enum class ECategory : std::uint16_t {
62+
kInvalid,
63+
kObject,
64+
kDirectory
65+
};
66+
67+
private:
68+
std::string fPath;
69+
std::string fTitle;
70+
std::string fClassName;
71+
std::uint16_t fCycle = 0;
72+
ECategory fCategory = ECategory::kInvalid;
73+
74+
public:
75+
/// Returns the absolute path of this key, i.e. the directory part plus the object name.
76+
const std::string &GetPath() const { return fPath; }
77+
/// Returns the base name of this key, i.e. the name of the object without the directory part.
78+
std::string GetBaseName() const { return std::string(DecomposePath(fPath).second); }
79+
const std::string &GetTitle() const { return fTitle; }
80+
const std::string &GetClassName() const { return fClassName; }
81+
std::uint16_t GetCycle() const { return fCycle; }
82+
ECategory GetCategory() const { return fCategory; }
83+
};
84+
85+
/// The iterable returned by RFile::ListKeys()
86+
class RFileKeyIterable final {
87+
using Pattern_t = std::string;
88+
89+
TFile *fFile = nullptr;
90+
Pattern_t fPattern;
91+
std::uint32_t fFlags = 0;
92+
93+
public:
94+
class RIterator {
95+
friend class RFileKeyIterable;
96+
97+
struct RIterStackElem {
98+
// This is ugly, but TList returns an (owning) pointer to a polymorphic TIterator...and we need this class
99+
// to be copy-constructible.
100+
std::shared_ptr<TIterator> fIter;
101+
std::string fDirPath;
102+
103+
// Outlined to avoid including TIterator.h
104+
RIterStackElem(TIterator *it, const std::string &path = "");
105+
// Outlined to avoid including TIterator.h
106+
~RIterStackElem();
107+
108+
// fDirPath doesn't need to be compared because it's implied by fIter.
109+
bool operator==(const RIterStackElem &other) const { return fIter == other.fIter; }
110+
};
111+
112+
// Using a deque to have pointer stability
113+
std::deque<RIterStackElem> fIterStack;
114+
Pattern_t fPattern;
115+
const TKey *fCurKey = nullptr;
116+
std::uint16_t fRootDirNesting = 0;
117+
std::uint32_t fFlags = 0;
118+
119+
void Advance();
120+
121+
// NOTE: `iter` here is an owning pointer (or null)
122+
RIterator(TIterator *iter, Pattern_t pattern, std::uint32_t flags);
123+
124+
public:
125+
using iterator = RIterator;
126+
using iterator_category = std::input_iterator_tag;
127+
using difference_type = std::ptrdiff_t;
128+
using value_type = RKeyInfo;
129+
using pointer = const value_type *;
130+
using reference = const value_type &;
131+
132+
iterator &operator++()
133+
{
134+
Advance();
135+
return *this;
136+
}
137+
value_type operator*();
138+
bool operator!=(const iterator &rh) const { return !(*this == rh); }
139+
bool operator==(const iterator &rh) const { return fIterStack == rh.fIterStack; }
140+
};
141+
142+
RFileKeyIterable(TFile *file, std::string_view rootDir, std::uint32_t flags)
143+
: fFile(file), fPattern(std::string(rootDir)), fFlags(flags)
144+
{
145+
}
146+
147+
RIterator begin() const;
148+
RIterator end() const;
149+
};
150+
32151
/**
33152
\class ROOT::Experimental::RFile
34153
\ingroup RFile
@@ -68,7 +187,7 @@ Even though there is no equivalent of TDirectory in the RFile API, directories a
68187
(since they are a concept in the ROOT binary format). However they are for now only interacted with indirectly, via the
69188
use of filesystem-like string-based paths. If you Put an object in an RFile under the path "path/to/object", "object"
70189
will be stored under directory "to" which is in turn stored under directory "path". This hierarchy is encoded in the
71-
ROOT file itself and it can provide some optimization and/or conveniencies when querying objects.
190+
ROOT file itself and it can provide some optimization and/or conveniences when querying objects.
72191
73192
For the most part, it is convenient to think about RFile in terms of a key-value storage where string-based paths are
74193
used to refer to arbitrary objects. However, given the hierarchical nature of ROOT files, certain filesystem-like
@@ -126,6 +245,12 @@ class RFile final {
126245
TKey *GetTKey(std::string_view path) const;
127246

128247
public:
248+
enum EListKeyFlags {
249+
kListObjects = 1 << 0,
250+
kListDirs = 1 << 1,
251+
kListRecursive = 1 << 2,
252+
};
253+
129254
// This is arbitrary, but it's useful to avoid pathological cases
130255
static constexpr int kMaxPathNesting = 1000;
131256

@@ -196,6 +321,40 @@ public:
196321

197322
/// Flushes the RFile if needed and closes it, disallowing any further reading or writing.
198323
void Close();
324+
325+
/// Returns an iterable over all keys of objects and/or directories written into this RFile starting at path
326+
/// `basePath` (defaulting to include the content of all subdirectories).
327+
/// By default, keys referring to directories are not returned: only those referring to leaf objects are.
328+
/// If `basePath` is the path of a leaf object, only `basePath` itself will be returned.
329+
/// `flags` is a bitmask specifying the listing mode.
330+
/// If `(flags & kListObject) != 0`, the listing will include keys of non-directory objects (default);
331+
/// If `(flags & kListDirs) != 0`, the listing will include keys of directory objects;
332+
/// If `(flags & kListRecursive) != 0`, the listing will recurse on all subdirectories of `basePath` (default),
333+
/// otherwise it will only list immediate children of `basePath`.
334+
///
335+
/// Example usage:
336+
/// ~~~{.cpp}
337+
/// for (RKeyInfo key : file->ListKeys()) {
338+
/// /* iterate over all objects in the RFile */
339+
/// cout << key.GetPath() << ";" << key.GetCycle() << " of type " << key.GetClassName() << "\n";
340+
/// }
341+
/// for (RKeyInfo key : file->ListKeys("", kListDirs|kListObjects|kListRecursive)) {
342+
/// /* iterate over all objects and directories in the RFile */
343+
/// }
344+
/// for (RKeyInfo key : file->ListKeys("a/b", kListObjects)) {
345+
/// /* iterate over all objects that are immediate children of directory "a/b" */
346+
/// }
347+
/// for (RKeyInfo key : file->ListKeys("foo", kListDirs|kListRecursive)) {
348+
/// /* iterate over all directories under directory "foo", recursively */
349+
/// }
350+
/// ~~~
351+
RFileKeyIterable ListKeys(std::string_view basePath = "", std::uint32_t flags = kListObjects | kListRecursive) const
352+
{
353+
return RFileKeyIterable(fFile.get(), basePath, flags);
354+
}
355+
356+
/// Prints the internal structure of this RFile to the given stream.
357+
void Print(std::ostream &out = std::cout) const;
199358
};
200359

201360
} // namespace Experimental

0 commit comments

Comments
 (0)