From 3d5696c37663a7ac86f80e4da8156733f6e232e9 Mon Sep 17 00:00:00 2001 From: silverweed Date: Wed, 8 Oct 2025 09:13:15 +0200 Subject: [PATCH 1/4] [rfile] Some improvements to doc and tests; use Info for extension msg --- io/io/inc/ROOT/RFile.hxx | 32 ++++++++++++++++---------------- io/io/src/RFile.cxx | 2 +- io/io/test/rfile.cxx | 15 ++++++++------- 3 files changed, 25 insertions(+), 24 deletions(-) diff --git a/io/io/inc/ROOT/RFile.hxx b/io/io/inc/ROOT/RFile.hxx index 821c92fba432f..da56ae5b35a20 100644 --- a/io/io/inc/ROOT/RFile.hxx +++ b/io/io/inc/ROOT/RFile.hxx @@ -37,16 +37,15 @@ ROOT::RLogChannel &RFileLog(); ## When and why should you use RFile RFile is a modern and minimalistic interface to ROOT files, both local and remote, that can be used instead of TFile -when the following conditions are met: -- you want a simple interface that makes it easy to do things right and hard to do things wrong; -- you only need basic Put/Get operations and don't need the more advanced TFile/TDirectory functionalities; -- you want more robustness and better error reporting for those operations; -- you want clearer ownership semantics expressed through the type system rather than having objects "automagically" - handled for you via implicit ownership of raw pointers. +when you only need basic Put/Get operations and don't need the more advanced TFile/TDirectory functionalities. +It provides: +- a simple interface that makes it easy to do things right and hard to do things wrong; +- more robustness and better error reporting for those operations; +- clearer ownership semantics expressed through the type system. -RFile doesn't try to cover the entirety of use cases covered by TFile/TDirectory/TDirectoryFile and is not -a 1:1 replacement for them. It is meant to simplify the most common use cases and make them easier to handle by -minimizing the amount of ROOT-specific quirks and conforming to more standard C++ practices. +RFile doesn't cover the entirety of use cases covered by TFile/TDirectory/TDirectoryFile and is not +a 1:1 replacement for them. It is meant to simplify the most common use cases by following newer standard C++ +practices. ## Ownership model @@ -65,13 +64,11 @@ file). ## Directories -Differently from TFile, the RFile class itself is not also a "directory". In fact, there is no RDirectory class at all. - -Directories are still an existing concept in RFile (since they are a concept in the ROOT binary format), -but they are usually interacted with indirectly, via the use of filesystem-like string-based paths. If you Put an object -in an RFile under the path "path/to/object", "object" will be stored under directory "to" which is in turn stored under -directory "path". This hierarchy is encoded in the ROOT file itself and it can provide some optimization and/or -conveniencies when querying objects. +Even though there is no equivalent of TDirectory in the RFile API, directories are still an existing concept in RFile +(since they are a concept in the ROOT binary format). However they are for now only interacted with indirectly, via the +use of filesystem-like string-based paths. If you Put an object in an RFile under the path "path/to/object", "object" +will be stored under directory "to" which is in turn stored under directory "path". This hierarchy is encoded in the +ROOT file itself and it can provide some optimization and/or conveniencies when querying objects. For the most part, it is convenient to think about RFile in terms of a key-value storage where string-based paths are used to refer to arbitrary objects. However, given the hierarchical nature of ROOT files, certain filesystem-like @@ -96,8 +93,11 @@ auto myObj = file->Get("h"); ~~~ */ class RFile final { + /// Flags used in PutInternal() enum PutFlags { + /// When encountering an object at the specified path, overwrite it with the new one instead of erroring out. kPutAllowOverwrite = 0x1, + /// When overwriting an object, preserve the existing one and create a new cycle, rather than removing it. kPutOverwriteKeepCycle = 0x2, }; diff --git a/io/io/src/RFile.cxx b/io/io/src/RFile.cxx index 05e6462992f55..8f9c7e6cf05df 100644 --- a/io/io/src/RFile.cxx +++ b/io/io/src/RFile.cxx @@ -35,7 +35,7 @@ static void CheckExtension(std::string_view path) } if (!ROOT::EndsWith(path, ".root")) { - R__LOG_WARNING(RFileLog()) << "ROOT::RFile only supports ROOT files. The preferred file extension is \".root\""; + R__LOG_INFO(RFileLog()) << "ROOT::RFile only supports ROOT files. The preferred file extension is \".root\""; } } diff --git a/io/io/test/rfile.cxx b/io/io/test/rfile.cxx index ed6fabbc8175b..7ed08e79bed24 100644 --- a/io/io/test/rfile.cxx +++ b/io/io/test/rfile.cxx @@ -8,7 +8,7 @@ #include #include #include -#include +#include using ROOT::Experimental::RFile; @@ -74,9 +74,6 @@ TEST(RFile, OpenInexistent) { FileRaii fileGuard("does_not_exist.root"); - // make sure that the file really does not exist, in case a previous test didn't clean it up. - gSystem->Unlink(fileGuard.GetPath().c_str()); - ROOT::TestSupport::CheckDiagsRAII diags; diags.optionalDiag(kSysError, "TFile::TFile", "", false); diags.optionalDiag(kError, "TFile::TFile", "", false); @@ -101,7 +98,10 @@ TEST(RFile, OpenInexistent) } // This succeeds because Update creates the file if it doesn't exist. - EXPECT_NO_THROW(RFile::Update("does_not_exist.root")); + FileRaii fileGuard2("created_by_update.root"); + // in case a previous run of the test failed to clean up, make sure the file doesn't exist: + gSystem->Unlink(fileGuard2.GetPath().c_str()); + EXPECT_NO_THROW(RFile::Update(fileGuard2.GetPath())); } TEST(RFile, OpenForWriting) @@ -148,8 +148,8 @@ TEST(RFile, CheckNoAutoRegistrationRead) auto file = RFile::Open(fileGuard.GetPath()); EXPECT_EQ(gDirectory, gROOT); auto hist = file->Get("hist"); - EXPECT_EQ(hist->GetDirectory(), nullptr); ASSERT_NE(hist, nullptr); + EXPECT_EQ(hist->GetDirectory(), nullptr); EXPECT_FLOAT_EQ(hist->GetEntries(), 1); } // no double free should happen when ROOT exits @@ -265,10 +265,11 @@ TEST(RFile, PutOverwrite) TEST(RFile, WrongExtension) { + ROOT::RLogScopedVerbosity logVerb(ROOT::ELogLevel::kInfo); { FileRaii fileGuard("test_rfile_wrong.root.1"); ROOT::TestSupport::CheckDiagsRAII diagsRaii; - diagsRaii.requiredDiag(kWarning, "ROOT.File", "preferred file extension is \".root\"", false); + diagsRaii.requiredDiag(kInfo, "ROOT.File", "preferred file extension is \".root\"", false); RFile::Recreate(fileGuard.GetPath()); } { From cea6428559b851d4abfcb6c304924dd7ea84fac7 Mon Sep 17 00:00:00 2001 From: silverweed Date: Wed, 8 Oct 2025 16:18:39 +0200 Subject: [PATCH 2/4] [rfile] Throw exception for .zip files --- io/io/src/RFile.cxx | 26 +++++++++----------------- io/io/test/rfile.cxx | 17 ++++++++++++----- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/io/io/src/RFile.cxx b/io/io/src/RFile.cxx index 8f9c7e6cf05df..21f7dd02b4e47 100644 --- a/io/io/src/RFile.cxx +++ b/io/io/src/RFile.cxx @@ -28,17 +28,6 @@ ROOT::RLogChannel &ROOT::Experimental::Internal::RFileLog() using ROOT::Experimental::RFile; using ROOT::Experimental::Internal::RFileLog; -static void CheckExtension(std::string_view path) -{ - if (ROOT::EndsWith(path, ".xml")) { - throw ROOT::RException(R__FAIL("ROOT::RFile doesn't support XML files.")); - } - - if (!ROOT::EndsWith(path, ".root")) { - R__LOG_INFO(RFileLog()) << "ROOT::RFile only supports ROOT files. The preferred file extension is \".root\""; - } -} - namespace { enum class ENameCycleError { kNoError, @@ -186,39 +175,42 @@ RFile::~RFile() = default; std::unique_ptr RFile::Open(std::string_view path) { - CheckExtension(path); - TDirectory::TContext ctx(nullptr); // XXX: probably not thread safe? auto tfile = std::unique_ptr(TFile::Open(std::string(path).c_str(), "READ_WITHOUT_GLOBALREGISTRATION")); if (!tfile || tfile->IsZombie()) throw ROOT::RException(R__FAIL("failed to open file " + std::string(path) + " for reading")); + if (tfile->IsRaw()) + throw ROOT::RException(R__FAIL("Opened file " + std::string(path) + " is not a ROOT file")); + auto rfile = std::unique_ptr(new RFile(std::move(tfile))); return rfile; } std::unique_ptr RFile::Update(std::string_view path) { - CheckExtension(path); - TDirectory::TContext ctx(nullptr); // XXX: probably not thread safe? auto tfile = std::unique_ptr(TFile::Open(std::string(path).c_str(), "UPDATE_WITHOUT_GLOBALREGISTRATION")); if (!tfile || tfile->IsZombie()) throw ROOT::RException(R__FAIL("failed to open file " + std::string(path) + " for updating")); + if (tfile->IsRaw()) + throw ROOT::RException(R__FAIL("Opened file " + std::string(path) + " is not a ROOT file")); + auto rfile = std::unique_ptr(new RFile(std::move(tfile))); return rfile; } std::unique_ptr RFile::Recreate(std::string_view path) { - CheckExtension(path); - TDirectory::TContext ctx(nullptr); // XXX: probably not thread safe? auto tfile = std::unique_ptr(TFile::Open(std::string(path).c_str(), "RECREATE_WITHOUT_GLOBALREGISTRATION")); if (!tfile || tfile->IsZombie()) throw ROOT::RException(R__FAIL("failed to open file " + std::string(path) + " for writing")); + if (tfile->IsRaw()) + throw ROOT::RException(R__FAIL("Opened file " + std::string(path) + " is not a ROOT file")); + auto rfile = std::unique_ptr(new RFile(std::move(tfile))); return rfile; } diff --git a/io/io/test/rfile.cxx b/io/io/test/rfile.cxx index 7ed08e79bed24..5fccc0fd2af71 100644 --- a/io/io/test/rfile.cxx +++ b/io/io/test/rfile.cxx @@ -266,16 +266,23 @@ TEST(RFile, PutOverwrite) TEST(RFile, WrongExtension) { ROOT::RLogScopedVerbosity logVerb(ROOT::ELogLevel::kInfo); + // Root files with unconventional extensions are supported. { FileRaii fileGuard("test_rfile_wrong.root.1"); - ROOT::TestSupport::CheckDiagsRAII diagsRaii; - diagsRaii.requiredDiag(kInfo, "ROOT.File", "preferred file extension is \".root\"", false); RFile::Recreate(fileGuard.GetPath()); } + + // XML files are not supported. + FileRaii fileGuardXml("test_rfile_wrong.xml"); + { + auto file = std::unique_ptr(TFile::Open(fileGuardXml.GetPath().c_str(), "RECREATE")); + TH1D h("h", "h", 10, 0, 1); + file->WriteObject(&h, "h"); + } { - FileRaii fileGuard("test_rfile_wrong.xml"); - ROOT::TestSupport::CheckDiagsRAII diagsRaii; - EXPECT_THROW(RFile::Recreate(fileGuard.GetPath()), ROOT::RException); + EXPECT_THROW(RFile::Open(fileGuardXml.GetPath()), ROOT::RException); + EXPECT_THROW(RFile::Update(fileGuardXml.GetPath()), ROOT::RException); + EXPECT_THROW(RFile::Recreate(fileGuardXml.GetPath()), ROOT::RException); } } From 002274ad5a53170f7909c6fc404d46c0fd373522 Mon Sep 17 00:00:00 2001 From: silverweed Date: Wed, 19 Mar 2025 15:17:23 +0100 Subject: [PATCH 3/4] [rfile] add ListKeys() method --- io/io/inc/ROOT/RFile.hxx | 161 +++++++++++++++++++++++++- io/io/src/RFile.cxx | 184 ++++++++++++++++++++++++++---- io/io/test/rfile.cxx | 241 +++++++++++++++++++++++++++++++++++---- 3 files changed, 541 insertions(+), 45 deletions(-) diff --git a/io/io/inc/ROOT/RFile.hxx b/io/io/inc/ROOT/RFile.hxx index da56ae5b35a20..9ee96278c703c 100644 --- a/io/io/inc/ROOT/RFile.hxx +++ b/io/io/inc/ROOT/RFile.hxx @@ -10,11 +10,14 @@ #include +#include #include +#include #include #include class TFile; +class TIterator; class TKey; namespace ROOT { @@ -29,6 +32,122 @@ ROOT::RLogChannel &RFileLog(); } // namespace Internal +/// Given a "path-like" string (like foo/bar/baz), returns a pair `{ dirName, baseName }`. +/// `baseName` will be empty if the string ends with '/'. +/// `dirName` will be empty if the string contains no '/'. +/// `dirName`, if not empty, always ends with a '/'. +/// NOTE: this function does no semantic checking or path expansion, nor does it interact with the +/// filesystem in any way (so it won't follow symlink or anything like that). +/// Moreover it doesn't trim the path in any way, so any leading or trailing whitespaces will be preserved. +/// This function does not perform any copy: the returned string_views have the same lifetime as `path`. +std::pair DecomposePath(std::string_view path); + +class RFileKeyIterable; + +/** +\class ROOT::Experimental::RKeyInfo +\ingroup RFile +\brief Information about an RFile object's Key. + +Every object inside a ROOT file has an associated "Key" which contains metadata on the object, such as its name, type +etc. +Querying this information can be done via RFile::ListKeys(). Reading an object's Key +doesn't deserialize the full object, so it's a relatively lightweight operation. +*/ +class RKeyInfo final { + friend class ROOT::Experimental::RFileKeyIterable; + +public: + enum class ECategory : std::uint16_t { + kInvalid, + kObject, + kDirectory + }; + +private: + std::string fPath; + std::string fTitle; + std::string fClassName; + std::uint16_t fCycle = 0; + ECategory fCategory = ECategory::kInvalid; + +public: + /// Returns the absolute path of this key, i.e. the directory part plus the object name. + const std::string &GetPath() const { return fPath; } + /// Returns the base name of this key, i.e. the name of the object without the directory part. + std::string GetBaseName() const { return std::string(DecomposePath(fPath).second); } + const std::string &GetTitle() const { return fTitle; } + const std::string &GetClassName() const { return fClassName; } + std::uint16_t GetCycle() const { return fCycle; } + ECategory GetCategory() const { return fCategory; } +}; + +/// The iterable returned by RFile::ListKeys() +class RFileKeyIterable final { + using Pattern_t = std::string; + + TFile *fFile = nullptr; + Pattern_t fPattern; + std::uint32_t fFlags = 0; + +public: + class RIterator { + friend class RFileKeyIterable; + + struct RIterStackElem { + // This is ugly, but TList returns an (owning) pointer to a polymorphic TIterator...and we need this class + // to be copy-constructible. + std::shared_ptr fIter; + std::string fDirPath; + + // Outlined to avoid including TIterator.h + RIterStackElem(TIterator *it, const std::string &path = ""); + // Outlined to avoid including TIterator.h + ~RIterStackElem(); + + // fDirPath doesn't need to be compared because it's implied by fIter. + bool operator==(const RIterStackElem &other) const { return fIter == other.fIter; } + }; + + // Using a deque to have pointer stability + std::deque fIterStack; + Pattern_t fPattern; + const TKey *fCurKey = nullptr; + std::uint16_t fRootDirNesting = 0; + std::uint32_t fFlags = 0; + + void Advance(); + + // NOTE: `iter` here is an owning pointer (or null) + RIterator(TIterator *iter, Pattern_t pattern, std::uint32_t flags); + + public: + using iterator = RIterator; + using iterator_category = std::input_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = RKeyInfo; + using pointer = const value_type *; + using reference = const value_type &; + + iterator &operator++() + { + Advance(); + return *this; + } + value_type operator*(); + bool operator!=(const iterator &rh) const { return !(*this == rh); } + bool operator==(const iterator &rh) const { return fIterStack == rh.fIterStack; } + }; + + RFileKeyIterable(TFile *file, std::string_view rootDir, std::uint32_t flags) + : fFile(file), fPattern(std::string(rootDir)), fFlags(flags) + { + } + + RIterator begin() const; + RIterator end() const; +}; + /** \class ROOT::Experimental::RFile \ingroup RFile @@ -68,7 +187,7 @@ Even though there is no equivalent of TDirectory in the RFile API, directories a (since they are a concept in the ROOT binary format). However they are for now only interacted with indirectly, via the use of filesystem-like string-based paths. If you Put an object in an RFile under the path "path/to/object", "object" will be stored under directory "to" which is in turn stored under directory "path". This hierarchy is encoded in the -ROOT file itself and it can provide some optimization and/or conveniencies when querying objects. +ROOT file itself and it can provide some optimization and/or conveniences when querying objects. For the most part, it is convenient to think about RFile in terms of a key-value storage where string-based paths are used to refer to arbitrary objects. However, given the hierarchical nature of ROOT files, certain filesystem-like @@ -126,6 +245,12 @@ class RFile final { TKey *GetTKey(std::string_view path) const; public: + enum EListKeyFlags { + kListObjects = 1 << 0, + kListDirs = 1 << 1, + kListRecursive = 1 << 2, + }; + // This is arbitrary, but it's useful to avoid pathological cases static constexpr int kMaxPathNesting = 1000; @@ -196,6 +321,40 @@ public: /// Flushes the RFile if needed and closes it, disallowing any further reading or writing. void Close(); + + /// Returns an iterable over all keys of objects and/or directories written into this RFile starting at path + /// `basePath` (defaulting to include the content of all subdirectories). + /// By default, keys referring to directories are not returned: only those referring to leaf objects are. + /// If `basePath` is the path of a leaf object, only `basePath` itself will be returned. + /// `flags` is a bitmask specifying the listing mode. + /// If `(flags & kListObject) != 0`, the listing will include keys of non-directory objects (default); + /// If `(flags & kListDirs) != 0`, the listing will include keys of directory objects; + /// If `(flags & kListRecursive) != 0`, the listing will recurse on all subdirectories of `basePath` (default), + /// otherwise it will only list immediate children of `basePath`. + /// + /// Example usage: + /// ~~~{.cpp} + /// for (RKeyInfo key : file->ListKeys()) { + /// /* iterate over all objects in the RFile */ + /// cout << key.GetPath() << ";" << key.GetCycle() << " of type " << key.GetClassName() << "\n"; + /// } + /// for (RKeyInfo key : file->ListKeys("", kListDirs|kListObjects|kListRecursive)) { + /// /* iterate over all objects and directories in the RFile */ + /// } + /// for (RKeyInfo key : file->ListKeys("a/b", kListObjects)) { + /// /* iterate over all objects that are immediate children of directory "a/b" */ + /// } + /// for (RKeyInfo key : file->ListKeys("foo", kListDirs|kListRecursive)) { + /// /* iterate over all directories under directory "foo", recursively */ + /// } + /// ~~~ + RFileKeyIterable ListKeys(std::string_view basePath = "", std::uint32_t flags = kListObjects | kListRecursive) const + { + return RFileKeyIterable(fFile.get(), basePath, flags); + } + + /// Prints the internal structure of this RFile to the given stream. + void Print(std::ostream &out = std::cout) const; }; } // namespace Experimental diff --git a/io/io/src/RFile.cxx b/io/io/src/RFile.cxx index 21f7dd02b4e47..3513f4e3d88cf 100644 --- a/io/io/src/RFile.cxx +++ b/io/io/src/RFile.cxx @@ -13,7 +13,9 @@ #include #include #include +#include #include +#include #include #include @@ -167,21 +169,32 @@ static std::string ValidateAndNormalizePath(std::string &path) return ""; } -///////////////////////////////////////////////////////////////////////////////////////////////// +static void EnsureFileOpenAndBinary(const TFile *tfile, std::string_view path) +{ + if (!tfile || tfile->IsZombie()) + throw ROOT::RException(R__FAIL("failed to open file " + std::string(path) + " for reading")); -RFile::RFile(std::unique_ptr file) : fFile(std::move(file)) {} + if (tfile->IsRaw() || !tfile->IsBinary() || tfile->IsArchive()) + throw ROOT::RException(R__FAIL("Opened file " + std::string(path) + " is not a ROOT binary file")); +} -RFile::~RFile() = default; +///////////////////////////////////////////////////////////////////////////////////////////////// +std::pair ROOT::Experimental::DecomposePath(std::string_view path) +{ + auto lastSlashIdx = path.rfind('/'); + if (lastSlashIdx == std::string_view::npos) + return {{}, path}; + + auto dirName = path.substr(0, lastSlashIdx + 1); + auto pathName = path.substr(lastSlashIdx + 1); + return {dirName, pathName}; +} std::unique_ptr RFile::Open(std::string_view path) { TDirectory::TContext ctx(nullptr); // XXX: probably not thread safe? auto tfile = std::unique_ptr(TFile::Open(std::string(path).c_str(), "READ_WITHOUT_GLOBALREGISTRATION")); - if (!tfile || tfile->IsZombie()) - throw ROOT::RException(R__FAIL("failed to open file " + std::string(path) + " for reading")); - - if (tfile->IsRaw()) - throw ROOT::RException(R__FAIL("Opened file " + std::string(path) + " is not a ROOT file")); + EnsureFileOpenAndBinary(tfile.get(), path); auto rfile = std::unique_ptr(new RFile(std::move(tfile))); return rfile; @@ -191,11 +204,7 @@ std::unique_ptr RFile::Update(std::string_view path) { TDirectory::TContext ctx(nullptr); // XXX: probably not thread safe? auto tfile = std::unique_ptr(TFile::Open(std::string(path).c_str(), "UPDATE_WITHOUT_GLOBALREGISTRATION")); - if (!tfile || tfile->IsZombie()) - throw ROOT::RException(R__FAIL("failed to open file " + std::string(path) + " for updating")); - - if (tfile->IsRaw()) - throw ROOT::RException(R__FAIL("Opened file " + std::string(path) + " is not a ROOT file")); + EnsureFileOpenAndBinary(tfile.get(), path); auto rfile = std::unique_ptr(new RFile(std::move(tfile))); return rfile; @@ -205,16 +214,16 @@ std::unique_ptr RFile::Recreate(std::string_view path) { TDirectory::TContext ctx(nullptr); // XXX: probably not thread safe? auto tfile = std::unique_ptr(TFile::Open(std::string(path).c_str(), "RECREATE_WITHOUT_GLOBALREGISTRATION")); - if (!tfile || tfile->IsZombie()) - throw ROOT::RException(R__FAIL("failed to open file " + std::string(path) + " for writing")); - - if (tfile->IsRaw()) - throw ROOT::RException(R__FAIL("Opened file " + std::string(path) + " is not a ROOT file")); + EnsureFileOpenAndBinary(tfile.get(), path); auto rfile = std::unique_ptr(new RFile(std::move(tfile))); return rfile; } +RFile::RFile(std::unique_ptr file) : fFile(std::move(file)) {} + +RFile::~RFile() = default; + TKey *RFile::GetTKey(std::string_view path) const { // In RFile, differently from TFile, when dealing with a path like "a/b/c", we always consider it to mean @@ -282,9 +291,9 @@ void *RFile::GetUntyped(std::string_view pathSV, const std::type_info &type) con if (auto autoAddFunc = cls->GetDirectoryAutoAdd(); autoAddFunc) { autoAddFunc(obj, nullptr); } - } else if (key && !GetROOT()->IsBatch()) { - R__LOG_WARNING(RFileLog()) << "Tried to get object '" << path << "' of type " << cls->GetName() - << " but that path contains an object of type " << key->GetClassName(); + } else if (key) { + R__LOG_INFO(RFileLog()) << "Tried to get object '" << path << "' of type " << cls->GetName() + << " but that path contains an object of type " << key->GetClassName(); } return obj; @@ -361,6 +370,139 @@ void RFile::PutUntyped(std::string_view pathSV, const std::type_info &type, cons } } +ROOT::Experimental::RFileKeyIterable::RIterator::RIterStackElem::RIterStackElem(TIterator *it, const std::string &path) + : fIter(it), fDirPath(path) +{ +} + +ROOT::Experimental::RFileKeyIterable::RIterator::RIterStackElem::~RIterStackElem() = default; + +ROOT::Experimental::RFileKeyIterable::RIterator::RIterator(TIterator *iter, Pattern_t pattern, std::uint32_t flags) + : fPattern(pattern), fFlags(flags) +{ + if (iter) { + fIterStack.emplace_back(iter); + + if (!pattern.empty()) { + fRootDirNesting = std::count(pattern.begin(), pattern.end(), '/'); + // `pattern` may or may not end with '/', but we consider it a directory regardless. + // In other words, like in virtually all filesystem operations, "dir" and "dir/" are equivalent. + fRootDirNesting += pattern.back() != '/'; + } + + // Advance the iterator to skip the first key, which is always the TFile key. + // This will also skip keys until we reach the first correct key we want to return. + Advance(); + } +} + +ROOT::Experimental::RFileKeyIterable::RIterator ROOT::Experimental::RFileKeyIterable::begin() const +{ + return {fFile->GetListOfKeys()->MakeIterator(), fPattern, fFlags}; +} + +ROOT::Experimental::RFileKeyIterable::RIterator ROOT::Experimental::RFileKeyIterable::end() const +{ + return {nullptr, fPattern, fFlags}; +} + +void ROOT::Experimental::RFileKeyIterable::RIterator::Advance() +{ + fCurKey = nullptr; + + const bool recursive = fFlags & RFile::kListRecursive; + const bool includeObj = fFlags & RFile::kListObjects; + const bool includeDirs = fFlags & RFile::kListDirs; + + // We only want to return keys that refer to user objects, not internal ones, therefore we skip + // all keys that have internal class names. + while (!fIterStack.empty()) { + auto &[iter, dirPath] = fIterStack.back(); + assert(iter); + TObject *keyObj = iter->Next(); + if (!keyObj) { + // reached end of the iteration + fIterStack.pop_back(); + continue; + } + + assert(keyObj->IsA() == TClass::GetClass()); + auto key = static_cast(keyObj); + + const std::string dirSep = (dirPath.empty() ? "" : "/"); + const bool isDir = TClass::GetClass(key->GetClassName())->InheritsFrom(TDirectory::Class()); + + if (isDir) { + TDirectory *dir = key->ReadObject(); + TIterator *innerIter = dir->GetListOfKeys()->MakeIterator(); + assert(innerIter); + fIterStack.emplace_back(innerIter, dirPath + dirSep + dir->GetName()); + if (!includeDirs) + continue; + } else if (!includeObj) { + continue; + } + + // Reconstruct the full path of the key + const std::string &fullPath = dirPath + dirSep + key->GetName(); + assert(!fIterStack.empty()); + const std::size_t nesting = fIterStack.size() - 1; + + // Skip key if it's not a child of root dir + if (!ROOT::StartsWith(fullPath, fPattern)) + continue; + + // Check that we are in the same directory as "rootDir". + // Note that for directories we list both the root dir and the immediate children (in non-recursive mode). + if (!recursive && nesting != fRootDirNesting && + (!isDir || nesting != static_cast(fRootDirNesting + 1))) + continue; + + // All checks passed: return this key. + assert(!fullPath.empty()); + fCurKey = key; + break; + } +} + +ROOT::Experimental::RKeyInfo ROOT::Experimental::RFileKeyIterable::RIterator::operator*() +{ + if (fIterStack.empty()) + throw ROOT::RException(R__FAIL("tried to dereference an invalid iterator")); + + const TKey *key = fCurKey; + if (!key) + throw ROOT::RException(R__FAIL("tried to dereference an invalid iterator")); + + const bool isDir = + strcmp(key->GetClassName(), "TDirectory") == 0 || strcmp(key->GetClassName(), "TDirectoryFile") == 0; + const auto &dirPath = fIterStack.back().fDirPath; + + RKeyInfo keyInfo; + keyInfo.fCategory = isDir ? RKeyInfo::ECategory::kDirectory : RKeyInfo::ECategory::kObject; + keyInfo.fPath = dirPath; + if (!isDir) + keyInfo.fPath += std::string(dirPath.empty() ? "" : "/") + key->GetName(); + keyInfo.fClassName = key->GetClassName(); + keyInfo.fCycle = key->GetCycle(); + keyInfo.fTitle = key->GetTitle(); + return keyInfo; +} + +void RFile::Print(std::ostream &out) const +{ + std::vector keys; + auto keysIter = ListKeys(); + for (const auto &key : keysIter) { + keys.emplace_back(key); + } + + std::sort(keys.begin(), keys.end(), [](const auto &a, const auto &b) { return a.GetPath() < b.GetPath(); }); + for (const auto &key : keys) { + out << key.GetClassName() << " " << key.GetPath() << ";" << key.GetCycle() << ": \"" << key.GetTitle() << "\"\n"; + } +} + size_t RFile::Flush() { return fFile->Write(); diff --git a/io/io/test/rfile.cxx b/io/io/test/rfile.cxx index 5fccc0fd2af71..6158491924b18 100644 --- a/io/io/test/rfile.cxx +++ b/io/io/test/rfile.cxx @@ -3,8 +3,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -43,6 +45,34 @@ class FileRaii { } // anonymous namespace +static std::string JoinKeyNames(const ROOT::Experimental::RFileKeyIterable &iterable) +{ + auto beg = iterable.begin(); + if (beg == iterable.end()) + return std::string(""); + return std::accumulate(std::next(beg), iterable.end(), (*beg).GetPath(), + [](const auto &a, const auto &b) { return a + ", " + b.GetPath(); }); +}; + +TEST(RFile, DecomposePath) +{ + using ROOT::Experimental::DecomposePath; + + auto Pair = [](std::string_view a, std::string_view b) { return std::make_pair(a, b); }; + + EXPECT_EQ(DecomposePath("/foo/bar/baz"), Pair("/foo/bar/", "baz")); + EXPECT_EQ(DecomposePath("/foo/bar/baz/"), Pair("/foo/bar/baz/", "")); + EXPECT_EQ(DecomposePath("foo/bar/baz"), Pair("foo/bar/", "baz")); + EXPECT_EQ(DecomposePath("foo"), Pair("", "foo")); + EXPECT_EQ(DecomposePath("/"), Pair("/", "")); + EXPECT_EQ(DecomposePath("////"), Pair("////", "")); + EXPECT_EQ(DecomposePath(""), Pair("", "")); + EXPECT_EQ(DecomposePath("asd/"), Pair("asd/", "")); + EXPECT_EQ(DecomposePath(" "), Pair("", " ")); + EXPECT_EQ(DecomposePath("/ "), Pair("/", " ")); + EXPECT_EQ(DecomposePath(" /"), Pair(" /", "")); +} + TEST(RFile, Open) { FileRaii fileGuard("test_rfile_read.root"); @@ -323,6 +353,43 @@ TEST(RFile, WriteReadInTFileDir) } } +TEST(RFile, IterateKeys) +{ + FileRaii fileGuard("test_rfile_iteratekeys.root"); + + { + auto file = RFile::Recreate(fileGuard.GetPath()); + TH1D a; + auto b = std::make_unique(); + std::string c = "0"; + file->Put("a", a); + file->Put("b", *b); + file->Put("c", c); + file->Put("/foo/bar/c", c); + } + + { + auto file = RFile::Open(fileGuard.GetPath()); + const auto expected = "a,b,c,foo/bar/c,"; + std::string s = ""; + for (const auto &key : file->ListKeys()) { + s += key.GetPath() + ","; + } + EXPECT_EQ(expected, s); + + // verify the expected iterator operations work + const auto expected2 = "b,c,foo/bar/c,"; + s = ""; + auto iterable = file->ListKeys(); + auto it = iterable.begin(); + std::advance(it, 1); + for (; it != iterable.end(); ++it) { + s += (*it).GetPath() + ","; + } + EXPECT_EQ(expected2, s); + } +} + TEST(RFile, SaneHierarchy) { // verify that we can't create weird hierarchies like: @@ -373,6 +440,102 @@ TEST(RFile, RefuseToCreateDirOverLeaf) } } +TEST(RFile, IterateKeysRecursive) +{ + FileRaii fileGuard("test_rfile_iteratekeys_recursive.root"); + + { + auto file = RFile::Recreate(fileGuard.GetPath()); + std::string s; + file->Put("a/c", s); + file->Put("a/b/d", s); + file->Put("e/f", s); + file->Put("e/c/g", s); + } + + { + auto file = RFile::Open(fileGuard.GetPath()); + EXPECT_EQ(JoinKeyNames(file->ListKeys()), "a/c, a/b/d, e/f, e/c/g"); + EXPECT_EQ(JoinKeyNames(file->ListKeys("a")), "a/c, a/b/d"); + EXPECT_EQ(JoinKeyNames(file->ListKeys("a/b")), "a/b/d"); + EXPECT_EQ(JoinKeyNames(file->ListKeys("a/b/c")), ""); + EXPECT_EQ(JoinKeyNames(file->ListKeys("e/c")), "e/c/g"); + EXPECT_EQ(JoinKeyNames(file->ListKeys("e/f")), "e/f"); + } +} + +TEST(RFile, IterateKeysNonRecursive) +{ + FileRaii fileGuard("test_rfile_iteratekeys_nonrecursive.root"); + + { + auto file = RFile::Recreate(fileGuard.GetPath()); + std::string s; + file->Put("h", s); + file->Put("a/c", s); + file->Put("a/b/d", s); + file->Put("e/f", s); + file->Put("e/c/g", s); + } + + { + auto file = RFile::Open(fileGuard.GetPath()); + EXPECT_EQ(JoinKeyNames(file->ListKeys("", RFile::kListObjects)), "h"); + EXPECT_EQ(JoinKeyNames(file->ListKeys("a", RFile::kListObjects)), "a/c"); + EXPECT_EQ(JoinKeyNames(file->ListKeys("a/b", RFile::kListObjects)), "a/b/d"); + EXPECT_EQ(JoinKeyNames(file->ListKeys("a/b/c", RFile::kListObjects)), ""); + EXPECT_EQ(JoinKeyNames(file->ListKeys("e", RFile::kListObjects)), "e/f"); + } +} + +TEST(RFile, IterateKeysOnlyDirs) +{ + FileRaii fileGuard("test_rfile_iteratekeys_onlydirs.root"); + + { + auto file = RFile::Recreate(fileGuard.GetPath()); + std::string s; + file->Put("h", s); + file->Put("a/c", s); + file->Put("a/b/d", s); + file->Put("e/f", s); + file->Put("e/c/g", s); + } + + { + auto file = RFile::Open(fileGuard.GetPath()); + EXPECT_EQ(JoinKeyNames(file->ListKeys("", RFile::kListDirs | RFile::kListRecursive)), "a, a/b, e, e/c"); + EXPECT_EQ(JoinKeyNames(file->ListKeys("a", RFile::kListDirs | RFile::kListRecursive)), "a, a/b"); + EXPECT_EQ(JoinKeyNames(file->ListKeys("a/b", RFile::kListDirs | RFile::kListRecursive)), "a/b"); + EXPECT_EQ(JoinKeyNames(file->ListKeys("a/b/c", RFile::kListDirs | RFile::kListRecursive)), ""); + EXPECT_EQ(JoinKeyNames(file->ListKeys("e", RFile::kListDirs | RFile::kListRecursive)), "e, e/c"); + } +} + +TEST(RFile, IterateKeysOnlyDirsNonRecursive) +{ + FileRaii fileGuard("test_rfile_iteratekeys_onlydirs_nonrec.root"); + + { + auto file = RFile::Recreate(fileGuard.GetPath()); + std::string s; + file->Put("h", s); + file->Put("a/c", s); + file->Put("a/b/d", s); + file->Put("e/f", s); + file->Put("e/c/g", s); + } + + { + auto file = RFile::Open(fileGuard.GetPath()); + EXPECT_EQ(JoinKeyNames(file->ListKeys("", RFile::kListDirs)), "a, e"); + EXPECT_EQ(JoinKeyNames(file->ListKeys("a", RFile::kListDirs)), "a, a/b"); + EXPECT_EQ(JoinKeyNames(file->ListKeys("a/b", RFile::kListDirs)), "a/b"); + EXPECT_EQ(JoinKeyNames(file->ListKeys("a/b/c", RFile::kListDirs)), ""); + EXPECT_EQ(JoinKeyNames(file->ListKeys("e", RFile::kListDirs)), "e, e/c"); + } +} + // TODO: this test could in principle also run without davix: need to figure out a way to detect if we have // remote access capabilities. #ifdef R__HAS_DAVIX @@ -431,35 +594,31 @@ TEST(RFile, Closing) } } +TEST(RFile, GetAfterOverwriteNoBackup) +{ + FileRaii fileGuard("test_rfile_getafternobackup.root"); + + auto file = RFile::Recreate(fileGuard.GetPath()); + std::string s = "foo"; + file->Put("s", s); + s = "bar"; + file->Overwrite("s", s, false); + auto ss = file->Get("s"); + EXPECT_EQ(*ss, s); + + std::vector keys; + for (const auto &key : file->ListKeys()) + keys.push_back(key); + + ASSERT_EQ(keys.size(), 1); +} + TEST(RFile, InvalidPaths) { FileRaii fileGuard("test_rfile_invalidpaths.root"); auto file = RFile::Recreate(fileGuard.GetPath()); - - static const char *const kKeyLong = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; std::string obj = "obj"; - EXPECT_NO_THROW(file->Put(kKeyLong, obj)); - - static const char *const kKeyFragmentLong = - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/" - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; - EXPECT_NO_THROW(file->Put(kKeyFragmentLong, obj)); - - static const char *const kKeyFragmentOk = - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/" - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" - "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/AAAAAAAAAAAAAAAAAAAAAAAAAAAA"; - EXPECT_NO_THROW(file->Put(kKeyFragmentOk, obj)); static const char *const kKeyWhitespaces = "my path with spaces/foo"; EXPECT_THROW(file->Put(kKeyWhitespaces, obj), ROOT::RException); @@ -483,6 +642,42 @@ TEST(RFile, InvalidPaths) EXPECT_NO_THROW(file->Put(kKeyBackslash, obj)); } +TEST(RFile, LongKeyName) +{ + FileRaii fileGuard("test_rfile_longkey.root"); + + auto file = RFile::Recreate(fileGuard.GetPath()); + + static const char kKeyLong[] = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; + static_assert(std::size(kKeyLong) > 256); + std::string obj = "obj"; + EXPECT_NO_THROW(file->Put(kKeyLong, obj)); + + auto keys = file->ListKeys(); + auto it = keys.begin(); + EXPECT_EQ((*it).GetPath(), kKeyLong); + + static const char *const kKeyFragmentLong = + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; + EXPECT_NO_THROW(file->Put(kKeyFragmentLong, obj)); + + static const char *const kKeyFragmentOk = + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/AAAAAAAAAAAAAAAAAAAAAAAAAAAA"; + EXPECT_NO_THROW(file->Put(kKeyFragmentOk, obj)); +} + TEST(RFile, NormalizedPaths) { FileRaii fileGuard("test_rfile_normalizedpaths.root"); From 75db7dda276fda84969504ebe1bde2be005f20ed Mon Sep 17 00:00:00 2001 From: silverweed Date: Wed, 22 Oct 2025 10:14:16 +0200 Subject: [PATCH 4/4] [rfile] Move DecomposePath into Detail --- io/io/inc/ROOT/RFile.hxx | 6 +++++- io/io/src/RFile.cxx | 2 +- io/io/test/rfile.cxx | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/io/io/inc/ROOT/RFile.hxx b/io/io/inc/ROOT/RFile.hxx index 9ee96278c703c..6188961e95f3c 100644 --- a/io/io/inc/ROOT/RFile.hxx +++ b/io/io/inc/ROOT/RFile.hxx @@ -32,6 +32,8 @@ ROOT::RLogChannel &RFileLog(); } // namespace Internal +namespace Detail { + /// Given a "path-like" string (like foo/bar/baz), returns a pair `{ dirName, baseName }`. /// `baseName` will be empty if the string ends with '/'. /// `dirName` will be empty if the string contains no '/'. @@ -42,6 +44,8 @@ ROOT::RLogChannel &RFileLog(); /// This function does not perform any copy: the returned string_views have the same lifetime as `path`. std::pair DecomposePath(std::string_view path); +} + class RFileKeyIterable; /** @@ -75,7 +79,7 @@ public: /// Returns the absolute path of this key, i.e. the directory part plus the object name. const std::string &GetPath() const { return fPath; } /// Returns the base name of this key, i.e. the name of the object without the directory part. - std::string GetBaseName() const { return std::string(DecomposePath(fPath).second); } + std::string GetBaseName() const { return std::string(Detail::DecomposePath(fPath).second); } const std::string &GetTitle() const { return fTitle; } const std::string &GetClassName() const { return fClassName; } std::uint16_t GetCycle() const { return fCycle; } diff --git a/io/io/src/RFile.cxx b/io/io/src/RFile.cxx index 3513f4e3d88cf..c5364fed3eb47 100644 --- a/io/io/src/RFile.cxx +++ b/io/io/src/RFile.cxx @@ -179,7 +179,7 @@ static void EnsureFileOpenAndBinary(const TFile *tfile, std::string_view path) } ///////////////////////////////////////////////////////////////////////////////////////////////// -std::pair ROOT::Experimental::DecomposePath(std::string_view path) +std::pair ROOT::Experimental::Detail::DecomposePath(std::string_view path) { auto lastSlashIdx = path.rfind('/'); if (lastSlashIdx == std::string_view::npos) diff --git a/io/io/test/rfile.cxx b/io/io/test/rfile.cxx index 6158491924b18..526172a4964cf 100644 --- a/io/io/test/rfile.cxx +++ b/io/io/test/rfile.cxx @@ -56,7 +56,7 @@ static std::string JoinKeyNames(const ROOT::Experimental::RFileKeyIterable &iter TEST(RFile, DecomposePath) { - using ROOT::Experimental::DecomposePath; + using ROOT::Experimental::Detail::DecomposePath; auto Pair = [](std::string_view a, std::string_view b) { return std::make_pair(a, b); };