From 46650a93fcb84171cea1e31cf5ea432807a496ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 26 Sep 2025 16:29:30 +0200 Subject: [PATCH 01/50] wip first commit --- CMakeLists.txt | 3 +- .../openPMD/toolkit/ExternalBlockStorage.hpp | 49 ++++++ .../toolkit/ExternalBlockStorage_internal.hpp | 19 +++ src/toolkit/ExternalBlockStorage.cpp | 139 ++++++++++++++++++ 4 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 include/openPMD/toolkit/ExternalBlockStorage.hpp create mode 100644 include/openPMD/toolkit/ExternalBlockStorage_internal.hpp create mode 100644 src/toolkit/ExternalBlockStorage.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index dba77d38be..954d48d8d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -434,7 +434,8 @@ set(CORE_SOURCE src/snapshots/IteratorTraits.cpp src/snapshots/RandomAccessIterator.cpp src/snapshots/Snapshots.cpp - src/snapshots/StatefulIterator.cpp) + src/snapshots/StatefulIterator.cpp + src/toolkit/ExternalBlockStorage.cpp) set(IO_SOURCE src/IO/AbstractIOHandler.cpp src/IO/AbstractIOHandlerImpl.cpp diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp new file mode 100644 index 0000000000..d540e8d7cc --- /dev/null +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include +#include + +namespace openPMD +{ +class ExternalBlockStorage; +} + +namespace openPMD::internal +{ +struct ExternalBlockStorageBackend +{ + virtual void + put(std::string const &identifier, void const *data, size_t len) = 0; + virtual ~ExternalBlockStorageBackend(); +}; + +struct StdioBuilder +{ + std::string m_directory; + std::optional m_openMode = std::nullopt; + + auto setDirectory(std::string directory) -> StdioBuilder &; + auto setOpenMode(std::string openMode) -> StdioBuilder &; + + operator ExternalBlockStorage(); +}; +} // namespace openPMD::internal + +namespace openPMD +{ +class ExternalBlockStorage +{ +private: + std::unique_ptr m_worker; + ExternalBlockStorage( + std::unique_ptr); + + friend struct internal::StdioBuilder; + +public: + static auto makeStdioSession(std::string directory) + -> internal::StdioBuilder; +}; +} // namespace openPMD diff --git a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp b/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp new file mode 100644 index 0000000000..2adaaa05f6 --- /dev/null +++ b/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp @@ -0,0 +1,19 @@ +#pragma once + +#include "openPMD/toolkit/ExternalBlockStorage.hpp" + +namespace openPMD::internal +{ +struct ExternalBlockStorageStdio : ExternalBlockStorageBackend +{ +private: + std::string m_directory; + std::string m_openMode; + +public: + ExternalBlockStorageStdio(std::string directory, std::string openMode); + void + put(std::string const &identifier, void const *data, size_t len) override; + ~ExternalBlockStorageStdio() override; +}; +} // namespace openPMD::internal diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp new file mode 100644 index 0000000000..5b5a7dc2ac --- /dev/null +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -0,0 +1,139 @@ + +#include "openPMD/toolkit/ExternalBlockStorage.hpp" + +#include "openPMD/auxiliary/Filesystem.hpp" +#include "openPMD/toolkit/ExternalBlockStorage_internal.hpp" + +#include +#include +#include + +namespace +{ +auto concat_filepath(std::string const &s1, std::string const &s2) + -> std::string +{ + if (s1.empty()) + { + return s2; + } + if (s2.empty()) + { + return s1; + } + bool ends_with_slash = + *s1.crbegin() == openPMD::auxiliary::directory_separator; + bool starts_with_slash = + *s2.cbegin() == openPMD::auxiliary::directory_separator; + + if (ends_with_slash ^ starts_with_slash) + { + return s1 + s2; + } + else if (ends_with_slash && starts_with_slash) + { + return s1 + (s2.c_str() + 1); + } + else + { + return s1 + openPMD::auxiliary::directory_separator + s2; + } +} +} // namespace + +namespace openPMD::internal +{ +ExternalBlockStorageBackend::~ExternalBlockStorageBackend() = default; + +ExternalBlockStorageStdio::ExternalBlockStorageStdio( + std::string directory, std::string openMode) + : m_directory(std::move(directory)), m_openMode(std::move(openMode)) +{ + if (m_directory.empty()) + { + throw std::invalid_argument( + "ExternalBlockStorageStdio: directory cannot be empty"); + } + + // Ensure the directory exists and is writable + if (!auxiliary::create_directories(m_directory)) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to create or access " + "directory: " + + m_directory); + } +} + +ExternalBlockStorageStdio::~ExternalBlockStorageStdio() = default; + +void ExternalBlockStorageStdio::put( + std::string const &identifier, void const *data, size_t len) +{ + if (len == 0) + { + return; + } + + // Generate a unique filename using a simple counter approach (can be + // extended) + static size_t counter = 0; + std::string filename = m_directory + "/block_" + std::to_string(counter++); + std::string filepath = concat_filepath(m_directory, identifier); + + FILE *file = std::fopen(filepath.c_str(), "wb"); + if (!file) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to open file for writing: " + + filepath); + } + + size_t written = std::fwrite(data, 1, len, file); + if (written != len) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to write full data to file: " + + filepath); + } + + if (std::fclose(file) != 0) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to close file after writing: " + + filepath); + } +} +} // namespace openPMD::internal +namespace openPMD +{ +auto ExternalBlockStorage::makeStdioSession(std::string directory) + -> internal::StdioBuilder +{ + return internal::StdioBuilder{std::move(directory)}; +} + +auto internal::StdioBuilder::setDirectory(std::string directory) + -> StdioBuilder & +{ + m_directory = std::move(directory); + return *this; +} +auto internal::StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & +{ + m_openMode = std::move(openMode); + return *this; +} + +internal::StdioBuilder::operator ExternalBlockStorage() +{ + return ExternalBlockStorage{ + std::make_unique( + std::move(m_directory), std::move(m_openMode).value_or("wb"))}; +} + +ExternalBlockStorage::ExternalBlockStorage( + std::unique_ptr worker) + : m_worker(std::move(worker)) +{} +} // namespace openPMD From 22a68811a8a91927dee04e12c606dc2117bc08ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 26 Sep 2025 18:48:45 +0200 Subject: [PATCH 02/50] Some cleaning --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 6 ++- .../toolkit/ExternalBlockStorage_internal.hpp | 4 +- src/toolkit/ExternalBlockStorage.cpp | 49 ++++++++++--------- 3 files changed, 31 insertions(+), 28 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index d540e8d7cc..9b8cd1da44 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -14,8 +14,9 @@ namespace openPMD::internal { struct ExternalBlockStorageBackend { - virtual void - put(std::string const &identifier, void const *data, size_t len) = 0; + virtual auto + put(std::string const &identifier, void const *data, size_t len) + -> std::string = 0; virtual ~ExternalBlockStorageBackend(); }; @@ -28,6 +29,7 @@ struct StdioBuilder auto setOpenMode(std::string openMode) -> StdioBuilder &; operator ExternalBlockStorage(); + auto build() -> ExternalBlockStorage; }; } // namespace openPMD::internal diff --git a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp b/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp index 2adaaa05f6..10a3e724be 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp @@ -12,8 +12,8 @@ struct ExternalBlockStorageStdio : ExternalBlockStorageBackend public: ExternalBlockStorageStdio(std::string directory, std::string openMode); - void - put(std::string const &identifier, void const *data, size_t len) override; + auto put(std::string const &identifier, void const *data, size_t len) + -> std::string override; ~ExternalBlockStorageStdio() override; }; } // namespace openPMD::internal diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 5b5a7dc2ac..6344330ee3 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -67,20 +67,15 @@ ExternalBlockStorageStdio::ExternalBlockStorageStdio( ExternalBlockStorageStdio::~ExternalBlockStorageStdio() = default; -void ExternalBlockStorageStdio::put( - std::string const &identifier, void const *data, size_t len) +auto ExternalBlockStorageStdio::put( + std::string const &identifier, void const *data, size_t len) -> std::string { + std::string filepath = concat_filepath(m_directory, identifier); if (len == 0) { - return; + return filepath; } - // Generate a unique filename using a simple counter approach (can be - // extended) - static size_t counter = 0; - std::string filename = m_directory + "/block_" + std::to_string(counter++); - std::string filepath = concat_filepath(m_directory, identifier); - FILE *file = std::fopen(filepath.c_str(), "wb"); if (!file) { @@ -103,33 +98,39 @@ void ExternalBlockStorageStdio::put( "ExternalBlockStorageStdio: failed to close file after writing: " + filepath); } -} -} // namespace openPMD::internal -namespace openPMD -{ -auto ExternalBlockStorage::makeStdioSession(std::string directory) - -> internal::StdioBuilder -{ - return internal::StdioBuilder{std::move(directory)}; + + return filepath; } -auto internal::StdioBuilder::setDirectory(std::string directory) - -> StdioBuilder & +auto StdioBuilder::setDirectory(std::string directory) -> StdioBuilder & { m_directory = std::move(directory); return *this; } -auto internal::StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & +auto StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & { m_openMode = std::move(openMode); return *this; } -internal::StdioBuilder::operator ExternalBlockStorage() +StdioBuilder::operator ExternalBlockStorage() { - return ExternalBlockStorage{ - std::make_unique( - std::move(m_directory), std::move(m_openMode).value_or("wb"))}; + return ExternalBlockStorage{std::make_unique( + std::move(m_directory), std::move(m_openMode).value_or("wb"))}; +} + +auto StdioBuilder::build() -> ExternalBlockStorage +{ + return *this; +} +} // namespace openPMD::internal + +namespace openPMD +{ +auto ExternalBlockStorage::makeStdioSession(std::string directory) + -> internal::StdioBuilder +{ + return internal::StdioBuilder{std::move(directory)}; } ExternalBlockStorage::ExternalBlockStorage( From 8c4dbb3a29181d5d8bacb9d777cd9433c371db05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 26 Sep 2025 20:15:48 +0200 Subject: [PATCH 03/50] Hmm maybe usable --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 18 +++ src/toolkit/ExternalBlockStorage.cpp | 143 +++++++++++++++++- 2 files changed, 156 insertions(+), 5 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 9b8cd1da44..318cfbdc87 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -1,5 +1,9 @@ #pragma once +#include "openPMD/Dataset.hpp" + +#include + #include #include #include @@ -45,7 +49,21 @@ class ExternalBlockStorage friend struct internal::StdioBuilder; public: + explicit ExternalBlockStorage(); + static auto makeStdioSession(std::string directory) -> internal::StdioBuilder; + + // returns created JSON key + template + auto store( + Extent globalExtent, + Offset blockOffset, + Extent blockExtent, + nlohmann::json &fullJsonDataset, + nlohmann::json::json_pointer const &path, + T const *data) -> std::string; + + static void sanitizeString(std::string &s); }; } // namespace openPMD diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 6344330ee3..27404259ff 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -1,12 +1,17 @@ #include "openPMD/toolkit/ExternalBlockStorage.hpp" +#include "openPMD/DatatypeMacros.hpp" #include "openPMD/auxiliary/Filesystem.hpp" #include "openPMD/toolkit/ExternalBlockStorage_internal.hpp" #include #include +#include +#include #include +#include +#include namespace { @@ -70,7 +75,10 @@ ExternalBlockStorageStdio::~ExternalBlockStorageStdio() = default; auto ExternalBlockStorageStdio::put( std::string const &identifier, void const *data, size_t len) -> std::string { - std::string filepath = concat_filepath(m_directory, identifier); + auto sanitized = identifier; + ExternalBlockStorage::sanitizeString(sanitized); + std::string filepath = concat_filepath(m_directory, sanitized); + if (len == 0) { return filepath; @@ -127,14 +135,139 @@ auto StdioBuilder::build() -> ExternalBlockStorage namespace openPMD { +ExternalBlockStorage::ExternalBlockStorage() = default; +ExternalBlockStorage::ExternalBlockStorage( + std::unique_ptr worker) + : m_worker(std::move(worker)) +{} + auto ExternalBlockStorage::makeStdioSession(std::string directory) -> internal::StdioBuilder { return internal::StdioBuilder{std::move(directory)}; } -ExternalBlockStorage::ExternalBlockStorage( - std::unique_ptr worker) - : m_worker(std::move(worker)) -{} +template +auto ExternalBlockStorage::store( + Extent globalExtent, + Offset blockOffset, + Extent blockExtent, + nlohmann::json &fullJsonDataset, + nlohmann::json::json_pointer const &path, + T const *data) -> std::string +{ + // JSON Identifier: running counter. + // Do not use an array to avoid reindexing upon deletion. + + // Filesystem Identifier: JSON path + running counter. + + // For each externally handled data block, store: + // 1. Filesystem identifier + // 2. Offset, Extent + auto &dataset = fullJsonDataset[path]; + + // running_index denotes the last *used* block index in the dataset + using running_index_t = uint64_t; + running_index_t running_index = [&]() -> running_index_t { + if (auto it = dataset.find("_running_index"); it != dataset.end()) + { + auto res = it->get(); + ++res; + *it = res; + return res; + } + else + { + dataset["_running_index"] = 0; + return 0; + } + }(); + + constexpr size_t padding = 6; + std::string index_as_str = [running_index]() { + auto res = std::to_string(running_index); + auto size = res.size(); + if (size >= padding) + { + return res; + } + std::stringstream padded; + for (size_t i = 0; i < padding - size; ++i) + { + padded << '0'; + } + padded << res; + return padded.str(); + }(); + + if (dataset.contains(index_as_str)) + { + throw std::runtime_error( + "Inconsistent state: Index " + index_as_str + " already in use."); + } + + auto check_metadata = [&dataset](char const *key, auto const &value) { + using value_t = + std::remove_reference_t>; + if (auto it = dataset.find(key); it != dataset.end()) + { + auto const &stored_value = it->get(); + if (stored_value != value) + { + throw std::runtime_error( + "Inconsistent chunk storage in key " + std::string(key) + + "."); + } + } + else + { + dataset[key] = value; + } + }; + std::string type = typeid(T).name(); // TODO use sth more portable + check_metadata("_type", type); + check_metadata("_byte_width", sizeof(T)); + check_metadata("_extent", globalExtent); + + auto &block = dataset[index_as_str]; + block["offset"] = blockOffset; + block["extent"] = blockExtent; + std::stringstream filesystem_identifier; + filesystem_identifier << path.to_string() << "--" << index_as_str << ".dat"; + auto escaped_filesystem_identifier = m_worker->put( + filesystem_identifier.str(), + data, + std::accumulate( + blockExtent.begin(), + blockExtent.end(), + sizeof(T), + [](size_t left, size_t right) { return left * right; })); + block["external_ref"] = escaped_filesystem_identifier; + return index_as_str; +} + +void ExternalBlockStorage::sanitizeString(std::string &s) +{ + // Replace invalid characters with underscore + for (char &c : s) + { + if (c == '/' || c == '\\' || c == ':' || c == '*' || c == '?' || + c == '"' || c == '<' || c == '>' || c == '|' || c == '\n' || + c == '\r' || c == '\t' || c == '\0' || c == ' ') + { + c = '_'; + } + } +} + +#define OPENPMD_INSTANTIATE(type) \ + template auto ExternalBlockStorage::store( \ + Extent globalExtent, \ + Offset blockOffset, \ + Extent blockExtent, \ + nlohmann::json & fullJsonDataset, \ + nlohmann::json::json_pointer const &path, \ + type const *data) -> std::string; +OPENPMD_FOREACH_DATASET_DATATYPE(OPENPMD_INSTANTIATE) +#undef OPENPMD_INSTANTIATE } // namespace openPMD From 50d97fbe081a067b26156bfeab51bdbb15000ed0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 26 Sep 2025 20:39:19 +0200 Subject: [PATCH 04/50] Quickly use this in the JSON backend bin/openpmd-pipe --infile samples/git-sample/data%T.bp --outfile data.json --outconfig "$(echo -e 'json.dataset.mode = "template"\njson.attribute.mode = "short"')" --- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 3 ++ src/IO/JSON/JSONIOHandlerImpl.cpp | 49 +++++++++++++++---- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index 6df0c60ced..08edc3180b 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -29,6 +29,7 @@ #include "openPMD/auxiliary/JSON_internal.hpp" #include "openPMD/backend/Variant_internal.hpp" #include "openPMD/config.hpp" +#include "openPMD/toolkit/ExternalBlockStorage.hpp" #include #include @@ -177,6 +178,8 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl std::string originalExtension); #endif + ExternalBlockStorage externalBlockStorage; + void init(openPMD::json::TracingJSON config); ~JSONIOHandlerImpl() override; diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 59541c1e30..7fae983380 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -33,6 +33,7 @@ #include "openPMD/auxiliary/TypeTraits.hpp" #include "openPMD/backend/Attribute.hpp" #include "openPMD/backend/Writable.hpp" +#include "openPMD/toolkit/ExternalBlockStorage.hpp" #include #include @@ -440,6 +441,9 @@ void JSONIOHandlerImpl::init(openPMD::json::TracingJSON config) (void)_; warnUnusedJson(backendConfig.value()); } + + externalBlockStorage = + ExternalBlockStorage::makeStdioSession("./external_blocks/"); } JSONIOHandlerImpl::~JSONIOHandlerImpl() = default; @@ -1139,6 +1143,22 @@ void JSONIOHandlerImpl::deleteAttribute( j.erase(parameters.name); } +namespace +{ + struct StoreExternally + { + template + static void call( + ExternalBlockStorage &blockStorage, void const *ptr, Args &&...args) + { + blockStorage.store( + std::forward(args)..., static_cast(ptr)); + } + + static constexpr char const *errorMsg = "StoreExternally"; + }; +} // namespace + void JSONIOHandlerImpl::writeDataset( Writable *writable, Parameter ¶meters) { @@ -1148,21 +1168,32 @@ void JSONIOHandlerImpl::writeDataset( auto pos = setAndGetFilePosition(writable); auto file = refreshFileFromParent(writable); - auto &j = obtainJsonContents(writable); + auto filePosition = setAndGetFilePosition(writable, false); + auto &jsonRoot = *obtainJsonContents(file); + auto &j = jsonRoot[filePosition->id]; switch (verifyDataset(parameters, j)) { case DatasetMode::Dataset: break; case DatasetMode::Template: - if (!m_datasetMode.m_skipWarnings) - { - std::cerr - << "[JSON/TOML backend: Warning] Trying to write data to a " - "template dataset. Will skip." - << '\n'; - m_datasetMode.m_skipWarnings = true; - } + switchDatasetType( + parameters.dtype, + externalBlockStorage, + parameters.data.get(), + j.at("extent").get(), + parameters.offset, + parameters.extent, + jsonRoot, + filePosition->id); + // if (!m_datasetMode.m_skipWarnings) + // { + // std::cerr + // << "[JSON/TOML backend: Warning] Trying to write data to a " + // "template dataset. Will skip." + // << '\n'; + // m_datasetMode.m_skipWarnings = true; + // } return; } From a0b820a88f2c0151e96fe29bd92cfd6998fffe5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 08:42:39 +0200 Subject: [PATCH 05/50] Better and generalized handling for datatypes --- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 40 +++++++++++++++++++ .../openPMD/toolkit/ExternalBlockStorage.hpp | 13 +++++- src/IO/JSON/JSONIOHandlerImpl.cpp | 13 +++--- src/toolkit/ExternalBlockStorage.cpp | 15 ++++--- 4 files changed, 70 insertions(+), 11 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index 08edc3180b..94d00dcf90 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -154,6 +154,46 @@ void from_json(const nlohmann::json &j, std::complex &p) } } // namespace std +namespace openPMD::internal +{ +auto jsonDatatypeToString(Datatype dt) -> std::string; + +struct JsonDatatypeHandling +{ + template + static auto encodeDatatype(nlohmann::json &j) -> bool + { + auto const &needed_datatype = + jsonDatatypeToString(determineDatatype()); + if (auto it = j.find("datatype"); it != j.end()) + { + return it.value().get() == needed_datatype; + } + else + { + j["datatype"] = needed_datatype; + return true; + } + } + + template + static auto decodeDatatype(nlohmann::json const &j, Args &&...args) -> bool + { + if (auto it = j.find("datatype"); it != j.end()) + { + switchDatasetType( + stringToDatatype(it.value().get()), + std::forward(args)...); + return true; + } + else + { + return false; + } + } +}; +} // namespace openPMD::internal + namespace openPMD { class JSONIOHandlerImpl : public AbstractIOHandlerImpl diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 318cfbdc87..5bbb87091e 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -39,6 +39,17 @@ struct StdioBuilder namespace openPMD { +// used nowhere, just shows the signatures +// TODO: replace this with a concept upon switching to C++20 +struct DatatypeHandling_Interface +{ + template + static auto encodeDatatype(nlohmann::json &) -> bool; + + template + static auto decodeDatatype(nlohmann::json const &j, Args &&...args) -> bool; +}; + class ExternalBlockStorage { private: @@ -55,7 +66,7 @@ class ExternalBlockStorage -> internal::StdioBuilder; // returns created JSON key - template + template auto store( Extent globalExtent, Offset blockOffset, diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 7fae983380..d38a81180c 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -163,7 +163,10 @@ namespace } } } +} // namespace +namespace internal +{ // Does the same as datatypeToString(), but this makes sure that we don't // accidentally change the JSON schema by modifying datatypeToString() std::string jsonDatatypeToString(Datatype dt) @@ -252,7 +255,7 @@ namespace } return "Unreachable!"; } -} // namespace +} // namespace internal auto JSONIOHandlerImpl::retrieveDatasetMode( openPMD::json::TracingJSON &config) const -> DatasetMode_s @@ -637,7 +640,7 @@ void JSONIOHandlerImpl::createDataset( } setAndGetFilePosition(writable, name); auto &dset = jsonVal[name]; - dset["datatype"] = jsonDatatypeToString(parameter.dtype); + dset["datatype"] = internal::jsonDatatypeToString(parameter.dtype); switch (localMode) { @@ -1151,7 +1154,7 @@ namespace static void call( ExternalBlockStorage &blockStorage, void const *ptr, Args &&...args) { - blockStorage.store( + blockStorage.store( std::forward(args)..., static_cast(ptr)); } @@ -1235,7 +1238,7 @@ void JSONIOHandlerImpl::writeAttribute( { case AttributeMode::Long: (*jsonVal)[filePosition->id]["attributes"][name] = { - {"datatype", jsonDatatypeToString(parameter.dtype)}, + {"datatype", internal::jsonDatatypeToString(parameter.dtype)}, {"value", value}}; break; case AttributeMode::Short: @@ -2397,7 +2400,7 @@ nlohmann::json JSONIOHandlerImpl::platformSpecifics() Datatype::BOOL}; for (auto &datatype : datatypes) { - res[jsonDatatypeToString(datatype)] = toBytes(datatype); + res[internal::jsonDatatypeToString(datatype)] = toBytes(datatype); } return res; } diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 27404259ff..a33039df86 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -2,6 +2,7 @@ #include "openPMD/toolkit/ExternalBlockStorage.hpp" #include "openPMD/DatatypeMacros.hpp" +#include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" #include "openPMD/auxiliary/Filesystem.hpp" #include "openPMD/toolkit/ExternalBlockStorage_internal.hpp" @@ -147,7 +148,7 @@ auto ExternalBlockStorage::makeStdioSession(std::string directory) return internal::StdioBuilder{std::move(directory)}; } -template +template auto ExternalBlockStorage::store( Extent globalExtent, Offset blockOffset, @@ -224,8 +225,10 @@ auto ExternalBlockStorage::store( dataset[key] = value; } }; - std::string type = typeid(T).name(); // TODO use sth more portable - check_metadata("_type", type); + if (!DatatypeHandling::template encodeDatatype(dataset)) + { + throw std::runtime_error("Inconsistent chunk storage in datatype."); + } check_metadata("_byte_width", sizeof(T)); check_metadata("_extent", globalExtent); @@ -260,14 +263,16 @@ void ExternalBlockStorage::sanitizeString(std::string &s) } } -#define OPENPMD_INSTANTIATE(type) \ - template auto ExternalBlockStorage::store( \ +#define OPENPMD_INSTANTIATE_DATATYPEHANDLING(datatypehandling, type) \ + template auto ExternalBlockStorage::store( \ Extent globalExtent, \ Offset blockOffset, \ Extent blockExtent, \ nlohmann::json & fullJsonDataset, \ nlohmann::json::json_pointer const &path, \ type const *data) -> std::string; +#define OPENPMD_INSTANTIATE(type) \ + OPENPMD_INSTANTIATE_DATATYPEHANDLING(internal::JsonDatatypeHandling, type) OPENPMD_FOREACH_DATASET_DATATYPE(OPENPMD_INSTANTIATE) #undef OPENPMD_INSTANTIATE } // namespace openPMD From a45e3b3b4be69661b5b5a4766de716f5a0893f96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 09:38:54 +0200 Subject: [PATCH 06/50] structure for aws --- CMakeLists.txt | 4 + .../openPMD/toolkit/ExternalBlockStorage.hpp | 45 ++++++ .../toolkit/ExternalBlockStorage_internal.hpp | 15 ++ src/toolkit/ExternalBlockStorage.cpp | 128 +++++++++++++++++- 4 files changed, 190 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 954d48d8d8..50ea1be05c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -385,6 +385,8 @@ else() endif() unset(openPMD_REQUIRED_ADIOS2_COMPONENTS) +find_package(AWSSDK REQUIRED COMPONENTS s3) + # external library: pybind11 (optional) include(${openPMD_SOURCE_DIR}/cmake/dependencies/pybind11.cmake) @@ -563,6 +565,8 @@ if(openPMD_HAVE_ADIOS2) endif() endif() +target_link_libraries(openPMD PUBLIC ${AWSSDK_LIBRARIES}) + # Runtime parameter and API status checks ("asserts") if(openPMD_USE_VERIFY) target_compile_definitions(openPMD PRIVATE openPMD_USE_VERIFY=1) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 5bbb87091e..d69db5a16b 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -2,9 +2,11 @@ #include "openPMD/Dataset.hpp" +#include #include #include +#include #include #include #include @@ -35,6 +37,41 @@ struct StdioBuilder operator ExternalBlockStorage(); auto build() -> ExternalBlockStorage; }; + +struct AwsBuilder +{ + struct init_credentials_tag_t + {}; + static constexpr init_credentials_tag_t init_credentials_tag = {}; + + AwsBuilder( + std::string bucketName, std::string accessKeyId, std::string secretKey); + + enum class Scheme : uint8_t + { + HTTP, + HTTPS + }; + std::string m_bucketName; + std::string m_accessKeyId; + std::string m_secretKey; + std::optional m_sessionToken; + std::initializer_list m_credentials; + std::optional m_endpointOverride; + std::optional m_region; + std::optional m_scheme; + + auto setBucketName(std::string bucketName) -> AwsBuilder &; + auto setCredentials(std::string accessKeyId, std::string secretKey) + -> AwsBuilder &; + auto setSessionToken(std::string sessionToken) -> AwsBuilder &; + auto setEndpointOverride(std::string endpoint) -> AwsBuilder &; + auto setRegion(std::string regionName) -> AwsBuilder &; + auto setScheme(Scheme s) -> AwsBuilder &; + + operator ExternalBlockStorage(); + auto build() -> ExternalBlockStorage; +}; } // namespace openPMD::internal namespace openPMD @@ -58,12 +95,17 @@ class ExternalBlockStorage std::unique_ptr); friend struct internal::StdioBuilder; + friend struct internal::AwsBuilder; public: explicit ExternalBlockStorage(); static auto makeStdioSession(std::string directory) -> internal::StdioBuilder; + template + static auto makeAwsSession( + std::string bucketName, std::string accessKeyId, std::string secretKey) + -> internal::AwsBuilder; // returns created JSON key template @@ -77,4 +119,7 @@ class ExternalBlockStorage static void sanitizeString(std::string &s); }; + +// Implementations + } // namespace openPMD diff --git a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp b/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp index 10a3e724be..2ad77e7965 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp @@ -2,6 +2,8 @@ #include "openPMD/toolkit/ExternalBlockStorage.hpp" +#include + namespace openPMD::internal { struct ExternalBlockStorageStdio : ExternalBlockStorageBackend @@ -16,4 +18,17 @@ struct ExternalBlockStorageStdio : ExternalBlockStorageBackend -> std::string override; ~ExternalBlockStorageStdio() override; }; + +struct ExternalBlockStorageAws : ExternalBlockStorageBackend +{ +private: + Aws::S3::S3Client m_client; + std::string m_bucketName; + +public: + ExternalBlockStorageAws(Aws::S3::S3Client, std::string bucketName); + auto put(std::string const &identifier, void const *data, size_t len) + -> std::string override; + ~ExternalBlockStorageAws() override; +}; } // namespace openPMD::internal diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index a33039df86..7672b1016e 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -1,10 +1,15 @@ #include "openPMD/toolkit/ExternalBlockStorage.hpp" +#include "openPMD/toolkit/ExternalBlockStorage_internal.hpp" + #include "openPMD/DatatypeMacros.hpp" #include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" #include "openPMD/auxiliary/Filesystem.hpp" -#include "openPMD/toolkit/ExternalBlockStorage_internal.hpp" + +#include +#include +#include #include #include @@ -12,7 +17,8 @@ #include #include #include -#include +#include +#include namespace { @@ -122,6 +128,58 @@ auto StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & return *this; } +ExternalBlockStorageAws::ExternalBlockStorageAws( + Aws::S3::S3Client client, std::string bucketName) + : m_client{std::move(client)}, m_bucketName(std::move(bucketName)) +{} +ExternalBlockStorageAws::~ExternalBlockStorageAws() = default; + +AwsBuilder::AwsBuilder( + std::string bucketName, std::string accessKeyId, std::string secretKey) + : m_bucketName(std::move(bucketName)) + , m_accessKeyId(std::move(accessKeyId)) + , m_secretKey(std::move(secretKey)) +{} + +auto AwsBuilder::setBucketName(std::string bucketName) -> AwsBuilder & +{ + m_bucketName = std::move(bucketName); + return *this; +} + +auto internal::AwsBuilder::setCredentials( + std::string accessKeyId, std::string secretKey) -> AwsBuilder & +{ + m_accessKeyId = std::move(accessKeyId); + m_secretKey = std::move(secretKey); + return *this; +} + +auto AwsBuilder::setEndpointOverride(std::string endpoint) -> AwsBuilder & +{ + m_endpointOverride = std::move(endpoint); + return *this; +} + +auto AwsBuilder::setRegion(std::string regionName) -> AwsBuilder & +{ + m_region = std::move(regionName); + return *this; +} + +auto AwsBuilder::setScheme(Scheme s) -> AwsBuilder & +{ + m_scheme = s; + return *this; +} + +auto internal::AwsBuilder::setSessionToken(std::string sessionToken) + -> AwsBuilder & +{ + m_sessionToken = std::move(sessionToken); + return *this; +} + StdioBuilder::operator ExternalBlockStorage() { return ExternalBlockStorage{std::make_unique( @@ -132,6 +190,63 @@ auto StdioBuilder::build() -> ExternalBlockStorage { return *this; } + +AwsBuilder::operator ExternalBlockStorage() +{ + Aws::Client::ClientConfiguration config; + + if (m_endpointOverride.has_value()) + { + config.endpointOverride = *m_endpointOverride; + } + if (m_region.has_value()) + { + config.region = *m_region; + } + else + { + config.region = "us-east-1"; + } + if (m_scheme.has_value()) + { + switch (*m_scheme) + { + case Scheme::HTTP: + config.scheme = Aws::Http::Scheme::HTTP; + break; + case Scheme::HTTPS: + config.scheme = Aws::Http::Scheme::HTTPS; + break; + break; + } + } + + // default timeout + config.connectTimeoutMs = 5000; + config.requestTimeoutMs = 15000; + + auto aws_credentials = [&]() -> Aws::Auth::AWSCredentials { + if (m_sessionToken.has_value()) + { + return {m_accessKeyId, m_secretKey, *m_sessionToken}; + } + else + { + return {m_accessKeyId, m_secretKey}; + } + }(); + + // Create the S3 client + Aws::S3::S3Client s3_client( + aws_credentials, + config, + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, + false); + + // Create the AWS storage backend + return ExternalBlockStorage{std::make_unique( + std::move(s3_client), std::move(m_bucketName))}; +} } // namespace openPMD::internal namespace openPMD @@ -148,6 +263,15 @@ auto ExternalBlockStorage::makeStdioSession(std::string directory) return internal::StdioBuilder{std::move(directory)}; } +template +auto ExternalBlockStorage::makeAwsSession( + std::string bucketName, std::string accessKeyId, std::string secretKey) + -> internal::AwsBuilder +{ + return internal::AwsBuilder( + std::move(bucketName), std::move(accessKeyId), std::move(secretKey)); +} + template auto ExternalBlockStorage::store( Extent globalExtent, From c68ff322079094f714fbafac2698ed08b07aecd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 10:59:58 +0200 Subject: [PATCH 07/50] first untested implementation for S3 --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 6 -- src/toolkit/ExternalBlockStorage.cpp | 78 +++++++++++++++++-- 2 files changed, 73 insertions(+), 11 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index d69db5a16b..53cf0d36ea 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -2,7 +2,6 @@ #include "openPMD/Dataset.hpp" -#include #include #include @@ -40,10 +39,6 @@ struct StdioBuilder struct AwsBuilder { - struct init_credentials_tag_t - {}; - static constexpr init_credentials_tag_t init_credentials_tag = {}; - AwsBuilder( std::string bucketName, std::string accessKeyId, std::string secretKey); @@ -102,7 +97,6 @@ class ExternalBlockStorage static auto makeStdioSession(std::string directory) -> internal::StdioBuilder; - template static auto makeAwsSession( std::string bucketName, std::string accessKeyId, std::string secretKey) -> internal::AwsBuilder; diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 7672b1016e..3e59ea0260 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -10,15 +10,19 @@ #include #include #include +#include +#include +#include #include +#include #include #include #include #include +#include #include #include -#include namespace { @@ -82,7 +86,7 @@ ExternalBlockStorageStdio::~ExternalBlockStorageStdio() = default; auto ExternalBlockStorageStdio::put( std::string const &identifier, void const *data, size_t len) -> std::string { - auto sanitized = identifier; + auto sanitized = identifier + ".dat"; ExternalBlockStorage::sanitizeString(sanitized); std::string filepath = concat_filepath(m_directory, sanitized); @@ -131,9 +135,74 @@ auto StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & ExternalBlockStorageAws::ExternalBlockStorageAws( Aws::S3::S3Client client, std::string bucketName) : m_client{std::move(client)}, m_bucketName(std::move(bucketName)) -{} +{ + Aws::S3::Model::CreateBucketRequest create_request; + create_request.SetBucket(m_bucketName); + auto create_outcome = m_client.CreateBucket(create_request); + if (!create_outcome.IsSuccess()) + { + std::cerr << "[ExternalBlockStorageAws::ExternalBlockStorageAws] " + "Warning: Failed to create bucket (may already exist): " + << create_outcome.GetError().GetMessage() << std::endl; + } + else + { + std::cout << "Bucket created: " << m_bucketName << std::endl; + } +} ExternalBlockStorageAws::~ExternalBlockStorageAws() = default; +namespace +{ + struct membuf : std::streambuf + { + membuf(char const *base, std::size_t size) + { + // hm hm + auto p = const_cast(base); + this->setg(p, p, p + size); // setup get area + } + }; + + struct imemstream : std::iostream + { + imemstream(char const *base, std::size_t size) + : std::iostream(&m_buf), m_buf(base, size) + {} + + private: + membuf m_buf; + }; +} // namespace + +auto ExternalBlockStorageAws::put( + std::string const &identifier, void const *data, size_t len) -> std::string +{ + auto sanitized = identifier; + ExternalBlockStorage::sanitizeString(sanitized); + + Aws::S3::Model::PutObjectRequest put_request; + put_request.SetBucket(m_bucketName); + put_request.SetKey(sanitized); + + auto input_data = Aws::MakeShared( + "PutObjectInputStream", reinterpret_cast(data), len); + std::static_pointer_cast(input_data); + + auto put_outcome = m_client.PutObject(put_request); + + if (put_outcome.IsSuccess()) + { + std::cout << "File uploaded successfully to S3!" << std::endl; + } + else + { + std::cerr << "Upload failed: " << put_outcome.GetError().GetMessage() + << std::endl; + } + return sanitized; +} + AwsBuilder::AwsBuilder( std::string bucketName, std::string accessKeyId, std::string secretKey) : m_bucketName(std::move(bucketName)) @@ -263,7 +332,6 @@ auto ExternalBlockStorage::makeStdioSession(std::string directory) return internal::StdioBuilder{std::move(directory)}; } -template auto ExternalBlockStorage::makeAwsSession( std::string bucketName, std::string accessKeyId, std::string secretKey) -> internal::AwsBuilder @@ -360,7 +428,7 @@ auto ExternalBlockStorage::store( block["offset"] = blockOffset; block["extent"] = blockExtent; std::stringstream filesystem_identifier; - filesystem_identifier << path.to_string() << "--" << index_as_str << ".dat"; + filesystem_identifier << path.to_string() << "--" << index_as_str; auto escaped_filesystem_identifier = m_worker->put( filesystem_identifier.str(), data, From 6ed78e542458f2f2db9d543c9f3a7f5c68461c19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 14:45:54 +0200 Subject: [PATCH 08/50] Reordering --- ...ernalBlockStorage_internal.hpp => Aws.hpp} | 13 ------ include/openPMD/toolkit/AwsBuilder.hpp | 45 +++++++++++++++++++ .../openPMD/toolkit/ExternalBlockStorage.hpp | 45 +------------------ include/openPMD/toolkit/Stdio.hpp | 19 ++++++++ include/openPMD/toolkit/StdioBuilder.hpp | 24 ++++++++++ src/toolkit/ExternalBlockStorage.cpp | 4 +- 6 files changed, 92 insertions(+), 58 deletions(-) rename include/openPMD/toolkit/{ExternalBlockStorage_internal.hpp => Aws.hpp} (58%) create mode 100644 include/openPMD/toolkit/AwsBuilder.hpp create mode 100644 include/openPMD/toolkit/Stdio.hpp create mode 100644 include/openPMD/toolkit/StdioBuilder.hpp diff --git a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp b/include/openPMD/toolkit/Aws.hpp similarity index 58% rename from include/openPMD/toolkit/ExternalBlockStorage_internal.hpp rename to include/openPMD/toolkit/Aws.hpp index 2ad77e7965..737629ec2b 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage_internal.hpp +++ b/include/openPMD/toolkit/Aws.hpp @@ -6,19 +6,6 @@ namespace openPMD::internal { -struct ExternalBlockStorageStdio : ExternalBlockStorageBackend -{ -private: - std::string m_directory; - std::string m_openMode; - -public: - ExternalBlockStorageStdio(std::string directory, std::string openMode); - auto put(std::string const &identifier, void const *data, size_t len) - -> std::string override; - ~ExternalBlockStorageStdio() override; -}; - struct ExternalBlockStorageAws : ExternalBlockStorageBackend { private: diff --git a/include/openPMD/toolkit/AwsBuilder.hpp b/include/openPMD/toolkit/AwsBuilder.hpp new file mode 100644 index 0000000000..3bb8cef491 --- /dev/null +++ b/include/openPMD/toolkit/AwsBuilder.hpp @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include +#include + +namespace openPMD +{ +class ExternalBlockStorage; +} + +namespace openPMD::internal +{ +struct AwsBuilder +{ + AwsBuilder( + std::string bucketName, std::string accessKeyId, std::string secretKey); + + enum class Scheme : uint8_t + { + HTTP, + HTTPS + }; + std::string m_bucketName; + std::string m_accessKeyId; + std::string m_secretKey; + std::optional m_sessionToken; + std::initializer_list m_credentials; + std::optional m_endpointOverride; + std::optional m_region; + std::optional m_scheme; + + auto setBucketName(std::string bucketName) -> AwsBuilder &; + auto setCredentials(std::string accessKeyId, std::string secretKey) + -> AwsBuilder &; + auto setSessionToken(std::string sessionToken) -> AwsBuilder &; + auto setEndpointOverride(std::string endpoint) -> AwsBuilder &; + auto setRegion(std::string regionName) -> AwsBuilder &; + auto setScheme(Scheme s) -> AwsBuilder &; + + operator ::openPMD::ExternalBlockStorage(); + auto build() -> ::openPMD::ExternalBlockStorage; +}; +} // namespace openPMD::internal diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 53cf0d36ea..f795b34eb3 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -1,6 +1,8 @@ #pragma once #include "openPMD/Dataset.hpp" +#include "openPMD/toolkit/AwsBuilder.hpp" +#include "openPMD/toolkit/StdioBuilder.hpp" #include @@ -24,49 +26,6 @@ struct ExternalBlockStorageBackend -> std::string = 0; virtual ~ExternalBlockStorageBackend(); }; - -struct StdioBuilder -{ - std::string m_directory; - std::optional m_openMode = std::nullopt; - - auto setDirectory(std::string directory) -> StdioBuilder &; - auto setOpenMode(std::string openMode) -> StdioBuilder &; - - operator ExternalBlockStorage(); - auto build() -> ExternalBlockStorage; -}; - -struct AwsBuilder -{ - AwsBuilder( - std::string bucketName, std::string accessKeyId, std::string secretKey); - - enum class Scheme : uint8_t - { - HTTP, - HTTPS - }; - std::string m_bucketName; - std::string m_accessKeyId; - std::string m_secretKey; - std::optional m_sessionToken; - std::initializer_list m_credentials; - std::optional m_endpointOverride; - std::optional m_region; - std::optional m_scheme; - - auto setBucketName(std::string bucketName) -> AwsBuilder &; - auto setCredentials(std::string accessKeyId, std::string secretKey) - -> AwsBuilder &; - auto setSessionToken(std::string sessionToken) -> AwsBuilder &; - auto setEndpointOverride(std::string endpoint) -> AwsBuilder &; - auto setRegion(std::string regionName) -> AwsBuilder &; - auto setScheme(Scheme s) -> AwsBuilder &; - - operator ExternalBlockStorage(); - auto build() -> ExternalBlockStorage; -}; } // namespace openPMD::internal namespace openPMD diff --git a/include/openPMD/toolkit/Stdio.hpp b/include/openPMD/toolkit/Stdio.hpp new file mode 100644 index 0000000000..10a3e724be --- /dev/null +++ b/include/openPMD/toolkit/Stdio.hpp @@ -0,0 +1,19 @@ +#pragma once + +#include "openPMD/toolkit/ExternalBlockStorage.hpp" + +namespace openPMD::internal +{ +struct ExternalBlockStorageStdio : ExternalBlockStorageBackend +{ +private: + std::string m_directory; + std::string m_openMode; + +public: + ExternalBlockStorageStdio(std::string directory, std::string openMode); + auto put(std::string const &identifier, void const *data, size_t len) + -> std::string override; + ~ExternalBlockStorageStdio() override; +}; +} // namespace openPMD::internal diff --git a/include/openPMD/toolkit/StdioBuilder.hpp b/include/openPMD/toolkit/StdioBuilder.hpp new file mode 100644 index 0000000000..7d93048167 --- /dev/null +++ b/include/openPMD/toolkit/StdioBuilder.hpp @@ -0,0 +1,24 @@ +#pragma once + +#include +#include + +namespace openPMD +{ +class ExternalBlockStorage; +} + +namespace openPMD::internal +{ +struct StdioBuilder +{ + std::string m_directory; + std::optional m_openMode = std::nullopt; + + auto setDirectory(std::string directory) -> StdioBuilder &; + auto setOpenMode(std::string openMode) -> StdioBuilder &; + + operator ::openPMD::ExternalBlockStorage(); + auto build() -> ::openPMD::ExternalBlockStorage; +}; +} // namespace openPMD::internal diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 3e59ea0260..fc08eadc4a 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -1,11 +1,11 @@ #include "openPMD/toolkit/ExternalBlockStorage.hpp" -#include "openPMD/toolkit/ExternalBlockStorage_internal.hpp" - #include "openPMD/DatatypeMacros.hpp" #include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" #include "openPMD/auxiliary/Filesystem.hpp" +#include "openPMD/toolkit/Aws.hpp" +#include "openPMD/toolkit/Stdio.hpp" #include #include From 3b19467e6f279b99fb5429efe8940355d3729bb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 15:00:06 +0200 Subject: [PATCH 09/50] continue restructuring --- CMakeLists.txt | 6 +- src/toolkit/Aws.cpp | 80 +++++++ src/toolkit/AwsBuilder.cpp | 117 ++++++++++ src/toolkit/ExternalBlockStorage.cpp | 321 +-------------------------- src/toolkit/Stdio.cpp | 68 ++++++ src/toolkit/StdioBuilder.cpp | 31 +++ 6 files changed, 305 insertions(+), 318 deletions(-) create mode 100644 src/toolkit/Aws.cpp create mode 100644 src/toolkit/AwsBuilder.cpp create mode 100644 src/toolkit/Stdio.cpp create mode 100644 src/toolkit/StdioBuilder.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 50ea1be05c..b246c6bacb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -437,7 +437,11 @@ set(CORE_SOURCE src/snapshots/RandomAccessIterator.cpp src/snapshots/Snapshots.cpp src/snapshots/StatefulIterator.cpp - src/toolkit/ExternalBlockStorage.cpp) + src/toolkit/ExternalBlockStorage.cpp + src/toolkit/AwsBuilder.cpp + src/toolkit/Aws.cpp + src/toolkit/StdioBuilder.cpp + src/toolkit/Stdio.cpp) set(IO_SOURCE src/IO/AbstractIOHandler.cpp src/IO/AbstractIOHandlerImpl.cpp diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp new file mode 100644 index 0000000000..2f05ec9402 --- /dev/null +++ b/src/toolkit/Aws.cpp @@ -0,0 +1,80 @@ +#include "openPMD/toolkit/Aws.hpp" + +#include +#include + +#include + +namespace +{ +struct membuf : std::streambuf +{ + membuf(char const *base, std::size_t size) + { + auto p = const_cast(base); + this->setg(p, p, p + size); + } +}; + +struct imemstream : std::iostream +{ + imemstream(char const *base, std::size_t size) + : std::iostream(&m_buf), m_buf(base, size) + {} + +private: + membuf m_buf; +}; +} // namespace + +namespace openPMD::internal +{ +ExternalBlockStorageAws::ExternalBlockStorageAws( + Aws::S3::S3Client client, std::string bucketName) + : m_client{std::move(client)}, m_bucketName(std::move(bucketName)) +{ + Aws::S3::Model::CreateBucketRequest create_request; + create_request.SetBucket(m_bucketName); + auto create_outcome = m_client.CreateBucket(create_request); + if (!create_outcome.IsSuccess()) + { + std::cerr << "[ExternalBlockStorageAws::ExternalBlockStorageAws] " + "Warning: Failed to create bucket (may already exist): " + << create_outcome.GetError().GetMessage() << std::endl; + } + else + { + std::cout << "Bucket created: " << m_bucketName << std::endl; + } +} +ExternalBlockStorageAws::~ExternalBlockStorageAws() = default; + +auto ExternalBlockStorageAws::put( + std::string const &identifier, void const *data, size_t len) -> std::string +{ + auto sanitized = identifier; + ExternalBlockStorage::sanitizeString(sanitized); + + Aws::S3::Model::PutObjectRequest put_request; + put_request.SetBucket(m_bucketName); + put_request.SetKey(sanitized); + + auto input_data = Aws::MakeShared( + "PutObjectInputStream", reinterpret_cast(data), len); + std::static_pointer_cast(input_data); + + auto put_outcome = m_client.PutObject(put_request); + + if (put_outcome.IsSuccess()) + { + std::cout << "File uploaded successfully to S3!" << std::endl; + } + else + { + std::cerr << "Upload failed: " << put_outcome.GetError().GetMessage() + << std::endl; + } + return sanitized; +} + +} // namespace openPMD::internal diff --git a/src/toolkit/AwsBuilder.cpp b/src/toolkit/AwsBuilder.cpp new file mode 100644 index 0000000000..13caa1f878 --- /dev/null +++ b/src/toolkit/AwsBuilder.cpp @@ -0,0 +1,117 @@ +#include "openPMD/toolkit/AwsBuilder.hpp" + +#include "openPMD/toolkit/Aws.hpp" +#include "openPMD/toolkit/ExternalBlockStorage.hpp" + +#include +#include +#include + +namespace openPMD::internal +{ +AwsBuilder::AwsBuilder( + std::string bucketName, std::string accessKeyId, std::string secretKey) + : m_bucketName(std::move(bucketName)) + , m_accessKeyId(std::move(accessKeyId)) + , m_secretKey(std::move(secretKey)) +{} + +auto AwsBuilder::setBucketName(std::string bucketName) -> AwsBuilder & +{ + m_bucketName = std::move(bucketName); + return *this; +} + +auto internal::AwsBuilder::setCredentials( + std::string accessKeyId, std::string secretKey) -> AwsBuilder & +{ + m_accessKeyId = std::move(accessKeyId); + m_secretKey = std::move(secretKey); + return *this; +} + +auto AwsBuilder::setEndpointOverride(std::string endpoint) -> AwsBuilder & +{ + m_endpointOverride = std::move(endpoint); + return *this; +} + +auto AwsBuilder::setRegion(std::string regionName) -> AwsBuilder & +{ + m_region = std::move(regionName); + return *this; +} + +auto AwsBuilder::setScheme(Scheme s) -> AwsBuilder & +{ + m_scheme = s; + return *this; +} + +auto internal::AwsBuilder::setSessionToken(std::string sessionToken) + -> AwsBuilder & +{ + m_sessionToken = std::move(sessionToken); + return *this; +} + +AwsBuilder::operator ExternalBlockStorage() +{ + Aws::Client::ClientConfiguration config; + + if (m_endpointOverride.has_value()) + { + config.endpointOverride = *m_endpointOverride; + } + if (m_region.has_value()) + { + config.region = *m_region; + } + else + { + config.region = "us-east-1"; + } + if (m_scheme.has_value()) + { + switch (*m_scheme) + { + case Scheme::HTTP: + config.scheme = Aws::Http::Scheme::HTTP; + break; + case Scheme::HTTPS: + config.scheme = Aws::Http::Scheme::HTTPS; + break; + break; + } + } + + config.connectTimeoutMs = 5000; + config.requestTimeoutMs = 15000; + + auto aws_credentials = [&]() -> Aws::Auth::AWSCredentials { + if (m_sessionToken.has_value()) + { + return {m_accessKeyId, m_secretKey, *m_sessionToken}; + } + else + { + return {m_accessKeyId, m_secretKey}; + } + }(); + + Aws::S3::S3Client s3_client( + aws_credentials, + config, + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, + false); + + return ExternalBlockStorage{std::make_unique( + std::move(s3_client), std::move(m_bucketName))}; +} + +auto AwsBuilder::build() -> ExternalBlockStorage +{ + return *this; +} + +} // namespace openPMD::internal diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index fc08eadc4a..fefaa00858 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -1,323 +1,19 @@ - #include "openPMD/toolkit/ExternalBlockStorage.hpp" #include "openPMD/DatatypeMacros.hpp" #include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" -#include "openPMD/auxiliary/Filesystem.hpp" -#include "openPMD/toolkit/Aws.hpp" -#include "openPMD/toolkit/Stdio.hpp" -#include -#include -#include -#include -#include -#include +#include -#include -#include -#include #include #include #include -#include -#include -#include - -namespace -{ -auto concat_filepath(std::string const &s1, std::string const &s2) - -> std::string -{ - if (s1.empty()) - { - return s2; - } - if (s2.empty()) - { - return s1; - } - bool ends_with_slash = - *s1.crbegin() == openPMD::auxiliary::directory_separator; - bool starts_with_slash = - *s2.cbegin() == openPMD::auxiliary::directory_separator; - - if (ends_with_slash ^ starts_with_slash) - { - return s1 + s2; - } - else if (ends_with_slash && starts_with_slash) - { - return s1 + (s2.c_str() + 1); - } - else - { - return s1 + openPMD::auxiliary::directory_separator + s2; - } -} -} // namespace namespace openPMD::internal { ExternalBlockStorageBackend::~ExternalBlockStorageBackend() = default; - -ExternalBlockStorageStdio::ExternalBlockStorageStdio( - std::string directory, std::string openMode) - : m_directory(std::move(directory)), m_openMode(std::move(openMode)) -{ - if (m_directory.empty()) - { - throw std::invalid_argument( - "ExternalBlockStorageStdio: directory cannot be empty"); - } - - // Ensure the directory exists and is writable - if (!auxiliary::create_directories(m_directory)) - { - throw std::runtime_error( - "ExternalBlockStorageStdio: failed to create or access " - "directory: " + - m_directory); - } -} - -ExternalBlockStorageStdio::~ExternalBlockStorageStdio() = default; - -auto ExternalBlockStorageStdio::put( - std::string const &identifier, void const *data, size_t len) -> std::string -{ - auto sanitized = identifier + ".dat"; - ExternalBlockStorage::sanitizeString(sanitized); - std::string filepath = concat_filepath(m_directory, sanitized); - - if (len == 0) - { - return filepath; - } - - FILE *file = std::fopen(filepath.c_str(), "wb"); - if (!file) - { - throw std::runtime_error( - "ExternalBlockStorageStdio: failed to open file for writing: " + - filepath); - } - - size_t written = std::fwrite(data, 1, len, file); - if (written != len) - { - throw std::runtime_error( - "ExternalBlockStorageStdio: failed to write full data to file: " + - filepath); - } - - if (std::fclose(file) != 0) - { - throw std::runtime_error( - "ExternalBlockStorageStdio: failed to close file after writing: " + - filepath); - } - - return filepath; -} - -auto StdioBuilder::setDirectory(std::string directory) -> StdioBuilder & -{ - m_directory = std::move(directory); - return *this; -} -auto StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & -{ - m_openMode = std::move(openMode); - return *this; } -ExternalBlockStorageAws::ExternalBlockStorageAws( - Aws::S3::S3Client client, std::string bucketName) - : m_client{std::move(client)}, m_bucketName(std::move(bucketName)) -{ - Aws::S3::Model::CreateBucketRequest create_request; - create_request.SetBucket(m_bucketName); - auto create_outcome = m_client.CreateBucket(create_request); - if (!create_outcome.IsSuccess()) - { - std::cerr << "[ExternalBlockStorageAws::ExternalBlockStorageAws] " - "Warning: Failed to create bucket (may already exist): " - << create_outcome.GetError().GetMessage() << std::endl; - } - else - { - std::cout << "Bucket created: " << m_bucketName << std::endl; - } -} -ExternalBlockStorageAws::~ExternalBlockStorageAws() = default; - -namespace -{ - struct membuf : std::streambuf - { - membuf(char const *base, std::size_t size) - { - // hm hm - auto p = const_cast(base); - this->setg(p, p, p + size); // setup get area - } - }; - - struct imemstream : std::iostream - { - imemstream(char const *base, std::size_t size) - : std::iostream(&m_buf), m_buf(base, size) - {} - - private: - membuf m_buf; - }; -} // namespace - -auto ExternalBlockStorageAws::put( - std::string const &identifier, void const *data, size_t len) -> std::string -{ - auto sanitized = identifier; - ExternalBlockStorage::sanitizeString(sanitized); - - Aws::S3::Model::PutObjectRequest put_request; - put_request.SetBucket(m_bucketName); - put_request.SetKey(sanitized); - - auto input_data = Aws::MakeShared( - "PutObjectInputStream", reinterpret_cast(data), len); - std::static_pointer_cast(input_data); - - auto put_outcome = m_client.PutObject(put_request); - - if (put_outcome.IsSuccess()) - { - std::cout << "File uploaded successfully to S3!" << std::endl; - } - else - { - std::cerr << "Upload failed: " << put_outcome.GetError().GetMessage() - << std::endl; - } - return sanitized; -} - -AwsBuilder::AwsBuilder( - std::string bucketName, std::string accessKeyId, std::string secretKey) - : m_bucketName(std::move(bucketName)) - , m_accessKeyId(std::move(accessKeyId)) - , m_secretKey(std::move(secretKey)) -{} - -auto AwsBuilder::setBucketName(std::string bucketName) -> AwsBuilder & -{ - m_bucketName = std::move(bucketName); - return *this; -} - -auto internal::AwsBuilder::setCredentials( - std::string accessKeyId, std::string secretKey) -> AwsBuilder & -{ - m_accessKeyId = std::move(accessKeyId); - m_secretKey = std::move(secretKey); - return *this; -} - -auto AwsBuilder::setEndpointOverride(std::string endpoint) -> AwsBuilder & -{ - m_endpointOverride = std::move(endpoint); - return *this; -} - -auto AwsBuilder::setRegion(std::string regionName) -> AwsBuilder & -{ - m_region = std::move(regionName); - return *this; -} - -auto AwsBuilder::setScheme(Scheme s) -> AwsBuilder & -{ - m_scheme = s; - return *this; -} - -auto internal::AwsBuilder::setSessionToken(std::string sessionToken) - -> AwsBuilder & -{ - m_sessionToken = std::move(sessionToken); - return *this; -} - -StdioBuilder::operator ExternalBlockStorage() -{ - return ExternalBlockStorage{std::make_unique( - std::move(m_directory), std::move(m_openMode).value_or("wb"))}; -} - -auto StdioBuilder::build() -> ExternalBlockStorage -{ - return *this; -} - -AwsBuilder::operator ExternalBlockStorage() -{ - Aws::Client::ClientConfiguration config; - - if (m_endpointOverride.has_value()) - { - config.endpointOverride = *m_endpointOverride; - } - if (m_region.has_value()) - { - config.region = *m_region; - } - else - { - config.region = "us-east-1"; - } - if (m_scheme.has_value()) - { - switch (*m_scheme) - { - case Scheme::HTTP: - config.scheme = Aws::Http::Scheme::HTTP; - break; - case Scheme::HTTPS: - config.scheme = Aws::Http::Scheme::HTTPS; - break; - break; - } - } - - // default timeout - config.connectTimeoutMs = 5000; - config.requestTimeoutMs = 15000; - - auto aws_credentials = [&]() -> Aws::Auth::AWSCredentials { - if (m_sessionToken.has_value()) - { - return {m_accessKeyId, m_secretKey, *m_sessionToken}; - } - else - { - return {m_accessKeyId, m_secretKey}; - } - }(); - - // Create the S3 client - Aws::S3::S3Client s3_client( - aws_credentials, - config, - Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, - false); - - // Create the AWS storage backend - return ExternalBlockStorage{std::make_unique( - std::move(s3_client), std::move(m_bucketName))}; -} -} // namespace openPMD::internal - namespace openPMD { ExternalBlockStorage::ExternalBlockStorage() = default; @@ -349,17 +45,8 @@ auto ExternalBlockStorage::store( nlohmann::json::json_pointer const &path, T const *data) -> std::string { - // JSON Identifier: running counter. - // Do not use an array to avoid reindexing upon deletion. - - // Filesystem Identifier: JSON path + running counter. - - // For each externally handled data block, store: - // 1. Filesystem identifier - // 2. Offset, Extent auto &dataset = fullJsonDataset[path]; - // running_index denotes the last *used* block index in the dataset using running_index_t = uint64_t; running_index_t running_index = [&]() -> running_index_t { if (auto it = dataset.find("_running_index"); it != dataset.end()) @@ -421,8 +108,8 @@ auto ExternalBlockStorage::store( { throw std::runtime_error("Inconsistent chunk storage in datatype."); } - check_metadata("_byte_width", sizeof(T)); - check_metadata("_extent", globalExtent); + check_metadata("byte_width", sizeof(T)); + check_metadata("extent", globalExtent); auto &block = dataset[index_as_str]; block["offset"] = blockOffset; @@ -443,7 +130,6 @@ auto ExternalBlockStorage::store( void ExternalBlockStorage::sanitizeString(std::string &s) { - // Replace invalid characters with underscore for (char &c : s) { if (c == '/' || c == '\\' || c == ':' || c == '*' || c == '?' || @@ -467,4 +153,5 @@ void ExternalBlockStorage::sanitizeString(std::string &s) OPENPMD_INSTANTIATE_DATATYPEHANDLING(internal::JsonDatatypeHandling, type) OPENPMD_FOREACH_DATASET_DATATYPE(OPENPMD_INSTANTIATE) #undef OPENPMD_INSTANTIATE + } // namespace openPMD diff --git a/src/toolkit/Stdio.cpp b/src/toolkit/Stdio.cpp new file mode 100644 index 0000000000..dac83d35c9 --- /dev/null +++ b/src/toolkit/Stdio.cpp @@ -0,0 +1,68 @@ +#include "openPMD/toolkit/Stdio.hpp" + +#include "openPMD/auxiliary/Filesystem.hpp" + +#include +#include + +namespace openPMD::internal +{ +ExternalBlockStorageStdio::ExternalBlockStorageStdio( + std::string directory, std::string openMode) + : m_directory(std::move(directory)), m_openMode(std::move(openMode)) +{ + if (m_directory.empty()) + { + throw std::invalid_argument( + "ExternalBlockStorageStdio: directory cannot be empty"); + } + + if (!auxiliary::create_directories(m_directory)) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to create or access " + "directory: " + + m_directory); + } +} + +ExternalBlockStorageStdio::~ExternalBlockStorageStdio() = default; + +auto ExternalBlockStorageStdio::put( + std::string const &identifier, void const *data, size_t len) -> std::string +{ + auto sanitized = identifier + ".dat"; + ExternalBlockStorage::sanitizeString(sanitized); + std::string filepath = m_directory + "/" + sanitized; + + if (len == 0) + { + return filepath; + } + + FILE *file = std::fopen(filepath.c_str(), "wb"); + if (!file) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to open file for writing: " + + filepath); + } + + size_t written = std::fwrite(data, 1, len, file); + if (written != len) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to write full data to file: " + + filepath); + } + + if (std::fclose(file) != 0) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to close file after writing: " + + filepath); + } + + return filepath; +} +} // namespace openPMD::internal diff --git a/src/toolkit/StdioBuilder.cpp b/src/toolkit/StdioBuilder.cpp new file mode 100644 index 0000000000..8fa5f6bb6f --- /dev/null +++ b/src/toolkit/StdioBuilder.cpp @@ -0,0 +1,31 @@ +#include "openPMD/toolkit/StdioBuilder.hpp" + +#include "openPMD/toolkit/ExternalBlockStorage.hpp" +#include "openPMD/toolkit/Stdio.hpp" + +#include + +namespace openPMD::internal +{ +auto StdioBuilder::setDirectory(std::string directory) -> StdioBuilder & +{ + m_directory = std::move(directory); + return *this; +} +auto StdioBuilder::setOpenMode(std::string openMode) -> StdioBuilder & +{ + m_openMode = std::move(openMode); + return *this; +} + +StdioBuilder::operator ExternalBlockStorage() +{ + return ExternalBlockStorage{std::make_unique( + std::move(m_directory), std::move(m_openMode).value_or("wb"))}; +} + +auto StdioBuilder::build() -> ExternalBlockStorage +{ + return *this; +} +} // namespace openPMD::internal From a9ab88662c80833d41f9aabbac2e6db4b8be4c9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 15:25:49 +0200 Subject: [PATCH 10/50] Some first little MPI awareness --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 1 + src/IO/JSON/JSONIOHandlerImpl.cpp | 157 ++++++++++-------- src/toolkit/ExternalBlockStorage.cpp | 9 +- 3 files changed, 100 insertions(+), 67 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index f795b34eb3..f1b5b83671 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -68,6 +68,7 @@ class ExternalBlockStorage Extent blockExtent, nlohmann::json &fullJsonDataset, nlohmann::json::json_pointer const &path, + std::optional infix, // e.g. for distinguishing MPI ranks T const *data) -> std::string; static void sanitizeString(std::string &s); diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index d38a81180c..c03f313813 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -1148,6 +1148,30 @@ void JSONIOHandlerImpl::deleteAttribute( namespace { + template + auto + write_rank_to_stream_with_sufficient_padding(Stream &s, int rank, int size) + -> Stream & + { + auto num_digits = [](unsigned n) -> unsigned { + constexpr auto max = std::numeric_limits::max(); + unsigned base_10 = 1; + unsigned res = 1; + while (base_10 < max) + { + base_10 *= 10; + if (n / base_10 == 0) + { + return res; + } + ++res; + } + return res; + }; + s << std::setw(num_digits(size - 1)) << std::setfill('0') << rank; + return s; + } + struct StoreExternally { template @@ -1179,7 +1203,22 @@ void JSONIOHandlerImpl::writeDataset( { case DatasetMode::Dataset: break; - case DatasetMode::Template: + case DatasetMode::Template: { + std::optional rankInfix; +#if openPMD_HAVE_MPI + if (m_communicator.has_value()) + { + auto &comm = *m_communicator; + // TODO maybe cache the result for this computation + int rank, size; + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &size); + std::stringstream s; + s << "r"; + write_rank_to_stream_with_sufficient_padding(s, rank, size); + rankInfix = s.str(); + } +#endif switchDatasetType( parameters.dtype, externalBlockStorage, @@ -1188,7 +1227,8 @@ void JSONIOHandlerImpl::writeDataset( parameters.offset, parameters.extent, jsonRoot, - filePosition->id); + filePosition->id, + std::move(rankInfix)); // if (!m_datasetMode.m_skipWarnings) // { // std::cerr @@ -1199,6 +1239,7 @@ void JSONIOHandlerImpl::writeDataset( // } return; } + } switchType(parameters.dtype, j, parameters); @@ -2146,53 +2187,37 @@ auto JSONIOHandlerImpl::putJsonContents( }; #if openPMD_HAVE_MPI - auto num_digits = [](unsigned n) -> unsigned { - constexpr auto max = std::numeric_limits::max(); - unsigned base_10 = 1; - unsigned res = 1; - while (base_10 < max) + auto parallelImplementation = [this, &filename, &writeSingleFile]( + MPI_Comm comm) { + auto path = fullPath(*filename); + auto dirpath = path + ".parallel"; + if (!auxiliary::create_directories(dirpath)) { - base_10 *= 10; - if (n / base_10 == 0) - { - return res; - } - ++res; + throw std::runtime_error( + "Failed creating directory '" + dirpath + + "' for parallel JSON output"); } - return res; - }; - - auto parallelImplementation = - [this, &filename, &writeSingleFile, &num_digits](MPI_Comm comm) { - auto path = fullPath(*filename); - auto dirpath = path + ".parallel"; - if (!auxiliary::create_directories(dirpath)) - { - throw std::runtime_error( - "Failed creating directory '" + dirpath + - "' for parallel JSON output"); - } - int rank = 0, size = 0; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &size); - std::stringstream subfilePath; - // writeSingleFile will prepend the base dir - subfilePath << *filename << ".parallel/mpi_rank_" - << std::setw(num_digits(size - 1)) << std::setfill('0') - << rank << [&]() { - switch (m_fileFormat) - { - case FileFormat::Json: - return ".json"; - case FileFormat::Toml: - return ".toml"; - } - throw std::runtime_error("Unreachable!"); - }(); - writeSingleFile(subfilePath.str()); - if (rank == 0) - { - constexpr char const *readme_msg = R"( + int rank = 0, size = 0; + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &size); + std::stringstream subfilePath; + // writeSingleFile will prepend the base dir + subfilePath << *filename << ".parallel/mpi_rank_"; + write_rank_to_stream_with_sufficient_padding(subfilePath, rank, size) + << [&]() { + switch (m_fileFormat) + { + case FileFormat::Json: + return ".json"; + case FileFormat::Toml: + return ".toml"; + } + throw std::runtime_error("Unreachable!"); + }(); + writeSingleFile(subfilePath.str()); + if (rank == 0) + { + constexpr char const *readme_msg = R"( This folder has been created by a parallel instance of the JSON backend in openPMD. There is one JSON file for each parallel writer MPI rank. The parallel JSON backend performs no metadata or data aggregation at all. @@ -2202,26 +2227,26 @@ There is no support in the openPMD-api for reading this folder as a single dataset. For reading purposes, either pick a single .json file and read that, or merge the .json files somehow (no tooling provided for this (yet)). )"; - std::fstream readme_file; - readme_file.open( - dirpath + "/README.txt", - std::ios_base::out | std::ios_base::trunc); - readme_file << readme_msg + 1; - readme_file.close(); - if (!readme_file.good() && - !filename.fileState->printedReadmeWarningAlready) - { - std::cerr - << "[Warning] Something went wrong in trying to create " - "README file at '" - << dirpath - << "/README.txt'. Will ignore and continue. The README " - "message would have been:\n----------\n" - << readme_msg + 1 << "----------" << std::endl; - filename.fileState->printedReadmeWarningAlready = true; - } + std::fstream readme_file; + readme_file.open( + dirpath + "/README.txt", + std::ios_base::out | std::ios_base::trunc); + readme_file << readme_msg + 1; + readme_file.close(); + if (!readme_file.good() && + !filename.fileState->printedReadmeWarningAlready) + { + std::cerr + << "[Warning] Something went wrong in trying to create " + "README file at '" + << dirpath + << "/README.txt'. Will ignore and continue. The README " + "message would have been:\n----------\n" + << readme_msg + 1 << "----------" << std::endl; + filename.fileState->printedReadmeWarningAlready = true; } - }; + } + }; std::shared_ptr res; if (m_communicator.has_value()) diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index fefaa00858..2a0f4fc683 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -43,6 +43,7 @@ auto ExternalBlockStorage::store( Extent blockExtent, nlohmann::json &fullJsonDataset, nlohmann::json::json_pointer const &path, + std::optional infix, T const *data) -> std::string { auto &dataset = fullJsonDataset[path]; @@ -115,7 +116,12 @@ auto ExternalBlockStorage::store( block["offset"] = blockOffset; block["extent"] = blockExtent; std::stringstream filesystem_identifier; - filesystem_identifier << path.to_string() << "--" << index_as_str; + filesystem_identifier << path.to_string(); + if (infix.has_value()) + { + filesystem_identifier << "--" << *infix; + } + filesystem_identifier << "--" << index_as_str; auto escaped_filesystem_identifier = m_worker->put( filesystem_identifier.str(), data, @@ -148,6 +154,7 @@ void ExternalBlockStorage::sanitizeString(std::string &s) Extent blockExtent, \ nlohmann::json & fullJsonDataset, \ nlohmann::json::json_pointer const &path, \ + std::optional infix, \ type const *data) -> std::string; #define OPENPMD_INSTANTIATE(type) \ OPENPMD_INSTANTIATE_DATATYPEHANDLING(internal::JsonDatatypeHandling, type) From 74ac7eea585d46d69b2422bbaf1ac70b555e0ea7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 29 Sep 2025 15:59:45 +0200 Subject: [PATCH 11/50] WIP: Config for external block storage from JSON --- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 132 ++++-- .../openPMD/toolkit/ExternalBlockStorage.hpp | 5 + src/IO/JSON/JSONIOHandlerImpl.cpp | 445 ++++++++++-------- 3 files changed, 341 insertions(+), 241 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index 94d00dcf90..0bc7e1db81 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -33,6 +33,7 @@ #include #include +#include #if openPMD_HAVE_MPI #include #endif @@ -196,6 +197,15 @@ struct JsonDatatypeHandling namespace openPMD { +namespace dataset_mode_types +{ + struct Dataset_t + {}; + struct Template_t + {}; + using External_t = std::shared_ptr; +} // namespace dataset_mode_types + class JSONIOHandlerImpl : public AbstractIOHandlerImpl { using json = nlohmann::json; @@ -218,8 +228,6 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl std::string originalExtension); #endif - ExternalBlockStorage externalBlockStorage; - void init(openPMD::json::TracingJSON config); ~JSONIOHandlerImpl() override; @@ -286,42 +294,6 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl std::future flush(); -private: -#if openPMD_HAVE_MPI - std::optional m_communicator; -#endif - - using FILEHANDLE = std::fstream; - - // map each Writable to its associated file - // contains only the filename, without the OS path - std::unordered_map m_files; - - std::unordered_map> m_jsonVals; - - // files that have logically, but not physically been written to - std::unordered_set m_dirty; - - /* - * Is set by constructor. - */ - FileFormat m_fileFormat{}; - - /* - * Under which key do we find the backend configuration? - * -> "json" for the JSON backend, "toml" for the TOML backend. - */ - std::string backendConfigKey() const; - - /* - * First return value: The location of the JSON value (either "json" or - * "toml") Second return value: The value that was maybe found at this place - */ - std::pair> - getBackendConfig(openPMD::json::TracingJSON &) const; - - std::string m_originalExtension; - /* * Was the config value explicitly user-chosen, or are we still working with * defaults? @@ -336,17 +308,36 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl // Dataset IO mode // ///////////////////// - enum class DatasetMode + struct DatasetMode + : std::variant< + dataset_mode_types::Dataset_t, + dataset_mode_types::Template_t, + dataset_mode_types::External_t> { - Dataset, - Template + using Dataset_t = dataset_mode_types::Dataset_t; + using Template_t = dataset_mode_types::Template_t; + using External_t = dataset_mode_types::External_t; + constexpr static Dataset_t Dataset{}; + constexpr static Template_t Template{}; + + using variant_t = std::variant< + dataset_mode_types::Dataset_t, + dataset_mode_types::Template_t, + External_t>; + using variant_t ::operator=; + + // casts needed because of + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90943 + inline auto as_base() const -> variant_t const & + { + return *this; + } + inline auto as_base() -> variant_t & + { + return *this; + } }; - // IOMode m_mode{}; - // SpecificationVia m_IOModeSpecificationVia = - // SpecificationVia::DefaultValue; bool m_printedSkippedWriteWarningAlready - // = false; - struct DatasetMode_s { // Initialized in init() @@ -361,8 +352,6 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl m_mode, m_specificationVia, m_skipWarnings}; } }; - DatasetMode_s m_datasetMode; - DatasetMode_s retrieveDatasetMode(openPMD::json::TracingJSON &config) const; /////////////////////// // Attribute IO mode // @@ -381,8 +370,50 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl AttributeMode m_mode{}; SpecificationVia m_specificationVia = SpecificationVia::DefaultValue; }; - AttributeMode_s m_attributeMode; +private: +#if openPMD_HAVE_MPI + std::optional m_communicator; +#endif + + using FILEHANDLE = std::fstream; + + // map each Writable to its associated file + // contains only the filename, without the OS path + std::unordered_map m_files; + + std::unordered_map> m_jsonVals; + + // files that have logically, but not physically been written to + std::unordered_set m_dirty; + + /* + * Is set by constructor. + */ + FileFormat m_fileFormat{}; + + /* + * Under which key do we find the backend configuration? + * -> "json" for the JSON backend, "toml" for the TOML backend. + */ + std::string backendConfigKey() const; + + /* + * First return value: The location of the JSON value (either "json" or + * "toml") Second return value: The value that was maybe found at this place + */ + std::pair> + getBackendConfig(openPMD::json::TracingJSON &) const; + static std::pair> + getBackendConfig( + openPMD::json::TracingJSON &, std::string const &configLocation); + + std::string m_originalExtension; + + DatasetMode_s m_datasetMode; + DatasetMode_s retrieveDatasetMode(openPMD::json::TracingJSON &config) const; + + AttributeMode_s m_attributeMode; AttributeMode_s retrieveAttributeMode(openPMD::json::TracingJSON &config) const; @@ -432,7 +463,8 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl // essentially: m_i = \prod_{j=0}^{i-1} extent_j static Extent getMultiplicators(Extent const &extent); - static std::pair getExtent(nlohmann::json &j); + static std::pair + getExtent(nlohmann::json &j, DatasetMode const &baseMode); // remove single '/' in the beginning and end of a string static std::string removeSlashes(std::string); diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index f1b5b83671..7a26647970 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -71,6 +71,11 @@ class ExternalBlockStorage std::optional infix, // e.g. for distinguishing MPI ranks T const *data) -> std::string; + auto externalStorageLocation() const -> nlohmann::json + { + return "implement me"; + } + static void sanitizeString(std::string &s); }; diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index c03f313813..6d96eb9578 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -31,6 +31,7 @@ #include "openPMD/auxiliary/Memory.hpp" #include "openPMD/auxiliary/StringManip.hpp" #include "openPMD/auxiliary/TypeTraits.hpp" +#include "openPMD/auxiliary/Variant.hpp" #include "openPMD/backend/Attribute.hpp" #include "openPMD/backend/Writable.hpp" #include "openPMD/toolkit/ExternalBlockStorage.hpp" @@ -43,6 +44,7 @@ #include #include #include +#include namespace openPMD { @@ -257,15 +259,74 @@ namespace internal } } // namespace internal +namespace +{ + void parse_internal_mode( + nlohmann::json const &mode_j, + std::string const &configLocation, + JSONIOHandlerImpl::DatasetMode_s &res) + { + using DatasetMode = JSONIOHandlerImpl::DatasetMode; + using SpecificationVia = JSONIOHandlerImpl::SpecificationVia; + + DatasetMode &ioMode = res.m_mode; + SpecificationVia &specificationVia = res.m_specificationVia; + bool &skipWarnings = res.m_skipWarnings; + + auto modeOption = openPMD::json::asLowerCaseStringDynamic(mode_j); + if (!modeOption.has_value()) + { + throw error::BackendConfigSchema( + {configLocation, "mode"}, + "Invalid value of non-string type (accepted values are " + "'dataset' and 'template'."); + } + auto mode = modeOption.value(); + if (mode == "dataset") + { + ioMode = DatasetMode::Dataset; + specificationVia = SpecificationVia::Manually; + } + else if (mode == "template") + { + ioMode = DatasetMode::Template; + specificationVia = SpecificationVia::Manually; + } + else if (mode == "template_no_warn") + { + ioMode = DatasetMode::Template; + specificationVia = SpecificationVia::Manually; + skipWarnings = true; + } + else + { + throw error::BackendConfigSchema( + {configLocation, "dataset", "mode"}, + "Invalid value: '" + mode + + "' (accepted values are 'dataset' and 'template'."); + } + } + void parse_external_mode( + [[maybe_unused]] json::TracingJSON mode, + [[maybe_unused]] std::string const &configLocation, + JSONIOHandlerImpl::DatasetMode_s &res) + { + using SpecificationVia = JSONIOHandlerImpl::SpecificationVia; + + res.m_mode = std::make_shared( + ExternalBlockStorage::makeStdioSession("./external_blocks")); + res.m_specificationVia = SpecificationVia::Manually; + } +} // namespace + auto JSONIOHandlerImpl::retrieveDatasetMode( openPMD::json::TracingJSON &config) const -> DatasetMode_s { // start with / copy from current config auto res = m_datasetMode; - DatasetMode &ioMode = res.m_mode; - SpecificationVia &specificationVia = res.m_specificationVia; - bool &skipWarnings = res.m_skipWarnings; - if (auto [configLocation, maybeConfig] = getBackendConfig(config); + + if (auto [configLocation, maybeConfig] = + getBackendConfig(config, backendConfigKey()); maybeConfig.has_value()) { auto jsonConfig = maybeConfig.value(); @@ -274,38 +335,14 @@ auto JSONIOHandlerImpl::retrieveDatasetMode( auto datasetConfig = jsonConfig["dataset"]; if (datasetConfig.json().contains("mode")) { - auto modeOption = openPMD::json::asLowerCaseStringDynamic( - datasetConfig["mode"].json()); - if (!modeOption.has_value()) - { - throw error::BackendConfigSchema( - {configLocation, "mode"}, - "Invalid value of non-string type (accepted values are " - "'dataset' and 'template'."); - } - auto mode = modeOption.value(); - if (mode == "dataset") - { - ioMode = DatasetMode::Dataset; - specificationVia = SpecificationVia::Manually; - } - else if (mode == "template") - { - ioMode = DatasetMode::Template; - specificationVia = SpecificationVia::Manually; - } - else if (mode == "template_no_warn") + auto mode = datasetConfig["mode"]; + if (mode.json().is_object()) { - ioMode = DatasetMode::Template; - specificationVia = SpecificationVia::Manually; - skipWarnings = true; + parse_external_mode(std::move(mode), configLocation, res); } else { - throw error::BackendConfigSchema( - {configLocation, "dataset", "mode"}, - "Invalid value: '" + mode + - "' (accepted values are 'dataset' and 'template'."); + parse_internal_mode(mode.json(), configLocation, res); } } } @@ -377,7 +414,13 @@ std::string JSONIOHandlerImpl::backendConfigKey() const std::pair> JSONIOHandlerImpl::getBackendConfig(openPMD::json::TracingJSON &config) const { - std::string configLocation = backendConfigKey(); + return getBackendConfig(config, backendConfigKey()); +} + +std::pair> +JSONIOHandlerImpl::getBackendConfig( + openPMD::json::TracingJSON &config, std::string const &configLocation) +{ if (config.json().contains(configLocation)) { return std::make_pair( @@ -444,9 +487,6 @@ void JSONIOHandlerImpl::init(openPMD::json::TracingJSON config) (void)_; warnUnusedJson(backendConfig.value()); } - - externalBlockStorage = - ExternalBlockStorage::makeStdioSession("./external_blocks/"); } JSONIOHandlerImpl::~JSONIOHandlerImpl() = default; @@ -642,47 +682,51 @@ void JSONIOHandlerImpl::createDataset( auto &dset = jsonVal[name]; dset["datatype"] = internal::jsonDatatypeToString(parameter.dtype); - switch (localMode) - { - case DatasetMode::Dataset: { - auto extent = parameter.extent; - switch (parameter.dtype) - { - case Datatype::CFLOAT: - case Datatype::CDOUBLE: - case Datatype::CLONG_DOUBLE: { - extent.push_back(2); - break; - } - default: - break; - } - if (parameter.extent.size() != 1 || - parameter.extent[0] != Dataset::UNDEFINED_EXTENT) - { - // TOML does not support nulls, so initialize with zero - dset["data"] = initializeNDArray( - extent, - m_fileFormat == FileFormat::Json ? std::optional{} - : parameter.dtype); - } - break; - } - case DatasetMode::Template: - if (parameter.extent != Extent{0} && - parameter.extent[0] != Dataset::UNDEFINED_EXTENT) - { - dset["extent"] = parameter.extent; - } - else - { - // no-op - // If extent is empty or no datatype is defined, don't bother - // writing it. - // The datatype is written above anyway. - } - break; - } + std::visit( + auxiliary::overloaded{ + [&](DatasetMode::Dataset_t const &) { + auto extent = parameter.extent; + switch (parameter.dtype) + { + case Datatype::CFLOAT: + case Datatype::CDOUBLE: + case Datatype::CLONG_DOUBLE: { + extent.push_back(2); + break; + } + default: + break; + } + if (parameter.extent.size() != 1 || + parameter.extent[0] != Dataset::UNDEFINED_EXTENT) + { + // TOML does not support nulls, so initialize with zero + dset["data"] = initializeNDArray( + extent, + m_fileFormat == FileFormat::Json + ? std::optional{} + : parameter.dtype); + } + }, + [&](DatasetMode::Template_t const &) { + if (parameter.extent != Extent{0} && + parameter.extent[0] != Dataset::UNDEFINED_EXTENT) + { + dset["extent"] = parameter.extent; + } + else + { + // no-op + // If extent is empty or no datatype is defined, don't + // bother writing it. The datatype is written above + // anyway. + } + }, + [&](DatasetMode::External_t const &) { + dset["extent"] = parameter.extent; + }}, + localMode.as_base()); + writable->written = true; m_dirty.emplace(file); } @@ -732,7 +776,8 @@ void JSONIOHandlerImpl::extendDataset( try { Extent datasetExtent; - std::tie(datasetExtent, localIOMode) = getExtent(j); + std::tie(datasetExtent, localIOMode) = + getExtent(j, m_datasetMode.m_mode); VERIFY_ALWAYS( datasetExtent.size() == parameters.extent.size(), "[JSON] Cannot change dimensionality of a dataset") @@ -750,38 +795,40 @@ void JSONIOHandlerImpl::extendDataset( "[JSON] The specified location contains no valid dataset"); } - switch (localIOMode) - { - case DatasetMode::Dataset: { - auto extent = parameters.extent; - auto datatype = stringToDatatype(j["datatype"].get()); - switch (datatype) - { - case Datatype::CFLOAT: - case Datatype::CDOUBLE: - case Datatype::CLONG_DOUBLE: { - extent.push_back(2); - break; - } - default: - // nothing to do - break; - } - // TOML does not support nulls, so initialize with zero - nlohmann::json newData = initializeNDArray( - extent, - m_fileFormat == FileFormat::Json ? std::optional{} - : datatype); - nlohmann::json &oldData = j["data"]; - mergeInto(newData, oldData); - j["data"] = newData; - } - break; - case DatasetMode::Template: { - j["extent"] = parameters.extent; - } - break; - } + std::visit( + auxiliary::overloaded{ + [&](DatasetMode::Dataset_t const &) { + auto extent = parameters.extent; + auto datatype = + stringToDatatype(j["datatype"].get()); + switch (datatype) + { + case Datatype::CFLOAT: + case Datatype::CDOUBLE: + case Datatype::CLONG_DOUBLE: { + extent.push_back(2); + break; + } + default: + // nothing to do + break; + } + // TOML does not support nulls, so initialize with zero + nlohmann::json newData = initializeNDArray( + extent, + m_fileFormat == FileFormat::Json ? std::optional{} + : datatype); + nlohmann::json &oldData = j["data"]; + mergeInto(newData, oldData); + j["data"] = newData; + }, + [&](DatasetMode::Template_t const &) { + j["extent"] = parameters.extent; + }, + [&](DatasetMode::External_t const &) { + j["extent"] = parameters.extent; + }}, + localIOMode.as_base()); writable->written = true; } @@ -977,7 +1024,7 @@ void JSONIOHandlerImpl::openDataset( *parameters.dtype = Datatype(stringToDatatype(datasetJson["datatype"].get())); - *parameters.extent = getExtent(datasetJson).first; + *parameters.extent = getExtent(datasetJson, m_datasetMode.m_mode).first; writable->written = true; } @@ -1199,49 +1246,49 @@ void JSONIOHandlerImpl::writeDataset( auto &jsonRoot = *obtainJsonContents(file); auto &j = jsonRoot[filePosition->id]; - switch (verifyDataset(parameters, j)) - { - case DatasetMode::Dataset: - break; - case DatasetMode::Template: { - std::optional rankInfix; + std::visit( + auxiliary::overloaded{ + [&](DatasetMode::Dataset_t const &) { + switchType(parameters.dtype, j, parameters); + }, + [&](DatasetMode::Template_t const &) { + if (!m_datasetMode.m_skipWarnings) + { + std::cerr << "[JSON/TOML backend: Warning] Trying to write " + "data to a " + "template dataset. Will skip." + << '\n'; + m_datasetMode.m_skipWarnings = true; + } + }, + [&](DatasetMode::External_t const &external) { + std::optional rankInfix; #if openPMD_HAVE_MPI - if (m_communicator.has_value()) - { - auto &comm = *m_communicator; - // TODO maybe cache the result for this computation - int rank, size; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &size); - std::stringstream s; - s << "r"; - write_rank_to_stream_with_sufficient_padding(s, rank, size); - rankInfix = s.str(); - } + if (m_communicator.has_value()) + { + auto &comm = *m_communicator; + // TODO maybe cache the result for this computation + int rank, size; + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &size); + std::stringstream s; + s << "r"; + write_rank_to_stream_with_sufficient_padding(s, rank, size); + rankInfix = s.str(); + } #endif - switchDatasetType( - parameters.dtype, - externalBlockStorage, - parameters.data.get(), - j.at("extent").get(), - parameters.offset, - parameters.extent, - jsonRoot, - filePosition->id, - std::move(rankInfix)); - // if (!m_datasetMode.m_skipWarnings) - // { - // std::cerr - // << "[JSON/TOML backend: Warning] Trying to write data to a " - // "template dataset. Will skip." - // << '\n'; - // m_datasetMode.m_skipWarnings = true; - // } - return; - } - } - - switchType(parameters.dtype, j, parameters); + switchDatasetType( + parameters.dtype, + *external, + parameters.data.get(), + j.at("extent").get(), + parameters.offset, + parameters.extent, + jsonRoot, + filePosition->id, + std::move(rankInfix)); + }}, + verifyDataset(parameters, j).as_base()); writable->written = true; } @@ -1320,30 +1367,35 @@ void JSONIOHandlerImpl::readDataset( auto &j = obtainJsonContents(writable); DatasetMode localMode = verifyDataset(parameters, j); - switch (localMode) - { - case DatasetMode::Template: - std::cerr << "[Warning] Cannot read chunks in Template mode of JSON " - "backend. Will fill with zeroes instead." - << '\n'; - switchNonVectorType( - parameters.dtype, parameters.data.get(), parameters.extent); - return; - case DatasetMode::Dataset: - try - { - switchType(parameters.dtype, j["data"], parameters); - } - catch (json::basic_json::type_error &) - { - throw error::ReadError( - error::AffectedObject::Dataset, - error::Reason::UnexpectedContent, - "JSON", - "The given path does not contain a valid dataset."); - } - break; - } + std::visit( + auxiliary::overloaded{ + [&](DatasetMode::Dataset_t const &) { + try + { + switchType( + parameters.dtype, j["data"], parameters); + } + catch (json::basic_json::type_error &) + { + throw error::ReadError( + error::AffectedObject::Dataset, + error::Reason::UnexpectedContent, + "JSON", + "The given path does not contain a valid dataset."); + } + }, + [&](DatasetMode::Template_t const &) { + std::cerr + << "[Warning] Cannot read chunks in Template mode of JSON " + "backend. Will fill with zeroes instead." + << '\n'; + switchNonVectorType( + parameters.dtype, parameters.data.get(), parameters.extent); + }, + [&](DatasetMode::External_t const &) { + throw std::runtime_error("Unimplemented"); + }}, + localMode.as_base()); } namespace @@ -1865,7 +1917,8 @@ Extent JSONIOHandlerImpl::getMultiplicators(Extent const &extent) return res; } -auto JSONIOHandlerImpl::getExtent(nlohmann::json &j) +auto JSONIOHandlerImpl::getExtent( + nlohmann::json &j, DatasetMode const &baseMode) -> std::pair { Extent res; @@ -1894,7 +1947,10 @@ auto JSONIOHandlerImpl::getExtent(nlohmann::json &j) } else if (j.contains("extent")) { - ioMode = DatasetMode::Template; + ioMode = + std::holds_alternative(baseMode.as_base()) + ? baseMode + : DatasetMode{DatasetMode::Template}; res = j["extent"].get(); } else @@ -2137,18 +2193,25 @@ auto JSONIOHandlerImpl::putJsonContents( return it; } - switch (m_datasetMode.m_mode) - { - case DatasetMode::Dataset: - (*it->second)["platform_byte_widths"] = platformSpecifics(); - (*it->second)[JSONDefaults::openpmd_internal] - [JSONDefaults::DatasetMode] = "dataset"; - break; - case DatasetMode::Template: - (*it->second)[JSONDefaults::openpmd_internal] - [JSONDefaults::DatasetMode] = "template"; - break; - } + std::visit( + auxiliary::overloaded{ + [&](DatasetMode::Dataset_t const &) { + (*it->second)["platform_byte_widths"] = platformSpecifics(); + (*it->second)[JSONDefaults::openpmd_internal] + [JSONDefaults::DatasetMode] = "dataset"; + }, + [&](DatasetMode::Template_t const &) { + (*it->second)[JSONDefaults::openpmd_internal] + [JSONDefaults::DatasetMode] = "template"; + }, + [&](DatasetMode::External_t const &external) { + (*it->second)["platform_byte_widths"] = platformSpecifics(); + (*it->second)["external_storage"] = + external->externalStorageLocation(); + (*it->second)[JSONDefaults::openpmd_internal] + [JSONDefaults::DatasetMode] = "external"; + }}, + m_datasetMode.m_mode.as_base()); switch (m_attributeMode.m_mode) { @@ -2377,7 +2440,7 @@ auto JSONIOHandlerImpl::verifyDataset( try { Extent datasetExtent; - std::tie(datasetExtent, res) = getExtent(j); + std::tie(datasetExtent, res) = getExtent(j, m_datasetMode.m_mode); VERIFY_ALWAYS( datasetExtent.size() == parameters.extent.size(), "[JSON] Read/Write request does not fit the dataset's dimension"); From 408ebd626a9f58051eb764725a4444f38b955028 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 30 Sep 2025 15:34:00 +0200 Subject: [PATCH 12/50] Add configuration --- src/IO/JSON/JSONIOHandlerImpl.cpp | 116 ++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 4 deletions(-) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 6d96eb9578..c6f6970128 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -306,15 +306,123 @@ namespace "' (accepted values are 'dataset' and 'template'."); } } + + template + auto optionalOrElse(std::optional o, OrElse &&orElse) -> T + { + if (o.has_value()) + { + return *std::move(o); + } + else + { + return std::forward(orElse)(); + } + } + void parse_external_mode( - [[maybe_unused]] json::TracingJSON mode, - [[maybe_unused]] std::string const &configLocation, + json::TracingJSON mode, + std::string const &configLocation, JSONIOHandlerImpl::DatasetMode_s &res) { using SpecificationVia = JSONIOHandlerImpl::SpecificationVia; + using ExternalBlockStorage = openPMD::ExternalBlockStorage; + + auto get_mandatory = [&](char const *key, + bool lowercase) -> std::string { + if (!mode.json().contains(key)) + { + throw error::BackendConfigSchema( + {configLocation, "mode", key}, "Mandatory key."); + } + auto const &val = mode.json({key}); + return optionalOrElse( + lowercase ? openPMD::json::asLowerCaseStringDynamic(val) + : openPMD::json::asStringDynamic(val), + [&]() -> std::string { + throw error::BackendConfigSchema( + {configLocation, "mode", key}, + "Must be of string type."); + }); + }; + auto if_contains_optional = + [&](char const *key, bool lowercase, auto &&then) { + if (!mode.json().contains(key)) + { + return; + } + auto const &val = mode.json({key}); + static_cast(then)(optionalOrElse( + lowercase ? openPMD::json::asLowerCaseStringDynamic(val) + : openPMD::json::asStringDynamic(val), + [&]() -> std::string { + throw error::BackendConfigSchema( + {configLocation, "mode", key}, + "Must be of string type."); + })); + }; + auto modeString = get_mandatory("type", true); + + if (modeString == "stdio") + { + auto builder = ExternalBlockStorage::makeStdioSession( + get_mandatory("directory", false)); + + if_contains_optional("open_mode", false, [&](std::string openMode) { + builder.setOpenMode(std::move(openMode)); + }); + + res.m_mode = + std::make_shared(builder.build()); + } + else if (modeString == "aws") + { + openPMD::internal::AwsBuilder builder( + get_mandatory("bucket_name", false), + get_mandatory("access_key_id", false), + get_mandatory("secret_access_key", false)); + + if_contains_optional( + "session_token", false, [&](std::string sessionToken) { + builder.setSessionToken(std::move(sessionToken)); + }); + if_contains_optional( + "endpoint", false, [&](std::string endpointOverride) { + builder.setEndpointOverride(std::move(endpointOverride)); + }); + if_contains_optional("region", false, [&](std::string region) { + builder.setRegion(std::move(region)); + }); + if_contains_optional( + "scheme", true, [&](std::string const &scheme) { + if (scheme == "http") + { + builder.setScheme( + openPMD::internal::AwsBuilder::Scheme::HTTP); + } + else if (scheme == "https") + { + builder.setScheme( + openPMD::internal::AwsBuilder::Scheme::HTTPS); + } + else + { + throw error::BackendConfigSchema( + {configLocation, "mode", "scheme"}, + "Must be either 'http' or 'https'."); + } + }); + + res.m_mode = + std::make_shared(builder.build()); + } + else + { + throw error::BackendConfigSchema( + {configLocation, "mode", "type"}, + "Must be either 'stdio' or 'aws'."); + } - res.m_mode = std::make_shared( - ExternalBlockStorage::makeStdioSession("./external_blocks")); res.m_specificationVia = SpecificationVia::Manually; } } // namespace From 22e5f8435d63f9fb03acf74ddcd302675fea4b91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 30 Sep 2025 18:31:16 +0200 Subject: [PATCH 13/50] Add option to init AWS API --- include/openPMD/Series.hpp | 3 + src/Series.cpp | 121 ++++++++++++++++++++++++------------- 2 files changed, 83 insertions(+), 41 deletions(-) diff --git a/include/openPMD/Series.hpp b/include/openPMD/Series.hpp index 603e540c2b..bc560fbb89 100644 --- a/include/openPMD/Series.hpp +++ b/include/openPMD/Series.hpp @@ -35,6 +35,7 @@ #include "openPMD/config.hpp" #include "openPMD/snapshots/Snapshots.hpp" #include "openPMD/version.hpp" +#include #if openPMD_HAVE_MPI #include @@ -239,6 +240,8 @@ namespace internal std::optional> m_deferred_initialization = std::nullopt; + std::optional m_manageAwsAPI = std::nullopt; + void close(); #if openPMD_HAVE_MPI diff --git a/src/Series.cpp b/src/Series.cpp index 6d13de73c3..28c7664c74 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -48,6 +48,8 @@ #include "openPMD/snapshots/StatefulIterator.hpp" #include "openPMD/version.hpp" +#include + #include #include #include @@ -1067,38 +1069,25 @@ void Series::init( } } -template -auto Series::initIOHandler( - std::string const &filepath, - std::string const &options, - Access at, - bool resolve_generic_extension, - MPI_Communicator &&...comm) - -> std::tuple, TracingJSON> +namespace { - auto &series = get(); - - json::TracingJSON optionsJson = json::parseOptions( - options, - std::forward(comm)..., - /* considerFiles = */ true); - auto input = parseInput(filepath); - if (resolve_generic_extension && input->format == Format::GENERIC && - !access::create(at)) + template + void do_resolve_generic_extension_read( + ParsedInput_t &input, std::string const &filepath, Access at) { auto isPartOfSeries = - input->iterationEncoding == IterationEncoding::fileBased + input.iterationEncoding == IterationEncoding::fileBased ? matcher( - input->filenamePrefix, - input->filenamePadding, - input->filenamePostfix, + input.filenamePrefix, + input.filenamePadding, + input.filenamePostfix, std::nullopt) - : matcher(input->name, -1, "", std::nullopt); + : matcher(input.name, -1, "", std::nullopt); std::optional extension; std::set additional_extensions; autoDetectPadding( isPartOfSeries, - input->path, + input.path, [&extension, &additional_extensions](std::string const &, Match const &match) { auto const &ext = match.extension.value(); @@ -1131,8 +1120,8 @@ auto Series::initIOHandler( std::nullopt, error.str()); } - input->filenameExtension = *extension; - input->format = determineFormat(*extension); + input.filenameExtension = *extension; + input.format = determineFormat(*extension); } else if (access::read(at)) { @@ -1144,30 +1133,68 @@ auto Series::initIOHandler( } } + template + void do_resolve_generic_extension_write(ParsedInput_t &input) + { + { + if (input.format == /* still */ Format::GENERIC) + { + throw error::WrongAPIUsage( + "Unable to automatically determine filename extension. " + "Please " + "specify in some way."); + } + else if (input.format == Format::ADIOS2_BP) + { + // Since ADIOS2 has multiple extensions depending on the engine, + // we need to pass this job on to the backend + input.filenameExtension = ".%E"; + } + else + { + input.filenameExtension = suffix(input.format); + } + } + } +} // namespace + +template +auto Series::initIOHandler( + std::string const &filepath, + std::string const &options, + Access at, + bool resolve_generic_extension, + MPI_Communicator &&...comm) + -> std::tuple, TracingJSON> +{ + auto &series = get(); + + json::TracingJSON optionsJson = json::parseOptions( + options, + std::forward(comm)..., + /* considerFiles = */ true); + auto input = parseInput(filepath); + + if (resolve_generic_extension && input->format == Format::GENERIC && + !access::create(at)) + { + do_resolve_generic_extension_read(*input, filepath, at); + } + // default options series.m_parseLazily = at == Access::READ_LINEAR; // now check for user-specified options parseJsonOptions(optionsJson, *input); + if (series.m_manageAwsAPI.has_value()) + { + Aws::InitAPI(*series.m_manageAwsAPI); + } + if (resolve_generic_extension && !input->filenameExtension.has_value()) { - if (input->format == /* still */ Format::GENERIC) - { - throw error::WrongAPIUsage( - "Unable to automatically determine filename extension. Please " - "specify in some way."); - } - else if (input->format == Format::ADIOS2_BP) - { - // Since ADIOS2 has multiple extensions depending on the engine, - // we need to pass this job on to the backend - input->filenameExtension = ".%E"; - } - else - { - input->filenameExtension = suffix(input->format); - } + do_resolve_generic_extension_write(*input); } return std::make_tuple(std::move(input), std::move(optionsJson)); } @@ -3175,6 +3202,14 @@ void Series::parseJsonOptions(TracingJSON &options, ParsedInput &input) { series.m_rankTable.m_rankTableSource = std::move(rankTableSource); } + { + bool doManageAwsAPI = false; + getJsonOption(options, "init_aws_api", doManageAwsAPI); + if (doManageAwsAPI) + { + series.m_manageAwsAPI = std::make_optional(); + } + } // backend key { std::map const backendDescriptors{ @@ -3261,6 +3296,10 @@ namespace internal // we must not throw in a destructor try { + if (m_manageAwsAPI.has_value()) + { + Aws::ShutdownAPI(*m_manageAwsAPI); + } close(); } catch (std::exception const &ex) From 50397c14dd0c89deaa14ddff6178dec13ac86431 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 1 Oct 2025 14:47:55 +0200 Subject: [PATCH 14/50] Add verifySSL parameter --- include/openPMD/toolkit/AwsBuilder.hpp | 2 ++ src/IO/JSON/JSONIOHandlerImpl.cpp | 16 ++++++++++++++++ src/toolkit/AwsBuilder.cpp | 11 +++++++++++ 3 files changed, 29 insertions(+) diff --git a/include/openPMD/toolkit/AwsBuilder.hpp b/include/openPMD/toolkit/AwsBuilder.hpp index 3bb8cef491..4a8ad691b9 100644 --- a/include/openPMD/toolkit/AwsBuilder.hpp +++ b/include/openPMD/toolkit/AwsBuilder.hpp @@ -30,6 +30,7 @@ struct AwsBuilder std::optional m_endpointOverride; std::optional m_region; std::optional m_scheme; + std::optional m_verifySSL; auto setBucketName(std::string bucketName) -> AwsBuilder &; auto setCredentials(std::string accessKeyId, std::string secretKey) @@ -38,6 +39,7 @@ struct AwsBuilder auto setEndpointOverride(std::string endpoint) -> AwsBuilder &; auto setRegion(std::string regionName) -> AwsBuilder &; auto setScheme(Scheme s) -> AwsBuilder &; + auto setVerifySSL(bool verify) -> AwsBuilder &; operator ::openPMD::ExternalBlockStorage(); auto build() -> ::openPMD::ExternalBlockStorage; diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index c6f6970128..f6187ca28e 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -361,6 +361,19 @@ namespace "Must be of string type."); })); }; + auto if_contains_optional_bool = [&](char const *key, auto &&then) { + if (!mode.json().contains(key)) + { + return; + } + auto const &val = mode.json({key}); + if (!val.is_boolean()) + { + throw error::BackendConfigSchema( + {configLocation, "mode", key}, "Must be of boolean type."); + } + static_cast(then)(val.get()); + }; auto modeString = get_mandatory("type", true); if (modeString == "stdio") @@ -393,6 +406,9 @@ namespace if_contains_optional("region", false, [&](std::string region) { builder.setRegion(std::move(region)); }); + if_contains_optional_bool("verify_ssl", [&](bool verifySSL) { + builder.setVerifySSL(verifySSL); + }); if_contains_optional( "scheme", true, [&](std::string const &scheme) { if (scheme == "http") diff --git a/src/toolkit/AwsBuilder.cpp b/src/toolkit/AwsBuilder.cpp index 13caa1f878..cb21fd399a 100644 --- a/src/toolkit/AwsBuilder.cpp +++ b/src/toolkit/AwsBuilder.cpp @@ -48,6 +48,12 @@ auto AwsBuilder::setScheme(Scheme s) -> AwsBuilder & return *this; } +auto AwsBuilder::setVerifySSL(bool verify) -> AwsBuilder & +{ + m_verifySSL = verify; + return *this; +} + auto internal::AwsBuilder::setSessionToken(std::string sessionToken) -> AwsBuilder & { @@ -88,6 +94,11 @@ AwsBuilder::operator ExternalBlockStorage() config.connectTimeoutMs = 5000; config.requestTimeoutMs = 15000; + if (m_verifySSL.has_value()) + { + config.verifySSL = *m_verifySSL; + } + auto aws_credentials = [&]() -> Aws::Auth::AWSCredentials { if (m_sessionToken.has_value()) { From f89a5db4cdef735e194e29035251f84e16f30004 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 1 Oct 2025 14:48:12 +0200 Subject: [PATCH 15/50] Add TODO comment --- src/IO/JSON/JSONIOHandlerImpl.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index f6187ca28e..7a498dab9e 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -391,6 +391,8 @@ namespace else if (modeString == "aws") { openPMD::internal::AwsBuilder builder( + // TODO: bucket_name: introduce expansion pattern for openPMD + // file name get_mandatory("bucket_name", false), get_mandatory("access_key_id", false), get_mandatory("secret_access_key", false)); From ee201a4c4693364a8b953da997764edd1ba268f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 1 Oct 2025 15:22:16 +0200 Subject: [PATCH 16/50] Add meta information object --- include/openPMD/toolkit/Aws.hpp | 8 +++- .../openPMD/toolkit/ExternalBlockStorage.hpp | 8 ++-- include/openPMD/toolkit/Stdio.hpp | 2 + src/toolkit/Aws.cpp | 21 ++++++++- src/toolkit/AwsBuilder.cpp | 4 +- src/toolkit/ExternalBlockStorage.cpp | 6 +++ src/toolkit/Stdio.cpp | 47 ++++++++++++++++++- 7 files changed, 86 insertions(+), 10 deletions(-) diff --git a/include/openPMD/toolkit/Aws.hpp b/include/openPMD/toolkit/Aws.hpp index 737629ec2b..5051fa2fbc 100644 --- a/include/openPMD/toolkit/Aws.hpp +++ b/include/openPMD/toolkit/Aws.hpp @@ -11,11 +11,17 @@ struct ExternalBlockStorageAws : ExternalBlockStorageBackend private: Aws::S3::S3Client m_client; std::string m_bucketName; + std::optional m_endpoint; public: - ExternalBlockStorageAws(Aws::S3::S3Client, std::string bucketName); + ExternalBlockStorageAws( + Aws::S3::S3Client, + std::string bucketName, + std::optional endpoint); auto put(std::string const &identifier, void const *data, size_t len) -> std::string override; + [[nodiscard]] auto externalStorageLocation() const + -> nlohmann::json override; ~ExternalBlockStorageAws() override; }; } // namespace openPMD::internal diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 7a26647970..6634321809 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -24,6 +24,9 @@ struct ExternalBlockStorageBackend virtual auto put(std::string const &identifier, void const *data, size_t len) -> std::string = 0; + [[nodiscard]] virtual auto externalStorageLocation() const + -> nlohmann::json = 0; + virtual ~ExternalBlockStorageBackend(); }; } // namespace openPMD::internal @@ -71,10 +74,7 @@ class ExternalBlockStorage std::optional infix, // e.g. for distinguishing MPI ranks T const *data) -> std::string; - auto externalStorageLocation() const -> nlohmann::json - { - return "implement me"; - } + [[nodiscard]] auto externalStorageLocation() const -> nlohmann::json; static void sanitizeString(std::string &s); }; diff --git a/include/openPMD/toolkit/Stdio.hpp b/include/openPMD/toolkit/Stdio.hpp index 10a3e724be..1fb8713b67 100644 --- a/include/openPMD/toolkit/Stdio.hpp +++ b/include/openPMD/toolkit/Stdio.hpp @@ -14,6 +14,8 @@ struct ExternalBlockStorageStdio : ExternalBlockStorageBackend ExternalBlockStorageStdio(std::string directory, std::string openMode); auto put(std::string const &identifier, void const *data, size_t len) -> std::string override; + [[nodiscard]] auto externalStorageLocation() const + -> nlohmann::json override; ~ExternalBlockStorageStdio() override; }; } // namespace openPMD::internal diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index 2f05ec9402..50aff10007 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -30,8 +30,12 @@ struct imemstream : std::iostream namespace openPMD::internal { ExternalBlockStorageAws::ExternalBlockStorageAws( - Aws::S3::S3Client client, std::string bucketName) - : m_client{std::move(client)}, m_bucketName(std::move(bucketName)) + Aws::S3::S3Client client, + std::string bucketName, + std::optional endpoint) + : m_client{std::move(client)} + , m_bucketName(std::move(bucketName)) + , m_endpoint(std::move(endpoint)) { Aws::S3::Model::CreateBucketRequest create_request; create_request.SetBucket(m_bucketName); @@ -77,4 +81,17 @@ auto ExternalBlockStorageAws::put( return sanitized; } +[[nodiscard]] auto ExternalBlockStorageAws::externalStorageLocation() const + -> nlohmann::json +{ + nlohmann::json j; + j["provider"] = "s3"; + if (m_endpoint.has_value()) + { + j["endpoint"] = *m_endpoint; + } + j["bucket"] = m_bucketName; + return j; +} + } // namespace openPMD::internal diff --git a/src/toolkit/AwsBuilder.cpp b/src/toolkit/AwsBuilder.cpp index cb21fd399a..cc3cdc87ef 100644 --- a/src/toolkit/AwsBuilder.cpp +++ b/src/toolkit/AwsBuilder.cpp @@ -117,7 +117,9 @@ AwsBuilder::operator ExternalBlockStorage() false); return ExternalBlockStorage{std::make_unique( - std::move(s3_client), std::move(m_bucketName))}; + std::move(s3_client), + std::move(m_bucketName), + std::move(m_endpointOverride))}; } auto AwsBuilder::build() -> ExternalBlockStorage diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 2a0f4fc683..2d29023c9c 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -134,6 +134,12 @@ auto ExternalBlockStorage::store( return index_as_str; } +[[nodiscard]] auto ExternalBlockStorage::externalStorageLocation() const + -> nlohmann::json +{ + return m_worker->externalStorageLocation(); +} + void ExternalBlockStorage::sanitizeString(std::string &s) { for (char &c : s) diff --git a/src/toolkit/Stdio.cpp b/src/toolkit/Stdio.cpp index dac83d35c9..c3fecf6f2a 100644 --- a/src/toolkit/Stdio.cpp +++ b/src/toolkit/Stdio.cpp @@ -5,6 +5,39 @@ #include #include +namespace +{ +auto concat_filepath(std::string const &s1, std::string const &s2) + -> std::string +{ + if (s1.empty()) + { + return s2; + } + if (s2.empty()) + { + return s1; + } + bool ends_with_slash = + *s1.crbegin() == openPMD::auxiliary::directory_separator; + bool starts_with_slash = + *s2.cbegin() == openPMD::auxiliary::directory_separator; + + if (ends_with_slash ^ starts_with_slash) + { + return s1 + s2; + } + else if (ends_with_slash && starts_with_slash) + { + return s1 + (s2.c_str() + 1); + } + else + { + return s1 + openPMD::auxiliary::directory_separator + s2; + } +} +} // namespace + namespace openPMD::internal { ExternalBlockStorageStdio::ExternalBlockStorageStdio( @@ -33,7 +66,7 @@ auto ExternalBlockStorageStdio::put( { auto sanitized = identifier + ".dat"; ExternalBlockStorage::sanitizeString(sanitized); - std::string filepath = m_directory + "/" + sanitized; + std::string filepath = concat_filepath(m_directory, sanitized); if (len == 0) { @@ -63,6 +96,16 @@ auto ExternalBlockStorageStdio::put( filepath); } - return filepath; + return sanitized; +} + +[[nodiscard]] auto ExternalBlockStorageStdio::externalStorageLocation() const + -> nlohmann::json +{ + nlohmann::json j; + j["provider"] = "stdio"; + j["directory"] = m_directory; + j["open_mode"] = m_openMode; + return j; } } // namespace openPMD::internal From 5956c154f8a7712cad00721ba09a24b382ffec12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 5 Dec 2025 17:02:04 +0100 Subject: [PATCH 17/50] Prepare reloading ext block storage from old file --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 7 +++ src/IO/JSON/JSONIOHandlerImpl.cpp | 43 ++++++++++++++----- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 6634321809..6f164dfab0 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -37,9 +37,16 @@ namespace openPMD // TODO: replace this with a concept upon switching to C++20 struct DatatypeHandling_Interface { + /* + * Returns false if the same JSON location was previously encoded as + * another datatype. + */ template static auto encodeDatatype(nlohmann::json &) -> bool; + /* + * Returns false if no encoded datatype could be found + */ template static auto decodeDatatype(nlohmann::json const &j, Args &&...args) -> bool; }; diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 7a498dab9e..ad277d6d6a 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -322,20 +322,38 @@ namespace void parse_external_mode( json::TracingJSON mode, + // In read mode, the metadata section stored under 'external_storage' + // These are default values, overridable with the first argument + std::optional previousCfg, std::string const &configLocation, JSONIOHandlerImpl::DatasetMode_s &res) { using SpecificationVia = JSONIOHandlerImpl::SpecificationVia; using ExternalBlockStorage = openPMD::ExternalBlockStorage; - auto get_mandatory = [&](char const *key, - bool lowercase) -> std::string { - if (!mode.json().contains(key)) + auto get_key = + [&](char const *key) -> std::optional { + if (mode.json().contains(key)) { - throw error::BackendConfigSchema( - {configLocation, "mode", key}, "Mandatory key."); + return {&mode.json({key})}; + } + else if (previousCfg.has_value() && (*previousCfg)->contains(key)) + { + return {&(**previousCfg).at(key)}; + } + else + { + return std::nullopt; } - auto const &val = mode.json({key}); + }; + + auto get_mandatory = [&](char const *key, + bool lowercase) -> std::string { + auto const &val = *optionalOrElse( + get_key("mode"), [&]() -> nlohmann::json const * { + throw error::BackendConfigSchema( + {configLocation, "mode", key}, "Mandatory key."); + }); return optionalOrElse( lowercase ? openPMD::json::asLowerCaseStringDynamic(val) : openPMD::json::asStringDynamic(val), @@ -347,11 +365,12 @@ namespace }; auto if_contains_optional = [&](char const *key, bool lowercase, auto &&then) { - if (!mode.json().contains(key)) + auto const maybeVal = get_key(key); + if (!maybeVal.has_value()) { return; } - auto const &val = mode.json({key}); + auto const &val = **maybeVal; static_cast(then)(optionalOrElse( lowercase ? openPMD::json::asLowerCaseStringDynamic(val) : openPMD::json::asStringDynamic(val), @@ -362,11 +381,12 @@ namespace })); }; auto if_contains_optional_bool = [&](char const *key, auto &&then) { - if (!mode.json().contains(key)) + auto const maybeVal = get_key(key); + if (!maybeVal.has_value()) { return; } - auto const &val = mode.json({key}); + auto const &val = **maybeVal; if (!val.is_boolean()) { throw error::BackendConfigSchema( @@ -464,7 +484,8 @@ auto JSONIOHandlerImpl::retrieveDatasetMode( auto mode = datasetConfig["mode"]; if (mode.json().is_object()) { - parse_external_mode(std::move(mode), configLocation, res); + parse_external_mode( + std::move(mode), std::nullopt, configLocation, res); } else { From abe149f0c9cacbfdc0ead56d9b7a6a68710bd3cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 5 Dec 2025 17:45:52 +0100 Subject: [PATCH 18/50] Reload config when reading from a JSON file --- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 9 +- src/IO/JSON/JSONIOHandlerImpl.cpp | 89 +++++++++++++++++-- 2 files changed, 90 insertions(+), 8 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index 0bc7e1db81..e9fcaf5289 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -410,8 +410,15 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl std::string m_originalExtension; + /* + * In read mode, we can only open the external block storage backend upon + * opening the JSON file, because it contains meta information relevant + * for configuring the backend. + */ + std::optional + m_deferredExternalBlockstorageConfig; DatasetMode_s m_datasetMode; - DatasetMode_s retrieveDatasetMode(openPMD::json::TracingJSON &config) const; + DatasetMode_s retrieveDatasetMode(openPMD::json::TracingJSON &config); AttributeMode_s m_attributeMode; AttributeMode_s diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index ad277d6d6a..64ecf2e790 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -24,6 +24,7 @@ #include "openPMD/Error.hpp" #include "openPMD/IO/AbstractIOHandler.hpp" #include "openPMD/IO/AbstractIOHandlerImpl.hpp" +#include "openPMD/IO/Access.hpp" #include "openPMD/ThrowError.hpp" #include "openPMD/auxiliary/Filesystem.hpp" #include "openPMD/auxiliary/JSONMatcher.hpp" @@ -349,8 +350,8 @@ namespace auto get_mandatory = [&](char const *key, bool lowercase) -> std::string { - auto const &val = *optionalOrElse( - get_key("mode"), [&]() -> nlohmann::json const * { + auto const &val = + *optionalOrElse(get_key(key), [&]() -> nlohmann::json const * { throw error::BackendConfigSchema( {configLocation, "mode", key}, "Mandatory key."); }); @@ -394,7 +395,7 @@ namespace } static_cast(then)(val.get()); }; - auto modeString = get_mandatory("type", true); + auto modeString = get_mandatory("provider", true); if (modeString == "stdio") { @@ -465,8 +466,8 @@ namespace } } // namespace -auto JSONIOHandlerImpl::retrieveDatasetMode( - openPMD::json::TracingJSON &config) const -> DatasetMode_s +auto JSONIOHandlerImpl::retrieveDatasetMode(openPMD::json::TracingJSON &config) + -> DatasetMode_s { // start with / copy from current config auto res = m_datasetMode; @@ -484,8 +485,20 @@ auto JSONIOHandlerImpl::retrieveDatasetMode( auto mode = datasetConfig["mode"]; if (mode.json().is_object()) { - parse_external_mode( - std::move(mode), std::nullopt, configLocation, res); + if (access::writeOnly(m_handler->m_backendAccess)) + { + parse_external_mode( + std::move(mode), std::nullopt, configLocation, res); + } + else + { + // sic! initialize the deferred json config as a new + // tracing object + m_deferredExternalBlockstorageConfig = + std::make_optional( + config.json(), config.originallySpecifiedAs); + config.declareFullyRead(); + } } else { @@ -661,6 +674,14 @@ void JSONIOHandlerImpl::createFile( access::write(m_handler->m_backendAccess), "[JSON] Creating a file in read-only mode is not possible."); + if (m_deferredExternalBlockstorageConfig.has_value()) + { + throw error::Internal( + "Creation of external block storage backend was deferred until " + "opening the first file, but a file is created before any was " + "opened."); + } + /* * Need to resolve this later than init() since the openPMD version might be * specified after the creation of the IOHandler. @@ -1104,6 +1125,28 @@ void JSONIOHandlerImpl::openFile( auto file = std::get<0>(getPossiblyExisting(name)); + if (m_deferredExternalBlockstorageConfig.has_value()) + { + auto const &contents = obtainJsonContents(file); + auto previousConfig = [&]() -> std::optional { + if (contents->contains("external_storage")) + { + return std::make_optional( + &contents->at("external_storage")); + } + else + { + return std::nullopt; + } + }(); + parse_external_mode( + std::move(*m_deferredExternalBlockstorageConfig), + previousConfig, + backendConfigKey(), + m_datasetMode); + m_attributeMode.m_specificationVia = SpecificationVia::Manually; + } + associateWithFile(writable, file); writable->written = true; @@ -2242,6 +2285,9 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file) auto res = serialImplementation(); #endif + bool initialize_external_block_storage = + m_deferredExternalBlockstorageConfig.has_value(); + if (res->contains(JSONDefaults::openpmd_internal)) { auto const &openpmd_internal = res->at(JSONDefaults::openpmd_internal); @@ -2272,6 +2318,10 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file) { m_datasetMode.m_mode = DatasetMode::Template; } + else if (modeOption.value() == "external") + { + initialize_external_block_storage = true; + } else { std::cerr << "[JSON/TOML backend] Warning: Invalid value '" @@ -2315,6 +2365,31 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file) } } } + + if (initialize_external_block_storage) + { + auto previousConfig = [&]() -> std::optional { + if (res->contains("external_storage")) + { + return std::make_optional( + &res->at("external_storage")); + } + else + { + return std::nullopt; + } + }(); + parse_external_mode( + m_deferredExternalBlockstorageConfig.has_value() + ? std::move(*m_deferredExternalBlockstorageConfig) + : openPMD::json::TracingJSON(), + previousConfig, + backendConfigKey(), + m_datasetMode); + m_attributeMode.m_specificationVia = SpecificationVia::Manually; + m_deferredExternalBlockstorageConfig.reset(); + } + m_jsonVals.emplace(file, res); return res; } From 25053c5fe18b9c79ccefe896c5b70f917627a644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 11:39:53 +0100 Subject: [PATCH 19/50] WIP: Read from EBS --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 15 +++++++ src/IO/JSON/JSONIOHandlerImpl.cpp | 31 ++++++++++--- src/toolkit/ExternalBlockStorage.cpp | 44 ++++++++++++++++++- 3 files changed, 83 insertions(+), 7 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 6f164dfab0..9a69ed8dff 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -81,6 +81,21 @@ class ExternalBlockStorage std::optional infix, // e.g. for distinguishing MPI ranks T const *data) -> std::string; + template + void read( + std::string const &identifier, + nlohmann::json &fullJsonDataset, + nlohmann::json::json_pointer const &path, + T *data); + + template + void read( + Offset blockOffset, + Extent blockExtent, + nlohmann::json &fullJsonDataset, + nlohmann::json::json_pointer const &path, + T *data); + [[nodiscard]] auto externalStorageLocation() const -> nlohmann::json; static void sanitizeString(std::string &s); diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 64ecf2e790..d4139b9b38 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -1547,14 +1547,28 @@ namespace static constexpr char const *errorMsg = "[JSON Backend] Fill with zeroes."; }; + + struct RetrieveExternally + { + template + static void + call(ExternalBlockStorage &blockStorage, void *ptr, Args &&...args) + { + blockStorage.read( + std::forward(args)..., static_cast(ptr)); + } + + static constexpr char const *errorMsg = "RetrieveExternally"; + }; } // namespace void JSONIOHandlerImpl::readDataset( Writable *writable, Parameter ¶meters) { - refreshFileFromParent(writable); - setAndGetFilePosition(writable); - auto &j = obtainJsonContents(writable); + auto file = refreshFileFromParent(writable); + auto filePosition = setAndGetFilePosition(writable); + auto &jsonRoot = *obtainJsonContents(file); + auto &j = jsonRoot[filePosition->id]; DatasetMode localMode = verifyDataset(parameters, j); std::visit( @@ -1582,8 +1596,15 @@ void JSONIOHandlerImpl::readDataset( switchNonVectorType( parameters.dtype, parameters.data.get(), parameters.extent); }, - [&](DatasetMode::External_t const &) { - throw std::runtime_error("Unimplemented"); + [&](DatasetMode::External_t &external) { + switchDatasetType( + parameters.dtype, + *external, + parameters.data.get(), + parameters.offset, + parameters.extent, + jsonRoot, + filePosition->id); }}, localMode.as_base()); } diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 2d29023c9c..0d2813eccc 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -112,7 +112,7 @@ auto ExternalBlockStorage::store( check_metadata("byte_width", sizeof(T)); check_metadata("extent", globalExtent); - auto &block = dataset[index_as_str]; + auto &block = dataset["external_blocks"][index_as_str]; block["offset"] = blockOffset; block["extent"] = blockExtent; std::stringstream filesystem_identifier; @@ -134,6 +134,35 @@ auto ExternalBlockStorage::store( return index_as_str; } +namespace +{ + template + void read_impl( + ExternalBlockStorageBackend *backend, + nlohmann::json const &external_block, + T *data) + {} +} // namespace + +template +void ExternalBlockStorage::read( + std::string const &identifier, + nlohmann::json &fullJsonDataset, + nlohmann::json::json_pointer const &path, + T *data) +{} + +template +void ExternalBlockStorage::read( + Offset blockOffset, + Extent blockExtent, + nlohmann::json &fullJsonDataset, + nlohmann::json::json_pointer const &path, + T *data) +{ + auto &dataset = fullJsonDataset[path]; +} + [[nodiscard]] auto ExternalBlockStorage::externalStorageLocation() const -> nlohmann::json { @@ -161,7 +190,18 @@ void ExternalBlockStorage::sanitizeString(std::string &s) nlohmann::json & fullJsonDataset, \ nlohmann::json::json_pointer const &path, \ std::optional infix, \ - type const *data) -> std::string; + type const *data) -> std::string; \ + template void ExternalBlockStorage::read( \ + std::string const &identifier, \ + nlohmann::json &fullJsonDataset, \ + nlohmann::json::json_pointer const &path, \ + type *data); \ + template void ExternalBlockStorage::read( \ + Offset blockOffset, \ + Extent blockExtent, \ + nlohmann::json & fullJsonDataset, \ + nlohmann::json::json_pointer const &path, \ + type *data); #define OPENPMD_INSTANTIATE(type) \ OPENPMD_INSTANTIATE_DATATYPEHANDLING(internal::JsonDatatypeHandling, type) OPENPMD_FOREACH_DATASET_DATATYPE(OPENPMD_INSTANTIATE) From 056982c43f7be56618b870fe146629db4e9c09b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 12:12:25 +0100 Subject: [PATCH 20/50] Base implementation for get() --- include/openPMD/toolkit/Aws.hpp | 1 + .../openPMD/toolkit/ExternalBlockStorage.hpp | 2 + include/openPMD/toolkit/Stdio.hpp | 1 + src/toolkit/Aws.cpp | 34 +++++++++++++++++ src/toolkit/ExternalBlockStorage.cpp | 2 +- src/toolkit/Stdio.cpp | 37 ++++++++++++++++++- 6 files changed, 75 insertions(+), 2 deletions(-) diff --git a/include/openPMD/toolkit/Aws.hpp b/include/openPMD/toolkit/Aws.hpp index 5051fa2fbc..5c66eee9c0 100644 --- a/include/openPMD/toolkit/Aws.hpp +++ b/include/openPMD/toolkit/Aws.hpp @@ -20,6 +20,7 @@ struct ExternalBlockStorageAws : ExternalBlockStorageBackend std::optional endpoint); auto put(std::string const &identifier, void const *data, size_t len) -> std::string override; + void get(std::string const &external_ref, void *data, size_t len) override; [[nodiscard]] auto externalStorageLocation() const -> nlohmann::json override; ~ExternalBlockStorageAws() override; diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 9a69ed8dff..cb5031cac2 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -24,6 +24,8 @@ struct ExternalBlockStorageBackend virtual auto put(std::string const &identifier, void const *data, size_t len) -> std::string = 0; + virtual void + get(std::string const &external_ref, void *data, size_t len) = 0; [[nodiscard]] virtual auto externalStorageLocation() const -> nlohmann::json = 0; diff --git a/include/openPMD/toolkit/Stdio.hpp b/include/openPMD/toolkit/Stdio.hpp index 1fb8713b67..9428151d2e 100644 --- a/include/openPMD/toolkit/Stdio.hpp +++ b/include/openPMD/toolkit/Stdio.hpp @@ -14,6 +14,7 @@ struct ExternalBlockStorageStdio : ExternalBlockStorageBackend ExternalBlockStorageStdio(std::string directory, std::string openMode); auto put(std::string const &identifier, void const *data, size_t len) -> std::string override; + void get(std::string const &external_ref, void *data, size_t len) override; [[nodiscard]] auto externalStorageLocation() const -> nlohmann::json override; ~ExternalBlockStorageStdio() override; diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index 50aff10007..f536e1fdf7 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -1,9 +1,11 @@ #include "openPMD/toolkit/Aws.hpp" #include +#include #include #include +#include namespace { @@ -81,6 +83,38 @@ auto ExternalBlockStorageAws::put( return sanitized; } +void ExternalBlockStorageAws::get( + std::string const &external_ref, void *data, size_t len) +{ + if (len == 0) + { + return; + } + + Aws::S3::Model::GetObjectRequest get_request; + get_request.SetBucket(m_bucketName); + get_request.SetKey(external_ref); + + auto get_outcome = m_client.GetObject(get_request); + if (!get_outcome.IsSuccess()) + { + throw std::runtime_error( + std::string("ExternalBlockStorageAws::get failed: ") + + get_outcome.GetError().GetMessage()); + } + + auto &body = get_outcome.GetResult().GetBody(); + body.read( + reinterpret_cast(data), static_cast(len)); + std::streamsize read_bytes = body.gcount(); + if (read_bytes != static_cast(len)) + { + throw std::runtime_error( + "ExternalBlockStorageAws: failed to read expected number of bytes " + "from S3 object"); + } +} + [[nodiscard]] auto ExternalBlockStorageAws::externalStorageLocation() const -> nlohmann::json { diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 0d2813eccc..d59cc9a4f6 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -138,7 +138,7 @@ namespace { template void read_impl( - ExternalBlockStorageBackend *backend, + internal::ExternalBlockStorageBackend *backend, nlohmann::json const &external_block, T *data) {} diff --git a/src/toolkit/Stdio.cpp b/src/toolkit/Stdio.cpp index c3fecf6f2a..ddf7da7178 100644 --- a/src/toolkit/Stdio.cpp +++ b/src/toolkit/Stdio.cpp @@ -73,7 +73,7 @@ auto ExternalBlockStorageStdio::put( return filepath; } - FILE *file = std::fopen(filepath.c_str(), "wb"); + FILE *file = std::fopen(filepath.c_str(), m_openMode.c_str()); if (!file) { throw std::runtime_error( @@ -99,6 +99,41 @@ auto ExternalBlockStorageStdio::put( return sanitized; } +void ExternalBlockStorageStdio::get( + std::string const &external_ref, void *data, size_t len) +{ + if (len == 0) + { + return; + } + + std::string filepath = concat_filepath(m_directory, external_ref); + + FILE *file = std::fopen(filepath.c_str(), "rb"); + if (!file) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to open file for reading: " + + filepath); + } + + size_t read = std::fread(data, 1, len, file); + if (read != len) + { + std::fclose(file); + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to read full data from file: " + + filepath); + } + + if (std::fclose(file) != 0) + { + throw std::runtime_error( + "ExternalBlockStorageStdio: failed to close file after reading: " + + filepath); + } +} + [[nodiscard]] auto ExternalBlockStorageStdio::externalStorageLocation() const -> nlohmann::json { From a8506431c3341861415895dd652a8040dc2d8d2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 13:43:31 +0100 Subject: [PATCH 21/50] Untested read impl --- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 15 ++++ .../openPMD/toolkit/ExternalBlockStorage.hpp | 7 ++ src/toolkit/Aws.cpp | 3 +- src/toolkit/ExternalBlockStorage.cpp | 78 +++++++++++++++---- 4 files changed, 87 insertions(+), 16 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index e9fcaf5289..5ba544db29 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -177,6 +177,21 @@ struct JsonDatatypeHandling } } + template + static auto checkDatatype(nlohmann::json const &j) -> bool + { + auto const &needed_datatype = + jsonDatatypeToString(determineDatatype()); + if (auto it = j.find("datatype"); it != j.end()) + { + return it.value().get() == needed_datatype; + } + else + { + return false; + } + } + template static auto decodeDatatype(nlohmann::json const &j, Args &&...args) -> bool { diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index cb5031cac2..082b767d70 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -46,6 +46,13 @@ struct DatatypeHandling_Interface template static auto encodeDatatype(nlohmann::json &) -> bool; + /* + * Returns false if the encoded datatype does not match T_required + * or if no datatype has been encoded. + */ + template + static auto checkDatatype(nlohmann::json const &j) -> bool; + /* * Returns false if no encoded datatype could be found */ diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index f536e1fdf7..e8c12ab16b 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -67,7 +67,8 @@ auto ExternalBlockStorageAws::put( auto input_data = Aws::MakeShared( "PutObjectInputStream", reinterpret_cast(data), len); - std::static_pointer_cast(input_data); + put_request.SetBody(input_data); + put_request.SetContentLength(static_cast(len)); auto put_outcome = m_client.PutObject(put_request); diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index d59cc9a4f6..05d02f6a95 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -2,6 +2,7 @@ #include "openPMD/DatatypeMacros.hpp" #include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" +#include "openPMD/auxiliary/StringManip.hpp" #include @@ -16,6 +17,30 @@ ExternalBlockStorageBackend::~ExternalBlockStorageBackend() = default; namespace openPMD { + +namespace +{ + auto flat_extent(Extent const &e) -> size_t + { + return std::accumulate( + e.begin(), e.end(), 1, [](size_t left, size_t right) { + return left * right; + }); + } + + template + void read_impl( + internal::ExternalBlockStorageBackend *backend, + nlohmann::json const &external_block, + T *data, + size_t len) + { + auto const &external_ref = + external_block.at("external_ref").get(); + backend->get(external_ref, data, sizeof(T) * len); + } +} // namespace + ExternalBlockStorage::ExternalBlockStorage() = default; ExternalBlockStorage::ExternalBlockStorage( std::unique_ptr worker) @@ -125,25 +150,11 @@ auto ExternalBlockStorage::store( auto escaped_filesystem_identifier = m_worker->put( filesystem_identifier.str(), data, - std::accumulate( - blockExtent.begin(), - blockExtent.end(), - sizeof(T), - [](size_t left, size_t right) { return left * right; })); + sizeof(T) * flat_extent(blockExtent)); block["external_ref"] = escaped_filesystem_identifier; return index_as_str; } -namespace -{ - template - void read_impl( - internal::ExternalBlockStorageBackend *backend, - nlohmann::json const &external_block, - T *data) - {} -} // namespace - template void ExternalBlockStorage::read( std::string const &identifier, @@ -161,6 +172,43 @@ void ExternalBlockStorage::read( T *data) { auto &dataset = fullJsonDataset[path]; + if (!DatatypeHandling::template checkDatatype(dataset)) + { + throw std::runtime_error("Inconsistent chunk storage in datatype."); + } + auto external_blocks = dataset["external_blocks"]; + bool found_a_precise_match = false; + for (auto it = external_blocks.begin(); it != external_blocks.end(); ++it) + { + auto const &block = it.value(); + try + { + auto const &o = block.at("offset").get(); + auto const &e = block.at("extent").get(); + // Look only for exact matches for now + if (o != blockOffset || e != blockExtent) + { + continue; + } + found_a_precise_match = true; + read_impl(m_worker.get(), block, data, flat_extent(blockExtent)); + break; + } + catch (nlohmann::json::exception const &e) + { + std::cerr << "[ExternalBlockStorage::read] Could not parse block '" + << it.key() << "'. Original error was:\n" + << e.what(); + } + } + if (!found_a_precise_match) + { + throw std::runtime_error( + "[ExternalBlockStorage::read] Unable to find a precise match for " + "offset " + + auxiliary::vec_as_string(blockOffset) + " and extent " + + auxiliary::vec_as_string(blockExtent)); + } } [[nodiscard]] auto ExternalBlockStorage::externalStorageLocation() const From 4bbd101cc0b0bb8f90c6c7fd9b0061a6d2f6403d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 14:21:00 +0100 Subject: [PATCH 22/50] Basically working reading needed also support for availableChunks --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 2 +- src/IO/JSON/JSONIOHandlerImpl.cpp | 41 +++++++++++++++++-- src/toolkit/ExternalBlockStorage.cpp | 6 +-- 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 082b767d70..cda4e3bd46 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -101,7 +101,7 @@ class ExternalBlockStorage void read( Offset blockOffset, Extent blockExtent, - nlohmann::json &fullJsonDataset, + nlohmann::json const &fullJsonDataset, nlohmann::json::json_pointer const &path, T *data); diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index d4139b9b38..de9023b729 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -1104,9 +1104,44 @@ void JSONIOHandlerImpl::availableChunks( { refreshFileFromParent(writable); auto filePosition = setAndGetFilePosition(writable); - auto &j = obtainJsonContents(writable)["data"]; - *parameters.chunks = chunksInJSON(j); - chunk_assignment::mergeChunks(*parameters.chunks); + auto &j = obtainJsonContents(writable); + + auto [extent, datasetmode] = getExtent(j, m_datasetMode.m_mode); + + std::visit( + auxiliary::overloaded{ + [&](DatasetMode::Dataset_t const &) { + *parameters.chunks = chunksInJSON(j.at("data")); + chunk_assignment::mergeChunks(*parameters.chunks); + }, + [&](DatasetMode::Template_t const &) { + /* no-op, no chunks to be loaded */ + }, + [&](DatasetMode::External_t &) { + auto external_blocks = j.at("external_blocks"); + auto &res = *parameters.chunks; + res.reserve(external_blocks.size()); + for (auto it = external_blocks.begin(); + it != external_blocks.end(); + ++it) + { + auto const &block = it.value(); + try + { + auto const &o = block.at("offset").get(); + auto const &e = block.at("extent").get(); + res.emplace_back(o, e); + } + catch (nlohmann::json::exception const &e) + { + std::cerr << "[JSONIOHandlerImpl::availableChunks] " + "Could not parse block '" + << it.key() << "'. Original error was:\n" + << e.what(); + } + } + }}, + datasetmode.as_base()); } void JSONIOHandlerImpl::openFile( diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 05d02f6a95..8e44f9efc6 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -167,7 +167,7 @@ template void ExternalBlockStorage::read( Offset blockOffset, Extent blockExtent, - nlohmann::json &fullJsonDataset, + nlohmann::json const &fullJsonDataset, nlohmann::json::json_pointer const &path, T *data) { @@ -176,7 +176,7 @@ void ExternalBlockStorage::read( { throw std::runtime_error("Inconsistent chunk storage in datatype."); } - auto external_blocks = dataset["external_blocks"]; + auto external_blocks = dataset.at("external_blocks"); bool found_a_precise_match = false; for (auto it = external_blocks.begin(); it != external_blocks.end(); ++it) { @@ -247,7 +247,7 @@ void ExternalBlockStorage::sanitizeString(std::string &s) template void ExternalBlockStorage::read( \ Offset blockOffset, \ Extent blockExtent, \ - nlohmann::json & fullJsonDataset, \ + nlohmann::json const &fullJsonDataset, \ nlohmann::json::json_pointer const &path, \ type *data); #define OPENPMD_INSTANTIATE(type) \ From 0e112ead34ca26ed7343f4623435aeb1cad723d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 14:29:59 +0100 Subject: [PATCH 23/50] cleanup --- .../openPMD/toolkit/ExternalBlockStorage.hpp | 12 +++---- src/toolkit/ExternalBlockStorage.cpp | 32 +++++++++---------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index cda4e3bd46..25b776e620 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -82,9 +82,9 @@ class ExternalBlockStorage // returns created JSON key template auto store( - Extent globalExtent, - Offset blockOffset, - Extent blockExtent, + Extent const &globalExtent, + Offset const &blockOffset, + Extent const &blockExtent, nlohmann::json &fullJsonDataset, nlohmann::json::json_pointer const &path, std::optional infix, // e.g. for distinguishing MPI ranks @@ -93,14 +93,14 @@ class ExternalBlockStorage template void read( std::string const &identifier, - nlohmann::json &fullJsonDataset, + nlohmann::json const &fullJsonDataset, nlohmann::json::json_pointer const &path, T *data); template void read( - Offset blockOffset, - Extent blockExtent, + Offset const &blockOffset, + Extent const &blockExtent, nlohmann::json const &fullJsonDataset, nlohmann::json::json_pointer const &path, T *data); diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 8e44f9efc6..cee4a34fd1 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -63,9 +63,9 @@ auto ExternalBlockStorage::makeAwsSession( template auto ExternalBlockStorage::store( - Extent globalExtent, - Offset blockOffset, - Extent blockExtent, + Extent const &globalExtent, + Offset const &blockOffset, + Extent const &blockExtent, nlohmann::json &fullJsonDataset, nlohmann::json::json_pointer const &path, std::optional infix, @@ -157,16 +157,16 @@ auto ExternalBlockStorage::store( template void ExternalBlockStorage::read( - std::string const &identifier, - nlohmann::json &fullJsonDataset, - nlohmann::json::json_pointer const &path, - T *data) + [[maybe_unused]] std::string const &identifier, + [[maybe_unused]] nlohmann::json const &fullJsonDataset, + [[maybe_unused]] nlohmann::json::json_pointer const &path, + [[maybe_unused]] T *data) {} template void ExternalBlockStorage::read( - Offset blockOffset, - Extent blockExtent, + Offset const &blockOffset, + Extent const &blockExtent, nlohmann::json const &fullJsonDataset, nlohmann::json::json_pointer const &path, T *data) @@ -232,21 +232,21 @@ void ExternalBlockStorage::sanitizeString(std::string &s) #define OPENPMD_INSTANTIATE_DATATYPEHANDLING(datatypehandling, type) \ template auto ExternalBlockStorage::store( \ - Extent globalExtent, \ - Offset blockOffset, \ - Extent blockExtent, \ - nlohmann::json & fullJsonDataset, \ + Extent const &globalExtent, \ + Offset const &blockOffset, \ + Extent const &blockExtent, \ + nlohmann::json &fullJsonDataset, \ nlohmann::json::json_pointer const &path, \ std::optional infix, \ type const *data) -> std::string; \ template void ExternalBlockStorage::read( \ std::string const &identifier, \ - nlohmann::json &fullJsonDataset, \ + nlohmann::json const &fullJsonDataset, \ nlohmann::json::json_pointer const &path, \ type *data); \ template void ExternalBlockStorage::read( \ - Offset blockOffset, \ - Extent blockExtent, \ + Offset const &blockOffset, \ + Extent const &blockExtent, \ nlohmann::json const &fullJsonDataset, \ nlohmann::json::json_pointer const &path, \ type *data); From e23b35e27c4e4be64537e82c0631727ded310416 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 15:00:50 +0100 Subject: [PATCH 24/50] Naming fixes --- src/IO/JSON/JSONIOHandlerImpl.cpp | 18 ++++++++++-------- src/toolkit/Aws.cpp | 2 +- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index de9023b729..7e8ec09a13 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -353,14 +353,15 @@ namespace auto const &val = *optionalOrElse(get_key(key), [&]() -> nlohmann::json const * { throw error::BackendConfigSchema( - {configLocation, "mode", key}, "Mandatory key."); + {configLocation, "dataset", "mode", key}, + "Mandatory key."); }); return optionalOrElse( lowercase ? openPMD::json::asLowerCaseStringDynamic(val) : openPMD::json::asStringDynamic(val), [&]() -> std::string { throw error::BackendConfigSchema( - {configLocation, "mode", key}, + {configLocation, "dataset", "mode", key}, "Must be of string type."); }); }; @@ -377,7 +378,7 @@ namespace : openPMD::json::asStringDynamic(val), [&]() -> std::string { throw error::BackendConfigSchema( - {configLocation, "mode", key}, + {configLocation, "dataset", "mode", key}, "Must be of string type."); })); }; @@ -391,7 +392,8 @@ namespace if (!val.is_boolean()) { throw error::BackendConfigSchema( - {configLocation, "mode", key}, "Must be of boolean type."); + {configLocation, "dataset", "mode", key}, + "Must be of boolean type."); } static_cast(then)(val.get()); }; @@ -414,7 +416,7 @@ namespace openPMD::internal::AwsBuilder builder( // TODO: bucket_name: introduce expansion pattern for openPMD // file name - get_mandatory("bucket_name", false), + get_mandatory("bucket", false), get_mandatory("access_key_id", false), get_mandatory("secret_access_key", false)); @@ -447,7 +449,7 @@ namespace else { throw error::BackendConfigSchema( - {configLocation, "mode", "scheme"}, + {configLocation, "dataset", "mode", "scheme"}, "Must be either 'http' or 'https'."); } }); @@ -458,7 +460,7 @@ namespace else { throw error::BackendConfigSchema( - {configLocation, "mode", "type"}, + {configLocation, "dataset", "mode", "provider"}, "Must be either 'stdio' or 'aws'."); } @@ -496,7 +498,7 @@ auto JSONIOHandlerImpl::retrieveDatasetMode(openPMD::json::TracingJSON &config) // tracing object m_deferredExternalBlockstorageConfig = std::make_optional( - config.json(), config.originallySpecifiedAs); + mode.json(), mode.originallySpecifiedAs); config.declareFullyRead(); } } diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index e8c12ab16b..5a7c17ead4 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -120,7 +120,7 @@ void ExternalBlockStorageAws::get( -> nlohmann::json { nlohmann::json j; - j["provider"] = "s3"; + j["provider"] = "aws"; if (m_endpoint.has_value()) { j["endpoint"] = *m_endpoint; From 3c4c992283035026e454b44e739b0d3ee365ace6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 15:48:25 +0100 Subject: [PATCH 25/50] wahhh? --- test/SerialIOTest.cpp | 60 ++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 18 deletions(-) diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 7fd13822f3..d6ba03b85b 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -5093,9 +5093,8 @@ this = "should not warn" // BP3 engine writes files, BP4 writes directories REQUIRE(openPMD::auxiliary::file_exists("../samples/jsonConfiguredBP3.bp")); - REQUIRE( - openPMD::auxiliary::directory_exists( - "../samples/jsonConfiguredBP4.bp")); + REQUIRE(openPMD::auxiliary::directory_exists( + "../samples/jsonConfiguredBP4.bp")); std::string readConfigBP3 = R"END( { @@ -5272,6 +5271,27 @@ TEST_CASE("bp4_steps", "[serial][adios2]") void serial_iterator(std::string const &file) { + auto const write_config = R"( +init_aws_api = true +rank_table = "posix_hostname" + +[json.attribute] +mode = "short" + +[json.dataset.mode] +provider = "aws" +access_key_id = "test" +secret_access_key = "test" +endpoint = "http://localhost:4566" +bucket = "simdata" + )"; + auto const read_config = R"( +init_aws_api = true + +[json.dataset.mode] +access_key_id = "test" +secret_access_key = "test" + )"; constexpr Extent::value_type extent = 1000; { Series writeSeries( @@ -5279,7 +5299,7 @@ void serial_iterator(std::string const &file) Access::CREATE_LINEAR #ifndef _WIN32 , - R"({"rank_table": "posix_hostname"})" + write_config #endif ); auto iterations = writeSeries.snapshots(); @@ -5294,7 +5314,7 @@ void serial_iterator(std::string const &file) } } - Series readSeries(file, Access::READ_ONLY); + Series readSeries(file, Access::READ_ONLY, read_config); size_t last_iteration_index = 0; size_t numberOfIterations = 0; @@ -5330,19 +5350,23 @@ void serial_iterator(std::string const &file) TEST_CASE("serial_iterator", "[serial][adios2]") { - for (auto const &t : testedFileExtensions()) - { -#ifdef _WIN32 - serial_iterator("../samples/serial_iterator_filebased_%T." + t); - serial_iterator("../samples/serial_iterator_groupbased." + t); -#else - // Add some regex characters into the file names to see that we can deal - // with that. Don't do that on Windows because Windows does not like - // those characters within file paths. - serial_iterator("../samples/serial_iterator_filebased_+?_%T." + t); - serial_iterator("../samples/serial_iterator_groupbased_+?." + t); -#endif - } + serial_iterator("../samples/serial_iterator.json"); + // for (auto const &t : testedFileExtensions()) + // { + // #ifdef _WIN32 + // serial_iterator("../samples/serial_iterator_filebased_%T." + t); + // serial_iterator("../samples/serial_iterator_groupbased." + t); + // #else + // // Add some regex characters into the file names to see that we + // can deal + // // with that. Don't do that on Windows because Windows does not + // like + // // those characters within file paths. + // serial_iterator("../samples/serial_iterator_filebased_+?_%T." + + // t); serial_iterator("../samples/serial_iterator_groupbased_+?." + + // t); + // #endif + // } } void variableBasedSingleIteration(std::string const &file) From 9990819c456fc000356a77738269a80010e939f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 16:01:15 +0100 Subject: [PATCH 26/50] Fix double initialization of EBS --- src/IO/JSON/JSONIOHandlerImpl.cpp | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 7e8ec09a13..e3abda85f1 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -1162,27 +1162,15 @@ void JSONIOHandlerImpl::openFile( auto file = std::get<0>(getPossiblyExisting(name)); - if (m_deferredExternalBlockstorageConfig.has_value()) - { - auto const &contents = obtainJsonContents(file); - auto previousConfig = [&]() -> std::optional { - if (contents->contains("external_storage")) - { - return std::make_optional( - &contents->at("external_storage")); - } - else - { - return std::nullopt; - } - }(); - parse_external_mode( - std::move(*m_deferredExternalBlockstorageConfig), - previousConfig, - backendConfigKey(), - m_datasetMode); - m_attributeMode.m_specificationVia = SpecificationVia::Manually; - } + // Need to access data in order to resolve external block storage + // configuration. EBS for read modes is configured at two places: + // + // 1. In the JSON config (stored at m_deferredExternalBlockstorageConfig) + // 2. In the previous JSON file that we are now opening + // + // Since the configuration may exclusively take place in either of the two + // options, files need to be opened now in any case. + obtainJsonContents(file); associateWithFile(writable, file); From 296470f6297291bc927718f3560362e28c89d6f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 16:13:12 +0100 Subject: [PATCH 27/50] Revert "wahhh?" This reverts commit ed239cf061d54e7243bf528a14a778c9f4afec40. --- test/SerialIOTest.cpp | 60 +++++++++++++------------------------------ 1 file changed, 18 insertions(+), 42 deletions(-) diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index d6ba03b85b..7fd13822f3 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -5093,8 +5093,9 @@ this = "should not warn" // BP3 engine writes files, BP4 writes directories REQUIRE(openPMD::auxiliary::file_exists("../samples/jsonConfiguredBP3.bp")); - REQUIRE(openPMD::auxiliary::directory_exists( - "../samples/jsonConfiguredBP4.bp")); + REQUIRE( + openPMD::auxiliary::directory_exists( + "../samples/jsonConfiguredBP4.bp")); std::string readConfigBP3 = R"END( { @@ -5271,27 +5272,6 @@ TEST_CASE("bp4_steps", "[serial][adios2]") void serial_iterator(std::string const &file) { - auto const write_config = R"( -init_aws_api = true -rank_table = "posix_hostname" - -[json.attribute] -mode = "short" - -[json.dataset.mode] -provider = "aws" -access_key_id = "test" -secret_access_key = "test" -endpoint = "http://localhost:4566" -bucket = "simdata" - )"; - auto const read_config = R"( -init_aws_api = true - -[json.dataset.mode] -access_key_id = "test" -secret_access_key = "test" - )"; constexpr Extent::value_type extent = 1000; { Series writeSeries( @@ -5299,7 +5279,7 @@ secret_access_key = "test" Access::CREATE_LINEAR #ifndef _WIN32 , - write_config + R"({"rank_table": "posix_hostname"})" #endif ); auto iterations = writeSeries.snapshots(); @@ -5314,7 +5294,7 @@ secret_access_key = "test" } } - Series readSeries(file, Access::READ_ONLY, read_config); + Series readSeries(file, Access::READ_ONLY); size_t last_iteration_index = 0; size_t numberOfIterations = 0; @@ -5350,23 +5330,19 @@ secret_access_key = "test" TEST_CASE("serial_iterator", "[serial][adios2]") { - serial_iterator("../samples/serial_iterator.json"); - // for (auto const &t : testedFileExtensions()) - // { - // #ifdef _WIN32 - // serial_iterator("../samples/serial_iterator_filebased_%T." + t); - // serial_iterator("../samples/serial_iterator_groupbased." + t); - // #else - // // Add some regex characters into the file names to see that we - // can deal - // // with that. Don't do that on Windows because Windows does not - // like - // // those characters within file paths. - // serial_iterator("../samples/serial_iterator_filebased_+?_%T." + - // t); serial_iterator("../samples/serial_iterator_groupbased_+?." + - // t); - // #endif - // } + for (auto const &t : testedFileExtensions()) + { +#ifdef _WIN32 + serial_iterator("../samples/serial_iterator_filebased_%T." + t); + serial_iterator("../samples/serial_iterator_groupbased." + t); +#else + // Add some regex characters into the file names to see that we can deal + // with that. Don't do that on Windows because Windows does not like + // those characters within file paths. + serial_iterator("../samples/serial_iterator_filebased_+?_%T." + t); + serial_iterator("../samples/serial_iterator_groupbased_+?." + t); +#endif + } } void variableBasedSingleIteration(std::string const &file) From 4b8bc2bf521d02d3e9785c106bfc5afd388e17fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 16:29:58 +0100 Subject: [PATCH 28/50] Actually use slashes in S3 allow --- src/toolkit/Aws.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index 5a7c17ead4..c9a7b71fad 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -58,8 +58,9 @@ ExternalBlockStorageAws::~ExternalBlockStorageAws() = default; auto ExternalBlockStorageAws::put( std::string const &identifier, void const *data, size_t len) -> std::string { - auto sanitized = identifier; - ExternalBlockStorage::sanitizeString(sanitized); + auto sanitized = !identifier.empty() && identifier.at(0) == '/' + ? identifier.substr(1) + : identifier; Aws::S3::Model::PutObjectRequest put_request; put_request.SetBucket(m_bucketName); From d198f9817ab69741030678d30bbb8b67d6c93a44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 19:26:50 +0100 Subject: [PATCH 29/50] Warn on unused restart config --- src/IO/JSON/JSONIOHandlerImpl.cpp | 33 ++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index e3abda85f1..b83ce91397 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -144,11 +144,30 @@ namespace return *accum_ptr; } - void warnUnusedJson(openPMD::json::TracingJSON const &jsonConfig) + auto prepend_to_json(nlohmann::json j) -> nlohmann::json + { + return j; + } + + template + auto prepend_to_json(nlohmann::json j, Arg &&arg, Args &&...args) + -> nlohmann::json + { + return nlohmann::json{ + {std::forward(arg), + prepend_to_json(std::move(j), std::forward(args)...)}}; + } + + template + void warnUnusedJson( + openPMD::json::TracingJSON const &jsonConfig, + Args &&...extra_json_hierarchy) { auto shadow = jsonConfig.invertShadow(); if (shadow.size() > 0) { + shadow = prepend_to_json( + std::move(shadow), std::forward(extra_json_hierarchy)...); switch (jsonConfig.originallySpecifiedAs) { case openPMD::json::SupportedLanguages::JSON: @@ -2425,14 +2444,14 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file) return std::nullopt; } }(); + auto manual_config = m_deferredExternalBlockstorageConfig.has_value() + ? std::move(*m_deferredExternalBlockstorageConfig) + : openPMD::json::TracingJSON(); parse_external_mode( - m_deferredExternalBlockstorageConfig.has_value() - ? std::move(*m_deferredExternalBlockstorageConfig) - : openPMD::json::TracingJSON(), - previousConfig, - backendConfigKey(), - m_datasetMode); + manual_config, previousConfig, backendConfigKey(), m_datasetMode); + warnUnusedJson(manual_config, "dataset", "mode"); m_attributeMode.m_specificationVia = SpecificationVia::Manually; + m_deferredExternalBlockstorageConfig.reset(); } From 33b7e6bcdb0dcd4aca26290d1241de5dfcc92a38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 8 Dec 2025 19:27:20 +0100 Subject: [PATCH 30/50] Reapply "wahhh?" This reverts commit 72b41a3855edc5a6eb1206714116d112fb6198d8. --- test/SerialIOTest.cpp | 60 ++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 18 deletions(-) diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 7fd13822f3..d6ba03b85b 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -5093,9 +5093,8 @@ this = "should not warn" // BP3 engine writes files, BP4 writes directories REQUIRE(openPMD::auxiliary::file_exists("../samples/jsonConfiguredBP3.bp")); - REQUIRE( - openPMD::auxiliary::directory_exists( - "../samples/jsonConfiguredBP4.bp")); + REQUIRE(openPMD::auxiliary::directory_exists( + "../samples/jsonConfiguredBP4.bp")); std::string readConfigBP3 = R"END( { @@ -5272,6 +5271,27 @@ TEST_CASE("bp4_steps", "[serial][adios2]") void serial_iterator(std::string const &file) { + auto const write_config = R"( +init_aws_api = true +rank_table = "posix_hostname" + +[json.attribute] +mode = "short" + +[json.dataset.mode] +provider = "aws" +access_key_id = "test" +secret_access_key = "test" +endpoint = "http://localhost:4566" +bucket = "simdata" + )"; + auto const read_config = R"( +init_aws_api = true + +[json.dataset.mode] +access_key_id = "test" +secret_access_key = "test" + )"; constexpr Extent::value_type extent = 1000; { Series writeSeries( @@ -5279,7 +5299,7 @@ void serial_iterator(std::string const &file) Access::CREATE_LINEAR #ifndef _WIN32 , - R"({"rank_table": "posix_hostname"})" + write_config #endif ); auto iterations = writeSeries.snapshots(); @@ -5294,7 +5314,7 @@ void serial_iterator(std::string const &file) } } - Series readSeries(file, Access::READ_ONLY); + Series readSeries(file, Access::READ_ONLY, read_config); size_t last_iteration_index = 0; size_t numberOfIterations = 0; @@ -5330,19 +5350,23 @@ void serial_iterator(std::string const &file) TEST_CASE("serial_iterator", "[serial][adios2]") { - for (auto const &t : testedFileExtensions()) - { -#ifdef _WIN32 - serial_iterator("../samples/serial_iterator_filebased_%T." + t); - serial_iterator("../samples/serial_iterator_groupbased." + t); -#else - // Add some regex characters into the file names to see that we can deal - // with that. Don't do that on Windows because Windows does not like - // those characters within file paths. - serial_iterator("../samples/serial_iterator_filebased_+?_%T." + t); - serial_iterator("../samples/serial_iterator_groupbased_+?." + t); -#endif - } + serial_iterator("../samples/serial_iterator.json"); + // for (auto const &t : testedFileExtensions()) + // { + // #ifdef _WIN32 + // serial_iterator("../samples/serial_iterator_filebased_%T." + t); + // serial_iterator("../samples/serial_iterator_groupbased." + t); + // #else + // // Add some regex characters into the file names to see that we + // can deal + // // with that. Don't do that on Windows because Windows does not + // like + // // those characters within file paths. + // serial_iterator("../samples/serial_iterator_filebased_+?_%T." + + // t); serial_iterator("../samples/serial_iterator_groupbased_+?." + + // t); + // #endif + // } } void variableBasedSingleIteration(std::string const &file) From 61b63fd5949b90448efdbe6b5a8afdb931cccbd3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 8 Dec 2025 18:35:27 +0000 Subject: [PATCH 31/50] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- test/SerialIOTest.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index d6ba03b85b..24fd529060 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -5093,8 +5093,9 @@ this = "should not warn" // BP3 engine writes files, BP4 writes directories REQUIRE(openPMD::auxiliary::file_exists("../samples/jsonConfiguredBP3.bp")); - REQUIRE(openPMD::auxiliary::directory_exists( - "../samples/jsonConfiguredBP4.bp")); + REQUIRE( + openPMD::auxiliary::directory_exists( + "../samples/jsonConfiguredBP4.bp")); std::string readConfigBP3 = R"END( { From 638f10548f76d02347b5d5c1c1bb7e68e387a458 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 9 Dec 2025 11:21:30 +0100 Subject: [PATCH 32/50] Use rank identifier also in JSON strings necessary precondition for MPI merging --- src/toolkit/ExternalBlockStorage.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index cee4a34fd1..4a4d37fa96 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -90,7 +90,7 @@ auto ExternalBlockStorage::store( }(); constexpr size_t padding = 6; - std::string index_as_str = [running_index]() { + std::string index_as_str = [running_index, &infix]() { auto res = std::to_string(running_index); auto size = res.size(); if (size >= padding) @@ -98,6 +98,10 @@ auto ExternalBlockStorage::store( return res; } std::stringstream padded; + if (infix.has_value()) + { + padded << *infix << "--"; + } for (size_t i = 0; i < padding - size; ++i) { padded << '0'; @@ -142,10 +146,6 @@ auto ExternalBlockStorage::store( block["extent"] = blockExtent; std::stringstream filesystem_identifier; filesystem_identifier << path.to_string(); - if (infix.has_value()) - { - filesystem_identifier << "--" << *infix; - } filesystem_identifier << "--" << index_as_str; auto escaped_filesystem_identifier = m_worker->put( filesystem_identifier.str(), From 4ed50b8d9fcf15de07cd3735f702ee540b5c2a49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 9 Dec 2025 13:30:37 +0100 Subject: [PATCH 33/50] Extract implementation of convert-toml-json to header --- include/openPMD/cli/convert-toml-json.hpp | 119 ++++++++++++++++++++++ src/cli/convert-toml-json.cpp | 69 ++----------- 2 files changed, 128 insertions(+), 60 deletions(-) create mode 100644 include/openPMD/cli/convert-toml-json.hpp diff --git a/include/openPMD/cli/convert-toml-json.hpp b/include/openPMD/cli/convert-toml-json.hpp new file mode 100644 index 0000000000..0b59a2ca22 --- /dev/null +++ b/include/openPMD/cli/convert-toml-json.hpp @@ -0,0 +1,119 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include + +namespace from_format_to_format +{ +namespace json = openPMD::json; +struct ID +{ + template + static auto call(nlohmann::json const &&val) + // template <> + // auto call(nlohmann::json const &val) -> + // nlohmann::json const& + { + if constexpr (originallySpecifiedAs == json::SupportedLanguages::JSON) + { + return val; + } + else + { + return json::jsonToToml(val); + } + } +}; + +struct switch_ +{ + template + struct other_type; + template + static auto call(nlohmann::json const &&val) + { + return ID::call::value>( + std::move(val)); + } +}; +template <> +struct switch_::other_type +{ + static constexpr json::SupportedLanguages value = + json::SupportedLanguages::TOML; +}; +template <> +struct switch_::other_type +{ + static constexpr json::SupportedLanguages value = + json::SupportedLanguages::JSON; +}; +} // namespace from_format_to_format + +template +class convert_json_toml +{ + static void with_parsed_cmdline_args(std::string jsonOrToml) + { + namespace json = openPMD::json; + auto [config, originallySpecifiedAs] = json::parseOptions( + jsonOrToml, + /* considerFiles = */ true, + /* convertLowercase = */ false); + { + // NOLINTNEXTLINE(bugprone-unused-local-non-trivial-variable) + [[maybe_unused]] auto _ = std::move(jsonOrToml); + } + switch (originallySpecifiedAs) + { + using SL = json::SupportedLanguages; + case SL::JSON: { + auto asToml = json::jsonToToml(config); + std::cout << json::format_toml(asToml); + } + break; + case SL::TOML: + std::cout << config << '\n'; + break; + } + } + +public: + static void run_application( + int argc, char const **argv, void (*print_help_message)(char const *)) + { + std::string jsonOrToml; + switch (argc) + { + case 0: + case 1: + // Just read the whole stream into memory + // Not very elegant, but we'll hold the entire JSON/TOML dataset + // in memory at some point anyway, so it doesn't really matter + { + std::stringbuf readEverything; + std::cin >> &readEverything; + jsonOrToml = readEverything.str(); + } + break; + case 2: + if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) + { + print_help_message(argv[1]); + exit(0); + } + jsonOrToml = argv[1]; + break; + default: + throw std::runtime_error( + std::string("Usage: ") + argv[0] + + " [file location or inline JSON/TOML]"); + } + with_parsed_cmdline_args(std::move(jsonOrToml)); + } +}; diff --git a/src/cli/convert-toml-json.cpp b/src/cli/convert-toml-json.cpp index d930fa156b..1e4e684930 100644 --- a/src/cli/convert-toml-json.cpp +++ b/src/cli/convert-toml-json.cpp @@ -18,58 +18,11 @@ * and the GNU Lesser General Public License along with openPMD-api. * If not, see . */ -#include -#include -#include +#include "openPMD/cli/convert-toml-json.hpp" -#include -#include -#include - -namespace json = openPMD::json; - -void parsed_main(std::string jsonOrToml) -{ - auto [config, originallySpecifiedAs] = json::parseOptions( - jsonOrToml, /* considerFiles = */ true, /* convertLowercase = */ false); - { - // NOLINTNEXTLINE(bugprone-unused-local-non-trivial-variable) - [[maybe_unused]] auto _ = std::move(jsonOrToml); - } - switch (originallySpecifiedAs) - { - using SL = json::SupportedLanguages; - case SL::JSON: { - auto asToml = json::jsonToToml(config); - std::cout << json::format_toml(asToml); - } - break; - case SL::TOML: - std::cout << config << '\n'; - break; - } -} - -int main(int argc, char const **argv) +void print_help_message(char const *program_name) { - std::string jsonOrToml; - switch (argc) - { - case 0: - case 1: - // Just read the whole stream into memory - // Not very elegant, but we'll hold the entire JSON/TOML dataset - // in memory at some point anyway, so it doesn't really matter - { - std::stringbuf readEverything; - std::cin >> &readEverything; - jsonOrToml = readEverything.str(); - } - break; - case 2: - if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) - { - std::cout << "Usage: " << std::string(argv[0]) << R"( [json_or_toml] + std::cout << "Usage: " << std::string(program_name) << R"( [json_or_toml] 'json_or_toml' can be a JSON or TOML dataset specified inline or a reference to a file prepended by an '@'. Inline datasets will be interpreted as JSON if they start with an '{', as TOML @@ -80,14 +33,10 @@ Inline dataset specifications can be replaced by input read from stdin. If the input is JSON, then it will be converted to TOML and written to stdout, equivalently from TOML to JSON. )"; - exit(0); - } - jsonOrToml = argv[1]; - break; - default: - throw std::runtime_error( - std::string("Usage: ") + argv[0] + - " [file location or inline JSON/TOML]"); - } - parsed_main(std::move(jsonOrToml)); +} + +int main(int argc, char const **argv) +{ + convert_json_toml::run_application( + argc, argv, print_help_message); } From 1703370190655ca531639cd86c6c3464436cccd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 9 Dec 2025 13:50:20 +0100 Subject: [PATCH 34/50] Implement merging --- include/openPMD/cli/convert-toml-json.hpp | 47 +++++++++++++++-------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/include/openPMD/cli/convert-toml-json.hpp b/include/openPMD/cli/convert-toml-json.hpp index 0b59a2ca22..5ffcabe127 100644 --- a/include/openPMD/cli/convert-toml-json.hpp +++ b/include/openPMD/cli/convert-toml-json.hpp @@ -6,6 +6,7 @@ #include #include +#include #include namespace from_format_to_format @@ -58,17 +59,11 @@ struct switch_::other_type template class convert_json_toml { - static void with_parsed_cmdline_args(std::string jsonOrToml) + static void + with_parsed_cmdline_args(openPMD::json::ParsedConfig parsed_config) { namespace json = openPMD::json; - auto [config, originallySpecifiedAs] = json::parseOptions( - jsonOrToml, - /* considerFiles = */ true, - /* convertLowercase = */ false); - { - // NOLINTNEXTLINE(bugprone-unused-local-non-trivial-variable) - [[maybe_unused]] auto _ = std::move(jsonOrToml); - } + auto [config, originallySpecifiedAs] = std::move(parsed_config); switch (originallySpecifiedAs) { using SL = json::SupportedLanguages; @@ -83,6 +78,30 @@ class convert_json_toml } } + static auto merge(char const **begin, char const **end) + -> openPMD::json::ParsedConfig + { + namespace json = openPMD::json; + if (begin == end) + { + throw std::runtime_error( + "merge: need at least one JSON/TOML file."); + } + auto config = json::parseOptions( + *begin, + /* considerFiles = */ true, + /* convertLowercase = */ false); + for (++begin; begin != end; ++begin) + { + auto [next, _] = json::parseOptions( + *begin, + /* considerFiles = */ true, + /* convertLowercase = */ false); + json::merge_internal(config.config, next, /* do_prune = */ false); + } + return config; + } + public: static void run_application( int argc, char const **argv, void (*print_help_message)(char const *)) @@ -101,19 +120,15 @@ class convert_json_toml jsonOrToml = readEverything.str(); } break; - case 2: + default: if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) { print_help_message(argv[1]); exit(0); } - jsonOrToml = argv[1]; + auto parsed_config = merge(argv + 1, argv + argc); + with_parsed_cmdline_args(std::move(parsed_config)); break; - default: - throw std::runtime_error( - std::string("Usage: ") + argv[0] + - " [file location or inline JSON/TOML]"); } - with_parsed_cmdline_args(std::move(jsonOrToml)); } }; From 36ec4d7b52cb84d957013d17a526215af599bcdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 9 Dec 2025 14:12:30 +0100 Subject: [PATCH 35/50] Add merge-json --- CMakeLists.txt | 1 + include/openPMD/cli/convert-toml-json.hpp | 23 ++++++++++++++++++----- src/cli/convert-toml-json.cpp | 2 +- src/cli/merge-json.cpp | 22 ++++++++++++++++++++++ 4 files changed, 42 insertions(+), 6 deletions(-) create mode 100644 src/cli/merge-json.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b246c6bacb..fecf7611ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -713,6 +713,7 @@ set(openPMD_TEST_NAMES set(openPMD_CLI_TOOL_NAMES ls convert-toml-json + merge-json ) set(openPMD_PYTHON_CLI_TOOL_NAMES pipe diff --git a/include/openPMD/cli/convert-toml-json.hpp b/include/openPMD/cli/convert-toml-json.hpp index 5ffcabe127..e632f8107c 100644 --- a/include/openPMD/cli/convert-toml-json.hpp +++ b/include/openPMD/cli/convert-toml-json.hpp @@ -59,6 +59,15 @@ struct switch_::other_type template class convert_json_toml { + static void print(toml::value &val) + { + namespace json = openPMD::json; + std::cout << json::format_toml(val); + } + static void print(nlohmann::json const &val) + { + std::cout << val << '\n'; + } static void with_parsed_cmdline_args(openPMD::json::ParsedConfig parsed_config) { @@ -68,13 +77,17 @@ class convert_json_toml { using SL = json::SupportedLanguages; case SL::JSON: { - auto asToml = json::jsonToToml(config); - std::cout << json::format_toml(asToml); + auto for_print = + FromFormatToFormat::template call(std::move(config)); + print(for_print); + } + break; + case SL::TOML: { + auto for_print = + FromFormatToFormat::template call(std::move(config)); + print(for_print); } break; - case SL::TOML: - std::cout << config << '\n'; - break; } } diff --git a/src/cli/convert-toml-json.cpp b/src/cli/convert-toml-json.cpp index 1e4e684930..ebd3aedb55 100644 --- a/src/cli/convert-toml-json.cpp +++ b/src/cli/convert-toml-json.cpp @@ -37,6 +37,6 @@ equivalently from TOML to JSON. int main(int argc, char const **argv) { - convert_json_toml::run_application( + convert_json_toml::run_application( argc, argv, print_help_message); } diff --git a/src/cli/merge-json.cpp b/src/cli/merge-json.cpp new file mode 100644 index 0000000000..c9cb2def1f --- /dev/null +++ b/src/cli/merge-json.cpp @@ -0,0 +1,22 @@ +#include "openPMD/cli/convert-toml-json.hpp" + +void print_help_message(char const *program_name) +{ + std::cout << "Usage: " << std::string(program_name) << R"( [json_or_toml]+ +'json_or_toml' can be a JSON or TOML dataset specified inline or a reference +to a file prepended by an '@'. +Inline datasets will be interpreted as JSON if they start with an '{', as TOML +otherwise. Datasets from a file will be interpreted as JSON or TOML depending +on the file ending '.json' or '.toml' respectively. +Inline dataset specifications can be replaced by input read from stdin. + +If the JSON/TOML files are mixed, then the output type (JSON or TOML) will be +determined by the type of the first file. +)"; +} + +int main(int argc, char const **argv) +{ + convert_json_toml::run_application( + argc, argv, print_help_message); +} From 1865b10f87ee38eb0da7bf917008325bc4b66f00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 9 Dec 2025 15:45:43 +0100 Subject: [PATCH 36/50] Create merge script in parallel json output --- src/IO/JSON/JSONIOHandlerImpl.cpp | 70 ++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index b83ce91397..1f1f7fb692 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -37,7 +37,11 @@ #include "openPMD/backend/Writable.hpp" #include "openPMD/toolkit/ExternalBlockStorage.hpp" +#if openPMD_USE_FILESYSTEM_HEADER +#include +#endif #include +#include #include #include @@ -2581,7 +2585,7 @@ merge the .json files somehow (no tooling provided for this (yet)). readme_file.open( dirpath + "/README.txt", std::ios_base::out | std::ios_base::trunc); - readme_file << readme_msg + 1; + readme_file << &readme_msg[1]; readme_file.close(); if (!readme_file.good() && !filename.fileState->printedReadmeWarningAlready) @@ -2595,6 +2599,70 @@ merge the .json files somehow (no tooling provided for this (yet)). << readme_msg + 1 << "----------" << std::endl; filename.fileState->printedReadmeWarningAlready = true; } + + constexpr char const *merge_script = R"END( +#!/usr/bin/env bash + +set -euo pipefail + +parallel_dir="$(dirname "$BASH_SOURCE")" +parallel_dir="$(cd "$parallel_dir" && pwd)" +serial_dir="${parallel_dir%.json.parallel}" +if [[ "$serial_dir" = "$parallel_dir" ]]; then + serial_dir="$parallel_dir/merged.json" +else + serial_dir="$serial_dir.json" +fi +echo "Will merge files to '$serial_dir'." >&2 +if [[ -e "$serial_dir" ]]; then + echo "Target dir already exists, aborting." >&2 + exit 1 +fi +if ! which openpmd-merge-json 2>/dev/null; then + echo "Did not find 'openpmd-merge-json' on PATH, aborting." >&2 + exit 1 +fi +for file in "$parallel_dir"/mpi_rank_*.json; do + echo "@$file" +done | + xargs openpmd-merge-json >"$serial_dir" +# TODO: xargs will only work up to a certain number of files)END"; + std::string const merge_script_path = dirpath + "/merge.sh"; + std::fstream merge_file; + merge_file.open( + merge_script_path, std::ios_base::out | std::ios_base::trunc); + merge_file << &merge_script[1]; + merge_file.close(); + + if (!merge_file.good() && + !filename.fileState->printedReadmeWarningAlready) + { + std::cerr + << "[Warning] Something went wrong in trying to create " + "merge script at '" + << merge_script_path << "'. Will ignore and continue." + << std::endl; + filename.fileState->printedReadmeWarningAlready = true; + } + +#if openPMD_USE_FILESYSTEM_HEADER + try + { + std::filesystem::permissions( + merge_script_path, + std::filesystem::perms::owner_exec | + std::filesystem::perms::owner_exec | + std::filesystem::perms::owner_exec, + std::filesystem::perm_options::add); + } + catch (std::filesystem::filesystem_error const &e) + { + std::cerr << "Failed setting executable permissions on '" + << merge_script_path + << "', will ignore. Original error was:\n" + << e.what() << std::endl; + } +#endif } }; From 50c3d9b02d2191c70297476e3751403427efd645 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 11 Dec 2025 11:56:07 +0100 Subject: [PATCH 37/50] Read files from stdin Avoids argument length limitations with cmd line arguments --- include/openPMD/cli/convert-toml-json.hpp | 47 ++++++++++++++++++++--- src/IO/JSON/JSONIOHandlerImpl.cpp | 10 ++--- src/cli/convert-toml-json.cpp | 5 ++- src/cli/merge-json.cpp | 5 ++- 4 files changed, 52 insertions(+), 15 deletions(-) diff --git a/include/openPMD/cli/convert-toml-json.hpp b/include/openPMD/cli/convert-toml-json.hpp index e632f8107c..051fa01bfb 100644 --- a/include/openPMD/cli/convert-toml-json.hpp +++ b/include/openPMD/cli/convert-toml-json.hpp @@ -1,7 +1,9 @@ #pragma once +#include #include #include +#include #include #include @@ -91,8 +93,22 @@ class convert_json_toml } } - static auto merge(char const **begin, char const **end) - -> openPMD::json::ParsedConfig + struct ByLine : std::string + { + friend auto operator>>(std::istream &i, ByLine &l) -> std::istream & + { + decltype(auto) res = std::getline(i, l); + if (res) + { + l.insert(0, 1, '@'); + } + return res; + } + }; + using ByLineIterator = std::istream_iterator; + + template + static auto merge(It begin, It end) -> openPMD::json::ParsedConfig { namespace json = openPMD::json; if (begin == end) @@ -116,21 +132,40 @@ class convert_json_toml } public: + enum class UseStdinAs : std::uint8_t + { + InlineJson, + ListOfJson + }; + static void run_application( - int argc, char const **argv, void (*print_help_message)(char const *)) + int argc, + char const **argv, + UseStdinAs stdinconfig, + void (*print_help_message)(char const *)) { std::string jsonOrToml; switch (argc) { case 0: case 1: - // Just read the whole stream into memory - // Not very elegant, but we'll hold the entire JSON/TOML dataset - // in memory at some point anyway, so it doesn't really matter + switch (stdinconfig) { + case UseStdinAs::InlineJson: { + // Just read the whole stream into memory + // Not very elegant, but we'll hold the entire JSON/TOML dataset + // in memory at some point anyway, so it doesn't really matter std::stringbuf readEverything; std::cin >> &readEverything; jsonOrToml = readEverything.str(); + break; + } + case UseStdinAs::ListOfJson: { + auto parsed_config = + merge(ByLineIterator(std::cin), ByLineIterator{}); + with_parsed_cmdline_args(std::move(parsed_config)); + break; + } } break; default: diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 1f1f7fb692..c308827133 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -2615,18 +2615,18 @@ else fi echo "Will merge files to '$serial_dir'." >&2 if [[ -e "$serial_dir" ]]; then - echo "Target dir already exists, aborting." >&2 + echo "Target file already exists, aborting." >&2 exit 1 fi -if ! which openpmd-merge-json 2>/dev/null; then +if ! which openpmd-merge-json >/dev/null 2>&1; then echo "Did not find 'openpmd-merge-json' on PATH, aborting." >&2 exit 1 fi for file in "$parallel_dir"/mpi_rank_*.json; do - echo "@$file" + echo "$file" done | - xargs openpmd-merge-json >"$serial_dir" -# TODO: xargs will only work up to a certain number of files)END"; + openpmd-merge-json >"$serial_dir" +)END"; std::string const merge_script_path = dirpath + "/merge.sh"; std::fstream merge_file; merge_file.open( diff --git a/src/cli/convert-toml-json.cpp b/src/cli/convert-toml-json.cpp index ebd3aedb55..c4ad79db4c 100644 --- a/src/cli/convert-toml-json.cpp +++ b/src/cli/convert-toml-json.cpp @@ -37,6 +37,7 @@ equivalently from TOML to JSON. int main(int argc, char const **argv) { - convert_json_toml::run_application( - argc, argv, print_help_message); + using convert = convert_json_toml; + convert::run_application( + argc, argv, convert::UseStdinAs::InlineJson, print_help_message); } diff --git a/src/cli/merge-json.cpp b/src/cli/merge-json.cpp index c9cb2def1f..567125e769 100644 --- a/src/cli/merge-json.cpp +++ b/src/cli/merge-json.cpp @@ -17,6 +17,7 @@ determined by the type of the first file. int main(int argc, char const **argv) { - convert_json_toml::run_application( - argc, argv, print_help_message); + using convert = convert_json_toml; + convert::run_application( + argc, argv, convert::UseStdinAs::ListOfJson, print_help_message); } From 27febb0d0d323801c213ce45e50d658038511af7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 11 Dec 2025 12:24:58 +0100 Subject: [PATCH 38/50] Update documentation --- src/cli/merge-json.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/cli/merge-json.cpp b/src/cli/merge-json.cpp index 567125e769..79312a2388 100644 --- a/src/cli/merge-json.cpp +++ b/src/cli/merge-json.cpp @@ -2,13 +2,17 @@ void print_help_message(char const *program_name) { - std::cout << "Usage: " << std::string(program_name) << R"( [json_or_toml]+ + std::cout << "Merge multiple JSON/TOML files into one.\nUsage: " + << std::string(program_name) << R"( [json_or_toml]+ 'json_or_toml' can be a JSON or TOML dataset specified inline or a reference to a file prepended by an '@'. Inline datasets will be interpreted as JSON if they start with an '{', as TOML otherwise. Datasets from a file will be interpreted as JSON or TOML depending on the file ending '.json' or '.toml' respectively. -Inline dataset specifications can be replaced by input read from stdin. + +In order to support large numbers of files to be merged, the paths to those +files can also be specified line-by-line per stdin, replacing the limitations +of command line arguments. If the JSON/TOML files are mixed, then the output type (JSON or TOML) will be determined by the type of the first file. From 9c6e82523f37147202ec842eec3ce3fe91f264c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 5 Jan 2026 17:07:24 +0100 Subject: [PATCH 39/50] WIP Async writing --- include/openPMD/toolkit/Aws.hpp | 24 ++++- .../openPMD/toolkit/ExternalBlockStorage.hpp | 4 + src/IO/JSON/JSONIOHandlerImpl.cpp | 7 ++ src/toolkit/Aws.cpp | 89 +++++++++++++++++-- src/toolkit/AwsBuilder.cpp | 4 +- src/toolkit/ExternalBlockStorage.cpp | 16 +++- 6 files changed, 133 insertions(+), 11 deletions(-) diff --git a/include/openPMD/toolkit/Aws.hpp b/include/openPMD/toolkit/Aws.hpp index 5c66eee9c0..d7baa8b002 100644 --- a/include/openPMD/toolkit/Aws.hpp +++ b/include/openPMD/toolkit/Aws.hpp @@ -4,25 +4,47 @@ #include +#include + namespace openPMD::internal { +struct AwsAsyncHandler +{ + std::mutex mutex; + std::condition_variable event; + std::size_t request_counter = 0; + // Upon C++20, we can use a std::atomic for this and ditch the + // condition_variable + mutex approach + std::size_t completion_counter = 0; + + void wait(); + void add_task(); + void add_and_notify_result(); + + ~AwsAsyncHandler(); +}; + struct ExternalBlockStorageAws : ExternalBlockStorageBackend { private: Aws::S3::S3Client m_client; std::string m_bucketName; std::optional m_endpoint; + std::optional m_async; public: ExternalBlockStorageAws( Aws::S3::S3Client, std::string bucketName, - std::optional endpoint); + std::optional endpoint, + bool async); auto put(std::string const &identifier, void const *data, size_t len) -> std::string override; void get(std::string const &external_ref, void *data, size_t len) override; [[nodiscard]] auto externalStorageLocation() const -> nlohmann::json override; + void sync() override; + ~ExternalBlockStorageAws() override; }; } // namespace openPMD::internal diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 25b776e620..f057a2301d 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -29,6 +29,8 @@ struct ExternalBlockStorageBackend [[nodiscard]] virtual auto externalStorageLocation() const -> nlohmann::json = 0; + virtual void sync(); + virtual ~ExternalBlockStorageBackend(); }; } // namespace openPMD::internal @@ -105,6 +107,8 @@ class ExternalBlockStorage nlohmann::json::json_pointer const &path, T *data); + void sync(); + [[nodiscard]] auto externalStorageLocation() const -> nlohmann::json; static void sanitizeString(std::string &s); diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index c308827133..bf5810c77f 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -689,6 +689,13 @@ std::future JSONIOHandlerImpl::flush() putJsonContents(file, false); } m_dirty.clear(); + std::visit( + auxiliary::overloaded{ + [](DatasetMode::External_t &externalStorage) { + externalStorage->sync(); + }, + [](auto &&) {}}, + this->m_datasetMode.m_mode.as_base()); return std::future(); } diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index c9a7b71fad..268b491126 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -1,10 +1,12 @@ #include "openPMD/toolkit/Aws.hpp" +#include #include #include #include #include +#include #include namespace @@ -31,13 +33,44 @@ struct imemstream : std::iostream namespace openPMD::internal { +void AwsAsyncHandler::wait() +{ + std::cerr << "Waiting for remaining tasks. Have " << completion_counter + << " of " << request_counter << std::endl; + size_t target = this->request_counter; + std::unique_lock lk(this->mutex); + this->event.wait( + lk, [this, target]() { return this->completion_counter >= target; }); + std::cerr << "Finished waiting for remaining tasks" << std::endl; +} + +void AwsAsyncHandler::add_task() +{ + this->request_counter++; +} + +void AwsAsyncHandler::add_and_notify_result() +{ + std::unique_lock lk(this->mutex); + this->completion_counter++; + lk.unlock(); + this->event.notify_all(); +} + +AwsAsyncHandler::~AwsAsyncHandler() +{ + this->wait(); +} + ExternalBlockStorageAws::ExternalBlockStorageAws( Aws::S3::S3Client client, std::string bucketName, - std::optional endpoint) + std::optional endpoint, + bool async) : m_client{std::move(client)} , m_bucketName(std::move(bucketName)) , m_endpoint(std::move(endpoint)) + , m_async(async ? std::make_optional() : std::nullopt) { Aws::S3::Model::CreateBucketRequest create_request; create_request.SetBucket(m_bucketName); @@ -71,16 +104,49 @@ auto ExternalBlockStorageAws::put( put_request.SetBody(input_data); put_request.SetContentLength(static_cast(len)); - auto put_outcome = m_client.PutObject(put_request); - - if (put_outcome.IsSuccess()) + if (!m_async.has_value()) { - std::cout << "File uploaded successfully to S3!" << std::endl; + auto put_outcome = m_client.PutObject(put_request); + + if (put_outcome.IsSuccess()) + { + std::cout << "File synchronously uploaded successfully to S3!" + << std::endl; + } + else + { + std::cerr << "Synchronous upload failed: " + << put_outcome.GetError().GetMessage() << std::endl; + } } else { - std::cerr << "Upload failed: " << put_outcome.GetError().GetMessage() - << std::endl; + auto &async_handler = *m_async; + auto responseReceivedHandler = + [&async_handler]( + const Aws::S3::S3Client *, + const Aws::S3::Model::PutObjectRequest &, + const Aws::S3::Model::PutObjectOutcome &put_outcome, + const std::shared_ptr + &) { + if (put_outcome.IsSuccess()) + { + std::cout + << "File asynchronously uploaded successfully to S3!" + << std::endl; + } + else + { + std::cerr << "Asynchronous upload failed: " + << put_outcome.GetError().GetMessage() + << std::endl; + } + async_handler.add_and_notify_result(); + }; + async_handler.add_task(); + m_client.PutObjectAsync(put_request, responseReceivedHandler); + // todo replace this + async_handler.wait(); } return sanitized; } @@ -117,6 +183,15 @@ void ExternalBlockStorageAws::get( } } +void ExternalBlockStorageAws::sync() +{ + if (!this->m_async.has_value()) + { + return; + } + this->m_async->wait(); +} + [[nodiscard]] auto ExternalBlockStorageAws::externalStorageLocation() const -> nlohmann::json { diff --git a/src/toolkit/AwsBuilder.cpp b/src/toolkit/AwsBuilder.cpp index cc3cdc87ef..5f5960b6c1 100644 --- a/src/toolkit/AwsBuilder.cpp +++ b/src/toolkit/AwsBuilder.cpp @@ -119,7 +119,9 @@ AwsBuilder::operator ExternalBlockStorage() return ExternalBlockStorage{std::make_unique( std::move(s3_client), std::move(m_bucketName), - std::move(m_endpointOverride))}; + std::move(m_endpointOverride), + // TODO: Add config option for this + /* async = */ true)}; } auto AwsBuilder::build() -> ExternalBlockStorage diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 4a4d37fa96..6c6e47bd8f 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -12,9 +12,14 @@ namespace openPMD::internal { -ExternalBlockStorageBackend::~ExternalBlockStorageBackend() = default; +void ExternalBlockStorageBackend::sync() +{ + // default for non-async backends: no-op } +ExternalBlockStorageBackend::~ExternalBlockStorageBackend() = default; +} // namespace openPMD::internal + namespace openPMD { @@ -161,7 +166,9 @@ void ExternalBlockStorage::read( [[maybe_unused]] nlohmann::json const &fullJsonDataset, [[maybe_unused]] nlohmann::json::json_pointer const &path, [[maybe_unused]] T *data) -{} +{ + throw std::runtime_error("Unimplemented!"); +} template void ExternalBlockStorage::read( @@ -211,6 +218,11 @@ void ExternalBlockStorage::read( } } +void ExternalBlockStorage::sync() +{ + this->m_worker->sync(); +} + [[nodiscard]] auto ExternalBlockStorage::externalStorageLocation() const -> nlohmann::json { From 66d25e0a5f70111b10c1b9555ddf0d81e5c6d30c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 6 Jan 2026 13:56:42 +0100 Subject: [PATCH 40/50] Use WriteBuffer type for smart pointers --- include/openPMD/toolkit/Aws.hpp | 3 ++- include/openPMD/toolkit/ExternalBlockStorage.hpp | 5 +++-- include/openPMD/toolkit/Stdio.hpp | 3 ++- src/IO/JSON/JSONIOHandlerImpl.cpp | 9 ++++----- src/toolkit/Aws.cpp | 7 +++++-- src/toolkit/ExternalBlockStorage.cpp | 7 ++++--- src/toolkit/Stdio.cpp | 5 +++-- 7 files changed, 23 insertions(+), 16 deletions(-) diff --git a/include/openPMD/toolkit/Aws.hpp b/include/openPMD/toolkit/Aws.hpp index d7baa8b002..0dc024ab81 100644 --- a/include/openPMD/toolkit/Aws.hpp +++ b/include/openPMD/toolkit/Aws.hpp @@ -38,7 +38,8 @@ struct ExternalBlockStorageAws : ExternalBlockStorageBackend std::string bucketName, std::optional endpoint, bool async); - auto put(std::string const &identifier, void const *data, size_t len) + auto + put(std::string const &identifier, auxiliary::WriteBuffer data, size_t len) -> std::string override; void get(std::string const &external_ref, void *data, size_t len) override; [[nodiscard]] auto externalStorageLocation() const diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index f057a2301d..b3b54f2703 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -1,6 +1,7 @@ #pragma once #include "openPMD/Dataset.hpp" +#include "openPMD/auxiliary/Memory.hpp" #include "openPMD/toolkit/AwsBuilder.hpp" #include "openPMD/toolkit/StdioBuilder.hpp" @@ -22,7 +23,7 @@ namespace openPMD::internal struct ExternalBlockStorageBackend { virtual auto - put(std::string const &identifier, void const *data, size_t len) + put(std::string const &identifier, auxiliary::WriteBuffer data, size_t len) -> std::string = 0; virtual void get(std::string const &external_ref, void *data, size_t len) = 0; @@ -90,7 +91,7 @@ class ExternalBlockStorage nlohmann::json &fullJsonDataset, nlohmann::json::json_pointer const &path, std::optional infix, // e.g. for distinguishing MPI ranks - T const *data) -> std::string; + auxiliary::WriteBuffer data) -> std::string; template void read( diff --git a/include/openPMD/toolkit/Stdio.hpp b/include/openPMD/toolkit/Stdio.hpp index 9428151d2e..0bf7f4a11c 100644 --- a/include/openPMD/toolkit/Stdio.hpp +++ b/include/openPMD/toolkit/Stdio.hpp @@ -12,7 +12,8 @@ struct ExternalBlockStorageStdio : ExternalBlockStorageBackend public: ExternalBlockStorageStdio(std::string directory, std::string openMode); - auto put(std::string const &identifier, void const *data, size_t len) + auto + put(std::string const &identifier, auxiliary::WriteBuffer data, size_t len) -> std::string override; void get(std::string const &external_ref, void *data, size_t len) override; [[nodiscard]] auto externalStorageLocation() const diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index bf5810c77f..19dbeb63b4 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -1467,11 +1467,10 @@ namespace struct StoreExternally { template - static void call( - ExternalBlockStorage &blockStorage, void const *ptr, Args &&...args) + static void call(ExternalBlockStorage &blockStorage, Args &&...args) { blockStorage.store( - std::forward(args)..., static_cast(ptr)); + std::forward(args)...); } static constexpr char const *errorMsg = "StoreExternally"; @@ -1525,13 +1524,13 @@ void JSONIOHandlerImpl::writeDataset( switchDatasetType( parameters.dtype, *external, - parameters.data.get(), j.at("extent").get(), parameters.offset, parameters.extent, jsonRoot, filePosition->id, - std::move(rankInfix)); + std::move(rankInfix), + std::move(parameters.data)); }}, verifyDataset(parameters, j).as_base()); diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index 268b491126..335d2bc0d4 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -89,7 +89,8 @@ ExternalBlockStorageAws::ExternalBlockStorageAws( ExternalBlockStorageAws::~ExternalBlockStorageAws() = default; auto ExternalBlockStorageAws::put( - std::string const &identifier, void const *data, size_t len) -> std::string + std::string const &identifier, auxiliary::WriteBuffer data, size_t len) + -> std::string { auto sanitized = !identifier.empty() && identifier.at(0) == '/' ? identifier.substr(1) @@ -100,7 +101,9 @@ auto ExternalBlockStorageAws::put( put_request.SetKey(sanitized); auto input_data = Aws::MakeShared( - "PutObjectInputStream", reinterpret_cast(data), len); + "PutObjectInputStream", + reinterpret_cast(data.get()), + len); put_request.SetBody(input_data); put_request.SetContentLength(static_cast(len)); diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 6c6e47bd8f..d3fdb1b39c 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -2,6 +2,7 @@ #include "openPMD/DatatypeMacros.hpp" #include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" +#include "openPMD/auxiliary/Memory.hpp" #include "openPMD/auxiliary/StringManip.hpp" #include @@ -74,7 +75,7 @@ auto ExternalBlockStorage::store( nlohmann::json &fullJsonDataset, nlohmann::json::json_pointer const &path, std::optional infix, - T const *data) -> std::string + auxiliary::WriteBuffer data) -> std::string { auto &dataset = fullJsonDataset[path]; @@ -154,7 +155,7 @@ auto ExternalBlockStorage::store( filesystem_identifier << "--" << index_as_str; auto escaped_filesystem_identifier = m_worker->put( filesystem_identifier.str(), - data, + std::move(data), sizeof(T) * flat_extent(blockExtent)); block["external_ref"] = escaped_filesystem_identifier; return index_as_str; @@ -250,7 +251,7 @@ void ExternalBlockStorage::sanitizeString(std::string &s) nlohmann::json &fullJsonDataset, \ nlohmann::json::json_pointer const &path, \ std::optional infix, \ - type const *data) -> std::string; \ + auxiliary::WriteBuffer) -> std::string; \ template void ExternalBlockStorage::read( \ std::string const &identifier, \ nlohmann::json const &fullJsonDataset, \ diff --git a/src/toolkit/Stdio.cpp b/src/toolkit/Stdio.cpp index ddf7da7178..a99ce6e0ad 100644 --- a/src/toolkit/Stdio.cpp +++ b/src/toolkit/Stdio.cpp @@ -62,7 +62,8 @@ ExternalBlockStorageStdio::ExternalBlockStorageStdio( ExternalBlockStorageStdio::~ExternalBlockStorageStdio() = default; auto ExternalBlockStorageStdio::put( - std::string const &identifier, void const *data, size_t len) -> std::string + std::string const &identifier, auxiliary::WriteBuffer data, size_t len) + -> std::string { auto sanitized = identifier + ".dat"; ExternalBlockStorage::sanitizeString(sanitized); @@ -81,7 +82,7 @@ auto ExternalBlockStorageStdio::put( filepath); } - size_t written = std::fwrite(data, 1, len, file); + size_t written = std::fwrite(data.get(), 1, len, file); if (written != len) { throw std::runtime_error( From 7fa32c03c4ba7cbcbb00069ee786c84d3385ee0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 6 Jan 2026 14:19:57 +0100 Subject: [PATCH 41/50] Async writing works --- include/openPMD/toolkit/Aws.hpp | 15 +++++++++++-- src/toolkit/Aws.cpp | 39 +++++++++++++++++++++++---------- 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/include/openPMD/toolkit/Aws.hpp b/include/openPMD/toolkit/Aws.hpp index 0dc024ab81..e0c3d1da4c 100644 --- a/include/openPMD/toolkit/Aws.hpp +++ b/include/openPMD/toolkit/Aws.hpp @@ -8,7 +8,7 @@ namespace openPMD::internal { -struct AwsAsyncHandler +struct AwsAsyncCounter { std::mutex mutex; std::condition_variable event; @@ -21,7 +21,18 @@ struct AwsAsyncHandler void add_task(); void add_and_notify_result(); - ~AwsAsyncHandler(); + ~AwsAsyncCounter(); +}; + +struct AwsAsyncHandler +{ + // We can defer std::unique_ptr operations longer than std::shared_ptr + // operations, since no one else has the memory, so use two counters. TODO: + // Add some form of restriction on how long the std::unique_ptr queue may + // become. Currently it can theoretically be spammed ad libitum. Either + // restrict the queue to a configurable length, or add a syncEverything() + // call. + AwsAsyncCounter shared_ptr_operations, unique_ptr_operations; }; struct ExternalBlockStorageAws : ExternalBlockStorageBackend diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index 335d2bc0d4..5b950443a0 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -1,4 +1,7 @@ #include "openPMD/toolkit/Aws.hpp" +#include "openPMD/auxiliary/Memory.hpp" +#include "openPMD/auxiliary/Memory_internal.hpp" +#include "openPMD/auxiliary/Variant.hpp" #include #include @@ -33,7 +36,7 @@ struct imemstream : std::iostream namespace openPMD::internal { -void AwsAsyncHandler::wait() +void AwsAsyncCounter::wait() { std::cerr << "Waiting for remaining tasks. Have " << completion_counter << " of " << request_counter << std::endl; @@ -44,12 +47,12 @@ void AwsAsyncHandler::wait() std::cerr << "Finished waiting for remaining tasks" << std::endl; } -void AwsAsyncHandler::add_task() +void AwsAsyncCounter::add_task() { this->request_counter++; } -void AwsAsyncHandler::add_and_notify_result() +void AwsAsyncCounter::add_and_notify_result() { std::unique_lock lk(this->mutex); this->completion_counter++; @@ -57,7 +60,7 @@ void AwsAsyncHandler::add_and_notify_result() this->event.notify_all(); } -AwsAsyncHandler::~AwsAsyncHandler() +AwsAsyncCounter::~AwsAsyncCounter() { this->wait(); } @@ -124,14 +127,30 @@ auto ExternalBlockStorageAws::put( } else { - auto &async_handler = *m_async; + auto &async_counter = *std::visit( + auxiliary::overloaded{ + [this](auxiliary::WriteBuffer::CopyableUniquePtr const &) { + return &this->m_async->unique_ptr_operations; + }, + [this](auxiliary::WriteBuffer::SharedPtr const &) { + return &this->m_async->shared_ptr_operations; + }}, + data.as_variant()); auto responseReceivedHandler = - [&async_handler]( + [&async_counter, + /* + * Need to keep buffers alive until they have been asynchronously + * read. Use the closure captures for this. Wrap the WriteBuffer + * inside a shared_ptr to make the std::function copyable. + */ + keepalive = + std::make_shared(std::move(data))]( const Aws::S3::S3Client *, const Aws::S3::Model::PutObjectRequest &, const Aws::S3::Model::PutObjectOutcome &put_outcome, const std::shared_ptr &) { + (void)keepalive; if (put_outcome.IsSuccess()) { std::cout @@ -144,12 +163,10 @@ auto ExternalBlockStorageAws::put( << put_outcome.GetError().GetMessage() << std::endl; } - async_handler.add_and_notify_result(); + async_counter.add_and_notify_result(); }; - async_handler.add_task(); + async_counter.add_task(); m_client.PutObjectAsync(put_request, responseReceivedHandler); - // todo replace this - async_handler.wait(); } return sanitized; } @@ -192,7 +209,7 @@ void ExternalBlockStorageAws::sync() { return; } - this->m_async->wait(); + this->m_async->shared_ptr_operations.wait(); } [[nodiscard]] auto ExternalBlockStorageAws::externalStorageLocation() const From 9f5026074c184b1a999d0507afa2e70d8c10b380 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 6 Jan 2026 14:24:12 +0100 Subject: [PATCH 42/50] Be less verbose --- src/toolkit/Aws.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index 5b950443a0..f3ea2828a8 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -38,13 +38,13 @@ namespace openPMD::internal { void AwsAsyncCounter::wait() { - std::cerr << "Waiting for remaining tasks. Have " << completion_counter - << " of " << request_counter << std::endl; + // std::cerr << "Waiting for remaining tasks. Have " << completion_counter + // << " of " << request_counter << std::endl; size_t target = this->request_counter; std::unique_lock lk(this->mutex); this->event.wait( lk, [this, target]() { return this->completion_counter >= target; }); - std::cerr << "Finished waiting for remaining tasks" << std::endl; + // std::cerr << "Finished waiting for remaining tasks" << std::endl; } void AwsAsyncCounter::add_task() @@ -116,8 +116,8 @@ auto ExternalBlockStorageAws::put( if (put_outcome.IsSuccess()) { - std::cout << "File synchronously uploaded successfully to S3!" - << std::endl; + // std::cout << "File synchronously uploaded successfully to S3!" + // << std::endl; } else { @@ -153,9 +153,9 @@ auto ExternalBlockStorageAws::put( (void)keepalive; if (put_outcome.IsSuccess()) { - std::cout - << "File asynchronously uploaded successfully to S3!" - << std::endl; + // std::cout + // << "File asynchronously uploaded successfully to S3!" + // << std::endl; } else { From a1adc36c269e830d283418465eaf2b18550953ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 6 Jan 2026 14:55:29 +0100 Subject: [PATCH 43/50] Fix late operations in Aws --- src/Series.cpp | 5 ++++- src/toolkit/Aws.cpp | 19 +++++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/Series.cpp b/src/Series.cpp index 28c7664c74..bed08eeef6 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -3296,11 +3296,14 @@ namespace internal // we must not throw in a destructor try { + // The order of operations is important: + // close() might need to wait for a number of remaining Aws + // operations to finish, so the AwsAPI needs to stay open for that. + close(); if (m_manageAwsAPI.has_value()) { Aws::ShutdownAPI(*m_manageAwsAPI); } - close(); } catch (std::exception const &ex) { diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index f3ea2828a8..c6b4211b93 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -38,13 +38,10 @@ namespace openPMD::internal { void AwsAsyncCounter::wait() { - // std::cerr << "Waiting for remaining tasks. Have " << completion_counter - // << " of " << request_counter << std::endl; size_t target = this->request_counter; std::unique_lock lk(this->mutex); this->event.wait( lk, [this, target]() { return this->completion_counter >= target; }); - // std::cerr << "Finished waiting for remaining tasks" << std::endl; } void AwsAsyncCounter::add_task() @@ -62,7 +59,10 @@ void AwsAsyncCounter::add_and_notify_result() AwsAsyncCounter::~AwsAsyncCounter() { + std::cerr << "Waiting for remaining tasks. Have " << completion_counter + << " of " << request_counter << std::endl; this->wait(); + std::cerr << "Finished waiting for remaining tasks" << std::endl; } ExternalBlockStorageAws::ExternalBlockStorageAws( @@ -89,7 +89,11 @@ ExternalBlockStorageAws::ExternalBlockStorageAws( std::cout << "Bucket created: " << m_bucketName << std::endl; } } -ExternalBlockStorageAws::~ExternalBlockStorageAws() = default; +ExternalBlockStorageAws::~ExternalBlockStorageAws() +{ + // We need to wait for late operations before doing anything else. + m_async.reset(); +} auto ExternalBlockStorageAws::put( std::string const &identifier, auxiliary::WriteBuffer data, size_t len) @@ -130,14 +134,17 @@ auto ExternalBlockStorageAws::put( auto &async_counter = *std::visit( auxiliary::overloaded{ [this](auxiliary::WriteBuffer::CopyableUniquePtr const &) { + std::cout << "Using unique pointer" << std::endl; return &this->m_async->unique_ptr_operations; }, [this](auxiliary::WriteBuffer::SharedPtr const &) { + std::cout << "Using shared pointer" << std::endl; return &this->m_async->shared_ptr_operations; }}, data.as_variant()); auto responseReceivedHandler = [&async_counter, + sanitized, /* * Need to keep buffers alive until they have been asynchronously * read. Use the closure captures for this. Wrap the WriteBuffer @@ -159,8 +166,8 @@ auto ExternalBlockStorageAws::put( } else { - std::cerr << "Asynchronous upload failed: " - << put_outcome.GetError().GetMessage() + std::cerr << "Asynchronous upload failed for '" << sanitized + << "': " << put_outcome.GetError().GetMessage() << std::endl; } async_counter.add_and_notify_result(); From f16bb4a4feb203147e1b527e355bf5916a191397 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 6 Jan 2026 17:16:52 +0100 Subject: [PATCH 44/50] Async reading --- include/openPMD/toolkit/Aws.hpp | 5 +- .../openPMD/toolkit/ExternalBlockStorage.hpp | 9 ++- include/openPMD/toolkit/Stdio.hpp | 5 +- src/IO/JSON/JSONIOHandlerImpl.cpp | 9 +-- src/toolkit/Aws.cpp | 79 +++++++++++++++---- src/toolkit/ExternalBlockStorage.cpp | 18 +++-- src/toolkit/Stdio.cpp | 4 +- 7 files changed, 93 insertions(+), 36 deletions(-) diff --git a/include/openPMD/toolkit/Aws.hpp b/include/openPMD/toolkit/Aws.hpp index e0c3d1da4c..31bc86df18 100644 --- a/include/openPMD/toolkit/Aws.hpp +++ b/include/openPMD/toolkit/Aws.hpp @@ -52,7 +52,10 @@ struct ExternalBlockStorageAws : ExternalBlockStorageBackend auto put(std::string const &identifier, auxiliary::WriteBuffer data, size_t len) -> std::string override; - void get(std::string const &external_ref, void *data, size_t len) override; + void + get(std::string const &external_ref, + std::shared_ptr data, + size_t len) override; [[nodiscard]] auto externalStorageLocation() const -> nlohmann::json override; void sync() override; diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index b3b54f2703..e27744d610 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -8,7 +8,6 @@ #include #include -#include #include #include #include @@ -26,7 +25,9 @@ struct ExternalBlockStorageBackend put(std::string const &identifier, auxiliary::WriteBuffer data, size_t len) -> std::string = 0; virtual void - get(std::string const &external_ref, void *data, size_t len) = 0; + get(std::string const &external_ref, + std::shared_ptr data, + size_t len) = 0; [[nodiscard]] virtual auto externalStorageLocation() const -> nlohmann::json = 0; @@ -98,7 +99,7 @@ class ExternalBlockStorage std::string const &identifier, nlohmann::json const &fullJsonDataset, nlohmann::json::json_pointer const &path, - T *data); + std::shared_ptr &data); template void read( @@ -106,7 +107,7 @@ class ExternalBlockStorage Extent const &blockExtent, nlohmann::json const &fullJsonDataset, nlohmann::json::json_pointer const &path, - T *data); + std::shared_ptr &data); void sync(); diff --git a/include/openPMD/toolkit/Stdio.hpp b/include/openPMD/toolkit/Stdio.hpp index 0bf7f4a11c..0d73684041 100644 --- a/include/openPMD/toolkit/Stdio.hpp +++ b/include/openPMD/toolkit/Stdio.hpp @@ -15,7 +15,10 @@ struct ExternalBlockStorageStdio : ExternalBlockStorageBackend auto put(std::string const &identifier, auxiliary::WriteBuffer data, size_t len) -> std::string override; - void get(std::string const &external_ref, void *data, size_t len) override; + void + get(std::string const &external_ref, + std::shared_ptr data, + size_t len) override; [[nodiscard]] auto externalStorageLocation() const -> nlohmann::json override; ~ExternalBlockStorageStdio() override; diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 19dbeb63b4..c5b02c654a 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -1605,11 +1605,10 @@ namespace struct RetrieveExternally { template - static void - call(ExternalBlockStorage &blockStorage, void *ptr, Args &&...args) + static void call(ExternalBlockStorage &blockStorage, Args &&...args) { blockStorage.read( - std::forward(args)..., static_cast(ptr)); + std::forward(args)...); } static constexpr char const *errorMsg = "RetrieveExternally"; @@ -1654,11 +1653,11 @@ void JSONIOHandlerImpl::readDataset( switchDatasetType( parameters.dtype, *external, - parameters.data.get(), parameters.offset, parameters.extent, jsonRoot, - filePosition->id); + filePosition->id, + parameters.data); }}, localMode.as_base()); } diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index c6b4211b93..fae1cfc635 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -173,13 +173,14 @@ auto ExternalBlockStorageAws::put( async_counter.add_and_notify_result(); }; async_counter.add_task(); - m_client.PutObjectAsync(put_request, responseReceivedHandler); + m_client.PutObjectAsync( + put_request, std::move(responseReceivedHandler)); } return sanitized; } void ExternalBlockStorageAws::get( - std::string const &external_ref, void *data, size_t len) + std::string const &external_ref, std::shared_ptr data, size_t len) { if (len == 0) { @@ -190,23 +191,69 @@ void ExternalBlockStorageAws::get( get_request.SetBucket(m_bucketName); get_request.SetKey(external_ref); - auto get_outcome = m_client.GetObject(get_request); - if (!get_outcome.IsSuccess()) + auto processGetOutcome = [len]( + Aws::S3::Model::GetObjectOutcome const + &get_outcome, + void *data_lambda) { + auto &body = get_outcome.GetResult().GetBody(); + body.read( + reinterpret_cast(data_lambda), + static_cast(len)); + std::streamsize read_bytes = body.gcount(); + if (read_bytes != static_cast(len)) + { + throw std::runtime_error( + "ExternalBlockStorageAws: failed to read expected number of " + "bytes " + "from S3 object"); + } + }; + + if (!m_async.has_value()) { - throw std::runtime_error( - std::string("ExternalBlockStorageAws::get failed: ") + - get_outcome.GetError().GetMessage()); - } + auto get_outcome = m_client.GetObject(get_request); + if (!get_outcome.IsSuccess()) + { + throw std::runtime_error( + std::string("ExternalBlockStorageAws::get failed: ") + + get_outcome.GetError().GetMessage()); + } - auto &body = get_outcome.GetResult().GetBody(); - body.read( - reinterpret_cast(data), static_cast(len)); - std::streamsize read_bytes = body.gcount(); - if (read_bytes != static_cast(len)) + processGetOutcome(get_outcome, data.get()); + } + else { - throw std::runtime_error( - "ExternalBlockStorageAws: failed to read expected number of bytes " - "from S3 object"); + auto &async_counter = this->m_async->shared_ptr_operations; + auto responseReceivedHandler = + [&async_counter, + external_ref, + processGetOutcome_lambda = std::move(processGetOutcome), + data_lambda = std::move(data)]( + const Aws::S3::S3Client *, + const Aws::S3::Model::GetObjectRequest &, + const Aws::S3::Model::GetObjectOutcome &get_outcome, + const std::shared_ptr + &) { + if (get_outcome.IsSuccess()) + { + // std::cout << "File asynchronously downloaded successfully + // " + // "from S3!" + // << std::endl; + } + else + { + std::cerr << "Asynchronous download failed for '" + << external_ref + << "': " << get_outcome.GetError().GetMessage() + << std::endl; + } + processGetOutcome_lambda(get_outcome, data_lambda.get()); + async_counter.add_and_notify_result(); + }; + async_counter.add_task(); + m_client.GetObjectAsync( + get_request, std::move(responseReceivedHandler)); } } diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index d3fdb1b39c..5dc601dea3 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -5,6 +5,7 @@ #include "openPMD/auxiliary/Memory.hpp" #include "openPMD/auxiliary/StringManip.hpp" +#include #include #include @@ -38,12 +39,15 @@ namespace void read_impl( internal::ExternalBlockStorageBackend *backend, nlohmann::json const &external_block, - T *data, + std::shared_ptr &data, size_t len) { auto const &external_ref = external_block.at("external_ref").get(); - backend->get(external_ref, data, sizeof(T) * len); + backend->get( + external_ref, + std::static_pointer_cast(data), + sizeof(T) * len); } } // namespace @@ -166,7 +170,7 @@ void ExternalBlockStorage::read( [[maybe_unused]] std::string const &identifier, [[maybe_unused]] nlohmann::json const &fullJsonDataset, [[maybe_unused]] nlohmann::json::json_pointer const &path, - [[maybe_unused]] T *data) + [[maybe_unused]] std::shared_ptr &data) { throw std::runtime_error("Unimplemented!"); } @@ -177,7 +181,7 @@ void ExternalBlockStorage::read( Extent const &blockExtent, nlohmann::json const &fullJsonDataset, nlohmann::json::json_pointer const &path, - T *data) + std::shared_ptr &data) { auto &dataset = fullJsonDataset[path]; if (!DatatypeHandling::template checkDatatype(dataset)) @@ -199,7 +203,7 @@ void ExternalBlockStorage::read( continue; } found_a_precise_match = true; - read_impl(m_worker.get(), block, data, flat_extent(blockExtent)); + read_impl(m_worker.get(), block, data, flat_extent(blockExtent)); break; } catch (nlohmann::json::exception const &e) @@ -256,13 +260,13 @@ void ExternalBlockStorage::sanitizeString(std::string &s) std::string const &identifier, \ nlohmann::json const &fullJsonDataset, \ nlohmann::json::json_pointer const &path, \ - type *data); \ + std::shared_ptr &data); \ template void ExternalBlockStorage::read( \ Offset const &blockOffset, \ Extent const &blockExtent, \ nlohmann::json const &fullJsonDataset, \ nlohmann::json::json_pointer const &path, \ - type *data); + std::shared_ptr &data); #define OPENPMD_INSTANTIATE(type) \ OPENPMD_INSTANTIATE_DATATYPEHANDLING(internal::JsonDatatypeHandling, type) OPENPMD_FOREACH_DATASET_DATATYPE(OPENPMD_INSTANTIATE) diff --git a/src/toolkit/Stdio.cpp b/src/toolkit/Stdio.cpp index a99ce6e0ad..64e495ad45 100644 --- a/src/toolkit/Stdio.cpp +++ b/src/toolkit/Stdio.cpp @@ -101,7 +101,7 @@ auto ExternalBlockStorageStdio::put( } void ExternalBlockStorageStdio::get( - std::string const &external_ref, void *data, size_t len) + std::string const &external_ref, std::shared_ptr data, size_t len) { if (len == 0) { @@ -118,7 +118,7 @@ void ExternalBlockStorageStdio::get( filepath); } - size_t read = std::fread(data, 1, len, file); + size_t read = std::fread(data.get(), 1, len, file); if (read != len) { std::fclose(file); From 62f3c15b8f47709c65858565dcee01d8bd5b21d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 7 Jan 2026 13:37:25 +0100 Subject: [PATCH 45/50] Make asyncIO configurable --- include/openPMD/toolkit/AwsBuilder.hpp | 2 ++ src/IO/JSON/JSONIOHandlerImpl.cpp | 3 +++ src/toolkit/AwsBuilder.cpp | 9 +++++++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/include/openPMD/toolkit/AwsBuilder.hpp b/include/openPMD/toolkit/AwsBuilder.hpp index 4a8ad691b9..3a90e8188c 100644 --- a/include/openPMD/toolkit/AwsBuilder.hpp +++ b/include/openPMD/toolkit/AwsBuilder.hpp @@ -31,6 +31,7 @@ struct AwsBuilder std::optional m_region; std::optional m_scheme; std::optional m_verifySSL; + std::optional m_useAsyncIO; auto setBucketName(std::string bucketName) -> AwsBuilder &; auto setCredentials(std::string accessKeyId, std::string secretKey) @@ -40,6 +41,7 @@ struct AwsBuilder auto setRegion(std::string regionName) -> AwsBuilder &; auto setScheme(Scheme s) -> AwsBuilder &; auto setVerifySSL(bool verify) -> AwsBuilder &; + auto setAsyncIO(bool useAsyncIO) -> AwsBuilder &; operator ::openPMD::ExternalBlockStorage(); auto build() -> ::openPMD::ExternalBlockStorage; diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index c5b02c654a..5e676ce531 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -457,6 +457,9 @@ namespace if_contains_optional_bool("verify_ssl", [&](bool verifySSL) { builder.setVerifySSL(verifySSL); }); + if_contains_optional_bool("async_io", [&](bool useAsyncIO) { + builder.setAsyncIO(useAsyncIO); + }); if_contains_optional( "scheme", true, [&](std::string const &scheme) { if (scheme == "http") diff --git a/src/toolkit/AwsBuilder.cpp b/src/toolkit/AwsBuilder.cpp index 5f5960b6c1..c3804c788b 100644 --- a/src/toolkit/AwsBuilder.cpp +++ b/src/toolkit/AwsBuilder.cpp @@ -54,6 +54,12 @@ auto AwsBuilder::setVerifySSL(bool verify) -> AwsBuilder & return *this; } +auto AwsBuilder::setAsyncIO(bool useAsyncIO) -> AwsBuilder & +{ + m_useAsyncIO = useAsyncIO; + return *this; +} + auto internal::AwsBuilder::setSessionToken(std::string sessionToken) -> AwsBuilder & { @@ -120,8 +126,7 @@ AwsBuilder::operator ExternalBlockStorage() std::move(s3_client), std::move(m_bucketName), std::move(m_endpointOverride), - // TODO: Add config option for this - /* async = */ true)}; + m_useAsyncIO.value_or(true))}; } auto AwsBuilder::build() -> ExternalBlockStorage From 00b0716d060c99c6c50acee1f64bebbc43113383 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 7 Jan 2026 13:59:07 +0100 Subject: [PATCH 46/50] Split syncMandatoryOperations and syncAllOperations --- include/openPMD/toolkit/Aws.hpp | 3 ++- include/openPMD/toolkit/ExternalBlockStorage.hpp | 6 ++++-- src/IO/JSON/JSONIOHandlerImpl.cpp | 2 +- src/toolkit/Aws.cpp | 12 +++++++++++- src/toolkit/ExternalBlockStorage.cpp | 16 +++++++++++++--- 5 files changed, 31 insertions(+), 8 deletions(-) diff --git a/include/openPMD/toolkit/Aws.hpp b/include/openPMD/toolkit/Aws.hpp index 31bc86df18..a06fc674e5 100644 --- a/include/openPMD/toolkit/Aws.hpp +++ b/include/openPMD/toolkit/Aws.hpp @@ -58,7 +58,8 @@ struct ExternalBlockStorageAws : ExternalBlockStorageBackend size_t len) override; [[nodiscard]] auto externalStorageLocation() const -> nlohmann::json override; - void sync() override; + void syncMandatoryOperations() override; + void syncAllOperations() override; ~ExternalBlockStorageAws() override; }; diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index e27744d610..08455eb3c3 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -31,7 +31,8 @@ struct ExternalBlockStorageBackend [[nodiscard]] virtual auto externalStorageLocation() const -> nlohmann::json = 0; - virtual void sync(); + virtual void syncMandatoryOperations(); + virtual void syncAllOperations(); virtual ~ExternalBlockStorageBackend(); }; @@ -109,7 +110,8 @@ class ExternalBlockStorage nlohmann::json::json_pointer const &path, std::shared_ptr &data); - void sync(); + void syncMandatoryOperations(); + void syncAllOperations(); [[nodiscard]] auto externalStorageLocation() const -> nlohmann::json; diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 5e676ce531..0eddf33e3b 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -695,7 +695,7 @@ std::future JSONIOHandlerImpl::flush() std::visit( auxiliary::overloaded{ [](DatasetMode::External_t &externalStorage) { - externalStorage->sync(); + externalStorage->syncMandatoryOperations(); }, [](auto &&) {}}, this->m_datasetMode.m_mode.as_base()); diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index fae1cfc635..78db741314 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -257,7 +257,7 @@ void ExternalBlockStorageAws::get( } } -void ExternalBlockStorageAws::sync() +void ExternalBlockStorageAws::syncMandatoryOperations() { if (!this->m_async.has_value()) { @@ -266,6 +266,16 @@ void ExternalBlockStorageAws::sync() this->m_async->shared_ptr_operations.wait(); } +void ExternalBlockStorageAws::syncAllOperations() +{ + if (!this->m_async.has_value()) + { + return; + } + this->m_async->shared_ptr_operations.wait(); + this->m_async->unique_ptr_operations.wait(); +} + [[nodiscard]] auto ExternalBlockStorageAws::externalStorageLocation() const -> nlohmann::json { diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 5dc601dea3..77220cfd24 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -14,7 +14,12 @@ namespace openPMD::internal { -void ExternalBlockStorageBackend::sync() +void ExternalBlockStorageBackend::syncMandatoryOperations() +{ + // default for non-async backends: no-op +} + +void ExternalBlockStorageBackend::syncAllOperations() { // default for non-async backends: no-op } @@ -223,9 +228,14 @@ void ExternalBlockStorage::read( } } -void ExternalBlockStorage::sync() +void ExternalBlockStorage::syncMandatoryOperations() +{ + this->m_worker->syncMandatoryOperations(); +} + +void ExternalBlockStorage::syncAllOperations() { - this->m_worker->sync(); + this->m_worker->syncAllOperations(); } [[nodiscard]] auto ExternalBlockStorage::externalStorageLocation() const From 36451812ee0af621c70343634b679d08a3694faa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 7 Jan 2026 15:14:00 +0100 Subject: [PATCH 47/50] Sync unique_ptrs before next step/file --- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 18 +++++++- src/IO/JSON/JSONIOHandlerImpl.cpp | 43 +++++++++++++------ 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index 5ba544db29..e2ed747e3a 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -307,6 +307,8 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl void touch(Writable *, Parameter const &) override; + void advance(Writable *, Parameter &) override; + std::future flush(); /* @@ -366,6 +368,19 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl return std::tuple{ m_mode, m_specificationVia, m_skipWarnings}; } + + template + auto mapExternalStorage(F &&functor) + { + std::visit( + auxiliary::overloaded{ + [&functor](DatasetMode::External_t &externalStorage) { + return static_cast(functor)( + externalStorage); + }, + [](auto &&) {}}, + m_mode.as_base()); + } }; /////////////////////// @@ -433,7 +448,8 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl std::optional m_deferredExternalBlockstorageConfig; DatasetMode_s m_datasetMode; - DatasetMode_s retrieveDatasetMode(openPMD::json::TracingJSON &config); + DatasetMode_s + retrieveDatasetMode(openPMD::json::TracingJSON &config, bool do_init); AttributeMode_s m_attributeMode; AttributeMode_s diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 0eddf33e3b..3ca987aa38 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -494,8 +494,8 @@ namespace } } // namespace -auto JSONIOHandlerImpl::retrieveDatasetMode(openPMD::json::TracingJSON &config) - -> DatasetMode_s +auto JSONIOHandlerImpl::retrieveDatasetMode( + openPMD::json::TracingJSON &config, bool do_init) -> DatasetMode_s { // start with / copy from current config auto res = m_datasetMode; @@ -513,7 +513,8 @@ auto JSONIOHandlerImpl::retrieveDatasetMode(openPMD::json::TracingJSON &config) auto mode = datasetConfig["mode"]; if (mode.json().is_object()) { - if (access::writeOnly(m_handler->m_backendAccess)) + if (!do_init || + access::writeOnly(m_handler->m_backendAccess)) { parse_external_mode( std::move(mode), std::nullopt, configLocation, res); @@ -666,7 +667,7 @@ void JSONIOHandlerImpl::init(openPMD::json::TracingJSON config) } // now modify according to config - m_datasetMode = retrieveDatasetMode(config); + m_datasetMode = retrieveDatasetMode(config, /* do_init = */ true); m_attributeMode = retrieveAttributeMode(config); if (auto [_, backendConfig] = getBackendConfig(config); @@ -692,13 +693,9 @@ std::future JSONIOHandlerImpl::flush() putJsonContents(file, false); } m_dirty.clear(); - std::visit( - auxiliary::overloaded{ - [](DatasetMode::External_t &externalStorage) { - externalStorage->syncMandatoryOperations(); - }, - [](auto &&) {}}, - this->m_datasetMode.m_mode.as_base()); + this->m_datasetMode.mapExternalStorage([](auto &externalStorage) { + externalStorage->syncMandatoryOperations(); + }); return std::future(); } @@ -738,6 +735,9 @@ void JSONIOHandlerImpl::createFile( if (!writable->written) { + m_datasetMode.mapExternalStorage([](auto &externalStorage) { + externalStorage->syncAllOperations(); + }); std::string name = parameters.name + m_originalExtension; auto res_pair = getPossiblyExisting(name); @@ -781,6 +781,10 @@ void JSONIOHandlerImpl::createFile( writable->written = true; writable->abstractFilePosition = std::make_shared(); } + else + { + throw error::Internal("This should not happen."); + } } void JSONIOHandlerImpl::checkFile( @@ -853,7 +857,8 @@ void JSONIOHandlerImpl::createDataset( parameter.options, /* considerFiles = */ false); // Retrieves mode from dataset-specific configuration, falls back to global // value if not defined - auto [localMode, _, skipWarnings] = retrieveDatasetMode(config); + auto [localMode, _, skipWarnings] = + retrieveDatasetMode(config, /* do_init = */ false); (void)_; // No use in introducing logic to skip warnings only for one particular // dataset. If warnings are skipped, then they are skipped consistently. @@ -2049,6 +2054,20 @@ void JSONIOHandlerImpl::touch( } } +void JSONIOHandlerImpl::advance( + Writable *w, Parameter ¶m) +{ + AbstractIOHandlerImpl::advance(w, param); + + if (access::linear(m_handler->m_backendAccess) && + access::writeOnly(m_handler->m_backendAccess)) + { + m_datasetMode.mapExternalStorage([](auto &externalStorage) { + externalStorage->syncAllOperations(); + }); + } +} + auto JSONIOHandlerImpl::getFilehandle(File const &fileName, Access access) -> std::tuple, std::istream *, std::ostream *> { From d33fe043ea45ea1e9ca3d80f74bc1511683169b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 7 Jan 2026 16:53:27 +0100 Subject: [PATCH 48/50] Conditional compilation --- CMakeLists.txt | 24 +++++++++++++++++++----- include/openPMD/Series.hpp | 5 +++++ include/openPMD/config.hpp.in | 4 ++++ include/openPMD/toolkit/Aws.hpp | 5 +++++ include/openPMD/toolkit/AwsBuilder.hpp | 2 ++ src/Series.cpp | 9 +++++++++ src/toolkit/Aws.cpp | 7 ++++++- src/toolkit/AwsBuilder.cpp | 10 ++++++++++ 8 files changed, 60 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fecf7611ed..682da5d52e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -137,6 +137,7 @@ openpmd_option(MPI "Parallel, Multi-Node I/O for clusters" AUTO) openpmd_option(HDF5 "HDF5 backend (.h5 files)" AUTO) openpmd_option(ADIOS2 "ADIOS2 backend (.bp files)" AUTO) openpmd_option(PYTHON "Enable Python bindings" AUTO) +openpmd_option(AWS "Enable AWS/S3 storage" AUTO) option(openPMD_INSTALL "Add installation targets" ON) option(openPMD_INSTALL_RPATH "Add RPATHs to installed binaries" ON) @@ -385,12 +386,23 @@ else() endif() unset(openPMD_REQUIRED_ADIOS2_COMPONENTS) -find_package(AWSSDK REQUIRED COMPONENTS s3) +if(openPMD_USE_AWS STREQUAL AUTO) + find_package(AWSSDK REQUIRED COMPONENTS s3) + if(AWSSDK_FOUND) + set(openPMD_HAVE_AWS TRUE) + else() + set(openPMD_HAVE_AWS FALSE) + endif() +elseif(openPMD_USE_AWS) + find_package(AWSSDK REQUIRED COMPONENTS s3) + set(openPMD_HAVE_AWS TRUE) +else() + set(openPMD_HAVE_AWS FALSE) +endif() -# external library: pybind11 (optional) +#external library : pybind11(optional) include(${openPMD_SOURCE_DIR}/cmake/dependencies/pybind11.cmake) - # Targets ##################################################################### # set(CORE_SOURCE @@ -569,9 +581,11 @@ if(openPMD_HAVE_ADIOS2) endif() endif() -target_link_libraries(openPMD PUBLIC ${AWSSDK_LIBRARIES}) +if(openPMD_HAVE_AWS) + target_link_libraries(openPMD PUBLIC ${AWSSDK_LIBRARIES}) +endif() -# Runtime parameter and API status checks ("asserts") +#Runtime parameter and API status checks("asserts") if(openPMD_USE_VERIFY) target_compile_definitions(openPMD PRIVATE openPMD_USE_VERIFY=1) else() diff --git a/include/openPMD/Series.hpp b/include/openPMD/Series.hpp index bc560fbb89..43f24822f3 100644 --- a/include/openPMD/Series.hpp +++ b/include/openPMD/Series.hpp @@ -35,7 +35,10 @@ #include "openPMD/config.hpp" #include "openPMD/snapshots/Snapshots.hpp" #include "openPMD/version.hpp" + +#if openPMD_HAVE_AWS #include +#endif #if openPMD_HAVE_MPI #include @@ -240,7 +243,9 @@ namespace internal std::optional> m_deferred_initialization = std::nullopt; +#if openPMD_HAVE_AWS std::optional m_manageAwsAPI = std::nullopt; +#endif void close(); diff --git a/include/openPMD/config.hpp.in b/include/openPMD/config.hpp.in index 8df5dae9de..042e0fdf57 100644 --- a/include/openPMD/config.hpp.in +++ b/include/openPMD/config.hpp.in @@ -45,3 +45,7 @@ #ifndef openPMD_HAVE_CUDA_EXAMPLES #cmakedefine01 openPMD_HAVE_CUDA_EXAMPLES #endif + +#ifndef openPMD_HAVE_AWS +#cmakedefine01 openPMD_HAVE_AWS +#endif diff --git a/include/openPMD/toolkit/Aws.hpp b/include/openPMD/toolkit/Aws.hpp index a06fc674e5..453dad5e45 100644 --- a/include/openPMD/toolkit/Aws.hpp +++ b/include/openPMD/toolkit/Aws.hpp @@ -1,5 +1,9 @@ #pragma once +#include "openPMD/config.hpp" + +#if openPMD_HAVE_AWS + #include "openPMD/toolkit/ExternalBlockStorage.hpp" #include @@ -64,3 +68,4 @@ struct ExternalBlockStorageAws : ExternalBlockStorageBackend ~ExternalBlockStorageAws() override; }; } // namespace openPMD::internal +#endif diff --git a/include/openPMD/toolkit/AwsBuilder.hpp b/include/openPMD/toolkit/AwsBuilder.hpp index 3a90e8188c..86af268905 100644 --- a/include/openPMD/toolkit/AwsBuilder.hpp +++ b/include/openPMD/toolkit/AwsBuilder.hpp @@ -1,5 +1,7 @@ #pragma once +#include "openPMD/config.hpp" + #include #include #include diff --git a/src/Series.cpp b/src/Series.cpp index bed08eeef6..2efcec63b5 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -42,13 +42,16 @@ #include "openPMD/backend/Attributable.hpp" #include "openPMD/backend/Attribute.hpp" #include "openPMD/backend/Variant_internal.hpp" +#include "openPMD/config.hpp" #include "openPMD/snapshots/ContainerImpls.hpp" #include "openPMD/snapshots/ContainerTraits.hpp" #include "openPMD/snapshots/Snapshots.hpp" #include "openPMD/snapshots/StatefulIterator.hpp" #include "openPMD/version.hpp" +#if openPMD_HAVE_AWS #include +#endif #include #include @@ -1187,10 +1190,12 @@ auto Series::initIOHandler( // now check for user-specified options parseJsonOptions(optionsJson, *input); +#if openPMD_HAVE_AWS if (series.m_manageAwsAPI.has_value()) { Aws::InitAPI(*series.m_manageAwsAPI); } +#endif if (resolve_generic_extension && !input->filenameExtension.has_value()) { @@ -3202,6 +3207,7 @@ void Series::parseJsonOptions(TracingJSON &options, ParsedInput &input) { series.m_rankTable.m_rankTableSource = std::move(rankTableSource); } +#if openPMD_HAVE_AWS { bool doManageAwsAPI = false; getJsonOption(options, "init_aws_api", doManageAwsAPI); @@ -3210,6 +3216,7 @@ void Series::parseJsonOptions(TracingJSON &options, ParsedInput &input) series.m_manageAwsAPI = std::make_optional(); } } +#endif // backend key { std::map const backendDescriptors{ @@ -3300,10 +3307,12 @@ namespace internal // close() might need to wait for a number of remaining Aws // operations to finish, so the AwsAPI needs to stay open for that. close(); +#if openPMD_HAVE_AWS if (m_manageAwsAPI.has_value()) { Aws::ShutdownAPI(*m_manageAwsAPI); } +#endif } catch (std::exception const &ex) { diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index 78db741314..9cf3372087 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -1,7 +1,11 @@ -#include "openPMD/toolkit/Aws.hpp" +#include "openPMD/config.hpp" + +#if openPMD_HAVE_AWS + #include "openPMD/auxiliary/Memory.hpp" #include "openPMD/auxiliary/Memory_internal.hpp" #include "openPMD/auxiliary/Variant.hpp" +#include "openPMD/toolkit/Aws.hpp" #include #include @@ -290,3 +294,4 @@ void ExternalBlockStorageAws::syncAllOperations() } } // namespace openPMD::internal +#endif diff --git a/src/toolkit/AwsBuilder.cpp b/src/toolkit/AwsBuilder.cpp index c3804c788b..64fa8778f7 100644 --- a/src/toolkit/AwsBuilder.cpp +++ b/src/toolkit/AwsBuilder.cpp @@ -1,11 +1,15 @@ +#include "openPMD/config.hpp" + #include "openPMD/toolkit/AwsBuilder.hpp" #include "openPMD/toolkit/Aws.hpp" #include "openPMD/toolkit/ExternalBlockStorage.hpp" +#if openPMD_HAVE_AWS #include #include #include +#endif namespace openPMD::internal { @@ -69,6 +73,7 @@ auto internal::AwsBuilder::setSessionToken(std::string sessionToken) AwsBuilder::operator ExternalBlockStorage() { +#if openPMD_HAVE_AWS Aws::Client::ClientConfiguration config; if (m_endpointOverride.has_value()) @@ -127,6 +132,11 @@ AwsBuilder::operator ExternalBlockStorage() std::move(m_bucketName), std::move(m_endpointOverride), m_useAsyncIO.value_or(true))}; +#else + throw std::runtime_error( + "Method not available: openPMD-api has been built without support for " + "AWS."); +#endif } auto AwsBuilder::build() -> ExternalBlockStorage From a95a8cee8f0e240913f80c98e1bcfb2bda2130e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 7 Jan 2026 17:44:53 +0100 Subject: [PATCH 49/50] License headers --- include/openPMD/cli/convert-toml-json.hpp | 20 +++++++++++++++++++ include/openPMD/config.hpp.in | 2 +- include/openPMD/toolkit/Aws.hpp | 20 +++++++++++++++++++ include/openPMD/toolkit/AwsBuilder.hpp | 20 +++++++++++++++++++ .../openPMD/toolkit/ExternalBlockStorage.hpp | 20 +++++++++++++++++++ include/openPMD/toolkit/Stdio.hpp | 20 +++++++++++++++++++ include/openPMD/toolkit/StdioBuilder.hpp | 20 +++++++++++++++++++ src/cli/merge-json.cpp | 20 +++++++++++++++++++ src/toolkit/Aws.cpp | 20 +++++++++++++++++++ src/toolkit/AwsBuilder.cpp | 20 +++++++++++++++++++ src/toolkit/ExternalBlockStorage.cpp | 20 +++++++++++++++++++ src/toolkit/Stdio.cpp | 20 +++++++++++++++++++ src/toolkit/StdioBuilder.cpp | 20 +++++++++++++++++++ 13 files changed, 241 insertions(+), 1 deletion(-) diff --git a/include/openPMD/cli/convert-toml-json.hpp b/include/openPMD/cli/convert-toml-json.hpp index 051fa01bfb..48e4eceb25 100644 --- a/include/openPMD/cli/convert-toml-json.hpp +++ b/include/openPMD/cli/convert-toml-json.hpp @@ -1,3 +1,23 @@ +/* Copyright 2026 Franz Poeschel + * + * This file is part of openPMD-api. + * + * openPMD-api is free software: you can redistribute it and/or modify + * it under the terms of of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * openPMD-api is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with openPMD-api. + * If not, see . + */ #pragma once #include diff --git a/include/openPMD/config.hpp.in b/include/openPMD/config.hpp.in index 042e0fdf57..9335e93836 100644 --- a/include/openPMD/config.hpp.in +++ b/include/openPMD/config.hpp.in @@ -1,4 +1,4 @@ -/* Copyright 2019-2021 Axel Huebl +/* Copyright 2019-2026 Axel Huebl, Franz Poeschel, Junmin Gu * * This file is part of openPMD-api. * diff --git a/include/openPMD/toolkit/Aws.hpp b/include/openPMD/toolkit/Aws.hpp index 453dad5e45..e62666d81f 100644 --- a/include/openPMD/toolkit/Aws.hpp +++ b/include/openPMD/toolkit/Aws.hpp @@ -1,3 +1,23 @@ +/* Copyright 2026 Franz Poeschel + * + * This file is part of openPMD-api. + * + * openPMD-api is free software: you can redistribute it and/or modify + * it under the terms of of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * openPMD-api is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with openPMD-api. + * If not, see . + */ #pragma once #include "openPMD/config.hpp" diff --git a/include/openPMD/toolkit/AwsBuilder.hpp b/include/openPMD/toolkit/AwsBuilder.hpp index 86af268905..fda657fff9 100644 --- a/include/openPMD/toolkit/AwsBuilder.hpp +++ b/include/openPMD/toolkit/AwsBuilder.hpp @@ -1,3 +1,23 @@ +/* Copyright 2026 Franz Poeschel + * + * This file is part of openPMD-api. + * + * openPMD-api is free software: you can redistribute it and/or modify + * it under the terms of of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * openPMD-api is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with openPMD-api. + * If not, see . + */ #pragma once #include "openPMD/config.hpp" diff --git a/include/openPMD/toolkit/ExternalBlockStorage.hpp b/include/openPMD/toolkit/ExternalBlockStorage.hpp index 08455eb3c3..8d7e734e1a 100644 --- a/include/openPMD/toolkit/ExternalBlockStorage.hpp +++ b/include/openPMD/toolkit/ExternalBlockStorage.hpp @@ -1,3 +1,23 @@ +/* Copyright 2026 Franz Poeschel + * + * This file is part of openPMD-api. + * + * openPMD-api is free software: you can redistribute it and/or modify + * it under the terms of of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * openPMD-api is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with openPMD-api. + * If not, see . + */ #pragma once #include "openPMD/Dataset.hpp" diff --git a/include/openPMD/toolkit/Stdio.hpp b/include/openPMD/toolkit/Stdio.hpp index 0d73684041..7d07a708f1 100644 --- a/include/openPMD/toolkit/Stdio.hpp +++ b/include/openPMD/toolkit/Stdio.hpp @@ -1,3 +1,23 @@ +/* Copyright 2026 Franz Poeschel + * + * This file is part of openPMD-api. + * + * openPMD-api is free software: you can redistribute it and/or modify + * it under the terms of of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * openPMD-api is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with openPMD-api. + * If not, see . + */ #pragma once #include "openPMD/toolkit/ExternalBlockStorage.hpp" diff --git a/include/openPMD/toolkit/StdioBuilder.hpp b/include/openPMD/toolkit/StdioBuilder.hpp index 7d93048167..74d2ca3d6b 100644 --- a/include/openPMD/toolkit/StdioBuilder.hpp +++ b/include/openPMD/toolkit/StdioBuilder.hpp @@ -1,3 +1,23 @@ +/* Copyright 2026 Franz Poeschel + * + * This file is part of openPMD-api. + * + * openPMD-api is free software: you can redistribute it and/or modify + * it under the terms of of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * openPMD-api is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with openPMD-api. + * If not, see . + */ #pragma once #include diff --git a/src/cli/merge-json.cpp b/src/cli/merge-json.cpp index 79312a2388..de762a0113 100644 --- a/src/cli/merge-json.cpp +++ b/src/cli/merge-json.cpp @@ -1,3 +1,23 @@ +/* Copyright 2026 Franz Poeschel + * + * This file is part of openPMD-api. + * + * openPMD-api is free software: you can redistribute it and/or modify + * it under the terms of of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * openPMD-api is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with openPMD-api. + * If not, see . + */ #include "openPMD/cli/convert-toml-json.hpp" void print_help_message(char const *program_name) diff --git a/src/toolkit/Aws.cpp b/src/toolkit/Aws.cpp index 9cf3372087..dc0afc0325 100644 --- a/src/toolkit/Aws.cpp +++ b/src/toolkit/Aws.cpp @@ -1,3 +1,23 @@ +/* Copyright 2026 Franz Poeschel + * + * This file is part of openPMD-api. + * + * openPMD-api is free software: you can redistribute it and/or modify + * it under the terms of of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * openPMD-api is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with openPMD-api. + * If not, see . + */ #include "openPMD/config.hpp" #if openPMD_HAVE_AWS diff --git a/src/toolkit/AwsBuilder.cpp b/src/toolkit/AwsBuilder.cpp index 64fa8778f7..aeff7f57d9 100644 --- a/src/toolkit/AwsBuilder.cpp +++ b/src/toolkit/AwsBuilder.cpp @@ -1,3 +1,23 @@ +/* Copyright 2026 Franz Poeschel + * + * This file is part of openPMD-api. + * + * openPMD-api is free software: you can redistribute it and/or modify + * it under the terms of of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * openPMD-api is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with openPMD-api. + * If not, see . + */ #include "openPMD/config.hpp" #include "openPMD/toolkit/AwsBuilder.hpp" diff --git a/src/toolkit/ExternalBlockStorage.cpp b/src/toolkit/ExternalBlockStorage.cpp index 77220cfd24..c1fae7a35b 100644 --- a/src/toolkit/ExternalBlockStorage.cpp +++ b/src/toolkit/ExternalBlockStorage.cpp @@ -1,3 +1,23 @@ +/* Copyright 2026 Franz Poeschel + * + * This file is part of openPMD-api. + * + * openPMD-api is free software: you can redistribute it and/or modify + * it under the terms of of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * openPMD-api is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with openPMD-api. + * If not, see . + */ #include "openPMD/toolkit/ExternalBlockStorage.hpp" #include "openPMD/DatatypeMacros.hpp" diff --git a/src/toolkit/Stdio.cpp b/src/toolkit/Stdio.cpp index 64e495ad45..4acf2748be 100644 --- a/src/toolkit/Stdio.cpp +++ b/src/toolkit/Stdio.cpp @@ -1,3 +1,23 @@ +/* Copyright 2026 Franz Poeschel + * + * This file is part of openPMD-api. + * + * openPMD-api is free software: you can redistribute it and/or modify + * it under the terms of of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * openPMD-api is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with openPMD-api. + * If not, see . + */ #include "openPMD/toolkit/Stdio.hpp" #include "openPMD/auxiliary/Filesystem.hpp" diff --git a/src/toolkit/StdioBuilder.cpp b/src/toolkit/StdioBuilder.cpp index 8fa5f6bb6f..eca2e239ad 100644 --- a/src/toolkit/StdioBuilder.cpp +++ b/src/toolkit/StdioBuilder.cpp @@ -1,3 +1,23 @@ +/* Copyright 2026 Franz Poeschel + * + * This file is part of openPMD-api. + * + * openPMD-api is free software: you can redistribute it and/or modify + * it under the terms of of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * openPMD-api is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with openPMD-api. + * If not, see . + */ #include "openPMD/toolkit/StdioBuilder.hpp" #include "openPMD/toolkit/ExternalBlockStorage.hpp" From 364eeea301339ab0cf81e96ed2ac76e367c1b8d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 7 Jan 2026 17:55:05 +0100 Subject: [PATCH 50/50] Fix openPMD_USE_AWS=AUTO --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 682da5d52e..5f9aa76c28 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -387,7 +387,7 @@ endif() unset(openPMD_REQUIRED_ADIOS2_COMPONENTS) if(openPMD_USE_AWS STREQUAL AUTO) - find_package(AWSSDK REQUIRED COMPONENTS s3) + find_package(AWSSDK COMPONENTS s3) if(AWSSDK_FOUND) set(openPMD_HAVE_AWS TRUE) else()