From 1baa7cecc2ca99b3c7e806a0dc55eed509a93c10 Mon Sep 17 00:00:00 2001 From: Xinli Shang Date: Sun, 28 Dec 2025 12:13:22 -0800 Subject: [PATCH 1/3] fix: make Avro reader buffer size configurable Add ReaderProperties::kAvroBufferSize property to allow users to configure the Avro input stream buffer size instead of using a hardcoded 1MB value. Changes: - Add kAvroBufferSize property to ReaderProperties (default: 1MB) - Update AvroReader to use the configurable buffer size - Remove TODO comment about making this configurable --- src/iceberg/avro/avro_reader.cc | 7 +++---- src/iceberg/file_reader.h | 3 +++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/iceberg/avro/avro_reader.cc b/src/iceberg/avro/avro_reader.cc index fa7337f0a..8fb56c2cc 100644 --- a/src/iceberg/avro/avro_reader.cc +++ b/src/iceberg/avro/avro_reader.cc @@ -89,10 +89,9 @@ class AvroReader::Impl { read_schema_ = options.projection; // Open the input stream and adapt to the avro interface. - // TODO(gangwu): make this configurable - constexpr int64_t kDefaultBufferSize = 1024 * 1024; - ICEBERG_ASSIGN_OR_RAISE(auto input_stream, - CreateInputStream(options, kDefaultBufferSize)); + ICEBERG_ASSIGN_OR_RAISE( + auto input_stream, + CreateInputStream(options, options.properties->Get(ReaderProperties::kAvroBufferSize))); ::avro::ValidSchema file_schema; diff --git a/src/iceberg/file_reader.h b/src/iceberg/file_reader.h index a5af0a41e..a54d2ee78 100644 --- a/src/iceberg/file_reader.h +++ b/src/iceberg/file_reader.h @@ -81,6 +81,9 @@ class ReaderProperties : public ConfigBase { /// Default: true (skip GenericDatum for better performance). inline static Entry kAvroSkipDatum{"read.avro.skip-datum", true}; + /// \brief The buffer size used by Avro input stream. + inline static Entry kAvroBufferSize{"read.avro.buffer-size", 1024 * 1024}; + /// \brief Create a default ReaderProperties instance. static std::unique_ptr default_properties(); From b63b9de5f31d838b9c93773574fadc3ffbc929cc Mon Sep 17 00:00:00 2001 From: Xinli Shang Date: Sun, 28 Dec 2025 14:02:09 -0800 Subject: [PATCH 2/3] style: apply clang-format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- src/iceberg/avro/avro_reader.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/iceberg/avro/avro_reader.cc b/src/iceberg/avro/avro_reader.cc index 8fb56c2cc..106e38655 100644 --- a/src/iceberg/avro/avro_reader.cc +++ b/src/iceberg/avro/avro_reader.cc @@ -91,7 +91,8 @@ class AvroReader::Impl { // Open the input stream and adapt to the avro interface. ICEBERG_ASSIGN_OR_RAISE( auto input_stream, - CreateInputStream(options, options.properties->Get(ReaderProperties::kAvroBufferSize))); + CreateInputStream(options, + options.properties->Get(ReaderProperties::kAvroBufferSize))); ::avro::ValidSchema file_schema; From 064d0653ea387ccc29efbb51b2b8f0012ca3872b Mon Sep 17 00:00:00 2001 From: Xinli Shang Date: Sun, 28 Dec 2025 15:10:57 -0800 Subject: [PATCH 3/3] test: add buffer size configuration test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Validate that the Avro buffer size can be configured via: - Set() method with custom value - FromMap() with configuration map - Unset() to return to default 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- src/iceberg/test/avro_test.cc | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/iceberg/test/avro_test.cc b/src/iceberg/test/avro_test.cc index c1bb8bc96..215462b5d 100644 --- a/src/iceberg/test/avro_test.cc +++ b/src/iceberg/test/avro_test.cc @@ -503,4 +503,27 @@ INSTANTIATE_TEST_SUITE_P(DirectDecoderModes, AvroReaderParameterizedTest, return info.param ? "DirectDecoder" : "GenericDatum"; }); +TEST_F(AvroReaderTest, BufferSizeConfiguration) { + // Test default buffer size + auto properties1 = ReaderProperties::default_properties(); + ASSERT_EQ(properties1->Get(ReaderProperties::kAvroBufferSize), 1024 * 1024); + + // Test setting custom buffer size + auto properties2 = ReaderProperties::default_properties(); + constexpr int64_t kCustomBufferSize = 2 * 1024 * 1024; // 2MB + properties2->Set(ReaderProperties::kAvroBufferSize, kCustomBufferSize); + ASSERT_EQ(properties2->Get(ReaderProperties::kAvroBufferSize), kCustomBufferSize); + + // Test setting via FromMap + std::unordered_map config_map = { + {"read.avro.buffer-size", "4194304"} // 4MB + }; + auto properties3 = ReaderProperties::FromMap(config_map); + ASSERT_EQ(properties3->Get(ReaderProperties::kAvroBufferSize), 4194304); + + // Test that unset returns to default + properties2->Unset(ReaderProperties::kAvroBufferSize); + ASSERT_EQ(properties2->Get(ReaderProperties::kAvroBufferSize), 1024 * 1024); +} + } // namespace iceberg::avro