diff --git a/src/iceberg/avro/avro_reader.cc b/src/iceberg/avro/avro_reader.cc index fa7337f0a..106e38655 100644 --- a/src/iceberg/avro/avro_reader.cc +++ b/src/iceberg/avro/avro_reader.cc @@ -89,10 +89,10 @@ class AvroReader::Impl { read_schema_ = options.projection; // Open the input stream and adapt to the avro interface. - // TODO(gangwu): make this configurable - constexpr int64_t kDefaultBufferSize = 1024 * 1024; - ICEBERG_ASSIGN_OR_RAISE(auto input_stream, - CreateInputStream(options, kDefaultBufferSize)); + ICEBERG_ASSIGN_OR_RAISE( + auto input_stream, + CreateInputStream(options, + options.properties->Get(ReaderProperties::kAvroBufferSize))); ::avro::ValidSchema file_schema; diff --git a/src/iceberg/file_reader.h b/src/iceberg/file_reader.h index a5af0a41e..a54d2ee78 100644 --- a/src/iceberg/file_reader.h +++ b/src/iceberg/file_reader.h @@ -81,6 +81,9 @@ class ReaderProperties : public ConfigBase { /// Default: true (skip GenericDatum for better performance). inline static Entry kAvroSkipDatum{"read.avro.skip-datum", true}; + /// \brief The buffer size used by Avro input stream. + inline static Entry kAvroBufferSize{"read.avro.buffer-size", 1024 * 1024}; + /// \brief Create a default ReaderProperties instance. static std::unique_ptr default_properties(); diff --git a/src/iceberg/test/avro_test.cc b/src/iceberg/test/avro_test.cc index c1bb8bc96..215462b5d 100644 --- a/src/iceberg/test/avro_test.cc +++ b/src/iceberg/test/avro_test.cc @@ -503,4 +503,27 @@ INSTANTIATE_TEST_SUITE_P(DirectDecoderModes, AvroReaderParameterizedTest, return info.param ? "DirectDecoder" : "GenericDatum"; }); +TEST_F(AvroReaderTest, BufferSizeConfiguration) { + // Test default buffer size + auto properties1 = ReaderProperties::default_properties(); + ASSERT_EQ(properties1->Get(ReaderProperties::kAvroBufferSize), 1024 * 1024); + + // Test setting custom buffer size + auto properties2 = ReaderProperties::default_properties(); + constexpr int64_t kCustomBufferSize = 2 * 1024 * 1024; // 2MB + properties2->Set(ReaderProperties::kAvroBufferSize, kCustomBufferSize); + ASSERT_EQ(properties2->Get(ReaderProperties::kAvroBufferSize), kCustomBufferSize); + + // Test setting via FromMap + std::unordered_map config_map = { + {"read.avro.buffer-size", "4194304"} // 4MB + }; + auto properties3 = ReaderProperties::FromMap(config_map); + ASSERT_EQ(properties3->Get(ReaderProperties::kAvroBufferSize), 4194304); + + // Test that unset returns to default + properties2->Unset(ReaderProperties::kAvroBufferSize); + ASSERT_EQ(properties2->Get(ReaderProperties::kAvroBufferSize), 1024 * 1024); +} + } // namespace iceberg::avro