diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc index 1162b4c3bb0..6494e11600f 100644 --- a/cpp/src/arrow/record_batch.cc +++ b/cpp/src/arrow/record_batch.cc @@ -414,8 +414,8 @@ Result> RecordBatch::RenameColumns( fields[i] = schema()->field(i)->WithName(names[i]); } - return RecordBatch::Make(::arrow::schema(std::move(fields)), num_rows(), - std::move(columns), GetSyncEvent()); + return RecordBatch::Make(::arrow::schema(std::move(fields), schema()->metadata()), + num_rows(), std::move(columns), GetSyncEvent()); } Result> RecordBatch::SelectColumns( diff --git a/cpp/src/arrow/record_batch_test.cc b/cpp/src/arrow/record_batch_test.cc index 4516b808a84..53ac5074608 100644 --- a/cpp/src/arrow/record_batch_test.cc +++ b/cpp/src/arrow/record_batch_test.cc @@ -521,6 +521,32 @@ TEST_F(TestRecordBatch, RenameColumns) { ASSERT_RAISES(Invalid, batch->RenameColumns({"hello", "world"})); } +TEST_F(TestRecordBatch, RenameColumnsPreservesMetadata) { + const int length = 10; + + auto field1 = field("f1", int32()); + auto field2 = field("f2", uint8()); + auto field3 = field("f3", int16()); + + auto metadata = key_value_metadata({"foo", "bar"}, {"fizz", "buzz"}); + auto schema1 = ::arrow::schema({field1, field2, field3})->WithMetadata(metadata); + + random::RandomArrayGenerator gen(42); + + auto array1 = gen.ArrayOf(int32(), length); + auto array2 = gen.ArrayOf(uint8(), length); + auto array3 = gen.ArrayOf(int16(), length); + + auto batch = RecordBatch::Make(schema1, length, {array1, array2, array3}); + + ASSERT_OK_AND_ASSIGN(auto renamed, batch->RenameColumns({"zero", "one", "two"})); + EXPECT_THAT(renamed->ColumnNames(), testing::ElementsAre("zero", "one", "two")); + + // Verify metadata is preserved + ASSERT_NE(nullptr, renamed->schema()->metadata()); + ASSERT_TRUE(renamed->schema()->metadata()->Equals(*metadata)); +} + TEST_F(TestRecordBatch, SelectColumns) { const int length = 10; diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc index 68a8a1951f1..4b725786141 100644 --- a/cpp/src/arrow/table.cc +++ b/cpp/src/arrow/table.cc @@ -366,7 +366,8 @@ Result> Table::RenameColumns( columns[i] = column(i); fields[i] = field(i)->WithName(names[i]); } - return Table::Make(::arrow::schema(std::move(fields)), std::move(columns), num_rows()); + return Table::Make(::arrow::schema(std::move(fields), schema()->metadata()), + std::move(columns), num_rows()); } Result> Table::SelectColumns( diff --git a/cpp/src/arrow/table_test.cc b/cpp/src/arrow/table_test.cc index 692671910b8..4b78878bd15 100644 --- a/cpp/src/arrow/table_test.cc +++ b/cpp/src/arrow/table_test.cc @@ -801,6 +801,20 @@ TEST_F(TestTable, RenameColumns) { ASSERT_RAISES(Invalid, table->RenameColumns({"hello", "world"})); } +TEST_F(TestTable, RenameColumnsPreservesMetadata) { + MakeExample1(10); + auto metadata = key_value_metadata({"foo", "bar"}, {"fizz", "buzz"}); + auto schema_with_metadata = schema_->WithMetadata(metadata); + auto table = Table::Make(schema_with_metadata, columns_); + + ASSERT_OK_AND_ASSIGN(auto renamed, table->RenameColumns({"zero", "one", "two"})); + EXPECT_THAT(renamed->ColumnNames(), testing::ElementsAre("zero", "one", "two")); + + // Verify metadata is preserved + ASSERT_NE(nullptr, renamed->schema()->metadata()); + ASSERT_TRUE(renamed->schema()->metadata()->Equals(*metadata)); +} + TEST_F(TestTable, SelectColumns) { MakeExample1(10); auto table = Table::Make(schema_, columns_); diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index cba4a0ecd3a..27d4b16df4c 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -2459,7 +2459,7 @@ Result> Schema::WithNames( for (const auto& field : impl_->fields_) { new_fields.push_back(field->WithName(*names_itr++)); } - return schema(std::move(new_fields)); + return schema(std::move(new_fields), metadata()); } std::shared_ptr Schema::WithMetadata( diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc index e9b1d30e6e7..c4cb5807c3f 100644 --- a/cpp/src/arrow/type_test.cc +++ b/cpp/src/arrow/type_test.cc @@ -713,6 +713,26 @@ TEST_F(TestSchema, TestRemoveMetadata) { ASSERT_TRUE(new_schema->metadata() == nullptr); } +TEST_F(TestSchema, TestWithNamesPreservesMetadata) { + auto f0 = field("f0", int32()); + auto f1 = field("f1", uint8(), false); + auto f2 = field("f2", utf8()); + auto metadata = key_value_metadata({{"foo", "bar"}, {"fizz", "buzz"}}); + auto schema_with_metadata = std::make_shared( + std::vector>{f0, f1, f2}, metadata); + + ASSERT_OK_AND_ASSIGN(auto renamed, schema_with_metadata->WithNames({"a", "b", "c"})); + + // Verify names are updated + ASSERT_EQ("a", renamed->field(0)->name()); + ASSERT_EQ("b", renamed->field(1)->name()); + ASSERT_EQ("c", renamed->field(2)->name()); + + // Verify metadata is preserved + ASSERT_NE(nullptr, renamed->metadata()); + ASSERT_TRUE(renamed->metadata()->Equals(*metadata)); +} + void AssertSchemaBuilderYield(const SchemaBuilder& builder, const std::shared_ptr& expected) { ASSERT_OK_AND_ASSIGN(auto schema, builder.Finish()); diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py index b65fb7d952c..fbacc93c935 100644 --- a/python/pyarrow/tests/test_table.py +++ b/python/pyarrow/tests/test_table.py @@ -1801,6 +1801,31 @@ def test_table_rename_columns(cls): table.rename_columns('not a list') +@pytest.mark.parametrize( + ('cls'), + [ + (pa.Table), + (pa.RecordBatch) + ] +) +def test_rename_columns_preserves_metadata(cls): + # GH-48024: rename_columns() should preserve schema metadata + schema = pa.schema( + [pa.field("n_legs", pa.int64()), pa.field("animals", pa.string())], + metadata={"source": "zoo"}, + ) + if cls == pa.Table: + obj = cls.from_arrays([[2, 4], ["Flamingo", "Horse"]], schema=schema) + else: + obj = cls.from_arrays([pa.array([2, 4]), pa.array(["Flamingo", "Horse"])], + schema=schema) + assert obj.schema.metadata == {b"source": b"zoo"} + + new_obj = obj.rename_columns(["n", "name"]) + assert new_obj.column_names == ["n", "name"] + assert new_obj.schema.metadata == {b"source": b"zoo"} + + @pytest.mark.parametrize( ('cls'), [