diff --git a/cpp/src/arrow/array/array_union_test.cc b/cpp/src/arrow/array/array_union_test.cc index 77ba2477791..a8f4ed1a9e2 100644 --- a/cpp/src/arrow/array/array_union_test.cc +++ b/cpp/src/arrow/array/array_union_test.cc @@ -22,8 +22,6 @@ #include "arrow/array.h" #include "arrow/array/builder_nested.h" #include "arrow/array/builder_union.h" -// TODO ipc shouldn't be included here -#include "arrow/ipc/test_common.h" #include "arrow/testing/builder.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/util.h" @@ -37,7 +35,7 @@ using internal::checked_pointer_cast; TEST(TestUnionArray, TestSliceEquals) { std::shared_ptr batch; - ASSERT_OK(ipc::test::MakeUnion(&batch)); + ASSERT_OK(MakeUnion(&batch)); auto CheckUnion = [](std::shared_ptr array) { const int64_t size = array->length(); diff --git a/cpp/src/arrow/integration/json_integration_test.cc b/cpp/src/arrow/integration/json_integration_test.cc index 0e84ea6124d..8321dcb3592 100644 --- a/cpp/src/arrow/integration/json_integration_test.cc +++ b/cpp/src/arrow/integration/json_integration_test.cc @@ -927,7 +927,7 @@ TEST(TestJsonArrayWriter, NestedTypes) { TEST(TestJsonArrayWriter, Unions) { std::shared_ptr batch; - ASSERT_OK(MakeUnion(&batch)); + ASSERT_OK(ipc::test::MakeUnion(&batch)); for (int i = 0; i < batch->num_columns(); ++i) { TestArrayRoundTrip(*batch->column(i)); @@ -1155,7 +1155,7 @@ const std::vector kBatchCases = { &MakeDeeplyNestedListView, &MakeStringTypesRecordBatchWithNulls, &MakeStruct, - &MakeUnion, + &ipc::test::MakeUnion, &MakeDictionary, &MakeNestedDictionary, &MakeMap, diff --git a/cpp/src/arrow/ipc/test_common.cc b/cpp/src/arrow/ipc/test_common.cc index dea40f18e81..08e04e2b171 100644 --- a/cpp/src/arrow/ipc/test_common.cc +++ b/cpp/src/arrow/ipc/test_common.cc @@ -670,55 +670,8 @@ Status MakeRunEndEncoded(std::shared_ptr* out) { } Status MakeUnion(std::shared_ptr* out) { - // Define schema - std::vector> union_fields( - {field("u0", int32()), field("u1", uint8())}); - - std::vector type_codes = {5, 10}; - auto sparse_type = sparse_union(union_fields, type_codes); - auto dense_type = dense_union(union_fields, type_codes); - - auto f0 = field("sparse", sparse_type); - auto f1 = field("dense", dense_type); - - auto schema = ::arrow::schema({f0, f1}); - - // Create data - std::vector> sparse_children(2); - std::vector> dense_children(2); - - const int64_t length = 7; - - std::shared_ptr type_ids_buffer; - std::vector type_ids = {5, 10, 5, 5, 10, 10, 5}; - RETURN_NOT_OK(CopyBufferFromVector(type_ids, default_memory_pool(), &type_ids_buffer)); - - std::vector u0_values = {0, 1, 2, 3, 4, 5, 6}; - ArrayFromVector(u0_values, &sparse_children[0]); - - std::vector u1_values = {10, 11, 12, 13, 14, 15, 16}; - ArrayFromVector(u1_values, &sparse_children[1]); - - // dense children - u0_values = {0, 2, 3, 7}; - ArrayFromVector(u0_values, &dense_children[0]); - - u1_values = {11, 14, 15}; - ArrayFromVector(u1_values, &dense_children[1]); - - std::shared_ptr offsets_buffer; - std::vector offsets = {0, 0, 1, 2, 1, 2, 3}; - RETURN_NOT_OK(CopyBufferFromVector(offsets, default_memory_pool(), &offsets_buffer)); - - auto sparse = std::make_shared(sparse_type, length, sparse_children, - type_ids_buffer); - auto dense = std::make_shared(dense_type, length, dense_children, - type_ids_buffer, offsets_buffer); - - // construct batch - std::vector> arrays = {sparse, dense}; - *out = RecordBatch::Make(schema, length, arrays); - return Status::OK(); + // Delegate to the shared implementation in arrow::testing::util + return ::arrow::MakeUnion(out); } Status MakeDictionary(std::shared_ptr* out) { diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc index b0c8deae36c..71830373132 100644 --- a/cpp/src/arrow/testing/util.cc +++ b/cpp/src/arrow/testing/util.cc @@ -39,8 +39,11 @@ # include // IWYU pragma: keep #endif +#include "arrow/array.h" +#include "arrow/array/builder_union.h" #include "arrow/config.h" #include "arrow/table.h" +#include "arrow/testing/builder.h" #include "arrow/testing/random.h" #include "arrow/type.h" #include "arrow/util/cpu_info.h" @@ -242,4 +245,55 @@ std::vector GetSupportedHardwareFlags( return hardware_flags; } +Status MakeUnion(std::shared_ptr* out) { + // Define schema + std::vector> union_fields( + {field("u0", int32()), field("u1", uint8())}); + + std::vector type_codes = {5, 10}; + auto sparse_type = sparse_union(union_fields, type_codes); + auto dense_type = dense_union(union_fields, type_codes); + + auto f0 = field("sparse", sparse_type); + auto f1 = field("dense", dense_type); + auto schema = ::arrow::schema({f0, f1}); + + // Create data + std::vector> sparse_children(2); + std::vector> dense_children(2); + + const int64_t length = 7; + + std::shared_ptr type_ids_buffer; + std::vector type_ids = {5, 10, 5, 5, 10, 10, 5}; + RETURN_NOT_OK(CopyBufferFromVector(type_ids, default_memory_pool(), &type_ids_buffer)); + + std::vector u0_values = {0, 1, 2, 3, 4, 5, 6}; + ArrayFromVector(u0_values, &sparse_children[0]); + + std::vector u1_values = {10, 11, 12, 13, 14, 15, 16}; + ArrayFromVector(u1_values, &sparse_children[1]); + + // dense children + u0_values = {0, 2, 3, 7}; + ArrayFromVector(u0_values, &dense_children[0]); + + u1_values = {11, 14, 15}; + ArrayFromVector(u1_values, &dense_children[1]); + + std::shared_ptr offsets_buffer; + std::vector offsets = {0, 0, 1, 2, 1, 2, 3}; + RETURN_NOT_OK(CopyBufferFromVector(offsets, default_memory_pool(), &offsets_buffer)); + + auto sparse = std::make_shared(sparse_type, length, sparse_children, + type_ids_buffer); + auto dense = std::make_shared(dense_type, length, dense_children, + type_ids_buffer, offsets_buffer); + + // construct batch + std::vector> arrays = {sparse, dense}; + *out = RecordBatch::Make(schema, length, arrays); + return Status::OK(); +} + } // namespace arrow diff --git a/cpp/src/arrow/testing/util.h b/cpp/src/arrow/testing/util.h index c2d6ca4d156..aea7051e8c3 100644 --- a/cpp/src/arrow/testing/util.h +++ b/cpp/src/arrow/testing/util.h @@ -142,4 +142,10 @@ ARROW_TESTING_EXPORT std::vector GetSupportedHardwareFlags( const std::vector& candidate_flags); +// Creates a RecordBatch containing both sparse and dense union arrays with the same +// union type definition. The union type has two fields: "u0" (int32) and "u1" (uint8) +// with type codes 5 and 10 respectively. Both arrays have length 7. +ARROW_TESTING_EXPORT +Status MakeUnion(std::shared_ptr* out); + } // namespace arrow