Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions cpp/src/arrow/array/array_union_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
#include "arrow/array.h"
#include "arrow/array/builder_nested.h"
#include "arrow/array/builder_union.h"
// TODO ipc shouldn't be included here
#include "arrow/ipc/test_common.h"
#include "arrow/testing/builder.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/util.h"
Expand All @@ -37,7 +35,7 @@ using internal::checked_pointer_cast;

TEST(TestUnionArray, TestSliceEquals) {
std::shared_ptr<RecordBatch> batch;
ASSERT_OK(ipc::test::MakeUnion(&batch));
ASSERT_OK(MakeUnion(&batch));

auto CheckUnion = [](std::shared_ptr<Array> array) {
const int64_t size = array->length();
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/integration/json_integration_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -927,7 +927,7 @@ TEST(TestJsonArrayWriter, NestedTypes) {

TEST(TestJsonArrayWriter, Unions) {
std::shared_ptr<RecordBatch> batch;
ASSERT_OK(MakeUnion(&batch));
ASSERT_OK(ipc::test::MakeUnion(&batch));

for (int i = 0; i < batch->num_columns(); ++i) {
TestArrayRoundTrip(*batch->column(i));
Expand Down Expand Up @@ -1155,7 +1155,7 @@ const std::vector<ipc::test::MakeRecordBatch*> kBatchCases = {
&MakeDeeplyNestedListView,
&MakeStringTypesRecordBatchWithNulls,
&MakeStruct,
&MakeUnion,
&ipc::test::MakeUnion,
&MakeDictionary,
&MakeNestedDictionary,
&MakeMap,
Expand Down
51 changes: 2 additions & 49 deletions cpp/src/arrow/ipc/test_common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -670,55 +670,8 @@ Status MakeRunEndEncoded(std::shared_ptr<RecordBatch>* out) {
}

Status MakeUnion(std::shared_ptr<RecordBatch>* out) {
// Define schema
std::vector<std::shared_ptr<Field>> union_fields(
{field("u0", int32()), field("u1", uint8())});

std::vector<int8_t> type_codes = {5, 10};
auto sparse_type = sparse_union(union_fields, type_codes);
auto dense_type = dense_union(union_fields, type_codes);

auto f0 = field("sparse", sparse_type);
auto f1 = field("dense", dense_type);

auto schema = ::arrow::schema({f0, f1});

// Create data
std::vector<std::shared_ptr<Array>> sparse_children(2);
std::vector<std::shared_ptr<Array>> dense_children(2);

const int64_t length = 7;

std::shared_ptr<Buffer> type_ids_buffer;
std::vector<uint8_t> type_ids = {5, 10, 5, 5, 10, 10, 5};
RETURN_NOT_OK(CopyBufferFromVector(type_ids, default_memory_pool(), &type_ids_buffer));

std::vector<int32_t> u0_values = {0, 1, 2, 3, 4, 5, 6};
ArrayFromVector<Int32Type, int32_t>(u0_values, &sparse_children[0]);

std::vector<uint8_t> u1_values = {10, 11, 12, 13, 14, 15, 16};
ArrayFromVector<UInt8Type, uint8_t>(u1_values, &sparse_children[1]);

// dense children
u0_values = {0, 2, 3, 7};
ArrayFromVector<Int32Type, int32_t>(u0_values, &dense_children[0]);

u1_values = {11, 14, 15};
ArrayFromVector<UInt8Type, uint8_t>(u1_values, &dense_children[1]);

std::shared_ptr<Buffer> offsets_buffer;
std::vector<int32_t> offsets = {0, 0, 1, 2, 1, 2, 3};
RETURN_NOT_OK(CopyBufferFromVector(offsets, default_memory_pool(), &offsets_buffer));

auto sparse = std::make_shared<SparseUnionArray>(sparse_type, length, sparse_children,
type_ids_buffer);
auto dense = std::make_shared<DenseUnionArray>(dense_type, length, dense_children,
type_ids_buffer, offsets_buffer);

// construct batch
std::vector<std::shared_ptr<Array>> arrays = {sparse, dense};
*out = RecordBatch::Make(schema, length, arrays);
return Status::OK();
// Delegate to the shared implementation in arrow::testing::util
return ::arrow::MakeUnion(out);
}

Status MakeDictionary(std::shared_ptr<RecordBatch>* out) {
Expand Down
54 changes: 54 additions & 0 deletions cpp/src/arrow/testing/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,11 @@
# include <unistd.h> // IWYU pragma: keep
#endif

#include "arrow/array.h"
#include "arrow/array/builder_union.h"
#include "arrow/config.h"
#include "arrow/table.h"
#include "arrow/testing/builder.h"
#include "arrow/testing/random.h"
#include "arrow/type.h"
#include "arrow/util/cpu_info.h"
Expand Down Expand Up @@ -242,4 +245,55 @@ std::vector<int64_t> GetSupportedHardwareFlags(
return hardware_flags;
}

Status MakeUnion(std::shared_ptr<RecordBatch>* out) {
// Define schema
std::vector<std::shared_ptr<Field>> union_fields(
{field("u0", int32()), field("u1", uint8())});

std::vector<int8_t> type_codes = {5, 10};
auto sparse_type = sparse_union(union_fields, type_codes);
auto dense_type = dense_union(union_fields, type_codes);

auto f0 = field("sparse", sparse_type);
auto f1 = field("dense", dense_type);
auto schema = ::arrow::schema({f0, f1});

// Create data
std::vector<std::shared_ptr<Array>> sparse_children(2);
std::vector<std::shared_ptr<Array>> dense_children(2);

const int64_t length = 7;

std::shared_ptr<Buffer> type_ids_buffer;
std::vector<uint8_t> type_ids = {5, 10, 5, 5, 10, 10, 5};
RETURN_NOT_OK(CopyBufferFromVector(type_ids, default_memory_pool(), &type_ids_buffer));

std::vector<int32_t> u0_values = {0, 1, 2, 3, 4, 5, 6};
ArrayFromVector<Int32Type, int32_t>(u0_values, &sparse_children[0]);

std::vector<uint8_t> u1_values = {10, 11, 12, 13, 14, 15, 16};
ArrayFromVector<UInt8Type, uint8_t>(u1_values, &sparse_children[1]);

// dense children
u0_values = {0, 2, 3, 7};
ArrayFromVector<Int32Type, int32_t>(u0_values, &dense_children[0]);

u1_values = {11, 14, 15};
ArrayFromVector<UInt8Type, uint8_t>(u1_values, &dense_children[1]);

std::shared_ptr<Buffer> offsets_buffer;
std::vector<int32_t> offsets = {0, 0, 1, 2, 1, 2, 3};
RETURN_NOT_OK(CopyBufferFromVector(offsets, default_memory_pool(), &offsets_buffer));

auto sparse = std::make_shared<SparseUnionArray>(sparse_type, length, sparse_children,
type_ids_buffer);
auto dense = std::make_shared<DenseUnionArray>(dense_type, length, dense_children,
type_ids_buffer, offsets_buffer);

// construct batch
std::vector<std::shared_ptr<Array>> arrays = {sparse, dense};
*out = RecordBatch::Make(schema, length, arrays);
return Status::OK();
}

} // namespace arrow
6 changes: 6 additions & 0 deletions cpp/src/arrow/testing/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,4 +142,10 @@ ARROW_TESTING_EXPORT
std::vector<int64_t> GetSupportedHardwareFlags(
const std::vector<int64_t>& candidate_flags);

// Creates a RecordBatch containing both sparse and dense union arrays with the same
// union type definition. The union type has two fields: "u0" (int32) and "u1" (uint8)
// with type codes 5 and 10 respectively. Both arrays have length 7.
ARROW_TESTING_EXPORT
Status MakeUnion(std::shared_ptr<RecordBatch>* out);

} // namespace arrow
Loading