diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index df6fab4f305..105897b49cb 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -296,6 +297,9 @@ G_BEGIN_DECLS * #GArrowPairwiseOptions is a class to customize the pairwise * functions such as `pairwise_diff` and `pairwise_diff_checked`. * + * #GArrowMakeStructOptions is a class to customize the `make_struct` + * function. + * * There are many functions to compute data on an array. */ @@ -8349,6 +8353,215 @@ garrow_pairwise_options_new(void) return GARROW_PAIRWISE_OPTIONS(g_object_new(GARROW_TYPE_PAIRWISE_OPTIONS, NULL)); } +enum { + PROP_MAKE_STRUCT_OPTIONS_FIELD_NAMES = 1, + PROP_MAKE_STRUCT_OPTIONS_FIELD_NULLABILITY, + PROP_MAKE_STRUCT_OPTIONS_FIELD_METADATA, +}; + +G_DEFINE_TYPE(GArrowMakeStructOptions, + garrow_make_struct_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_make_struct_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(object))); + + switch (prop_id) { + case PROP_MAKE_STRUCT_OPTIONS_FIELD_NAMES: + { + auto strv = static_cast(g_value_get_boxed(value)); + options->field_names.clear(); + if (strv) { + for (gchar **p = strv; *p; ++p) { + options->field_names.emplace_back(*p); + } + } + // Keep nullability and metadata vectors in sync with names. + const auto new_size = options->field_names.size(); + if (options->field_nullability.size() != new_size) { + options->field_nullability.assign(new_size, true); + } + if (options->field_metadata.size() != new_size) { + options->field_metadata.assign(new_size, nullptr); + } + } + break; + case PROP_MAKE_STRUCT_OPTIONS_FIELD_NULLABILITY: + { + auto array = static_cast(g_value_get_boxed(value)); + options->field_nullability.clear(); + if (array) { + for (guint i = 0; i < array->len; ++i) { + auto nullability = g_array_index(array, gboolean, i); + options->field_nullability.push_back(nullability != FALSE); + } + } + } + break; + case PROP_MAKE_STRUCT_OPTIONS_FIELD_METADATA: + { + auto array = static_cast(g_value_get_boxed(value)); + options->field_metadata.clear(); + if (array) { + for (guint i = 0; i < array->len; ++i) { + auto metadata = static_cast(g_ptr_array_index(array, i)); + if (metadata) { + options->field_metadata.push_back( + garrow_internal_hash_table_to_metadata(metadata)); + } else { + options->field_metadata.push_back(nullptr); + } + } + } + } + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_make_struct_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(object))); + + switch (prop_id) { + case PROP_MAKE_STRUCT_OPTIONS_FIELD_NAMES: + { + const auto &names = options->field_names; + auto strv = static_cast(g_new0(gchar *, names.size() + 1)); + for (gsize i = 0; i < names.size(); ++i) { + strv[i] = g_strdup(names[i].c_str()); + } + g_value_take_boxed(value, strv); + } + break; + case PROP_MAKE_STRUCT_OPTIONS_FIELD_NULLABILITY: + { + const auto &nullability = options->field_nullability; + auto array = g_array_sized_new(FALSE, FALSE, sizeof(gboolean), nullability.size()); + for (gsize i = 0; i < nullability.size(); ++i) { + gboolean val = nullability[i] ? TRUE : FALSE; + g_array_append_val(array, val); + } + g_value_take_boxed(value, array); + } + break; + case PROP_MAKE_STRUCT_OPTIONS_FIELD_METADATA: + { + const auto &metadata = options->field_metadata; + auto array = g_ptr_array_sized_new(metadata.size()); + for (gsize i = 0; i < metadata.size(); ++i) { + GHashTable *hash_table = nullptr; + if (metadata[i]) { + hash_table = garrow_internal_hash_table_from_metadata(metadata[i]); + } + g_ptr_array_add(array, hash_table); + } + g_value_take_boxed(value, array); + } + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_make_struct_options_init(GArrowMakeStructOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast( + new arrow::compute::MakeStructOptions()); +} + +static void +garrow_make_struct_options_class_init(GArrowMakeStructOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_make_struct_options_set_property; + gobject_class->get_property = garrow_make_struct_options_get_property; + + arrow::compute::MakeStructOptions options; + + GParamSpec *spec; + /** + * GArrowMakeStructOptions:field-names: + * + * Names for wrapped columns. + * + * Since: 23.0.0 + */ + spec = g_param_spec_boxed("field-names", + "Field names", + "Names for wrapped columns", + G_TYPE_STRV, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_MAKE_STRUCT_OPTIONS_FIELD_NAMES, + spec); + + /** + * GArrowMakeStructOptions:field-nullability: + * + * Nullability for each field. This is a #GArray of #gboolean values. + * + * Since: 23.0.0 + */ + spec = g_param_spec_boxed("field-nullability", + "Field nullability", + "Nullability for each field", + G_TYPE_ARRAY, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_MAKE_STRUCT_OPTIONS_FIELD_NULLABILITY, + spec); + + /** + * GArrowMakeStructOptions:field-metadata: + * + * Metadata for each field. This is a #GPtrArray of #GHashTable pointers + * (each hash table has utf8 keys and utf8 values), or %NULL for fields + * without metadata. + * + * Since: 23.0.0 + */ + spec = g_param_spec_boxed("field-metadata", + "Field metadata", + "Metadata for each field", + G_TYPE_PTR_ARRAY, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_MAKE_STRUCT_OPTIONS_FIELD_METADATA, + spec); +} + +/** + * garrow_make_struct_options_new: + * + * Returns: A newly created #GArrowMakeStructOptions. + * + * Since: 23.0.0 + */ +GArrowMakeStructOptions * +garrow_make_struct_options_new(void) +{ + auto options = g_object_new(GARROW_TYPE_MAKE_STRUCT_OPTIONS, NULL); + return GARROW_MAKE_STRUCT_OPTIONS(options); +} + G_END_DECLS arrow::Result @@ -8553,6 +8766,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_pairwise_options_new_raw(arrow_pairwise_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "MakeStructOptions") { + const auto arrow_make_struct_options = + static_cast(arrow_options); + auto options = garrow_make_struct_options_new_raw(arrow_make_struct_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -9377,3 +9595,21 @@ garrow_pairwise_options_get_raw(GArrowPairwiseOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowMakeStructOptions * +garrow_make_struct_options_new_raw(const arrow::compute::MakeStructOptions *arrow_options) +{ + auto options = + GARROW_MAKE_STRUCT_OPTIONS(g_object_new(GARROW_TYPE_MAKE_STRUCT_OPTIONS, nullptr)); + auto arrow_new_options = static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); + *arrow_new_options = *arrow_options; + return options; +} + +arrow::compute::MakeStructOptions * +garrow_make_struct_options_get_raw(GArrowMakeStructOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index becdaf3a672..7da4623b52b 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1475,4 +1475,20 @@ GARROW_AVAILABLE_IN_23_0 GArrowPairwiseOptions * garrow_pairwise_options_new(void); +#define GARROW_TYPE_MAKE_STRUCT_OPTIONS (garrow_make_struct_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowMakeStructOptions, + garrow_make_struct_options, + GARROW, + MAKE_STRUCT_OPTIONS, + GArrowFunctionOptions) +struct _GArrowMakeStructOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowMakeStructOptions * +garrow_make_struct_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 1a9da1c00c5..c07ee5701a3 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -256,3 +256,9 @@ GArrowPairwiseOptions * garrow_pairwise_options_new_raw(const arrow::compute::PairwiseOptions *arrow_options); arrow::compute::PairwiseOptions * garrow_pairwise_options_get_raw(GArrowPairwiseOptions *options); + +GArrowMakeStructOptions * +garrow_make_struct_options_new_raw( + const arrow::compute::MakeStructOptions *arrow_options); +arrow::compute::MakeStructOptions * +garrow_make_struct_options_get_raw(GArrowMakeStructOptions *options); diff --git a/c_glib/arrow-glib/internal-hash-table.hpp b/c_glib/arrow-glib/internal-hash-table.hpp index 2e0a72561a7..9ec6d00d3e1 100644 --- a/c_glib/arrow-glib/internal-hash-table.hpp +++ b/c_glib/arrow-glib/internal-hash-table.hpp @@ -40,7 +40,7 @@ garrow_internal_hash_table_to_metadata(GHashTable *metadata) static inline GHashTable * garrow_internal_hash_table_from_metadata( - const std::shared_ptr &arrow_metadata) + const std::shared_ptr &arrow_metadata) { auto metadata = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, g_free); const auto &keys = arrow_metadata->keys(); diff --git a/c_glib/test/test-make-struct-options.rb b/c_glib/test/test-make-struct-options.rb new file mode 100644 index 00000000000..2e6175ee4b6 --- /dev/null +++ b/c_glib/test/test-make-struct-options.rb @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestMakeStructOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::MakeStructOptions.new + end + + def test_field_names_property + assert_equal([], @options.field_names) + @options.field_names = ["a", "b", "c"] + assert_equal(["a", "b", "c"], @options.field_names) + end + + def test_make_struct_function + a = build_int8_array([1, 2, 3]) + b = build_boolean_array([true, false, nil]) + args = [ + Arrow::ArrayDatum.new(a), + Arrow::ArrayDatum.new(b), + ] + @options.field_names = ["a", "b"] + make_struct_function = Arrow::Function.find("make_struct") + result = make_struct_function.execute(args, @options).value + + expected = build_struct_array( + [ + Arrow::Field.new("a", Arrow::Int8DataType.new), + Arrow::Field.new("b", Arrow::BooleanDataType.new), + ], + [ + {"a" => 1, "b" => true}, + {"a" => 2, "b" => false}, + {"a" => 3, "b" => nil}, + ] + ) + assert_equal(expected, result) + end +end diff --git a/ruby/red-arrow/ext/arrow/arrow.cpp b/ruby/red-arrow/ext/arrow/arrow.cpp index 404ec8996f2..9fc0e9344c7 100644 --- a/ruby/red-arrow/ext/arrow/arrow.cpp +++ b/ruby/red-arrow/ext/arrow/arrow.cpp @@ -20,6 +20,8 @@ #include "red-arrow.hpp" #include "memory-view.hpp" +#include +#include #include namespace red_arrow { @@ -63,6 +65,158 @@ namespace red_arrow { rbgobj_gc_mark_instance(node->data); } } + + static GHashTable* + ruby_hash_to_ghash_table(VALUE rb_hash) + { + if (NIL_P(rb_hash)) { + return nullptr; + } + + if (rb_type(rb_hash) != T_HASH) { + rb_raise(rb_eTypeError, "expected Hash or nil, got %s", rb_obj_classname(rb_hash)); + } + + auto hash_table = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, g_free); + VALUE keys = rb_funcall(rb_hash, rb_intern("keys"), 0); + long len = RARRAY_LEN(keys); + + for (long i = 0; i < len; ++i) { + VALUE key = rb_ary_entry(keys, i); + VALUE value = rb_hash_aref(rb_hash, key); + gchar* gkey = g_strdup(StringValueCStr(key)); + gchar* gvalue = g_strdup(StringValueCStr(value)); + g_hash_table_insert(hash_table, gkey, gvalue); + } + + return hash_table; + } + + static void + ghash_table_foreach_callback(gpointer key, gpointer value, gpointer user_data) + { + VALUE rb_hash = reinterpret_cast(user_data); + gchar* gkey = static_cast(key); + gchar* gvalue = static_cast(value); + VALUE rb_key = rb_utf8_str_new(gkey, strlen(gkey)); + VALUE rb_value = rb_utf8_str_new(gvalue, strlen(gvalue)); + rb_hash_aset(rb_hash, rb_key, rb_value); + } + + static VALUE + ghash_table_to_ruby_hash(GHashTable* hash_table) + { + if (!hash_table) { + return Qnil; + } + + VALUE rb_hash = rb_hash_new(); + g_hash_table_foreach(hash_table, + ghash_table_foreach_callback, + reinterpret_cast(rb_hash)); + return rb_hash; + } + + VALUE + make_struct_options_set_field_nullability(VALUE rb_options, VALUE rb_array) + { + auto options = GARROW_MAKE_STRUCT_OPTIONS(RVAL2GOBJ(rb_options)); + + GArray* array = nullptr; + if (!NIL_P(rb_array)) { + Check_Type(rb_array, T_ARRAY); + long len = RARRAY_LEN(rb_array); + array = g_array_sized_new(FALSE, FALSE, sizeof(gboolean), len); + + for (long i = 0; i < len; ++i) { + VALUE val = rb_ary_entry(rb_array, i); + gboolean bool_val = RTEST(val) ? TRUE : FALSE; + g_array_append_val(array, bool_val); + } + } + + GValue gvalue = G_VALUE_INIT; + g_value_init(&gvalue, G_TYPE_ARRAY); + g_value_take_boxed(&gvalue, array); + g_object_set_property(G_OBJECT(options), "field-nullability", &gvalue); + g_value_unset(&gvalue); + + return rb_options; + } + + VALUE + make_struct_options_get_field_nullability(VALUE rb_options) + { + auto options = GARROW_MAKE_STRUCT_OPTIONS(RVAL2GOBJ(rb_options)); + + GValue gvalue = G_VALUE_INIT; + g_value_init(&gvalue, G_TYPE_ARRAY); + g_object_get_property(G_OBJECT(options), "field-nullability", &gvalue); + + GArray* array = static_cast(g_value_get_boxed(&gvalue)); + VALUE rb_array = rb_ary_new(); + + if (array) { + for (guint i = 0; i < array->len; ++i) { + gboolean val = g_array_index(array, gboolean, i); + rb_ary_push(rb_array, val ? Qtrue : Qfalse); + } + } + + g_value_unset(&gvalue); + return rb_array; + } + + VALUE + make_struct_options_get_field_metadata(VALUE rb_options) + { + auto options = GARROW_MAKE_STRUCT_OPTIONS(RVAL2GOBJ(rb_options)); + + GValue gvalue = G_VALUE_INIT; + g_value_init(&gvalue, G_TYPE_PTR_ARRAY); + g_object_get_property(G_OBJECT(options), "field-metadata", &gvalue); + + GPtrArray* array = static_cast(g_value_get_boxed(&gvalue)); + VALUE rb_array = rb_ary_new(); + + if (array) { + for (guint i = 0; i < array->len; ++i) { + GHashTable* hash_table = static_cast(g_ptr_array_index(array, i)); + VALUE rb_hash = ghash_table_to_ruby_hash(hash_table); + rb_ary_push(rb_array, rb_hash); + } + } + + g_value_unset(&gvalue); + return rb_array; + } + + VALUE + make_struct_options_set_field_metadata(VALUE rb_options, VALUE rb_array) + { + auto options = GARROW_MAKE_STRUCT_OPTIONS(RVAL2GOBJ(rb_options)); + + GPtrArray* array = nullptr; + if (!NIL_P(rb_array)) { + Check_Type(rb_array, T_ARRAY); + long len = RARRAY_LEN(rb_array); + array = g_ptr_array_sized_new(len); + + for (long i = 0; i < len; ++i) { + VALUE val = rb_ary_entry(rb_array, i); + GHashTable* hash_table = ruby_hash_to_ghash_table(val); + g_ptr_array_add(array, hash_table); + } + } + + GValue gvalue = G_VALUE_INIT; + g_value_init(&gvalue, G_TYPE_PTR_ARRAY); + g_value_take_boxed(&gvalue, array); + g_object_set_property(G_OBJECT(options), "field-metadata", &gvalue); + g_value_unset(&gvalue); + + return rb_options; + } } extern "C" void Init_arrow() { @@ -94,6 +248,20 @@ extern "C" void Init_arrow() { reinterpret_cast(red_arrow::table_each_raw_record), 0); + auto cArrowMakeStructOptions = rb_const_get_at(mArrow, rb_intern("MakeStructOptions")); + rb_define_method(cArrowMakeStructOptions, "field_nullability", + reinterpret_cast(red_arrow::make_struct_options_get_field_nullability), + 0); + rb_define_method(cArrowMakeStructOptions, "field_nullability=", + reinterpret_cast(red_arrow::make_struct_options_set_field_nullability), + 1); + rb_define_method(cArrowMakeStructOptions, "field_metadata", + reinterpret_cast(red_arrow::make_struct_options_get_field_metadata), + 0); + rb_define_method(cArrowMakeStructOptions, "field_metadata=", + reinterpret_cast(red_arrow::make_struct_options_set_field_metadata), + 1); + red_arrow::cDate = rb_const_get(rb_cObject, rb_intern("Date")); red_arrow::cArrowTime = rb_const_get_at(mArrow, rb_intern("Time")); diff --git a/ruby/red-arrow/ext/arrow/red-arrow.hpp b/ruby/red-arrow/ext/arrow/red-arrow.hpp index ffc24f9844b..b11021a5823 100644 --- a/ruby/red-arrow/ext/arrow/red-arrow.hpp +++ b/ruby/red-arrow/ext/arrow/red-arrow.hpp @@ -62,6 +62,11 @@ namespace red_arrow { VALUE record_batch_each_raw_record(VALUE obj); VALUE table_each_raw_record(VALUE obj); + VALUE make_struct_options_get_field_nullability(VALUE obj); + VALUE make_struct_options_set_field_nullability(VALUE obj, VALUE field_nullability); + VALUE make_struct_options_get_field_metadata(VALUE obj); + VALUE make_struct_options_set_field_metadata(VALUE obj, VALUE field_metadata); + inline VALUE time_unit_to_scale(const arrow::TimeUnit::type unit) { switch (unit) { case arrow::TimeUnit::SECOND: diff --git a/ruby/red-arrow/test/test-make-struct-options.rb b/ruby/red-arrow/test/test-make-struct-options.rb new file mode 100644 index 00000000000..76e9d384213 --- /dev/null +++ b/ruby/red-arrow/test/test-make-struct-options.rb @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestMakeStructOptions < Test::Unit::TestCase + def setup + @options = Arrow::MakeStructOptions.new + end + + def test_field_names_property + assert_equal([], @options.field_names) + @options.field_names = ["a", "b", "c"] + assert_equal(["a", "b", "c"], @options.field_names) + @options.field_names = [] + assert_equal([], @options.field_names) + end + + def test_field_nullability_property + assert_equal([], @options.field_nullability) + @options.field_nullability = [true, false, true] + assert_equal([true, false, true], @options.field_nullability) + @options.field_nullability = [] + assert_equal([], @options.field_nullability) + end + + def test_field_metadata_property + assert_equal([], @options.field_metadata) + @options.field_metadata = [{"a" => "b"}, {"c" => "d"}] + assert_equal([{"a" => "b"}, {"c" => "d"}], @options.field_metadata) + @options.field_metadata = [] + assert_equal([], @options.field_metadata) + end + + def test_make_struct_function + a = Arrow::Int8Array.new([1, 2, 3]) + b = Arrow::BooleanArray.new([true, false, nil]) + args = [a, b] + metadata1 = {"a" => "b"} + metadata2 = {"c" => "d"} + @options.field_names = ["a", "b"] + @options.field_nullability = [false, true] + @options.field_metadata = [metadata1, metadata2] + make_struct_function = Arrow::Function.find("make_struct") + result = make_struct_function.execute(args, @options).value + + expected = Arrow::StructArray.new( + Arrow::StructDataType.new( + [ + Arrow::Field.new("a", Arrow::Int8DataType.new, false), + Arrow::Field.new("b", Arrow::BooleanDataType.new, true), + ] + ), + [ + {"a" => 1, "b" => true}, + {"a" => 2, "b" => false}, + {"a" => 3, "b" => nil}, + ] + ) + assert_equal(expected, result) + fields = result.value_data_type.fields + assert_equal(metadata1, fields[0].metadata) + assert_equal(metadata2, fields[1].metadata) + end +end