Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
236 changes: 236 additions & 0 deletions c_glib/arrow-glib/compute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <arrow-glib/error.hpp>
#include <arrow-glib/executor.hpp>
#include <arrow-glib/expression.hpp>
#include <arrow-glib/internal-hash-table.hpp>
#include <arrow-glib/reader.hpp>
#include <arrow-glib/record-batch.hpp>
#include <arrow-glib/scalar.hpp>
Expand Down Expand Up @@ -296,6 +297,9 @@ G_BEGIN_DECLS
* #GArrowPairwiseOptions is a class to customize the pairwise
* functions such as `pairwise_diff` and `pairwise_diff_checked`.
*
* #GArrowMakeStructOptions is a class to customize the `make_struct`
* function.
*
* There are many functions to compute data on an array.
*/

Expand Down Expand Up @@ -8349,6 +8353,215 @@ garrow_pairwise_options_new(void)
return GARROW_PAIRWISE_OPTIONS(g_object_new(GARROW_TYPE_PAIRWISE_OPTIONS, NULL));
}

enum {
PROP_MAKE_STRUCT_OPTIONS_FIELD_NAMES = 1,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we keep this? I think that we should not provided this because mixing field_names and add_field() isn't expected. For example, add_field(); field_names = [...] removes information provided by the first add_field().

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree it can be confusing, but I think this is more convenient if you don't care about nullability or metadata.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. How about providing convenient APIs like this in Ruby something like the following?

module Arrow
  class MakeStructOptions
    def field_names=(names)
      raise ArgumentError, "mixing #add_field and #field_names= is prohibited" unless n_fields.zero?
      names.each do |name|
        add_field(name, true, nil)
      end
    end
  end
end

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if you call #field_names= twice?

PROP_MAKE_STRUCT_OPTIONS_FIELD_NULLABILITY,
PROP_MAKE_STRUCT_OPTIONS_FIELD_METADATA,
};

G_DEFINE_TYPE(GArrowMakeStructOptions,
garrow_make_struct_options,
GARROW_TYPE_FUNCTION_OPTIONS)

static void
garrow_make_struct_options_set_property(GObject *object,
guint prop_id,
const GValue *value,
GParamSpec *pspec)
{
auto options = static_cast<arrow::compute::MakeStructOptions *>(
garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(object)));

switch (prop_id) {
case PROP_MAKE_STRUCT_OPTIONS_FIELD_NAMES:
{
auto strv = static_cast<gchar **>(g_value_get_boxed(value));
options->field_names.clear();
if (strv) {
for (gchar **p = strv; *p; ++p) {
options->field_names.emplace_back(*p);
}
}
// Keep nullability and metadata vectors in sync with names.
const auto new_size = options->field_names.size();
if (options->field_nullability.size() != new_size) {
options->field_nullability.assign(new_size, true);
}
if (options->field_metadata.size() != new_size) {
options->field_metadata.assign(new_size, nullptr);
}
}
break;
case PROP_MAKE_STRUCT_OPTIONS_FIELD_NULLABILITY:
{
auto array = static_cast<GArray *>(g_value_get_boxed(value));
options->field_nullability.clear();
if (array) {
for (guint i = 0; i < array->len; ++i) {
auto nullability = g_array_index(array, gboolean, i);
options->field_nullability.push_back(nullability != FALSE);
}
}
}
break;
case PROP_MAKE_STRUCT_OPTIONS_FIELD_METADATA:
{
auto array = static_cast<GPtrArray *>(g_value_get_boxed(value));
options->field_metadata.clear();
if (array) {
for (guint i = 0; i < array->len; ++i) {
auto metadata = static_cast<GHashTable *>(g_ptr_array_index(array, i));
if (metadata) {
options->field_metadata.push_back(
garrow_internal_hash_table_to_metadata(metadata));
} else {
options->field_metadata.push_back(nullptr);
}
}
}
}
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
break;
}
}

static void
garrow_make_struct_options_get_property(GObject *object,
guint prop_id,
GValue *value,
GParamSpec *pspec)
{
auto options = static_cast<arrow::compute::MakeStructOptions *>(
garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(object)));

switch (prop_id) {
case PROP_MAKE_STRUCT_OPTIONS_FIELD_NAMES:
{
const auto &names = options->field_names;
auto strv = static_cast<gchar **>(g_new0(gchar *, names.size() + 1));
for (gsize i = 0; i < names.size(); ++i) {
strv[i] = g_strdup(names[i].c_str());
}
g_value_take_boxed(value, strv);
}
break;
case PROP_MAKE_STRUCT_OPTIONS_FIELD_NULLABILITY:
{
const auto &nullability = options->field_nullability;
auto array = g_array_sized_new(FALSE, FALSE, sizeof(gboolean), nullability.size());
for (gsize i = 0; i < nullability.size(); ++i) {
gboolean val = nullability[i] ? TRUE : FALSE;
g_array_append_val(array, val);
}
g_value_take_boxed(value, array);
}
break;
case PROP_MAKE_STRUCT_OPTIONS_FIELD_METADATA:
{
const auto &metadata = options->field_metadata;
auto array = g_ptr_array_sized_new(metadata.size());
for (gsize i = 0; i < metadata.size(); ++i) {
GHashTable *hash_table = nullptr;
if (metadata[i]) {
hash_table = garrow_internal_hash_table_from_metadata(metadata[i]);
}
g_ptr_array_add(array, hash_table);
}
g_value_take_boxed(value, array);
}
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
break;
}
}

static void
garrow_make_struct_options_init(GArrowMakeStructOptions *object)
{
auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
priv->options = static_cast<arrow::compute::FunctionOptions *>(
new arrow::compute::MakeStructOptions());
}

static void
garrow_make_struct_options_class_init(GArrowMakeStructOptionsClass *klass)
{
auto gobject_class = G_OBJECT_CLASS(klass);

gobject_class->set_property = garrow_make_struct_options_set_property;
gobject_class->get_property = garrow_make_struct_options_get_property;

arrow::compute::MakeStructOptions options;

GParamSpec *spec;
/**
* GArrowMakeStructOptions:field-names:
*
* Names for wrapped columns.
*
* Since: 23.0.0
*/
spec = g_param_spec_boxed("field-names",
"Field names",
"Names for wrapped columns",
G_TYPE_STRV,
static_cast<GParamFlags>(G_PARAM_READWRITE));
g_object_class_install_property(gobject_class,
PROP_MAKE_STRUCT_OPTIONS_FIELD_NAMES,
spec);

/**
* GArrowMakeStructOptions:field-nullability:
*
* Nullability for each field. This is a #GArray of #gboolean values.
*
* Since: 23.0.0
*/
spec = g_param_spec_boxed("field-nullability",
"Field nullability",
"Nullability for each field",
G_TYPE_ARRAY,
static_cast<GParamFlags>(G_PARAM_READWRITE));
g_object_class_install_property(gobject_class,
PROP_MAKE_STRUCT_OPTIONS_FIELD_NULLABILITY,
spec);

/**
* GArrowMakeStructOptions:field-metadata:
*
* Metadata for each field. This is a #GPtrArray of #GHashTable pointers
* (each hash table has utf8 keys and utf8 values), or %NULL for fields
* without metadata.
*
* Since: 23.0.0
*/
spec = g_param_spec_boxed("field-metadata",
"Field metadata",
"Metadata for each field",
G_TYPE_PTR_ARRAY,
static_cast<GParamFlags>(G_PARAM_READWRITE));
g_object_class_install_property(gobject_class,
PROP_MAKE_STRUCT_OPTIONS_FIELD_METADATA,
spec);
}

/**
* garrow_make_struct_options_new:
*
* Returns: A newly created #GArrowMakeStructOptions.
*
* Since: 23.0.0
*/
GArrowMakeStructOptions *
garrow_make_struct_options_new(void)
{
auto options = g_object_new(GARROW_TYPE_MAKE_STRUCT_OPTIONS, NULL);
return GARROW_MAKE_STRUCT_OPTIONS(options);
}

G_END_DECLS

arrow::Result<arrow::FieldRef>
Expand Down Expand Up @@ -8553,6 +8766,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt
static_cast<const arrow::compute::PairwiseOptions *>(arrow_options);
auto options = garrow_pairwise_options_new_raw(arrow_pairwise_options);
return GARROW_FUNCTION_OPTIONS(options);
} else if (arrow_type_name == "MakeStructOptions") {
const auto arrow_make_struct_options =
static_cast<const arrow::compute::MakeStructOptions *>(arrow_options);
auto options = garrow_make_struct_options_new_raw(arrow_make_struct_options);
return GARROW_FUNCTION_OPTIONS(options);
} else {
auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL);
return GARROW_FUNCTION_OPTIONS(options);
Expand Down Expand Up @@ -9377,3 +9595,21 @@ garrow_pairwise_options_get_raw(GArrowPairwiseOptions *options)
return static_cast<arrow::compute::PairwiseOptions *>(
garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
}

GArrowMakeStructOptions *
garrow_make_struct_options_new_raw(const arrow::compute::MakeStructOptions *arrow_options)
{
auto options =
GARROW_MAKE_STRUCT_OPTIONS(g_object_new(GARROW_TYPE_MAKE_STRUCT_OPTIONS, nullptr));
auto arrow_new_options = static_cast<arrow::compute::MakeStructOptions *>(
garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
*arrow_new_options = *arrow_options;
return options;
}

arrow::compute::MakeStructOptions *
garrow_make_struct_options_get_raw(GArrowMakeStructOptions *options)
{
return static_cast<arrow::compute::MakeStructOptions *>(
garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
}
16 changes: 16 additions & 0 deletions c_glib/arrow-glib/compute.h
Original file line number Diff line number Diff line change
Expand Up @@ -1475,4 +1475,20 @@ GARROW_AVAILABLE_IN_23_0
GArrowPairwiseOptions *
garrow_pairwise_options_new(void);

#define GARROW_TYPE_MAKE_STRUCT_OPTIONS (garrow_make_struct_options_get_type())
GARROW_AVAILABLE_IN_23_0
G_DECLARE_DERIVABLE_TYPE(GArrowMakeStructOptions,
garrow_make_struct_options,
GARROW,
MAKE_STRUCT_OPTIONS,
GArrowFunctionOptions)
struct _GArrowMakeStructOptionsClass
{
GArrowFunctionOptionsClass parent_class;
};

GARROW_AVAILABLE_IN_23_0
GArrowMakeStructOptions *
garrow_make_struct_options_new(void);

G_END_DECLS
6 changes: 6 additions & 0 deletions c_glib/arrow-glib/compute.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,3 +256,9 @@ GArrowPairwiseOptions *
garrow_pairwise_options_new_raw(const arrow::compute::PairwiseOptions *arrow_options);
arrow::compute::PairwiseOptions *
garrow_pairwise_options_get_raw(GArrowPairwiseOptions *options);

GArrowMakeStructOptions *
garrow_make_struct_options_new_raw(
const arrow::compute::MakeStructOptions *arrow_options);
arrow::compute::MakeStructOptions *
garrow_make_struct_options_get_raw(GArrowMakeStructOptions *options);
2 changes: 1 addition & 1 deletion c_glib/arrow-glib/internal-hash-table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ garrow_internal_hash_table_to_metadata(GHashTable *metadata)

static inline GHashTable *
garrow_internal_hash_table_from_metadata(
const std::shared_ptr<arrow::KeyValueMetadata> &arrow_metadata)
const std::shared_ptr<const arrow::KeyValueMetadata> &arrow_metadata)
{
auto metadata = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, g_free);
const auto &keys = arrow_metadata->keys();
Expand Down
55 changes: 55 additions & 0 deletions c_glib/test/test-make-struct-options.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

class TestMakeStructOptions < Test::Unit::TestCase
include Helper::Buildable

def setup
@options = Arrow::MakeStructOptions.new
end

def test_field_names_property
assert_equal([], @options.field_names)
@options.field_names = ["a", "b", "c"]
assert_equal(["a", "b", "c"], @options.field_names)
end

def test_make_struct_function
a = build_int8_array([1, 2, 3])
b = build_boolean_array([true, false, nil])
args = [
Arrow::ArrayDatum.new(a),
Arrow::ArrayDatum.new(b),
]
@options.field_names = ["a", "b"]
make_struct_function = Arrow::Function.find("make_struct")
result = make_struct_function.execute(args, @options).value

expected = build_struct_array(
[
Arrow::Field.new("a", Arrow::Int8DataType.new),
Arrow::Field.new("b", Arrow::BooleanDataType.new),
],
[
{"a" => 1, "b" => true},
{"a" => 2, "b" => false},
{"a" => 3, "b" => nil},
]
)
assert_equal(expected, result)
end
end
Loading
Loading