From fd8f27a2f7a1e35949f31943ecbf6da48a486e2c Mon Sep 17 00:00:00 2001 From: Tadeja Kadunc Date: Wed, 26 Nov 2025 14:30:03 +0100 Subject: [PATCH 01/12] Initial commit --- docs/source/python/api/compute.rst | 4 +- python/pyarrow/_compute.pyx | 56 ++++++++++++++++++++++++++++ python/pyarrow/compute.py | 2 + python/pyarrow/includes/libarrow.pxd | 12 ++++++ python/pyarrow/tests/test_compute.py | 40 ++++++++++++++++++++ 5 files changed, 113 insertions(+), 1 deletion(-) diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index b74d674ac61..569b7cf0d62 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -532,8 +532,8 @@ Selections drop_null filter inverse_permutation - take scatter + take Sorts and Partitions -------------------- @@ -606,6 +606,7 @@ Compute Options ExtractRegexSpanOptions FilterOptions IndexOptions + InversePermutationOptions JoinOptions ListFlattenOptions ListSliceOptions @@ -635,6 +636,7 @@ Compute Options SkewOptions SliceOptions SortOptions + ScatterOptions SplitOptions SplitPatternOptions StrftimeOptions diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 59fd775b5ac..6e46255f2db 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -1444,6 +1444,62 @@ class RunEndEncodeOptions(_RunEndEncodeOptions): self._set_options(run_end_type) +cdef class _InversePermutationOptions(FunctionOptions): + def _set_options(self, max_index, output_type): + if output_type is None: + self.wrapped.reset(new CInversePermutationOptions(max_index)) + else: + output_ty = ensure_type(output_type) + self.wrapped.reset( + new CInversePermutationOptions(max_index, + pyarrow_unwrap_data_type(output_ty))) + + +class InversePermutationOptions(_InversePermutationOptions): + """ + Options for `inverse_permutation` function. + + Parameters + ---------- + max_index : int64, default -1 + The max value in the input indices to allow. + The length of the function’s output will be this value plus 1. + If negative, this value will be set to the length of the input indices + minus 1 and the length of the function’s output will be the length + of the input indices. + output_type : DataType, default None + The type of the output inverse permutation. + If None, the output will be of the same type as the input indices, otherwise + must be signed integer type. An invalid error will be reported if this type + is not able to store the length of the input indices. + """ + + def __init__(self, max_index=-1, output_type=None): + self._set_options(max_index, output_type) + + +cdef class _ScatterOptions(FunctionOptions): + def _set_options(self, max_index): + self.wrapped.reset(new CScatterOptions(max_index)) + + +class ScatterOptions(_ScatterOptions): + """ + Options for `scatter` function. + + Parameters + ---------- + max_index : int64, default -1 + The max value in the input indices to allow. + The length of the function’s output will be this value plus 1. + If negative, this value will be set to the length of the input indices minus 1 + and the length of the function’s output will be the length of the input indices. + """ + + def __init__(self, max_index=-1): + self._set_options(max_index) + + cdef class _TakeOptions(FunctionOptions): def _set_options(self, boundscheck): self.wrapped.reset(new CTakeOptions(boundscheck)) diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index fe0afdb0a87..8177948aaeb 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -43,6 +43,7 @@ ExtractRegexSpanOptions, FilterOptions, IndexOptions, + InversePermutationOptions, JoinOptions, ListSliceOptions, ListFlattenOptions, @@ -66,6 +67,7 @@ RoundTemporalOptions, RoundToMultipleOptions, ScalarAggregateOptions, + ScatterOptions, SelectKOptions, SetLookupOptions, SkewOptions, diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index c03bf20026e..658d7bd104a 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -2588,6 +2588,18 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: CTakeOptions(c_bool boundscheck) c_bool boundscheck + cdef cppclass CInversePermutationOptions \ + "arrow::compute::InversePermutationOptions"(CFunctionOptions): + CInversePermutationOptions(int64_t max_index) + CInversePermutationOptions(int64_t max_index, shared_ptr[CDataType] output_type) + int64_t max_index + shared_ptr[CDataType] output_type + + cdef cppclass CScatterOptions \ + "arrow::compute::ScatterOptions"(CFunctionOptions): + CScatterOptions(int64_t max_index) + int64_t max_index + cdef cppclass CStrptimeOptions \ "arrow::compute::StrptimeOptions"(CFunctionOptions): CStrptimeOptions(c_string format, TimeUnit unit, c_bool raise_error) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index ca0df36cff2..2b9bb828ea5 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -202,6 +202,7 @@ def test_option_class_equality(request): pc.WeekOptions(week_starts_monday=True, count_from_zero=False, first_week_is_fully_in_year=False), pc.ZeroFillOptions(4, "0"), + pc.InversePermutationOptions(-1, output_type=pa.int32()), ] # Timezone database might not be installed on Windows or Emscripten if request.config.pyarrow.is_enabled["timezone_data"]: @@ -1590,6 +1591,45 @@ def test_filter_null_type(): assert len(table.filter(mask).column(0)) == 5 +def test_inverse_permutation(): + arr0 = pa.array([], type=pa.int32()) + arr = pa.chunked_array([ + arr0, [9, 7, 5, 3, 1], [0], [2, 4, 6], [8], arr0, + ]) + result = pc.inverse_permutation(arr) + print(result) + expected = pa.chunked_array([[5, 4, 6, 3, 7, 2, 8, 1, 9, 0]], type=pa.int32()) + assert result.equals(expected) + + # `inverse_permutation` kernel currently does not accept options + options = pc.InversePermutationOptions(max_index=4, output_type=pa.int64()) + print(options) + with pytest.raises(TypeError, match="an unexpected keyword argument \'options\'"): + pc.inverse_permutation(arr, options=options) + + # `inverse_permutation` kernel currently won't accept max_index + with pytest.raises(TypeError, match="an unexpected keyword argument \'max_index\'"): + pc.inverse_permutation(arr, max_index=4) + + +def test_scatter(): + values = pa.array([True, False, True, True, False, False, True, True, True, False]) + indices = pa.array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) + expected = pa.array([False, True, True, True, False, + False, True, True, False, True]) + result = pc.scatter(values, indices) + assert result.equals(expected) + + # `scatter` kernel currently does not accept options + options = pc.ScatterOptions(max_index=4) + with pytest.raises(TypeError, match="unexpected keyword argument \'options\'"): + pc.scatter(values, indices, options=options) + + # `scatter` kernel currently won't accept max_index + with pytest.raises(TypeError, match="unexpected keyword argument \'max_index\'"): + pc.scatter(values, indices, max_index=4) + + @pytest.mark.parametrize("typ", ["array", "chunked_array"]) def test_compare_array(typ): if typ == "array": From 47e3e8cf8004843764b6f9fe9386ea5a1d5bbc5f Mon Sep 17 00:00:00 2001 From: Tadeja Kadunc Date: Thu, 27 Nov 2025 22:37:00 +0100 Subject: [PATCH 02/12] Adding option class in FunctionDocs of vector_swizzle.cc --- .../arrow/compute/kernels/vector_swizzle.cc | 5 ++-- python/pyarrow/tests/test_compute.py | 29 +++++++------------ 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_swizzle.cc b/cpp/src/arrow/compute/kernels/vector_swizzle.cc index aa82f55c2b8..954663197ca 100644 --- a/cpp/src/arrow/compute/kernels/vector_swizzle.cc +++ b/cpp/src/arrow/compute/kernels/vector_swizzle.cc @@ -32,7 +32,8 @@ namespace { const FunctionDoc inverse_permutation_doc( "Return the inverse permutation of the given indices", - "For the `i`-th `index` in `indices`, the `index`-th output is `i`", {"indices"}); + "For the `i`-th `index` in `indices`, the `index`-th output is `i`", {"indices"}, + "InversePermutationOptions"); const InversePermutationOptions* GetDefaultInversePermutationOptions() { static const auto kDefaultInversePermutationOptions = @@ -332,7 +333,7 @@ void RegisterVectorInversePermutation(FunctionRegistry* registry) { const FunctionDoc scatter_doc( "Scatter the values into specified positions according to the indices", "Place the `i`-th value at the position specified by the `i`-th index", - {"values", "indices"}); + {"values", "indices"}, "ScatterOptions"); const ScatterOptions* GetDefaultScatterOptions() { static const auto kDefaultScatterOptions = ScatterOptions::Defaults(); diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 2b9bb828ea5..72b3a98e942 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -40,7 +40,7 @@ import pyarrow as pa import pyarrow.compute as pc -from pyarrow.lib import ArrowNotImplementedError +from pyarrow.lib import ArrowNotImplementedError, ArrowIndexError try: import pyarrow.substrait as pas @@ -202,7 +202,7 @@ def test_option_class_equality(request): pc.WeekOptions(week_starts_monday=True, count_from_zero=False, first_week_is_fully_in_year=False), pc.ZeroFillOptions(4, "0"), - pc.InversePermutationOptions(-1, output_type=pa.int32()), + pc.InversePermutationOptions(output_type=pa.int32()), ] # Timezone database might not be installed on Windows or Emscripten if request.config.pyarrow.is_enabled["timezone_data"]: @@ -1596,19 +1596,14 @@ def test_inverse_permutation(): arr = pa.chunked_array([ arr0, [9, 7, 5, 3, 1], [0], [2, 4, 6], [8], arr0, ]) - result = pc.inverse_permutation(arr) - print(result) expected = pa.chunked_array([[5, 4, 6, 3, 7, 2, 8, 1, 9, 0]], type=pa.int32()) - assert result.equals(expected) + assert pc.inverse_permutation(arr).equals(expected) - # `inverse_permutation` kernel currently does not accept options - options = pc.InversePermutationOptions(max_index=4, output_type=pa.int64()) - print(options) - with pytest.raises(TypeError, match="an unexpected keyword argument \'options\'"): - pc.inverse_permutation(arr, options=options) + options = pc.InversePermutationOptions(max_index=9, output_type=pa.int32()) + assert pc.inverse_permutation(arr, options=options).equals(expected) + assert pc.inverse_permutation(arr, max_index=-1).equals(expected) - # `inverse_permutation` kernel currently won't accept max_index - with pytest.raises(TypeError, match="an unexpected keyword argument \'max_index\'"): + with pytest.raises(ArrowIndexError, match="Index out of bounds: 9"): pc.inverse_permutation(arr, max_index=4) @@ -1620,13 +1615,11 @@ def test_scatter(): result = pc.scatter(values, indices) assert result.equals(expected) - # `scatter` kernel currently does not accept options - options = pc.ScatterOptions(max_index=4) - with pytest.raises(TypeError, match="unexpected keyword argument \'options\'"): - pc.scatter(values, indices, options=options) + options = pc.ScatterOptions(max_index=-1) + assert pc.scatter(values, indices, options=options).equals(expected) + assert pc.scatter(values, indices, max_index=9).equals(expected) - # `scatter` kernel currently won't accept max_index - with pytest.raises(TypeError, match="unexpected keyword argument \'max_index\'"): + with pytest.raises(ArrowIndexError, match="Index out of bounds: 9"): pc.scatter(values, indices, max_index=4) From 208fd24af28d21644b44151a0b35c2c3edd2b545 Mon Sep 17 00:00:00 2001 From: Tadeja Kadunc Date: Thu, 4 Dec 2025 13:55:23 +0100 Subject: [PATCH 03/12] Including suggested changes to function_internal.h --- cpp/src/arrow/compute/function_internal.h | 5 ++++- python/pyarrow/tests/test_compute.py | 1 - 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/function_internal.h b/cpp/src/arrow/compute/function_internal.h index 9d8928466ba..726a5419953 100644 --- a/cpp/src/arrow/compute/function_internal.h +++ b/cpp/src/arrow/compute/function_internal.h @@ -347,7 +347,7 @@ static inline Result> GenericToScalar( static inline Result> GenericToScalar( const std::shared_ptr& value) { if (!value) { - return Status::Invalid("shared_ptr is nullptr"); + return std::make_shared(); } return MakeNullScalar(value); } @@ -448,6 +448,9 @@ static inline enable_if_same_result GenericFromScalar( template static inline enable_if_same_result> GenericFromScalar( const std::shared_ptr& value) { + if (value->type->id() == Type::NA) { + return std::shared_ptr(); + } return value->type; } diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 72b3a98e942..91eb8105855 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -202,7 +202,6 @@ def test_option_class_equality(request): pc.WeekOptions(week_starts_monday=True, count_from_zero=False, first_week_is_fully_in_year=False), pc.ZeroFillOptions(4, "0"), - pc.InversePermutationOptions(output_type=pa.int32()), ] # Timezone database might not be installed on Windows or Emscripten if request.config.pyarrow.is_enabled["timezone_data"]: From 89896d528ea55ff4b26c63f31fd31ca2c0cb5714 Mon Sep 17 00:00:00 2001 From: Tadeja Kadunc Date: Thu, 4 Dec 2025 14:09:47 +0100 Subject: [PATCH 04/12] Lint --- cpp/src/arrow/compute/function_internal.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/compute/function_internal.h b/cpp/src/arrow/compute/function_internal.h index 726a5419953..fad4651a4ad 100644 --- a/cpp/src/arrow/compute/function_internal.h +++ b/cpp/src/arrow/compute/function_internal.h @@ -448,9 +448,9 @@ static inline enable_if_same_result GenericFromScalar( template static inline enable_if_same_result> GenericFromScalar( const std::shared_ptr& value) { - if (value->type->id() == Type::NA) { - return std::shared_ptr(); - } + if (value->type->id() == Type::NA) { + return std::shared_ptr(); + } return value->type; } From 4cd1e67ab1ff1043db86009328f459914fbf2f99 Mon Sep 17 00:00:00 2001 From: Tadeja Kadunc Date: Thu, 18 Dec 2025 15:42:53 +0100 Subject: [PATCH 05/12] Alternative approach with output_type as optional in cpp. --- cpp/src/arrow/compute/api_vector.cc | 2 +- cpp/src/arrow/compute/api_vector.h | 8 ++++---- cpp/src/arrow/compute/function_internal.h | 13 +++++++++---- cpp/src/arrow/compute/kernels/vector_swizzle.cc | 11 +++-------- python/pyarrow/_compute.pyx | 2 +- python/pyarrow/includes/libarrow.pxd | 2 +- 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc index 538cdccaf2b..1bf4de93520 100644 --- a/cpp/src/arrow/compute/api_vector.cc +++ b/cpp/src/arrow/compute/api_vector.cc @@ -257,7 +257,7 @@ ListFlattenOptions::ListFlattenOptions(bool recursive) constexpr char ListFlattenOptions::kTypeName[]; InversePermutationOptions::InversePermutationOptions( - int64_t max_index, std::shared_ptr output_type) + int64_t max_index, std::optional> output_type) : FunctionOptions(internal::kInversePermutationOptionsType), max_index(max_index), output_type(std::move(output_type)) {} diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index b1676219b16..2fb3f937f81 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -299,7 +299,7 @@ class ARROW_EXPORT ListFlattenOptions : public FunctionOptions { class ARROW_EXPORT InversePermutationOptions : public FunctionOptions { public: explicit InversePermutationOptions(int64_t max_index = -1, - std::shared_ptr output_type = NULLPTR); + std::optional> output_type = std::nullopt); static constexpr const char kTypeName[] = "InversePermutationOptions"; static InversePermutationOptions Defaults() { return InversePermutationOptions(); } @@ -308,11 +308,11 @@ class ARROW_EXPORT InversePermutationOptions : public FunctionOptions { /// of the input indices minus 1 and the length of the function's output will be the /// length of the input indices. int64_t max_index = -1; - /// \brief The type of the output inverse permutation. If null, the output will be of - /// the same type as the input indices, otherwise must be signed integer type. An + /// \brief Optional type of the output inverse permutation. Default of `nullopt` will + /// use the same type as the input indices, otherwise must be signed integer type. An /// invalid error will be reported if this type is not able to store the length of the /// input indices. - std::shared_ptr output_type = NULLPTR; + std::optional> output_type; }; /// \brief Options for scatter function diff --git a/cpp/src/arrow/compute/function_internal.h b/cpp/src/arrow/compute/function_internal.h index fad4651a4ad..433cf39ad9a 100644 --- a/cpp/src/arrow/compute/function_internal.h +++ b/cpp/src/arrow/compute/function_internal.h @@ -347,11 +347,19 @@ static inline Result> GenericToScalar( static inline Result> GenericToScalar( const std::shared_ptr& value) { if (!value) { - return std::make_shared(); + return Status::Invalid("shared_ptr is nullptr"); } return MakeNullScalar(value); } +static inline Result> GenericToScalar( + const std::optional>& value) { + if (!value.has_value()) { + return std::make_shared(); + } + return GenericToScalar(value.value()); +} + static inline Result> GenericToScalar(const TypeHolder& value) { return GenericToScalar(value.GetSharedPtr()); } @@ -448,9 +456,6 @@ static inline enable_if_same_result GenericFromScalar( template static inline enable_if_same_result> GenericFromScalar( const std::shared_ptr& value) { - if (value->type->id() == Type::NA) { - return std::shared_ptr(); - } return value->type; } diff --git a/cpp/src/arrow/compute/kernels/vector_swizzle.cc b/cpp/src/arrow/compute/kernels/vector_swizzle.cc index 954663197ca..25ffe54f384 100644 --- a/cpp/src/arrow/compute/kernels/vector_swizzle.cc +++ b/cpp/src/arrow/compute/kernels/vector_swizzle.cc @@ -51,10 +51,8 @@ Result ResolveInversePermutationOutputType( DCHECK_EQ(input_types.size(), 1); DCHECK_NE(input_types[0], nullptr); - std::shared_ptr output_type = InversePermutationState::Get(ctx).output_type; - if (!output_type) { - output_type = input_types[0].owned_type; - } +std::shared_ptr output_type = + InversePermutationState::Get(ctx).output_type.value_or(input_types[0].owned_type); if (!is_signed_integer(output_type->id())) { return Status::TypeError( "Output type of inverse_permutation must be signed integer, got " + @@ -78,10 +76,7 @@ struct InversePermutationImpl { // Apply default options semantics. int64_t output_length = options.max_index < 0 ? input_length : options.max_index + 1; - std::shared_ptr output_type = options.output_type; - if (!output_type) { - output_type = input_type; - } + std::shared_ptr output_type = options.output_type.value_or(input_type); ThisType impl(ctx, indices, input_length, output_length); RETURN_NOT_OK(VisitTypeInline(*output_type, &impl)); diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 6e46255f2db..30f7f8d2fa4 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -1467,7 +1467,7 @@ class InversePermutationOptions(_InversePermutationOptions): If negative, this value will be set to the length of the input indices minus 1 and the length of the function’s output will be the length of the input indices. - output_type : DataType, default None + output_type : Optional[DataType], default None The type of the output inverse permutation. If None, the output will be of the same type as the input indices, otherwise must be signed integer type. An invalid error will be reported if this type diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 658d7bd104a..977211215cc 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -2593,7 +2593,7 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: CInversePermutationOptions(int64_t max_index) CInversePermutationOptions(int64_t max_index, shared_ptr[CDataType] output_type) int64_t max_index - shared_ptr[CDataType] output_type + optional[shared_ptr[CDataType]] output_type cdef cppclass CScatterOptions \ "arrow::compute::ScatterOptions"(CFunctionOptions): From 375f61b493bba14873cf7734943efb1f97bbd176 Mon Sep 17 00:00:00 2001 From: Tadeja Kadunc Date: Thu, 18 Dec 2025 16:36:42 +0100 Subject: [PATCH 06/12] Update test. --- cpp/src/arrow/compute/kernels/vector_swizzle_test.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/vector_swizzle_test.cc b/cpp/src/arrow/compute/kernels/vector_swizzle_test.cc index 0879955ec49..22b78a016d9 100644 --- a/cpp/src/arrow/compute/kernels/vector_swizzle_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_swizzle_test.cc @@ -162,7 +162,8 @@ TEST(InversePermutation, DefaultOptions) { ARROW_SCOPED_TRACE("Default options values"); InversePermutationOptions options; ASSERT_EQ(options.max_index, -1); - ASSERT_EQ(options.output_type, nullptr); + ASSERT_EQ(options.output_type, std::nullopt); + ASSERT_FALSE(options.output_type.has_value()); } { ARROW_SCOPED_TRACE("Default options semantics"); From 3f095825e1041fb29b156828ceb10bb4e76736ba Mon Sep 17 00:00:00 2001 From: Tadeja Kadunc Date: Thu, 18 Dec 2025 17:22:09 +0100 Subject: [PATCH 07/12] Lint format. --- cpp/src/arrow/compute/api_vector.h | 5 +++-- cpp/src/arrow/compute/function_internal.h | 2 +- cpp/src/arrow/compute/kernels/vector_swizzle.cc | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index 2fb3f937f81..ce614cb60ad 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -298,8 +298,9 @@ class ARROW_EXPORT ListFlattenOptions : public FunctionOptions { /// \brief Options for inverse_permutation function class ARROW_EXPORT InversePermutationOptions : public FunctionOptions { public: - explicit InversePermutationOptions(int64_t max_index = -1, - std::optional> output_type = std::nullopt); + explicit InversePermutationOptions( + int64_t max_index = -1, + std::optional> output_type = std::nullopt); static constexpr const char kTypeName[] = "InversePermutationOptions"; static InversePermutationOptions Defaults() { return InversePermutationOptions(); } diff --git a/cpp/src/arrow/compute/function_internal.h b/cpp/src/arrow/compute/function_internal.h index 433cf39ad9a..a98c68b3e59 100644 --- a/cpp/src/arrow/compute/function_internal.h +++ b/cpp/src/arrow/compute/function_internal.h @@ -347,7 +347,7 @@ static inline Result> GenericToScalar( static inline Result> GenericToScalar( const std::shared_ptr& value) { if (!value) { - return Status::Invalid("shared_ptr is nullptr"); + return Status::Invalid("shared_ptr is nullptr"); } return MakeNullScalar(value); } diff --git a/cpp/src/arrow/compute/kernels/vector_swizzle.cc b/cpp/src/arrow/compute/kernels/vector_swizzle.cc index 25ffe54f384..cf9f5379a69 100644 --- a/cpp/src/arrow/compute/kernels/vector_swizzle.cc +++ b/cpp/src/arrow/compute/kernels/vector_swizzle.cc @@ -51,8 +51,8 @@ Result ResolveInversePermutationOutputType( DCHECK_EQ(input_types.size(), 1); DCHECK_NE(input_types[0], nullptr); -std::shared_ptr output_type = - InversePermutationState::Get(ctx).output_type.value_or(input_types[0].owned_type); + std::shared_ptr output_type = + InversePermutationState::Get(ctx).output_type.value_or(input_types[0].owned_type); if (!is_signed_integer(output_type->id())) { return Status::TypeError( "Output type of inverse_permutation must be signed integer, got " + From 0b10a90573af53964126c01799b99e62afbb4c55 Mon Sep 17 00:00:00 2001 From: Tadeja Kadunc Date: Thu, 18 Dec 2025 20:38:01 +0100 Subject: [PATCH 08/12] Improve bindings --- python/pyarrow/_compute.pyx | 16 +++++++--------- python/pyarrow/includes/libarrow.pxd | 3 +-- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 30f7f8d2fa4..45aad4d3c77 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -1445,14 +1445,12 @@ class RunEndEncodeOptions(_RunEndEncodeOptions): cdef class _InversePermutationOptions(FunctionOptions): - def _set_options(self, max_index, output_type): - if output_type is None: - self.wrapped.reset(new CInversePermutationOptions(max_index)) - else: - output_ty = ensure_type(output_type) - self.wrapped.reset( - new CInversePermutationOptions(max_index, - pyarrow_unwrap_data_type(output_ty))) + def _set_options(self, max_index=-1, output_type=None): + cdef optional[shared_ptr[CDataType]] c_output_type = nullopt + if output_type is not None: + c_output_type = pyarrow_unwrap_data_type(ensure_type(output_type)) + self.wrapped.reset( + new CInversePermutationOptions(max_index, c_output_type)) class InversePermutationOptions(_InversePermutationOptions): @@ -1467,7 +1465,7 @@ class InversePermutationOptions(_InversePermutationOptions): If negative, this value will be set to the length of the input indices minus 1 and the length of the function’s output will be the length of the input indices. - output_type : Optional[DataType], default None + output_type : DataType, default None The type of the output inverse permutation. If None, the output will be of the same type as the input indices, otherwise must be signed integer type. An invalid error will be reported if this type diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 977211215cc..e96a7d84696 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -2590,8 +2590,7 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: cdef cppclass CInversePermutationOptions \ "arrow::compute::InversePermutationOptions"(CFunctionOptions): - CInversePermutationOptions(int64_t max_index) - CInversePermutationOptions(int64_t max_index, shared_ptr[CDataType] output_type) + CInversePermutationOptions(int64_t max_index, optional[shared_ptr[CDataType]] output_type) int64_t max_index optional[shared_ptr[CDataType]] output_type From 640f40ff2e2c8a457010f3623407e721e8d8b64e Mon Sep 17 00:00:00 2001 From: Tadeja Kadunc Date: Thu, 18 Dec 2025 22:49:36 +0100 Subject: [PATCH 09/12] Update docstring --- cpp/src/arrow/compute/api_vector.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index ce614cb60ad..159a787641e 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -309,8 +309,8 @@ class ARROW_EXPORT InversePermutationOptions : public FunctionOptions { /// of the input indices minus 1 and the length of the function's output will be the /// length of the input indices. int64_t max_index = -1; - /// \brief Optional type of the output inverse permutation. Default of `nullopt` will - /// use the same type as the input indices, otherwise must be signed integer type. An + /// \brief The data type for the output array of inverse permutation. Defaults to the + /// type of the input indices when `nullopt`. Must be a signed integer type. An /// invalid error will be reported if this type is not able to store the length of the /// input indices. std::optional> output_type; From e2ba6aa245a4a91f0235652d3afc7ce40046312d Mon Sep 17 00:00:00 2001 From: Tadeja Kadunc Date: Fri, 19 Dec 2025 15:18:30 +0100 Subject: [PATCH 10/12] Update function_internal.h --- cpp/src/arrow/compute/function_internal.h | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/cpp/src/arrow/compute/function_internal.h b/cpp/src/arrow/compute/function_internal.h index a98c68b3e59..7bea4043a5f 100644 --- a/cpp/src/arrow/compute/function_internal.h +++ b/cpp/src/arrow/compute/function_internal.h @@ -352,14 +352,6 @@ static inline Result> GenericToScalar( return MakeNullScalar(value); } -static inline Result> GenericToScalar( - const std::optional>& value) { - if (!value.has_value()) { - return std::make_shared(); - } - return GenericToScalar(value.value()); -} - static inline Result> GenericToScalar(const TypeHolder& value) { return GenericToScalar(value.GetSharedPtr()); } @@ -390,9 +382,10 @@ static inline Result> GenericToScalar(std::nullopt_t) { } template -static inline auto GenericToScalar(const std::optional& value) - -> Result { - return value.has_value() ? MakeScalar(value.value()) : std::make_shared(); +static inline Result> GenericToScalar( + const std::optional& value) { + return value.has_value() ? GenericToScalar(value.value()) + : std::make_shared(); } template From 93ab9e3ed4a5d01e1e1121a51fc4c21293cbf0ab Mon Sep 17 00:00:00 2001 From: tadeja Date: Fri, 19 Dec 2025 21:34:51 +0100 Subject: [PATCH 11/12] Update python docstring --- python/pyarrow/_compute.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 45aad4d3c77..c80e4f9316a 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -1466,9 +1466,9 @@ class InversePermutationOptions(_InversePermutationOptions): minus 1 and the length of the function’s output will be the length of the input indices. output_type : DataType, default None - The type of the output inverse permutation. + The data type for the output array of inverse permutation. If None, the output will be of the same type as the input indices, otherwise - must be signed integer type. An invalid error will be reported if this type + must be a signed integer type. An invalid error will be reported if this type is not able to store the length of the input indices. """ From 45ba7649dee6a76f1a4e23f7296fbd671b1452dd Mon Sep 17 00:00:00 2001 From: Rossi Sun Date: Mon, 29 Dec 2025 13:23:27 +0800 Subject: [PATCH 12/12] Adjust order --- docs/source/python/api/compute.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index 569b7cf0d62..f58856c5bdb 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -631,12 +631,12 @@ Compute Options RoundToMultipleOptions RunEndEncodeOptions ScalarAggregateOptions + ScatterOptions SelectKOptions SetLookupOptions SkewOptions SliceOptions SortOptions - ScatterOptions SplitOptions SplitPatternOptions StrftimeOptions