diff --git a/r/R/dplyr-funcs-string.R b/r/R/dplyr-funcs-string.R index 36b88fc2f15..158bae2db87 100644 --- a/r/R/dplyr-funcs-string.R +++ b/r/R/dplyr-funcs-string.R @@ -481,18 +481,25 @@ register_bindings_string_other <- function() { if (allowNA) { arrow_not_supported("allowNA = TRUE") } - if (is.na(keepNA)) { + keepNA_is_na <- is.na(keepNA) + if (keepNA_is_na) { keepNA <- !identical(type, "width") } - if (!keepNA) { - # TODO: I think there is a fill_null kernel we could use, set null to 2 + if (keepNA && !keepNA_is_na) { arrow_not_supported("keepNA = TRUE") } if (identical(type, "bytes")) { - Expression$create("binary_length", x) + result <- Expression$create("binary_length", x) } else { - Expression$create("utf8_length", x) + result <- Expression$create("utf8_length", x) } + + if (!keepNA) { + # When keepNA = FALSE, NA values should return 2 (length of "NA" as string) + result <- Expression$create("coalesce", result, Expression$scalar(2L)) + } + + result }, notes = "`allowNA = TRUE` and `keepNA = TRUE` not supported" ) diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index 26b091b9e04..58da3ea2335 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -1442,6 +1442,16 @@ test_that("nchar with namespacing", { ) }) +test_that("nchar with keepNA = FALSE", { + df <- tibble(x = c("foo", NA_character_, "bar")) + compare_dplyr_binding( + .input |> + mutate(n = nchar(x, keepNA = FALSE)) |> + collect(), + df + ) +}) + test_that("str_trim()", { compare_dplyr_binding( .input |>