diff --git a/r/tests/testthat/test-dplyr-join.R b/r/tests/testthat/test-dplyr-join.R index 51ca528a644..ce7ed2e63fc 100644 --- a/r/tests/testthat/test-dplyr-join.R +++ b/r/tests/testthat/test-dplyr-join.R @@ -188,8 +188,95 @@ test_that("Error handling for unsupported expressions in join_by", { ) }) -# TODO: test duplicate col names -# TODO: casting: int and float columns? +test_that("joins with duplicate column names", { + # When column names are duplicated (not in by), suffixes are added + left_dup <- tibble::tibble( + key = 1:5, + shared = 1:5, + shared_float = c(1.1, 2.2, 3.3, 4.4, 5.5), + left_unique = letters[1:5] + ) + right_dup <- tibble::tibble( + key = 1:5, + shared = 6:10, + shared_float = c(6.1, 7.2, 8.3, 9.4, 10.5), + right_unique = LETTERS[1:5] + ) + + # Test with default suffixes (.x and .y) + compare_dplyr_binding( + .input |> + left_join(right_dup, by = "key") |> + collect(), + left_dup + ) + + compare_dplyr_binding( + .input |> + inner_join(right_dup, by = "key") |> + collect(), + left_dup + ) + + # Test with custom suffixes + compare_dplyr_binding( + .input |> + left_join(right_dup, by = "key", suffix = c("_left", "_right")) |> + collect(), + left_dup + ) + + compare_dplyr_binding( + .input |> + inner_join(right_dup, by = "key", suffix = c("_left", "_right")) |> + collect(), + left_dup + ) + + # Test that column names are correctly suffixed + # Verify exact column names match expected pattern using the same fixture + result <- arrow_table(left_dup) |> + inner_join( + arrow_table(right_dup), + by = "key", + suffix = c("_left", "_right") + ) |> + collect() + res_col_names <- names(result) + # Column order: join key first, then left table columns (with suffixes), + # then right table columns (with suffixes) + expected_col_names <- c( + "key", + "shared_left", + "shared_float_left", + "left_unique", + "shared_right", + "shared_float_right", + "right_unique" + ) + expect_equal(expected_col_names, res_col_names) +}) + +test_that("joins with incompatible types for join keys", { + # Test that joining on columns with incompatible types (int vs float) fails + # Arrow requires join keys to have compatible types - type casting is not + # automatically performed for join keys + left_int <- Table$create( + x = c(1L, 2L), + shared = c(10L, 20L) + ) + right_float <- Table$create( + x = c(1.0, 2.0), + shared = c(10.1, 20.2) + ) + + expect_error( + left_int |> + left_join(right_float, by = "x") |> + collect(), + "Incompatible data types for corresponding join field keys" + ) +}) test_that("right_join", { compare_dplyr_binding( @@ -317,26 +404,6 @@ test_that("arrow dplyr query correctly filters then joins", { ) }) -test_that("suffix", { - left_suf <- Table$create( - key = c(1, 2), - left_unique = c(2.1, 3.1), - shared = c(10.1, 10.3) - ) - - right_suf <- Table$create( - key = c(1, 2, 3, 10, 20), - right_unique = c(1.1, 1.2, 3.1, 4.1, 4.3), - shared = c(20.1, 30, 40, 50, 60) - ) - - join_op <- inner_join(left_suf, right_suf, by = "key", suffix = c("_left", "_right")) - output <- collect(join_op) - res_col_names <- names(output) - expected_col_names <- c("key", "left_unique", "shared_left", "right_unique", "shared_right") - expect_equal(expected_col_names, res_col_names) -}) - test_that("suffix and implicit schema", { left_suf <- Table$create( key = c(1, 2),