From ec41e4220100eb04679c64606e0b5e6836ccd291 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 18 Mar 2025 14:44:52 -0400 Subject: [PATCH 1/2] GH-36411: [C++][Python] Use meson-python for PyArrow build system --- .github/workflows/dev.yml | 2 +- .github/workflows/python.yml | 26 +- ci/conda_env_cpp.txt | 2 +- ci/conda_env_python.txt | 2 +- ci/conda_env_sphinx.txt | 1 + ci/scripts/python_build.bat | 114 +- ci/scripts/python_build.sh | 133 ++- ci/scripts/python_sdist_build.sh | 6 +- cpp/cmake_modules/FindSnappyAlt.cmake | 4 - cpp/cmake_modules/Findutf8proc.cmake | 3 - dev/release/01-prepare-test.rb | 7 - dev/release/02-source-test.rb | 7 +- dev/release/post-10-bump-versions-test.rb | 7 - dev/release/utils-prepare.sh | 5 - python/CMakeLists.txt | 1007 ----------------- python/LICENSE.txt | 1 + python/MANIFEST.in | 15 - python/NOTICE.txt | 1 + .../examples/minimal_build/Dockerfile.ubuntu | 1 + python/meson.build | 91 ++ python/meson.options | 114 ++ python/pyarrow/__init__.py | 8 + python/pyarrow/meson.build | 475 ++++++++ python/pyarrow/src/arrow/python/meson.build | 38 + python/pyarrow/tests/util.py | 1 + python/pyproject.toml | 4 +- python/requirements-build.txt | 2 +- python/requirements-test.txt | 1 + .../generate_dist.py} | 20 +- python/setup.py | 437 ------- 30 files changed, 986 insertions(+), 1549 deletions(-) delete mode 100644 python/CMakeLists.txt create mode 120000 python/LICENSE.txt delete mode 100644 python/MANIFEST.in create mode 120000 python/NOTICE.txt create mode 100644 python/meson.build create mode 100644 python/meson.options create mode 100644 python/pyarrow/meson.build create mode 100644 python/pyarrow/src/arrow/python/meson.build rename python/{pyarrow/src/arrow/python/CMakeLists.txt => scripts/generate_dist.py} (64%) delete mode 100755 python/setup.py diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index b763cfbbbca..c728ff23958 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -103,7 +103,7 @@ jobs: shell: bash run: | gem install test-unit openssl - pip install "cython>=3.1" setuptools pytest requests setuptools-scm + pip install "cython>=3.1" meson-python pytest requests setuptools-scm - name: Run Release Test shell: bash run: | diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index e5d367958dd..86cf276d658 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -118,7 +118,7 @@ jobs: with: python-version: 3.12 - name: Setup Archery - run: pip install -e dev/archery[docker] + run: pip install -e dev/archery[docker] meson-python - name: Execute Docker Build env: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} @@ -175,7 +175,7 @@ jobs: ARROW_BUILD_TESTS: OFF PYARROW_TEST_LARGE_MEMORY: ON # Current oldest supported version according to https://endoflife.date/macos - MACOSX_DEPLOYMENT_TARGET: 12.0 + MACOSX_DEPLOYMENT_TARGET: "12.0" steps: - name: Checkout Arrow uses: actions/checkout@v6 @@ -288,11 +288,31 @@ jobs: env: # We can invalidate the current cache by updating this. CACHE_VERSION: "2025-09-16.1" + - name: Install conda + shell: cmd + run: | + curl -L https://github.com/conda-forge/miniforge/releases/download/25.11.0-0/Miniforge3-25.11.0-0-Windows-x86_64.exe -o miniforge.exe + start /wait "" .\miniforge.exe /S /D=C:\miniforge + del .\miniforge.exe + C:\miniforge\Scripts\conda.exe init cmd.exe + echo C:\miniforge\Scripts;C:\miniforge\Library\bin;C:\miniforge\condabin>> %GITHUB_PATH% + - name: Create conda environment + shell: cmd + run: | + conda env create -n pyarrow-dev -f .\ci\conda_env_cpp.txt -y + conda env update -n pyarrow-dev -f .\ci\conda_env_python.txt - name: Build Arrow C++ and PyArrow shell: cmd + env: + PYTHON_CMD: "python" run: | - call "ci\scripts\python_build.bat" %cd% "${{ steps.path-info.outputs.usr-windows-dir }}" + call activate pyarrow-dev || exit /B 1 + conda list + call "ci\scripts\python_build.bat" %cd% "%CONDA_PREFIX%" - name: Test PyArrow shell: cmd + env: + PYTHON_CMD: "python" run: | + call activate pyarrow-dev call "ci\scripts\python_test.bat" %cd% diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index 18d58f7bb2d..5f264fe5159 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -35,7 +35,7 @@ libboost-devel libgrpc libprotobuf libutf8proc -lz4-c +lz4-c>=1.10.0 make meson ninja diff --git a/ci/conda_env_python.txt b/ci/conda_env_python.txt index eddba95a11f..2e734abe1e4 100644 --- a/ci/conda_env_python.txt +++ b/ci/conda_env_python.txt @@ -24,9 +24,9 @@ cython>=3.1 cloudpickle fsspec hypothesis +meson-python numpy>=1.16.6 pytest pytest-faulthandler s3fs>=2023.10.0 -setuptools>=77 setuptools_scm>=8 diff --git a/ci/conda_env_sphinx.txt b/ci/conda_env_sphinx.txt index 565d147bf77..24908af3dd1 100644 --- a/ci/conda_env_sphinx.txt +++ b/ci/conda_env_sphinx.txt @@ -21,6 +21,7 @@ cython>3.1.1 doxygen ipython linkify-it-py +meson-python # We can't install linuxdoc by conda. We install linuxdoc by pip in # ci/dockerfiles/conda-python-pandas.dockerfile. # linuxdoc diff --git a/ci/scripts/python_build.bat b/ci/scripts/python_build.bat index 417cc0d5dd0..78392b9eea1 100644 --- a/ci/scripts/python_build.bat +++ b/ci/scripts/python_build.bat @@ -110,30 +110,110 @@ ccache -sv echo "=== Building Python ===" set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE% -set PYARROW_BUILD_VERBOSE=1 -set PYARROW_BUNDLE_ARROW_CPP=ON -set PYARROW_CMAKE_GENERATOR=%CMAKE_GENERATOR% -set PYARROW_WITH_ACERO=%ARROW_ACERO% -set PYARROW_WITH_DATASET=%ARROW_DATASET% -set PYARROW_WITH_FLIGHT=%ARROW_FLIGHT% -set PYARROW_WITH_GANDIVA=%ARROW_GANDIVA% -set PYARROW_WITH_GCS=%ARROW_GCS% -set PYARROW_WITH_HDFS=%ARROW_HDFS% -set PYARROW_WITH_ORC=%ARROW_ORC% -set PYARROW_WITH_PARQUET=%ARROW_PARQUET% -set PYARROW_WITH_PARQUET_ENCRYPTION=%PARQUET_REQUIRE_ENCRYPTION% -set PYARROW_WITH_SUBSTRAIT=%ARROW_SUBSTRAIT% -set PYARROW_WITH_S3=%ARROW_S3% -set ARROW_HOME=%CMAKE_INSTALL_PREFIX% -set CMAKE_PREFIX_PATH=%CMAKE_INSTALL_PREFIX% +if %ARROW_ACERO% == ON ( + set PYARROW_WITH_ACERO=enabled +) else if %ARROW_ACERO% == OFF ( + set PYARROW_WITH_ACERO=disabled +) else ( + set PYARROW_WITH_ACERO=auto +) +if %ARROW_DATASET% == ON ( + set PYARROW_WITH_DATASET=enabled +) else if %ARROW_DATASET% == OFF ( + set PYARROW_WITH_DATASET=disabled +) else ( + set PYARROW_WITH_DATASET=auto +) +if %ARROW_FLIGHT% == ON ( + set PYARROW_WITH_FLIGHT=enabled +) else if %ARROW_FLIGHT% == OFF ( + set PYARROW_WITH_FLIGHT=disabled +) else ( + set PYARROW_WITH_FLIGHT=auto +) +if %ARROW_GANDIVA% == ON ( + set PYARROW_WITH_GANDIVA=enabled +) else if %ARROW_GANDIVA% == OFF ( + set PYARROW_WITH_GANDIVA=disabled +) else ( + set PYARROW_WITH_GANDIVA=auto +) +if %ARROW_GCS% == ON ( + set PYARROW_WITH_GCS=enabled +) else if %ARROW_GCS% == OFF ( + set PYARROW_WITH_GCS=disabled +) else ( + set PYARROW_WITH_GCS=auto +) +if %ARROW_HDFS% == ON ( + set PYARROW_WITH_HDFS=enabled +) else if %ARROW_HDFS% == OFF ( + set PYARROW_WITH_HDFS=disabled +) else ( + set PYARROW_WITH_HDFS=auto +) +if %ARROW_ORC% == ON ( + set PYARROW_WITH_ORC=enabled +) else if %ARROW_ORC% == OFF ( + set PYARROW_WITH_ORC=disabled +) else ( + set PYARROW_WITH_ORC=auto +) +if %ARROW_PARQUET% == ON ( + set PYARROW_WITH_PARQUET=enabled +) else if %ARROW_PARQUET% == OFF ( + set PYARROW_WITH_PARQUET=disabled +) else ( + set PYARROW_WITH_PARQUET=auto +) +if %PARQUET_REQUIRE_ENCRYPTION% == ON ( + set PYARROW_WITH_PARQUET_ENCRYPTION=enabled +) else if %ARROW_ACERO% == OFF ( + set PYARROW_WITH_PARQUET_ENCRYPTION=disabled +) else ( + set PYARROW_WITH_PARQUET_ENCRYPTION=auto +) +if %ARROW_SUBSTRAIT% == ON ( + set PYARROW_WITH_SUBSTRAIT=enabled +) else if %ARROW_SUBSTRAIT% == OFF ( + set PYARROW_WITH_SUBSTRAIT=disabled +) else ( + set PYARROW_WITH_SUBSTRAIT=auto +) +if %ARROW_S3% == ON ( + set PYARROW_WITH_S3=enabled +) else if %ARROW_S3% == OFF ( + set PYARROW_WITH_S3=disabled +) else ( + set PYARROW_WITH_S3=auto +) +if %CMAKE_BUILD_TYPE% == Release ( + set MESON_BUILD_TYPE=release +) else ( + set MESON_BUILD_TYPE=debug +) pushd %SOURCE_DIR%\python @REM Install Python build dependencies %PYTHON_CMD% -m pip install --upgrade pip || exit /B 1 %PYTHON_CMD% -m pip install -r requirements-build.txt || exit /B 1 +%PYTHON_CMD% -m pip install build || exit /B 1 @REM Build PyArrow -%PYTHON_CMD% -m pip install --no-deps --no-build-isolation -vv . || exit /B 1 +%PYTHON_CMD% -m build --wheel --no-isolation . ^ + -Csetup-args="-Dbuildtype=%MESON_BUILD_TYPE%" ^ + -Csetup-args="-Dacero=%PYARROW_WITH_ACERO%" ^ + -Csetup-args="-Ddataset=%PYARROW_WITH_DATASET%" ^ + -Csetup-args="-Dflight=%PYARROW_WITH_FLIGHT%" ^ + -Csetup-args="-Dgandiva=%PYARROW_WITH_GANDIVA%" ^ + -Csetup-args="-Dgcs=%PYARROW_WITH_GCS%" ^ + -Csetup-args="-Dhdfs=%PYARROW_WITH_HDFS%" ^ + -Csetup-args="-Dorc=%PYARROW_WITH_ORC%" ^ + -Csetup-args="-Dparquet=%PYARROW_WITH_PARQUET%" ^ + -Csetup-args="-Dparquet_require_encryption=%PYARROW_WITH_PARQUET_ENCRYPTION%" ^ + -Csetup-args="-Dsubstrait=%PYARROW_WITH_SUBSTRAIT%" ^ + -Csetup-args="-Ds3=%PYARROW_WITH_S3%" || exit /B 1 +%PYTHON_CMD% -m pip install --no-index --find-links .\dist\ pyarrow popd diff --git a/ci/scripts/python_build.sh b/ci/scripts/python_build.sh index e0c64521cdd..631919ef5e4 100755 --- a/ci/scripts/python_build.sh +++ b/ci/scripts/python_build.sh @@ -39,44 +39,76 @@ if [ -n "${ARROW_PYTHON_VENV:-}" ]; then . "${ARROW_PYTHON_VENV}/bin/activate" fi -case "$(uname)" in - Linux) - n_jobs=$(nproc) - ;; - Darwin) - n_jobs=$(sysctl -n hw.ncpu) - ;; - MINGW*) - n_jobs=${NUMBER_OF_PROCESSORS:-1} - ;; - *) - n_jobs=${NPROC:-1} - ;; -esac - if [ -n "${CONDA_PREFIX}" ]; then echo -e "===\n=== Conda environment for build\n===" conda list fi -export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} -export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} - -export PYARROW_WITH_ACERO=${ARROW_ACERO:-OFF} -export PYARROW_WITH_AZURE=${ARROW_AZURE:-OFF} -export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF} -export PYARROW_WITH_DATASET=${ARROW_DATASET:-ON} -export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF} -export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA:-OFF} -export PYARROW_WITH_GCS=${ARROW_GCS:-OFF} -export PYARROW_WITH_HDFS=${ARROW_HDFS:-ON} -export PYARROW_WITH_ORC=${ARROW_ORC:-OFF} -export PYARROW_WITH_PARQUET=${ARROW_PARQUET:-OFF} -export PYARROW_WITH_PARQUET_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION:-ON} -export PYARROW_WITH_S3=${ARROW_S3:-OFF} -export PYARROW_WITH_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} - -export PYARROW_PARALLEL=${n_jobs} +PYARROW_WITH_ACERO=$(case "$ARROW_ACERO" in + ON) echo "enabled" ;; + OFF) echo "disabled" ;; + *) echo "auto" ;; + esac) +PYARROW_WITH_AZURE=$(case "$ARROW_AZURE" in + ON) echo "enabled" ;; + OFF) echo "disabled" ;; + *) echo "auto" ;; + esac) +PYARROW_WITH_CUDA=$(case "$ARROW_CUDA" in + ON) echo "enabled" ;; + OFF) echo "disabled" ;; + *) echo "auto" ;; + esac) +PYARROW_WITH_DATASET=$(case "$ARROW_DATASET" in + ON) echo "enabled" ;; + OFF) echo "disabled" ;; + *) echo "enabled" ;; + esac) +PYARROW_WITH_FLIGHT=$(case "$ARROW_FLIGHT" in + ON) echo "enabled" ;; + OFF) echo "disabled" ;; + *) echo "auto" ;; + esac) +PYARROW_WITH_GANDIVA=$(case "$ARROW_GANDIVA" in + ON) echo "enabled" ;; + OFF) echo "disabled" ;; + *) echo "auto" ;; + esac) +PYARROW_WITH_GCS=$(case "$ARROW_GCS" in + ON) echo "enabled" ;; + OFF) echo "disabled" ;; + *) echo "auto" ;; + esac) +PYARROW_WITH_HDFS=$(case "$ARROW_HDFS" in + ON) echo "enabled" ;; + OFF) echo "disabled" ;; + *) echo "enabled" ;; + esac) +PYARROW_WITH_ORC=$(case "$ARROW_ORC" in + ON) echo "enabled" ;; + OFF) echo "disabled" ;; + *) echo "auto" ;; + esac) +PYARROW_WITH_PARQUET=$(case "$ARROW_PARQUET" in + ON) echo "enabled" ;; + OFF) echo "disabled" ;; + *) echo "auto" ;; + esac) +PYARROW_WITH_PARQUET_ENCRYPTION=$(case "$PARQUET_REQUIRE_ENCRYPTION" in + ON) echo "enabled" ;; + OFF) echo "disabled" ;; + *) echo "enabled" ;; + esac) +PYARROW_WITH_S3=$(case "$ARROW_S3" in + ON) echo "enabled" ;; + OFF) echo "disabled" ;; + *) echo "auto" ;; + esac) +PYARROW_WITH_SUBSTRAIT=$(case "$ARROW_SUBSTRAIT" in + ON) echo "enabled" ;; + OFF) echo "disabled" ;; + *) echo "auto" ;; + esac) : "${CMAKE_PREFIX_PATH:=${ARROW_HOME}}" export CMAKE_PREFIX_PATH @@ -93,7 +125,40 @@ pushd "${python_build_dir}" # on Debian/Ubuntu (ARROW-15243). # - Cannot use build isolation as we want to use specific dependency versions # (e.g. Numpy, Pandas) on some CI jobs. -${PYTHON:-python} -m pip install --no-deps --no-build-isolation -vv . + +# The conda compilers package adds flags for debugging and optimization +# that are unnecessary in a CI context +OLD_CFLAGS=$CFLAGS +OLD_CPPFLAGS=$CPPFLAGS +OLD_CXXFLAGS=$CXXFLAGS +export CFLAGS= +export CPPFLAGS= +export CXXFLAGS= + +BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} +BUILD_TYPE=${BUILD_TYPE,,} # Meson requires lowercase values + +${PYTHON:-python} -m pip install --no-deps --no-build-isolation -vv . \ + -Csetup-args="-Dbuildtype=${BUILD_TYPE}" \ + -Csetup-args="-Dacero=${PYARROW_WITH_ACERO}" \ + -Csetup-args="-Dazure=${PYARROW_WITH_AZURE}" \ + -Csetup-args="-Dcuda=${PYARROW_WITH_CUDA}" \ + -Csetup-args="-Ddataset=${PYARROW_WITH_DATASET}" \ + -Csetup-args="-Dflight=${PYARROW_WITH_FLIGHT}" \ + -Csetup-args="-Dgandiva=${PYARROW_WITH_GANDIVA}" \ + -Csetup-args="-Dgcs=${PYARROW_WITH_GCS}" \ + -Csetup-args="-Dhdfs=${PYARROW_WITH_HDFS}" \ + -Csetup-args="-Dorc=${PYARROW_WITH_ORC}" \ + -Csetup-args="-Dparquet=${PYARROW_WITH_PARQUET}" \ + -Csetup-args="-Dparquet_require_encryption=${PYARROW_WITH_PARQUET_ENCRYPTION}" \ + -Csetup-args="-Ds3=${PYARROW_WITH_S3}" \ + -Csetup-args="-Dsubstrait=${PYARROW_WITH_SUBSTRAIT}" \ + -Ccompile-args="-v" \ + -Csetup-args="--pkg-config-path=${ARROW_HOME}/lib/pkgconfig" + +export CFLAGS=$OLD_CFLAGS +export CPPFLAGS=$OLD_CPPFLAGS +export CXXFLAGS=$OLD_CXXFLAGS popd if [ "${BUILD_DOCS_PYTHON}" == "ON" ]; then diff --git a/ci/scripts/python_sdist_build.sh b/ci/scripts/python_sdist_build.sh index dfb99518431..7bea9c3dc10 100755 --- a/ci/scripts/python_sdist_build.sh +++ b/ci/scripts/python_sdist_build.sh @@ -23,5 +23,9 @@ source_dir=${1}/python pushd "${source_dir}" export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION:-} -${PYTHON:-python} setup.py sdist +# Meson dist must be run from a VCS, so initiate a dummy repo +git init . +git add --all . +git commit -m "dummy commit for meson dist" +${PYTHON:-python} -m build --sdist . popd diff --git a/cpp/cmake_modules/FindSnappyAlt.cmake b/cpp/cmake_modules/FindSnappyAlt.cmake index d0a06f0997a..175c2a0f4d1 100644 --- a/cpp/cmake_modules/FindSnappyAlt.cmake +++ b/cpp/cmake_modules/FindSnappyAlt.cmake @@ -75,10 +75,6 @@ if(ARROW_SNAPPY_USE_SHARED) "${CMAKE_SHARED_LIBRARY_PREFIX}snappy${CMAKE_SHARED_LIBRARY_SUFFIX}") else() set(SNAPPY_STATIC_LIB_NAME_BASE "snappy") - if(MSVC) - set(SNAPPY_STATIC_LIB_NAME_BASE - "${SNAPPY_STATIC_LIB_NAME_BASE}${SNAPPY_MSVC_STATIC_LIB_SUFFIX}") - endif() set(SNAPPY_LIB_NAMES "${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_STATIC_LIB_NAME_BASE}${CMAKE_STATIC_LIBRARY_SUFFIX}" ) diff --git a/cpp/cmake_modules/Findutf8proc.cmake b/cpp/cmake_modules/Findutf8proc.cmake index 75d459d0ec7..bb5e2580ab8 100644 --- a/cpp/cmake_modules/Findutf8proc.cmake +++ b/cpp/cmake_modules/Findutf8proc.cmake @@ -73,9 +73,6 @@ if(ARROW_UTF8PROC_USE_SHARED) list(APPEND utf8proc_LIB_NAMES "${CMAKE_SHARED_LIBRARY_PREFIX}utf8proc${CMAKE_SHARED_LIBRARY_SUFFIX}") else() - if(MSVC AND NOT DEFINED utf8proc_MSVC_STATIC_LIB_SUFFIX) - set(utf8proc_MSVC_STATIC_LIB_SUFFIX "_static") - endif() set(utf8proc_STATIC_LIB_SUFFIX "${utf8proc_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}") set(utf8proc_LIB_NAMES diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb index 22afe618002..fb1082573d7 100644 --- a/dev/release/01-prepare-test.rb +++ b/dev/release/01-prepare-test.rb @@ -228,13 +228,6 @@ def test_version_pre_tag "+set(MLARROW_VERSION \"#{@release_version}\")"], ], }, - { - path: "python/CMakeLists.txt", - hunks: [ - ["-set(PYARROW_VERSION \"#{@snapshot_version}\")", - "+set(PYARROW_VERSION \"#{@release_version}\")"], - ], - }, { path: "python/pyproject.toml", hunks: [ diff --git a/dev/release/02-source-test.rb b/dev/release/02-source-test.rb index fe2c7b77591..e292cd1933e 100644 --- a/dev/release/02-source-test.rb +++ b/dev/release/02-source-test.rb @@ -60,7 +60,12 @@ def test_symbolic_links def test_python_version source Dir.chdir("#{@tag_name_no_rc}/python") do - sh("python3", "setup.py", "sdist") + # Meson dist must be run from a VCS, so initiate a dummy repo + sh("git", "init", ".") + sh("git", "add", "--all", ".") + sh("git", "commit", "-m", "dummy commit for meson dist") + sh("python3", "-m", "pip", "install", "build") + sh("python3", "-m", "build", "--sdist", ".", "-Csetup-args=-Dsdist=true") if on_release_branch? pyarrow_source_archive = "dist/pyarrow-#{@release_version}.tar.gz" else diff --git a/dev/release/post-10-bump-versions-test.rb b/dev/release/post-10-bump-versions-test.rb index aad4d7c699c..4fbe30d6a7c 100644 --- a/dev/release/post-10-bump-versions-test.rb +++ b/dev/release/post-10-bump-versions-test.rb @@ -250,13 +250,6 @@ def test_version_post_tag "+set(MLARROW_VERSION \"#{@next_snapshot_version}\")"], ], }, - { - path: "python/CMakeLists.txt", - hunks: [ - ["-set(PYARROW_VERSION \"#{@snapshot_version}\")", - "+set(PYARROW_VERSION \"#{@next_snapshot_version}\")"], - ], - }, { path: "python/pyproject.toml", hunks: [ diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh index e67439467cc..64192c93ed3 100644 --- a/dev/release/utils-prepare.sh +++ b/dev/release/utils-prepare.sh @@ -117,11 +117,6 @@ update_versions() { pyproject.toml rm -f pyproject.toml.bak git add pyproject.toml - sed -i.bak -E -e \ - "s/^set\(PYARROW_VERSION \".+\"\)/set(PYARROW_VERSION \"${version}\")/" \ - CMakeLists.txt - rm -f CMakeLists.txt.bak - git add CMakeLists.txt popd pushd "${ARROW_DIR}/r" diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt deleted file mode 100644 index b1c8e324942..00000000000 --- a/python/CMakeLists.txt +++ /dev/null @@ -1,1007 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# Includes code assembled from BSD/MIT/Apache-licensed code from some 3rd-party -# projects, including Kudu, Impala, and libdynd. See python/LICENSE.txt - -cmake_minimum_required(VERSION 3.25) -project(pyarrow) - -# This is needed for 3.13 free-threading. CMake used to add Python -# include directories with `-isystem`, which led to some Python-internal -# includes to resolve to normal 3.13 includes (cause -isystem includes -# are searched after system directories), instead of 3.13-freethreading, -# which in turn meant that Py_GIL_DISABLED was not set. -set(CMAKE_NO_SYSTEM_FROM_IMPORTED ON) - -set(PYARROW_VERSION "23.0.0-SNAPSHOT") -string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" PYARROW_BASE_VERSION "${PYARROW_VERSION}") - -# Generate SO version and full SO version -project(pyarrow VERSION "${PYARROW_BASE_VERSION}") -set(PYARROW_VERSION_MAJOR "${pyarrow_VERSION_MAJOR}") -set(PYARROW_VERSION_MINOR "${pyarrow_VERSION_MINOR}") -set(PYARROW_VERSION_PATCH "${pyarrow_VERSION_PATCH}") -# pyarrow 1.x.y => SO version is "10x", full SO version is "10x.y.0" -# Example: for 18.0.0 --> PYARROW_SO_VERSION=1800, PYARROW_FULL_SO_VERSION=1800.0.0 -math(EXPR PYARROW_SO_VERSION "${PYARROW_VERSION_MAJOR} * 100 + ${PYARROW_VERSION_MINOR}") -set(PYARROW_FULL_SO_VERSION "${PYARROW_SO_VERSION}.${PYARROW_VERSION_PATCH}.0") - -# Running from a Python sdist tarball -set(LOCAL_CMAKE_MODULES "${CMAKE_SOURCE_DIR}/cmake_modules") -if(EXISTS "${LOCAL_CMAKE_MODULES}") - set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${LOCAL_CMAKE_MODULES}) -endif() - -# Running from a git source tree -set(CPP_CMAKE_MODULES "${CMAKE_SOURCE_DIR}/../cpp/cmake_modules") -if(EXISTS "${CPP_CMAKE_MODULES}") - set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CPP_CMAKE_MODULES}) -endif() - -if(PYARROW_CPP_HOME) - list(INSERT CMAKE_PREFIX_PATH 0 "${PYARROW_CPP_HOME}") -endif() - -include(CMakeParseArguments) - -# MACOSX_RPATH is enabled by default. -# https://www.cmake.org/cmake/help/latest/policy/CMP0042.html -cmake_policy(SET CMP0042 NEW) - -# Only interpret if() arguments as variables or keywords when unquoted. -# https://www.cmake.org/cmake/help/latest/policy/CMP0054.html -cmake_policy(SET CMP0054 NEW) - -# RPATH settings on macOS do not affect install_name. -# https://cmake.org/cmake/help/latest/policy/CMP0068.html -if(POLICY CMP0068) - cmake_policy(SET CMP0068 NEW) -endif() - -# find_package() uses _ROOT variables. -# https://cmake.org/cmake/help/latest/policy/CMP0074.html -if(POLICY CMP0074) - cmake_policy(SET CMP0074 NEW) -endif() - -# RPATH entries are properly escaped in the intermediary CMake install script. -# https://cmake.org/cmake/help/latest/policy/CMP0095.html -if(POLICY CMP0095) - cmake_policy(SET CMP0095 NEW) -endif() - -# Use the first Python installation on PATH, not the newest one -set(Python3_FIND_STRATEGY "LOCATION") -# On Windows, use registry last, not first -set(Python3_FIND_REGISTRY "LAST") -# On macOS, use framework last, not first -set(Python3_FIND_FRAMEWORK "LAST") - -# Allow "make install" to not depend on all targets. -# -# Must be declared in the top-level CMakeLists.txt. -set(CMAKE_SKIP_INSTALL_ALL_DEPENDENCY true) - -set(CMAKE_MACOSX_RPATH 1) -if(DEFINED ENV{MACOSX_DEPLOYMENT_TARGET}) - set(CMAKE_OSX_DEPLOYMENT_TARGET $ENV{MACOSX_DEPLOYMENT_TARGET}) -else() - set(CMAKE_OSX_DEPLOYMENT_TARGET 12.0) -endif() - -# Generate a Clang compile_commands.json "compilation database" file for use -# with various development tools, such as Vim's YouCompleteMe plugin. -# See http://clang.llvm.org/docs/JSONCompilationDatabase.html -if("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1") - set(CMAKE_EXPORT_COMPILE_COMMANDS 1) -endif() - -if(UNIX) - set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) - # In the event that we are bundling the shared libraries (e.g. in a - # manylinux1 wheel), we need to set the RPATH of the extensions to the - # root of the pyarrow/ package so that libarrow is able to be - # loaded properly - if(APPLE) - set(CMAKE_INSTALL_NAME_DIR "@rpath") - set(CMAKE_INSTALL_RPATH "@loader_path/") - else() - set(CMAKE_INSTALL_RPATH "\$ORIGIN") - endif() -endif() - -find_program(CCACHE_FOUND ccache) -if(CCACHE_FOUND - AND NOT CMAKE_C_COMPILER_LAUNCHER - AND NOT CMAKE_CXX_COMPILER_LAUNCHER) - message(STATUS "Using ccache: ${CCACHE_FOUND}") - set(CMAKE_C_COMPILER_LAUNCHER ${CCACHE_FOUND}) - set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_FOUND}) - # ARROW-3985: let ccache preserve C++ comments, because some of them may be - # meaningful to the compiler - set(ENV{CCACHE_COMMENTS} "1") -endif() - -# -# Compiler flags -# - -include(BuildUtils) - -# Cython generated code emits way to many warnings at CHECKIN and EVERYTHING -set(BUILD_WARNING_LEVEL "PRODUCTION") - -# This must be synchronized with the definition in -# cpp/cmake_modules/DefineOptions.cmake. -if(NOT DEFINED ARROW_SIMD_LEVEL) - set(ARROW_SIMD_LEVEL - "DEFAULT" - CACHE STRING "Compile time SIMD optimization level") -endif() -if(NOT DEFINED ARROW_RUNTIME_SIMD_LEVEL) - set(ARROW_RUNTIME_SIMD_LEVEL - "MAX" - CACHE STRING "Max runtime SIMD optimization level") -endif() -include(SetupCxxFlags) - -if($ENV{PYODIDE}) - # These variables are needed for building PyArrow on Emscripten. - # If they aren't set, CMake cross compiling fails for Python - # modules (at least under Pyodide it does). - set(Python3_INCLUDE_DIR $ENV{PYTHONINCLUDE}) - set(Python3_LIBRARY $ENV{CPYTHONLIB}) - set(Python3_EXECUTABLE) - execute_process(COMMAND ${Python3_EXECUTABLE} -c - "import numpy; print(numpy.__version__)" - OUTPUT_VARIABLE PYODIDE_NUMPY_VERSION - OUTPUT_STRIP_TRAILING_WHITESPACE) - string(REGEX MATCH "^([0-9]+)" PYODIDE_NUMPY_MAJOR_VERSION ${PYODIDE_NUMPY_VERSION}) - if(PYODIDE_NUMPY_MAJOR_VERSION GREATER_EQUAL 2) - set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/_core/include) - else() - set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/core/include) - endif() - set(ENV{_PYTHON_SYSCONFIGDATA_NAME} $ENV{SYSCONFIG_NAME}) - # we set the c and cxx compiler manually to bypass pywasmcross - # which is pyodide's way of messing with C++ build parameters. - set(CMAKE_C_COMPILER emcc) - set(CMAKE_CXX_COMPILER em++) -endif() - -# Add common flags -set(CMAKE_CXX_FLAGS "${CXX_COMMON_FLAGS} ${CMAKE_CXX_FLAGS}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PYARROW_CXXFLAGS}") - -if(MSVC) - # MSVC version of -Wno-return-type-c-linkage - string(APPEND CMAKE_CXX_FLAGS " /wd4190") - - # Cython generates some bitshift expressions that MSVC does not like in - # __Pyx_PyFloat_DivideObjC - string(APPEND CMAKE_CXX_FLAGS " /wd4293") - - # Converting to/from C++ bool is pretty wonky in Cython. The C4800 warning - # seem harmless, and probably not worth the effort of working around it - string(APPEND CMAKE_CXX_FLAGS " /wd4800") - - # See https://github.com/cython/cython/issues/4445. - # - # Cython 3 emits "(void)__Pyx_PyObject_CallMethod0;" to suppress a - # "unused function" warning but the code emits another "function - # call missing argument list" warning. - string(APPEND CMAKE_CXX_FLAGS " /wd4551") -else() - # Enable perf and other tools to work properly - string(APPEND CMAKE_CXX_FLAGS " -fno-omit-frame-pointer") - - # Suppress Cython warnings - string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-variable -Wno-maybe-uninitialized") - - if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL - "Clang") - # Cython warnings in clang - string(APPEND CMAKE_CXX_FLAGS " -Wno-parentheses-equality") - string(APPEND CMAKE_CXX_FLAGS " -Wno-constant-logical-operand") - string(APPEND CMAKE_CXX_FLAGS " -Wno-missing-declarations") - string(APPEND CMAKE_CXX_FLAGS " -Wno-sometimes-uninitialized") - - # We have public Cython APIs which return C++ types, which are in an extern - # "C" blog (no symbol mangling) and clang doesn't like this - string(APPEND CMAKE_CXX_FLAGS " -Wno-return-type-c-linkage") - endif() -endif() - -# For any C code, use the same flags. -set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS}") - -# Add C++-only flags, like -std=c++20 -set(CMAKE_CXX_FLAGS "${CXX_ONLY_FLAGS} ${CMAKE_CXX_FLAGS}") - -message(STATUS "CMAKE_C_FLAGS: ${CMAKE_C_FLAGS}") -message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") - -if(MSVC) - # MSVC makes its own output directories based on the build configuration - set(BUILD_SUBDIR_NAME "") -else() - # Set compile output directory - string(TOLOWER ${CMAKE_BUILD_TYPE} BUILD_SUBDIR_NAME) -endif() - -# If build in-source, create the latest symlink. If build out-of-source, which is -# preferred, simply output the binaries in the build folder -if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR}) - set(BUILD_OUTPUT_ROOT_DIRECTORY - "${CMAKE_CURRENT_BINARY_DIR}/build/${BUILD_SUBDIR_NAME}") - # Link build/latest to the current build directory, to avoid developers - # accidentally running the latest debug build when in fact they're building - # release builds. - file(MAKE_DIRECTORY ${BUILD_OUTPUT_ROOT_DIRECTORY}) - if(NOT APPLE) - set(MORE_ARGS "-T") - endif() - execute_process(COMMAND ln ${MORE_ARGS} -sf ${BUILD_OUTPUT_ROOT_DIRECTORY} - ${CMAKE_CURRENT_BINARY_DIR}/build/latest) -else() - set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${BUILD_SUBDIR_NAME}") -endif() - -message(STATUS "Generator: ${CMAKE_GENERATOR}") -message(STATUS "Build output directory: ${BUILD_OUTPUT_ROOT_DIRECTORY}") - -# where to put generated archives (.a files) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}") -set(ARCHIVE_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}") - -# where to put generated libraries (.so files) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}") -set(LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}") - -# where to put generated binaries -set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}") - -# Python and Numpy libraries -find_package(Python3Alt REQUIRED) -message(STATUS "Found NumPy version: ${Python3_NumPy_VERSION}") -message(STATUS "NumPy include dir: ${NUMPY_INCLUDE_DIRS}") - -include(UseCython) -message(STATUS "Found Cython version: ${CYTHON_VERSION}") - -# Arrow C++ and set default PyArrow build options -include(GNUInstallDirs) -find_package(Arrow REQUIRED) - -macro(define_option name description arrow_option) - set("PYARROW_${name}" - "AUTO" - CACHE STRING ${description}) - - if("${PYARROW_${name}}" STREQUAL "AUTO") - # by default, first check if env variable exists, otherwise use Arrow C++ config - set(env_variable "PYARROW_WITH_${name}") - if(DEFINED ENV{${env_variable}}) - if($ENV{${env_variable}}) - set("PYARROW_BUILD_${name}" ON) - else() - set("PYARROW_BUILD_${name}" OFF) - endif() - else() - if(${arrow_option}) - set("PYARROW_BUILD_${name}" ON) - else() - set("PYARROW_BUILD_${name}" OFF) - endif() - endif() - else() - if("${PYARROW_${name}}") - set("PYARROW_BUILD_${name}" ON) - else() - set("PYARROW_BUILD_${name}" OFF) - endif() - endif() -endmacro() - -define_option(ACERO "Build the PyArrow Acero integration" ARROW_ACERO) -define_option(CUDA "Build the PyArrow CUDA support" ARROW_CUDA) -define_option(DATASET "Build the PyArrow Dataset integration" ARROW_DATASET) -define_option(FLIGHT "Build the PyArrow Flight integration" ARROW_FLIGHT) -define_option(GANDIVA "Build the PyArrow Gandiva integration" ARROW_GANDIVA) -define_option(ORC "Build the PyArrow ORC integration" ARROW_ORC) -define_option(PARQUET "Build the PyArrow Parquet integration" ARROW_PARQUET) -define_option(PARQUET_ENCRYPTION "Build the PyArrow Parquet encryption integration" - PARQUET_REQUIRE_ENCRYPTION) -define_option(SUBSTRAIT "Build the PyArrow Substrait integration" ARROW_SUBSTRAIT) -define_option(AZURE "Build the PyArrow Azure integration" ARROW_AZURE) -define_option(GCS "Build the PyArrow GCS integration" ARROW_GCS) -define_option(S3 "Build the PyArrow S3 integration" ARROW_S3) -define_option(HDFS "Build the PyArrow HDFS integration" ARROW_HDFS) -option(PYARROW_BUNDLE_ARROW_CPP "Bundle the Arrow C++ libraries" OFF) -option(PYARROW_BUNDLE_CYTHON_CPP "Bundle the C++ files generated by Cython" OFF) -option(PYARROW_GENERATE_COVERAGE "Build with Cython code coverage enabled" OFF) -set(PYARROW_CXXFLAGS - "" - CACHE STRING "Compiler flags to append when compiling PyArrow C++") - -# enforce module dependencies -if(PYARROW_BUILD_SUBSTRAIT) - set(PYARROW_BUILD_DATASET ON) -endif() -if(PYARROW_BUILD_DATASET) - set(PYARROW_BUILD_ACERO ON) -endif() - -# PyArrow C++ -set(PYARROW_CPP_ROOT_DIR pyarrow/src) -set(PYARROW_CPP_SOURCE_DIR ${PYARROW_CPP_ROOT_DIR}/arrow/python) - -# Write out compile-time configuration constants -string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_PYBUILD_TYPE) -configure_file("${PYARROW_CPP_SOURCE_DIR}/config_internal.h.cmake" - "${PYARROW_CPP_SOURCE_DIR}/config_internal.h" ESCAPE_QUOTES) - -set(PYARROW_CPP_SRCS - ${PYARROW_CPP_SOURCE_DIR}/arrow_to_pandas.cc - ${PYARROW_CPP_SOURCE_DIR}/benchmark.cc - ${PYARROW_CPP_SOURCE_DIR}/common.cc - ${PYARROW_CPP_SOURCE_DIR}/config.cc - ${PYARROW_CPP_SOURCE_DIR}/datetime.cc - ${PYARROW_CPP_SOURCE_DIR}/decimal.cc - ${PYARROW_CPP_SOURCE_DIR}/extension_type.cc - ${PYARROW_CPP_SOURCE_DIR}/gdb.cc - ${PYARROW_CPP_SOURCE_DIR}/helpers.cc - ${PYARROW_CPP_SOURCE_DIR}/inference.cc - ${PYARROW_CPP_SOURCE_DIR}/io.cc - ${PYARROW_CPP_SOURCE_DIR}/ipc.cc - ${PYARROW_CPP_SOURCE_DIR}/numpy_convert.cc - ${PYARROW_CPP_SOURCE_DIR}/numpy_init.cc - ${PYARROW_CPP_SOURCE_DIR}/numpy_to_arrow.cc - ${PYARROW_CPP_SOURCE_DIR}/python_test.cc - ${PYARROW_CPP_SOURCE_DIR}/python_to_arrow.cc - ${PYARROW_CPP_SOURCE_DIR}/pyarrow.cc - ${PYARROW_CPP_SOURCE_DIR}/udf.cc - ${PYARROW_CPP_SOURCE_DIR}/util.cc) -set_source_files_properties(${PYARROW_CPP_SOURCE_DIR}/numpy_init.cc - PROPERTIES SKIP_UNITY_BUILD_INCLUSION ON) - -set(PYARROW_CPP_LINK_LIBS "") - -# -# Arrow vs PyArrow C++ options -# - -# Check all the options from Arrow and PyArrow C++ to be in line -# -# Order is important for "NOT ARROW_BUILD_SHARED". We must use -# depending modules -> depended modules order. For example, -# ArrowSubstrait depends on ArrowDataset. So PYARROW_CPP_LINK_LIBS -# must use -# "ArrowSubstrait::arrow_substrait_static;ArrowDataset::arrow_dataset_static" -# order. - -if(PYARROW_BUILD_SUBSTRAIT) - message(STATUS "Building PyArrow with Substrait") - if(NOT ARROW_SUBSTRAIT) - message(FATAL_ERROR "You must build Arrow C++ with ARROW_SUBSTRAIT=ON") - endif() - find_package(ArrowSubstrait REQUIRED) - if(ARROW_BUILD_SHARED) - list(APPEND PYARROW_CPP_LINK_LIBS ArrowSubstrait::arrow_substrait_shared) - else() - list(APPEND PYARROW_CPP_LINK_LIBS ArrowSubstrait::arrow_substrait_static) - endif() -endif() - -if(PYARROW_BUILD_DATASET) - message(STATUS "Building PyArrow with Dataset") - if(NOT ARROW_DATASET) - message(FATAL_ERROR "You must build Arrow C++ with ARROW_DATASET=ON") - endif() - find_package(ArrowDataset REQUIRED) - if(ARROW_BUILD_SHARED) - list(APPEND PYARROW_CPP_LINK_LIBS ArrowDataset::arrow_dataset_shared) - else() - list(APPEND PYARROW_CPP_LINK_LIBS ArrowDataset::arrow_dataset_static) - endif() -endif() - -if(PYARROW_BUILD_ACERO) - message(STATUS "Building PyArrow with Acero") - if(NOT ARROW_ACERO) - message(FATAL_ERROR "You must build Arrow C++ with ARROW_ACERO=ON") - endif() - find_package(ArrowAcero REQUIRED) - if(ARROW_BUILD_SHARED) - list(APPEND PYARROW_CPP_LINK_LIBS ArrowAcero::arrow_acero_shared) - else() - list(APPEND PYARROW_CPP_LINK_LIBS ArrowAcero::arrow_acero_static) - endif() -endif() - -# Currently PyArrow cannot be built without ARROW_COMPUTE -if(NOT ARROW_COMPUTE) - message(FATAL_ERROR "You must build Arrow C++ with ARROW_COMPUTE=ON") -else() - message(STATUS "Building PyArrow with Compute") - find_package(ArrowCompute REQUIRED) - if(ARROW_BUILD_SHARED) - list(APPEND PYARROW_CPP_LINK_LIBS ArrowCompute::arrow_compute_shared) - else() - list(APPEND PYARROW_CPP_LINK_LIBS ArrowCompute::arrow_compute_static) - endif() -endif() - -if(PYARROW_BUILD_PARQUET) - message(STATUS "Building PyArrow with Parquet") - if(NOT ARROW_PARQUET) - message(FATAL_ERROR "You must build Arrow C++ with ARROW_PARQUET=ON") - endif() - find_package(Parquet REQUIRED) -else() - if(PYARROW_BUILD_PARQUET_ENCRYPTION) - message(WARNING "Building PyArrow with Parquet Encryption is requested, but Parquet itself is not enabled. Ignoring the Parquet Encryption setting." - ) - set(PYARROW_BUILD_PARQUET_ENCRYPTION OFF) - endif() -endif() - -# Check for only Arrow C++ options -if(ARROW_CSV) - list(APPEND PYARROW_CPP_SRCS ${PYARROW_CPP_SOURCE_DIR}/csv.cc) -else() - message(FATAL_ERROR "You must build Arrow C++ with ARROW_CSV=ON") -endif() - -if(ARROW_FILESYSTEM) - list(APPEND PYARROW_CPP_SRCS ${PYARROW_CPP_SOURCE_DIR}/filesystem.cc) -endif() - -if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - set_property(SOURCE ${PYARROW_CPP_SOURCE_DIR}/pyarrow.cc - APPEND_STRING - PROPERTY COMPILE_FLAGS " -Wno-cast-qual ") -endif() - -if(NOT PYARROW_CPP_LINK_LIBS) - if(ARROW_BUILD_SHARED) - list(APPEND PYARROW_CPP_LINK_LIBS Arrow::arrow_shared) - else() - list(APPEND PYARROW_CPP_LINK_LIBS Arrow::arrow_static) - endif() -endif() - -add_library(arrow_python SHARED ${PYARROW_CPP_SRCS}) -target_include_directories(arrow_python PUBLIC ${PYARROW_CPP_ROOT_DIR} - ${CMAKE_CURRENT_BINARY_DIR}/pyarrow/src) - -# on static builds we need to be careful not to link PYARROW_CPP_LINK_LIBS -# into everything depending on arrow_python, or else we get duplicate -# libraries. Whereas conversely on shared builds, we need everything -# to depend on everything, as python loads modules separately -if(ARROW_BUILD_SHARED) - target_link_libraries(arrow_python PUBLIC ${PYARROW_CPP_LINK_LIBS}) -else() - target_link_libraries(arrow_python PRIVATE ${PYARROW_CPP_LINK_LIBS}) -endif() -target_link_libraries(arrow_python PUBLIC Python3::NumPy) -target_compile_definitions(arrow_python PRIVATE ARROW_PYTHON_EXPORTING) -set_target_properties(arrow_python PROPERTIES VERSION "${PYARROW_FULL_SO_VERSION}" - SOVERSION "${PYARROW_SO_VERSION}") -install(TARGETS arrow_python - ARCHIVE DESTINATION . - LIBRARY DESTINATION . - RUNTIME DESTINATION .) - -set(PYARROW_CPP_ENCRYPTION_SRCS ${PYARROW_CPP_SOURCE_DIR}/parquet_encryption.cc) -if(NOT PYARROW_BUILD_PARQUET_ENCRYPTION) - message(STATUS "Parquet Encryption is NOT Enabled") -else() - if(PARQUET_REQUIRE_ENCRYPTION) - add_library(arrow_python_parquet_encryption SHARED ${PYARROW_CPP_ENCRYPTION_SRCS}) - target_link_libraries(arrow_python_parquet_encryption PUBLIC arrow_python - ${PARQUET_LINK_LIBS}) - target_compile_definitions(arrow_python_parquet_encryption - PRIVATE ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING) - set_target_properties(arrow_python_parquet_encryption - PROPERTIES VERSION "${PYARROW_FULL_SO_VERSION}" - SOVERSION "${PYARROW_SO_VERSION}") - install(TARGETS arrow_python_parquet_encryption - ARCHIVE DESTINATION . - LIBRARY DESTINATION . - RUNTIME DESTINATION .) - message(STATUS "Parquet Encryption Enabled") - else() - message(FATAL_ERROR "You must build Arrow C++ with PARQUET_REQUIRE_ENCRYPTION=ON") - endif() -endif() - -set(PYARROW_CPP_FLIGHT_SRCS ${PYARROW_CPP_SOURCE_DIR}/flight.cc) -if(PYARROW_BUILD_FLIGHT) - message(STATUS "Building PyArrow with Flight") - if(NOT ARROW_FLIGHT) - message(FATAL_ERROR "You must build Arrow C++ with ARROW_FLIGHT=ON") - endif() - # Must link to shared libarrow_flight: we don't want to link more than one - # copy of gRPC into the eventual Cython shared object, otherwise gRPC calls - # fail with weird errors due to multiple copies of global static state (The - # other solution is to link gRPC shared everywhere instead of statically only - # in Flight) - if(NOT ARROW_BUILD_SHARED) - message(FATAL_ERROR "You must build Arrow C++ with ARROW_BUILD_SHARED=ON") - endif() - find_package(ArrowFlight REQUIRED) - - add_library(arrow_python_flight SHARED ${PYARROW_CPP_FLIGHT_SRCS}) - target_link_libraries(arrow_python_flight PUBLIC arrow_python - ArrowFlight::arrow_flight_shared) - target_compile_definitions(arrow_python_flight PRIVATE ARROW_PYFLIGHT_EXPORTING) - set_target_properties(arrow_python_flight - PROPERTIES VERSION "${PYARROW_FULL_SO_VERSION}" - SOVERSION "${PYARROW_SO_VERSION}") - install(TARGETS arrow_python_flight - ARCHIVE DESTINATION . - LIBRARY DESTINATION . - RUNTIME DESTINATION .) -endif() - -if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - # Clang, be quiet. Python C API has lots of macros - set_property(SOURCE ${PYARROW_CPP_SRCS} ${PYARROW_CPP_FLIGHT_SRCS} - APPEND_STRING - PROPERTY COMPILE_FLAGS -Wno-parentheses-equality) -endif() - -install(DIRECTORY ${PYARROW_CPP_SOURCE_DIR}/ - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/arrow/python - FILES_MATCHING - PATTERN "*internal.h" EXCLUDE - PATTERN "*.h") - -function(bundle_arrow_lib library_path) - set(options) - set(one_value_args SO_VERSION) - set(multi_value_args) - cmake_parse_arguments(ARG - "${options}" - "${one_value_args}" - "${multi_value_args}" - ${ARGN}) - if(ARG_UNPARSED_ARGUMENTS) - message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}") - endif() - - get_filename_component(LIBRARY_PATH_REAL ${library_path} REALPATH) - get_filename_component(LIBRARY_NAME ${library_path} NAME_WE) - - # Only copy the shared library with ABI version on Linux and macOS - - if(MSVC) - install(FILES ${LIBRARY_PATH_REAL} - DESTINATION "." - RENAME ${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) - elseif(APPLE) - install(FILES ${LIBRARY_PATH_REAL} - DESTINATION "." - RENAME ${LIBRARY_NAME}.${ARG_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX}) - else() - install(FILES ${LIBRARY_PATH_REAL} - DESTINATION "." - RENAME ${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}.${ARG_SO_VERSION}) - endif() -endfunction() - -function(bundle_arrow_import_lib library_path) - get_filename_component(LIBRARY_NAME ${library_path} NAME_WE) - install(FILES ${library_path} - DESTINATION "." - RENAME ${LIBRARY_NAME}.lib) -endfunction() - -function(bundle_arrow_dependency library_name) - if(MSVC) - if(DEFINED ENV{CONDA_PREFIX}) - file(TO_CMAKE_PATH "$ENV{CONDA_PREFIX}\\Library" SHARED_LIB_HOME) - endif() - else() - if(DEFINED ENV{CONDA_PREFIX}) - file(TO_CMAKE_PATH "$ENV{CONDA_PREFIX}" SHARED_LIB_HOME) - endif() - endif() - if(DEFINED ENV{${library_name}_HOME}) - file(TO_CMAKE_PATH "$ENV{${library_name}_HOME}" SHARED_LIB_HOME) - endif() - arrow_build_shared_library_name(shared_lib_name "${library_name}") - unset(SHARED_LIB_PATH CACHE) - if(MSVC) - set(CMAKE_SHARED_LIBRARY_SUFFIXES_ORIGINAL ${CMAKE_FIND_LIBRARY_SUFFIXES}) - # .dll isn't found by find_library with MSVC because .dll isn't included in - # CMAKE_FIND_LIBRARY_SUFFIXES. - list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES "${CMAKE_SHARED_LIBRARY_SUFFIX}") - endif() - if(SHARED_LIB_HOME) - find_library(SHARED_LIB_PATH - NAMES "${shared_lib_name}" - PATHS "${SHARED_LIB_HOME}" - PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES} - NO_DEFAULT_PATH) - else() - find_library(SHARED_LIB_PATH - NAMES "${shared_lib_name}" - PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES}) - endif() - if(MSVC) - set(CMAKE_SHARED_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES_ORIGINAL}) - endif() - if(SHARED_LIB_PATH) - get_filename_component(SHARED_LIB_REALPATH ${SHARED_LIB_PATH} REALPATH) - get_filename_component(SHARED_LIB_NAME ${SHARED_LIB_PATH} NAME) - message(STATUS "Bundle dependency ${library_name}: ${SHARED_LIB_REALPATH} as ${SHARED_LIB_NAME}" - ) - install(FILES ${SHARED_LIB_REALPATH} - DESTINATION "." - RENAME ${SHARED_LIB_NAME}) - else() - message(FATAL_ERROR "Unable to bundle dependency: ${library_name}") - endif() -endfunction() - -# Always bundle includes -get_filename_component(ARROW_INCLUDE_ARROW_DIR_REAL ${ARROW_INCLUDE_DIR}/arrow REALPATH) -install(DIRECTORY ${ARROW_INCLUDE_ARROW_DIR_REAL} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) - -if(PYARROW_BUNDLE_ARROW_CPP) - # Arrow and Compute - bundle_arrow_lib(${ARROW_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) - bundle_arrow_lib(${ARROW_COMPUTE_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) - - if(MSVC) - bundle_arrow_import_lib(${ARROW_IMPORT_LIB}) - bundle_arrow_import_lib(${ARROW_COMPUTE_IMPORT_LIB}) - endif() -endif() - -# -# Cython modules -# - -set(CYTHON_EXTENSIONS - lib - _compute - _csv - _feather - _fs - _json - _pyarrow_cpp_tests) -set_source_files_properties(pyarrow/lib.pyx PROPERTIES CYTHON_API TRUE) - -set(LINK_LIBS arrow_python) - -if(PYARROW_BUILD_AZURE) - message(STATUS "Building PyArrow with Azure") - if(NOT ARROW_AZURE) - message(FATAL_ERROR "You must build Arrow C++ with ARROW_AZURE=ON") - endif() - list(APPEND CYTHON_EXTENSIONS _azurefs) -endif() - -if(PYARROW_BUILD_GCS) - message(STATUS "Building PyArrow with GCS") - if(NOT ARROW_GCS) - message(FATAL_ERROR "You must build Arrow C++ with ARROW_GCS=ON") - endif() - list(APPEND CYTHON_EXTENSIONS _gcsfs) -endif() - -if(PYARROW_BUILD_S3) - message(STATUS "Building PyArrow with S3") - if(NOT ARROW_S3) - message(FATAL_ERROR "You must build Arrow C++ with ARROW_S3=ON") - endif() - list(APPEND CYTHON_EXTENSIONS _s3fs) -endif() - -if(PYARROW_BUILD_HDFS) - message(STATUS "Building PyArrow with HDFS") - if(NOT ARROW_HDFS) - message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON") - endif() - list(APPEND CYTHON_EXTENSIONS _hdfs) -endif() - -if(PYARROW_BUILD_CUDA) - message(STATUS "Building PyArrow with CUDA") - if(NOT ARROW_CUDA) - message(FATAL_ERROR "You must build Arrow C++ with ARROW_CUDA=ON") - endif() - find_package(ArrowCUDA REQUIRED) - - if(PYARROW_BUNDLE_ARROW_CPP) - bundle_arrow_lib(${ARROW_CUDA_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) - if(MSVC) - bundle_arrow_import_lib(${ARROW_CUDA_IMPORT_LIB}) - endif() - endif() - set(CUDA_LINK_LIBS ArrowCUDA::arrow_cuda_shared) - list(APPEND CYTHON_EXTENSIONS _cuda) - set_source_files_properties(pyarrow/_cuda.pyx PROPERTIES CYTHON_API TRUE) -endif() - -# Acero -if(PYARROW_BUILD_ACERO) - if(ARROW_BUILD_SHARED) - if(PYARROW_BUNDLE_ARROW_CPP) - bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) - if(MSVC) - bundle_arrow_import_lib(${ARROW_ACERO_IMPORT_LIB}) - endif() - endif() - - set(ACERO_LINK_LIBS ArrowAcero::arrow_acero_shared) - else() - # Acero is statically linked into libarrow_python already - set(ACERO_LINK_LIBS) - endif() - list(APPEND CYTHON_EXTENSIONS _acero) -endif() - -# Dataset -if(PYARROW_BUILD_DATASET) - if(ARROW_BUILD_SHARED) - if(PYARROW_BUNDLE_ARROW_CPP) - bundle_arrow_lib(${ARROW_DATASET_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) - if(MSVC) - bundle_arrow_import_lib(${ARROW_DATASET_IMPORT_LIB}) - endif() - endif() - - set(DATASET_LINK_LIBS ArrowDataset::arrow_dataset_shared) - else() - # dataset is statically linked into libarrow_python already - set(DATASET_LINK_LIBS) - endif() - list(APPEND CYTHON_EXTENSIONS _dataset) -endif() - -# Parquet -if(PYARROW_BUILD_PARQUET) - if(PYARROW_BUNDLE_ARROW_CPP) - get_filename_component(PARQUET_INCLUDE_PARQUET_DIR_REAL - ${PARQUET_INCLUDE_DIR}/parquet REALPATH) - install(DIRECTORY ${PARQUET_INCLUDE_PARQUET_DIR_REAL} - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) - endif() - - if(ARROW_BUILD_SHARED) - if(PYARROW_BUNDLE_ARROW_CPP) - bundle_arrow_lib(${PARQUET_SHARED_LIB} SO_VERSION ${PARQUET_SO_VERSION}) - if(MSVC) - bundle_arrow_import_lib(${PARQUET_IMPORT_LIB}) - endif() - endif() - set(PARQUET_LINK_LIBS Parquet::parquet_shared) - else() - # don't link the static lib as it is - # already in arrow_python - set(PARQUET_LINK_LIBS) - endif() - list(APPEND CYTHON_EXTENSIONS _parquet) - if(PYARROW_BUILD_PARQUET_ENCRYPTION) - list(APPEND CYTHON_EXTENSIONS _parquet_encryption) - endif() - if(PYARROW_BUILD_DATASET) - list(APPEND CYTHON_EXTENSIONS _dataset_parquet) - if(PYARROW_BUILD_PARQUET_ENCRYPTION) - list(APPEND CYTHON_EXTENSIONS _dataset_parquet_encryption) - endif() - endif() -endif() - -# ORC -if(PYARROW_BUILD_ORC) - message(STATUS "Building PyArrow with ORC") - if(NOT ARROW_ORC) - message(FATAL_ERROR "You must build Arrow C++ with ARROW_ORC=ON") - endif() - list(APPEND CYTHON_EXTENSIONS _orc) - if(PYARROW_BUILD_DATASET) - list(APPEND CYTHON_EXTENSIONS _dataset_orc) - endif() -endif() - -# Flight -if(PYARROW_BUILD_FLIGHT) - if(PYARROW_BUNDLE_ARROW_CPP) - bundle_arrow_lib(${ARROW_FLIGHT_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) - if(MSVC) - bundle_arrow_import_lib(${ARROW_FLIGHT_IMPORT_LIB}) - # XXX Hardcoded library names because CMake is too stupid to give us - # the shared library paths. - # https://gitlab.kitware.com/cmake/cmake/issues/16210 - # bundle_arrow_dependency(libcrypto-1_1-x64) - # bundle_arrow_dependency(libssl-1_1-x64) - endif() - endif() - - set(FLIGHT_LINK_LIBS arrow_python_flight) - list(APPEND CYTHON_EXTENSIONS _flight) -else() - set(FLIGHT_LINK_LIBS "") -endif() - -# Substrait -if(PYARROW_BUILD_SUBSTRAIT) - message(STATUS "Building PyArrow with Substrait") - - if(ARROW_BUILD_SHARED) - if(PYARROW_BUNDLE_ARROW_CPP) - bundle_arrow_lib(${ARROW_SUBSTRAIT_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) - if(MSVC) - bundle_arrow_import_lib(${ARROW_SUBSTRAIT_IMPORT_LIB}) - endif() - endif() - set(SUBSTRAIT_LINK_LIBS ArrowSubstrait::arrow_substrait_shared) - else() - # Arrow Substrait is statically linked into libarrow_python already - set(SUBSTRAIT_LINK_LIBS) - endif() - - list(APPEND CYTHON_EXTENSIONS _substrait) -endif() - -# Gandiva -if(PYARROW_BUILD_GANDIVA) - message(STATUS "Building PyArrow with Gandiva") - if(NOT ARROW_GANDIVA) - message(FATAL_ERROR "You must build Arrow C++ with ARROW_GANDIVA=ON") - endif() - find_package(Gandiva REQUIRED) - - if(PYARROW_BUNDLE_ARROW_CPP) - get_filename_component(GANDIVA_INCLUDE_GANDIVA_DIR_REAL - ${GANDIVA_INCLUDE_DIR}/gandiva REALPATH) - install(DIRECTORY ${GANDIVA_INCLUDE_GANDIVA_DIR_REAL} - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) - - bundle_arrow_lib(${GANDIVA_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) - - if(MSVC) - bundle_arrow_import_lib(${GANDIVA_IMPORT_LIB}) - endif() - endif() - - set(GANDIVA_LINK_LIBS Gandiva::gandiva_shared) - list(APPEND CYTHON_EXTENSIONS gandiva) -endif() - -# -# Setup and build Cython modules -# - -if(PYARROW_GENERATE_COVERAGE) - set(CYTHON_FLAGS "${CYTHON_FLAGS}" "-Xlinetrace=True") -endif() - -# Error on any warnings not already explicitly ignored. -set(CYTHON_FLAGS "${CYTHON_FLAGS}" "--warning-errors") -# GH-40236: make generated C++ code easier to compile by disabling an -# undocumented Cython feature. -set(CYTHON_FLAGS "${CYTHON_FLAGS}" "--no-c-in-traceback") - -if(CYTHON_VERSION VERSION_GREATER_EQUAL "3.1.0a0") - list(APPEND CYTHON_FLAGS "-Xfreethreading_compatible=True") -endif() - -foreach(module ${CYTHON_EXTENSIONS}) - string(REPLACE "." ";" directories ${module}) - list(GET directories -1 module_name) - list(REMOVE_AT directories -1) - - string(REPLACE "." "/" module_root "${module}") - set(module_SRC pyarrow/${module_root}.pyx) - set_source_files_properties(${module_SRC} PROPERTIES CYTHON_IS_CXX TRUE) - - cython_add_module(${module_name} ${module_name}_pyx ${module_name}_output ${module_SRC}) - - if(directories) - string(REPLACE ";" "/" module_output_directory ${directories}) - set_target_properties(${module_name} PROPERTIES LIBRARY_OUTPUT_DIRECTORY - ${module_output_directory}) - endif() - - # XXX(wesm): ARROW-2326 this logic is only needed when we have Cython - # modules in interior directories. Since all of our C extensions and - # bundled libraries are in the same place, we can skip this part - - # list(LENGTH directories i) - # while(${i} GREATER 0) - # set(module_install_rpath "${module_install_rpath}/..") - # math(EXPR i "${i} - 1" ) - # endwhile(${i} GREATER 0) - - if(PYARROW_GENERATE_COVERAGE) - set_target_properties(${module_name} PROPERTIES COMPILE_DEFINITIONS - "CYTHON_TRACE=1;CYTHON_TRACE_NOGIL=1") - endif() - - target_link_libraries(${module_name} PRIVATE ${LINK_LIBS}) - - install(TARGETS ${module_name} LIBRARY DESTINATION ".") - foreach(output ${${module_name}_output}) - if(output MATCHES "\\.${CYTHON_CXX_EXTENSION}$") - if(NOT PYARROW_BUNDLE_CYTHON_CPP) - continue() - endif() - endif() - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${output} DESTINATION ".") - endforeach() -endforeach() - -set(ARROW_PYTHON_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/pyarrow/src/arrow/python") -file(MAKE_DIRECTORY ${ARROW_PYTHON_BINARY_DIR}) -add_custom_command(OUTPUT "${ARROW_PYTHON_BINARY_DIR}/lib_api.h" - "${ARROW_PYTHON_BINARY_DIR}/lib.h" - COMMAND ${CMAKE_COMMAND} -E copy - "${CMAKE_CURRENT_BINARY_DIR}/lib_api.h" - "${CMAKE_CURRENT_BINARY_DIR}/lib.h" - "${ARROW_PYTHON_BINARY_DIR}/" - DEPENDS lib_pyx) -add_custom_target(cython_api_headers DEPENDS "${ARROW_PYTHON_BINARY_DIR}/lib_api.h" - "${ARROW_PYTHON_BINARY_DIR}/lib.h") -add_dependencies(arrow_python cython_api_headers) -install(FILES "${ARROW_PYTHON_BINARY_DIR}/lib_api.h" "${ARROW_PYTHON_BINARY_DIR}/lib.h" - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/arrow/python) - -# Additional link libraries - -if(PYARROW_BUILD_CUDA) - target_link_libraries(_cuda PRIVATE ${CUDA_LINK_LIBS}) -endif() - -if(PYARROW_BUILD_FLIGHT) - target_link_libraries(_flight PRIVATE ${FLIGHT_LINK_LIBS}) -endif() - -if(PYARROW_BUILD_SUBSTRAIT) - target_link_libraries(_substrait PRIVATE ${SUBSTRAIT_LINK_LIBS}) -endif() - -if(PYARROW_BUILD_ACERO) - target_link_libraries(_acero PRIVATE ${ACERO_LINK_LIBS}) -endif() - -if(PYARROW_BUILD_DATASET) - target_link_libraries(_dataset PRIVATE ${DATASET_LINK_LIBS}) - if(PYARROW_BUILD_ORC) - target_link_libraries(_dataset_orc PRIVATE ${DATASET_LINK_LIBS}) - endif() - if(PYARROW_BUILD_PARQUET) - target_link_libraries(_dataset_parquet PRIVATE ${DATASET_LINK_LIBS}) - endif() -endif() - -if(PYARROW_BUILD_GANDIVA) - target_link_libraries(gandiva PRIVATE ${GANDIVA_LINK_LIBS}) -endif() - -if(PYARROW_BUILD_PARQUET) - target_link_libraries(_parquet PRIVATE ${PARQUET_LINK_LIBS}) - if(PYARROW_BUILD_PARQUET_ENCRYPTION) - target_link_libraries(_parquet_encryption PRIVATE arrow_python_parquet_encryption) - endif() -endif() diff --git a/python/LICENSE.txt b/python/LICENSE.txt new file mode 120000 index 00000000000..4ab43736a83 --- /dev/null +++ b/python/LICENSE.txt @@ -0,0 +1 @@ +../LICENSE.txt \ No newline at end of file diff --git a/python/MANIFEST.in b/python/MANIFEST.in deleted file mode 100644 index ed7012e4b70..00000000000 --- a/python/MANIFEST.in +++ /dev/null @@ -1,15 +0,0 @@ -include README.md -include ../LICENSE.txt -include ../NOTICE.txt - -global-include CMakeLists.txt -graft pyarrow -graft cmake_modules - -global-exclude *.so -global-exclude *.pyc -global-exclude *~ -global-exclude \#* -global-exclude .git* -global-exclude .DS_Store -prune .asv diff --git a/python/NOTICE.txt b/python/NOTICE.txt new file mode 120000 index 00000000000..eb9f24e040b --- /dev/null +++ b/python/NOTICE.txt @@ -0,0 +1 @@ +../NOTICE.txt \ No newline at end of file diff --git a/python/examples/minimal_build/Dockerfile.ubuntu b/python/examples/minimal_build/Dockerfile.ubuntu index 5c6b1131684..508471cb53c 100644 --- a/python/examples/minimal_build/Dockerfile.ubuntu +++ b/python/examples/minimal_build/Dockerfile.ubuntu @@ -29,6 +29,7 @@ RUN apt-get update -y -q && \ cmake \ git \ ninja-build \ + pkg-config \ python3-dev \ python3-pip \ python3-venv \ diff --git a/python/meson.build b/python/meson.build new file mode 100644 index 00000000000..c65a8378683 --- /dev/null +++ b/python/meson.build @@ -0,0 +1,91 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +project( + 'pyarrow', + 'cython', + 'cpp', + version: run_command( + 'python', + '-m', + 'setuptools_scm', + '--force-write-version-files', + check: true, + ).stdout().strip(), + license: 'Apache-2.0', + license_files: ['LICENSE.txt'], + meson_version: '>=1.4.0', + default_options: ['buildtype=release', 'cpp_std=c++20'], +) + +py = import('python').find_installation(pure: false) + +install_data('NOTICE.txt', install_dir: py.get_install_dir() / 'pyarrow') +# Meson-python suggests using a dist script to resolve symlinks to physical +# files when creating the sdist. See +# https://github.com/mesonbuild/meson-python/discussions/823#discussioncomment-15078932 +meson.add_dist_script( + py, + meson.project_source_root() / 'scripts/generate_dist.py', +) + +if get_option('default_library') == 'static' + cmake_suffix = 'static' +else + cmake_suffix = 'shared' +endif + +# https://github.com/mesonbuild/meson-python/issues/647 +if get_option('sdist') + arrow_dep = [] +else + arrow_dep = dependency( + 'arrow', + 'Arrow', + modules: [f'Arrow::arrow_@cmake_suffix@'], + ) +endif + +# When NumPy 2.0 becomes the minimum we can remove the +# custom location check +numpy_dep = dependency('numpy', required: false) +if not numpy_dep.found() + incdir_numpy = run_command( + py, + [ + '-c', + '''import os +import numpy as np +try: + # Check if include directory is inside the pyarrow dir + # e.g. a venv created inside the pyarrow dir + # If so, convert it to a relative path + incdir = os.path.relpath(np.get_include()) +except Exception: + incdir = np.get_include() +print(incdir) +''', + ], + check: true, + ).stdout().strip() + + numpy_dep = declare_dependency(include_directories: incdir_numpy) +endif + +cc = meson.get_compiler('cpp') + +subdir('pyarrow') diff --git a/python/meson.options b/python/meson.options new file mode 100644 index 00000000000..6dc389c0ac9 --- /dev/null +++ b/python/meson.options @@ -0,0 +1,114 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +option( + 'acero', + type: 'feature', + description: 'build the Acero Engine extension', + value: 'auto', +) + +option( + 'azure', + type: 'feature', + description: 'build the Azure Blob Storage extension', + value: 'auto', +) + +option( + 'cuda', + type: 'feature', + description: 'build the Cuda extension', + value: 'auto', +) + +option( + 'dataset', + type: 'feature', + description: 'build the Dataset extension', + value: 'auto', +) + +option( + 'flight', + type: 'feature', + description: 'build the Flight extension', + value: 'auto', +) + +option( + 'gandiva', + type: 'feature', + description: 'build the Gandiva extension', + value: 'auto', +) + +option( + 'gcs', + type: 'feature', + description: 'build the Google Cloud Storage (GCS) extension', + value: 'auto', +) + +option( + 'hdfs', + type: 'feature', + description: 'build the HDFS extension', + value: 'auto', +) + +option( + 'orc', + type: 'feature', + description: 'build the ORC extension', + value: 'auto', +) + +option( + 'parquet', + type: 'feature', + description: 'build the Parquet extension', + value: 'auto', +) + +option( + 'parquet_require_encryption', + type: 'feature', + description: 'build the Parquet encryption extension', + value: 'auto', +) + +option( + 'sdist', + type: 'boolean', + description: 'Build a Python source distribution', + value: false, +) + +option( + 's3', + type: 'feature', + description: 'build the Amazon S3 extension', + value: 'auto', +) + +option( + 'substrait', + type: 'feature', + description: 'build the Substrait extension', + value: 'auto', +) diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index da2fe966475..785f411d74a 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -431,6 +431,14 @@ def append_library_dir(library_dir): if _os.path.exists(pyarrow_libs_dir): append_library_dir(pyarrow_libs_dir) + # Source installations on Windows into a conda environment need to + # expose the appropriate conda directories that Arrow C++ installs + # to by default + if _os.environ.get('CONDA_PREFIX'): + prefix = _os.environ['CONDA_PREFIX'] + append_library_dir(_os.path.join(prefix, 'bin')) + append_library_dir(_os.path.join(prefix, 'Lib')) + # ARROW-4074: Allow for ARROW_HOME to be set to some other directory if _os.environ.get('ARROW_HOME'): append_library_dir(_os.path.join(_os.environ['ARROW_HOME'], 'lib')) diff --git a/python/pyarrow/meson.build b/python/pyarrow/meson.build new file mode 100644 index 00000000000..9b0a81e1e63 --- /dev/null +++ b/python/pyarrow/meson.build @@ -0,0 +1,475 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +cython_args = ['--include-dir', meson.current_source_dir()] +if get_option('buildtype') in ['debug', 'debugoptimized'] + cython_args += ['--gdb'] +endif + +pyarrow_srcs = files( + 'src/arrow/python/arrow_to_pandas.cc', + 'src/arrow/python/benchmark.cc', + 'src/arrow/python/common.cc', + 'src/arrow/python/config.cc', + 'src/arrow/python/csv.cc', + 'src/arrow/python/datetime.cc', + 'src/arrow/python/decimal.cc', + 'src/arrow/python/extension_type.cc', + 'src/arrow/python/filesystem.cc', + 'src/arrow/python/gdb.cc', + 'src/arrow/python/helpers.cc', + 'src/arrow/python/inference.cc', + 'src/arrow/python/io.cc', + 'src/arrow/python/ipc.cc', + 'src/arrow/python/numpy_convert.cc', + 'src/arrow/python/numpy_init.cc', + 'src/arrow/python/numpy_to_arrow.cc', + 'src/arrow/python/pyarrow.cc', + 'src/arrow/python/python_test.cc', + 'src/arrow/python/python_to_arrow.cc', + 'src/arrow/python/udf.cc', + 'src/arrow/python/util.cc', +) + +subdir('src/arrow/python') + +if get_option('sdist') + arrow_compute_dep = [] +else + arrow_compute_dep = dependency( + 'arrow-compute', + 'ArrowCompute', + modules: [f'ArrowCompute::arrow_compute_@cmake_suffix@'], + ) +endif + +libpyarrow_deps = [arrow_dep, arrow_compute_dep] + +cython_modules = { + 'lib': {}, + '_compute': {'dependencies': arrow_compute_dep}, + '_csv': {}, + '_feather': {}, + '_fs': {}, + '_json': {}, + '_pyarrow_cpp_tests': {}, +} + +needs_substrait = get_option('substrait').enabled() +needs_dataset = get_option('dataset').enabled() or needs_substrait +needs_acero = get_option('acero').enabled() or needs_dataset +needs_flight = get_option('flight').enabled() + +if needs_acero + if get_option('sdist') + arrow_acero_dep = [] + else + arrow_acero_dep = dependency( + 'arrow-acero', + 'ArrowAcero', + modules: [f'ArrowAcero::arrow_acero_@cmake_suffix@'], + ) + endif + libpyarrow_deps += [arrow_acero_dep] + cython_modules += {'_acero': {'dependencies': arrow_acero_dep}} +endif + +if get_option('azure').enabled() + cython_modules += {'_azurefs': {}} +endif + +if get_option('cuda').enabled() + if get_option('sdist') + arrow_cuda_dep = [] + else + arrow_cuda_dep = dependency( + 'arrow-cuda', + 'ArrowCUDA', + modules: [f'ArrowCUDA::arrow_cuda_@cmake_suffix@'], + ) + endif + libpyarrow_deps += [arrow_cuda_dep] + cython_modules += {'_cuda': {'dependencies': arrow_cuda_dep}} +endif + +if needs_dataset + if get_option('sdist') + arrow_dataset_dep = [] + else + arrow_dataset_dep = dependency( + 'arrow-dataset', + 'ArrowDataset', + modules: [f'ArrowDataset::arrow_dataset_@cmake_suffix@'], + ) + endif + libpyarrow_deps += [arrow_dataset_dep] + cython_modules += {'_dataset': {'dependencies': arrow_dataset_dep}} +endif + +if needs_flight + if get_option('sdist') + arrow_flight_dep = [] + else + arrow_flight_dep = dependency( + 'arrow-flight', + 'ArrowFlight', + modules: [f'ArrowFlight::arrow_flight_@cmake_suffix@'], + ) + endif + libpyarrow_deps += [arrow_flight_dep] +endif + +if get_option('gandiva').enabled() + if get_option('sdist') + gandiva_dep = [] + else + gandiva_dep = dependency( + 'gandiva', + 'Gandiva', + modules: [f'Gandiva::gandiva_@cmake_suffix@'], + ) + endif + libpyarrow_deps += [gandiva_dep] + cython_modules += {'gandiva': {'dependencies': gandiva_dep}} +endif + +if get_option('gcs').enabled() + cython_modules += {'_gcsfs': {}} +endif + +if get_option('hdfs').enabled() + cython_modules += {'_hdfs': {}} +endif + +if get_option('orc').enabled() + cython_modules += {'_orc': {}} + + if needs_dataset + cython_modules += {'_dataset_orc': {'dependencies': arrow_dataset_dep}} + endif +endif + +if get_option('s3').enabled() + cython_modules += {'_s3fs': {}} +endif + +if needs_substrait + if get_option('sdist') + arrow_substrait_dep = [] + else + arrow_substrait_dep = dependency( + 'arrow-substrait', + 'ArrowSubstrait', + modules: [f'ArrowSubstrait::arrow_substrait_@cmake_suffix@'], + ) + endif + libpyarrow_deps += [arrow_substrait_dep] + cython_modules += {'_substrait': {'dependencies': arrow_substrait_dep}} +endif + +needs_parquet = get_option('parquet').enabled() +needs_parquet_encryption = get_option('parquet_require_encryption').enabled() +if needs_parquet_encryption and not needs_parquet + warning( + ''' + Building PyArrow with Parquet Encryption is requested, but Parquet + itself is not enabled. Ignoring the Parquet Encryption setting., + ''', + ) + needs_parquet_encryption = false +endif + +if needs_parquet + if get_option('sdist') + parquet_dep = [] + else + parquet_dep = dependency( + 'parquet', + 'Parquet', + modules: [f'Parquet::parquet_@cmake_suffix@'], + ) + endif + libpyarrow_deps += [parquet_dep] + cython_modules += {'_parquet': {'dependencies': parquet_dep}} + + if needs_dataset + cython_modules += { + '_dataset_parquet': { + 'dependencies': [parquet_dep, arrow_dataset_dep], + }, + } + endif +endif + +gnu_symbol_visibility = host_machine.system() == 'darwin' ? 'default' : 'inlineshidden' + +if get_option('default_library') == 'static' + pyarrow_private_args = ['-DARROW_PYTHON_STATIC'] + pyarrow_public_args = ['-DARROW_PYTHON_STATIC'] +else + pyarrow_private_args = ['-DARROW_PYTHON_EXPORTING'] + pyarrow_public_args = [] +endif + +pyarrow_lib = library( + 'arrow_python', + sources: pyarrow_srcs, + include_directories: ['src'], + dependencies: libpyarrow_deps + [ + numpy_dep, + cython_generated_dep, + py.dependency(), + ], + cpp_args: pyarrow_private_args, + install: true, + install_dir: py.get_install_dir() / 'pyarrow', + gnu_symbol_visibility: gnu_symbol_visibility, + override_options: ['b_lundef=false'], +) +pyarrow_lib_dep = declare_dependency( + link_with: [pyarrow_lib], + compile_args: pyarrow_public_args, +) + +if needs_flight + if get_option('default_library') == 'static' + pyarrow_flight_private_args = ['-DARROW_PYFLIGHT_STATIC'] + pyarrow_flight_public_args = ['-DARROW_PYFLIGHT_STATIC'] + else + pyarrow_flight_private_args = ['-DARROW_PYFLIGHT_EXPORTING'] + pyarrow_flight_public_args = [] + endif + + pyarrow_flight_lib = library( + 'arrow_flight_lib', + sources: ['src/arrow/python/flight.cc'], + dependencies: [arrow_flight_dep, pyarrow_lib_dep, py.dependency()], + include_directories: ['src'], + cpp_args: pyarrow_flight_private_args, + install: true, + install_dir: py.get_install_dir() / 'pyarrow', + gnu_symbol_visibility: gnu_symbol_visibility, + override_options: ['b_lundef=false'], + ) + + pyarrow_flight_dep = declare_dependency( + link_with: [pyarrow_flight_lib], + dependencies: [arrow_flight_dep], + compile_args: pyarrow_flight_public_args, + ) + cython_modules += {'_flight': {'dependencies': [pyarrow_flight_dep]}} +endif + +if needs_parquet_encryption + if get_option('default_library') == 'static' + pyarrow_pq_enc_private_args = [ + '-DARROW_PYTHON_PARQUET_ENCRYPTION_STATIC', + ] + pyarrow_pq_enc_public_args = [ + '-DARROW_PYTHON_PARQUET_ENCRYPTION_STATIC', + ] + else + pyarrow_pq_enc_private_args = [ + '-DARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING', + ] + pyarrow_pq_enc_public_args = [] + endif + + pyarrow_encryption_lib = library( + 'arrow_python_parquet_encryption', + sources: ['src/arrow/python/parquet_encryption.cc'], + include_directories: ['src'], + dependencies: [parquet_dep, pyarrow_lib_dep, py.dependency()], + cpp_args: pyarrow_pq_enc_private_args, + install: true, + install_dir: py.get_install_dir() / 'pyarrow', + gnu_symbol_visibility: gnu_symbol_visibility, + override_options: ['b_lundef=false'], + ) + + pyarrow_parquet_encryption_dep = declare_dependency( + link_with: [pyarrow_encryption_lib], + dependencies: [parquet_dep], + compile_args: pyarrow_pq_enc_public_args, + ) + + cython_modules += { + '_parquet_encryption': { + 'dependencies': [pyarrow_parquet_encryption_dep], + }, + } + + if needs_dataset + cython_modules += { + '_dataset_parquet_encryption': { + 'dependencies': [ + arrow_dataset_dep, + pyarrow_parquet_encryption_dep, + ], + }, + } + endif +endif + +foreach key, val : cython_modules + cython_mod_name = '@0@.pyx'.format(key) + py.extension_module( + key, + sources: [cython_mod_name], + include_directories: ['src'], + cython_args: cython_args, + dependencies: [arrow_dep, pyarrow_lib_dep, numpy_dep] + val.get( + 'dependencies', + [], + ), + override_options: ['cython_language=cpp'], + install: true, + subdir: 'pyarrow', + install_rpath: '$ORIGIN', + gnu_symbol_visibility: gnu_symbol_visibility, + ) + + install_data(cython_mod_name, install_dir: py.get_install_dir() / 'pyarrow') +endforeach + +cython_headers = [ + '_acero.pxd', + 'array.pxi', + 'benchmark.pxi', + 'builder.pxi', + 'compat.pxi', + '_compute.pxd', + 'config.pxi', + '_csv.pxd', + '_cuda.pxd', + '_dataset_parquet.pxd', + '_dataset.pxd', + 'device.pxi', + '_dlpack.pxi', + 'error.pxi', + '_fs.pxd', + '__init__.pxd', + 'io.pxi', + 'ipc.pxi', + '_json.pxd', + 'lib.pxd', + 'memory.pxi', + '_orc.pxd', + 'pandas-shim.pxi', + '_parquet_encryption.pxd', + '_parquet.pxd', + 'public-api.pxi', + '_pyarrow_cpp_tests.pxd', + 'scalar.pxi', + 'table.pxi', + 'tensor.pxi', + 'types.pxi', +] + +install_data(cython_headers, install_dir: py.get_install_dir() / 'pyarrow') +install_subdir('includes', install_dir: py.get_install_dir() / 'pyarrow') +install_subdir( + 'src/arrow/python', + install_dir: py.get_install_dir() / 'pyarrow/src/arrow', +) + +pysources = [ + 'acero.py', + 'benchmark.py', + 'cffi.py', + '_compute_docstrings.py', + 'compute.py', + 'conftest.py', + 'csv.py', + 'cuda.py', + 'dataset.py', + 'feather.py', + 'flight.py', + 'fs.py', + '_generated_version.py', + '__init__.py', + 'ipc.py', + 'json.py', + 'jvm.py', + 'orc.py', + 'pandas_compat.py', + 'substrait.py', + 'types.py', + 'util.py', +] +py.install_sources(pysources, subdir: 'pyarrow') + +py.install_sources( + files( + 'src/arrow/python/api.h', + 'src/arrow/python/arrow_to_pandas.h', + 'src/arrow/python/async.h', + 'src/arrow/python/benchmark.h', + 'src/arrow/python/common.h', + 'src/arrow/python/config.h', + 'src/arrow/python/csv.h', + 'src/arrow/python/datetime.h', + 'src/arrow/python/decimal.h', + 'src/arrow/python/extension_type.h', + 'src/arrow/python/filesystem.h', + 'src/arrow/python/flight.h', + 'src/arrow/python/gdb.h', + 'src/arrow/python/helpers.h', + 'src/arrow/python/inference.h', + 'src/arrow/python/io.h', + 'src/arrow/python/ipc.h', + 'src/arrow/python/iterators.h', + 'src/arrow/python/numpy_convert.h', + 'src/arrow/python/numpy_init.h', + 'src/arrow/python/numpy_interop.h', + 'src/arrow/python/numpy_to_arrow.h', + 'src/arrow/python/parquet_encryption.h', + 'src/arrow/python/platform.h', + 'src/arrow/python/pyarrow.h', + 'src/arrow/python/pyarrow_lib.h', + 'src/arrow/python/python_test.h', + 'src/arrow/python/python_to_arrow.h', + 'src/arrow/python/type_traits.h', + 'src/arrow/python/udf.h', + 'src/arrow/python/util.h', + 'src/arrow/python/visibility.h', + ), + subdir: 'pyarrow/include/arrow/python', +) + +if not get_option('sdist') + arrow_header_dir = arrow_dep.get_variable( + pkgconfig: 'includedir', + cmake: 'PACKAGE_INCLUDE_DIRS', + ) / 'arrow' + + install_subdir( + arrow_header_dir, + install_dir: py.get_install_dir() / 'pyarrow' / 'include', + ) +endif + +py.install_sources( + files('src/arrow/python/vendored/pythoncapi_compat.h'), + subdir: 'pyarrow/include/arrow/python/vendored', +) + +subdirs = ['interchange', 'parquet', 'tests', 'vendored'] + +foreach subdir : subdirs + install_subdir(subdir, install_dir: py.get_install_dir() / 'pyarrow') +endforeach diff --git a/python/pyarrow/src/arrow/python/meson.build b/python/pyarrow/src/arrow/python/meson.build new file mode 100644 index 00000000000..b3de2a1ec40 --- /dev/null +++ b/python/pyarrow/src/arrow/python/meson.build @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +cython_generated_headers = custom_target( + 'lib-pyx-headers', + input: ['../../../lib.pyx'], + output: ['lib.h', 'lib_api.h'], + command: ['cython', '--cplus', '@INPUT@', '-o', '@OUTPUT0@'], + install: true, + install_dir: py.get_install_dir() / 'pyarrow', +) + +cython_generated_dep = declare_dependency(sources: cython_generated_headers) + +conf_data = configuration_data() + +conf_data.set('UPPERCASE_PYBUILD_TYPE', get_option('buildtype').to_upper()) +configure_file( + input: 'config_internal.h.cmake', + output: 'config_internal.h', + configuration: conf_data, + format: 'cmake@', +) + diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py index 7e3dd4324e9..7cbd09e71c5 100644 --- a/python/pyarrow/tests/util.py +++ b/python/pyarrow/tests/util.py @@ -178,6 +178,7 @@ def get_modified_env_with_pythonpath(): else: new_pythonpath = module_path env['PYTHONPATH'] = new_pythonpath + env['MACOSX_DEPLOYMENT_TARGET'] = "14.0" return env diff --git a/python/pyproject.toml b/python/pyproject.toml index 0a730fd4f78..a4a64ce4c38 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -19,12 +19,12 @@ requires = [ "cython >= 3.1", "numpy>=1.25", + "meson-python", # configuring setuptools_scm in pyproject.toml requires # versions released after 2022 "setuptools_scm[toml]>=8", - "setuptools>=77", ] -build-backend = "setuptools.build_meta" +build-backend = "mesonpy" [project] name = "pyarrow" diff --git a/python/requirements-build.txt b/python/requirements-build.txt index 9e03e04aded..0d67096a981 100644 --- a/python/requirements-build.txt +++ b/python/requirements-build.txt @@ -1,4 +1,4 @@ cython>=3.1 numpy>=1.25 +meson-python setuptools_scm>=8 -setuptools>=77 diff --git a/python/requirements-test.txt b/python/requirements-test.txt index 4339aeb9c16..f01f7f98ab4 100644 --- a/python/requirements-test.txt +++ b/python/requirements-test.txt @@ -5,3 +5,4 @@ pandas pytest pytz pyuwsgi; sys.platform != 'win32' and python_version < '3.13' +setuptools>=64 diff --git a/python/pyarrow/src/arrow/python/CMakeLists.txt b/python/scripts/generate_dist.py similarity index 64% rename from python/pyarrow/src/arrow/python/CMakeLists.txt rename to python/scripts/generate_dist.py index 67508982eab..c610564a857 100644 --- a/python/pyarrow/src/arrow/python/CMakeLists.txt +++ b/python/scripts/generate_dist.py @@ -15,5 +15,21 @@ # specific language governing permissions and limitations # under the License. -arrow_install_all_headers("arrow/python") -add_subdirectory(vendored) +import os +import pathlib +import shutil + + +def main(): + src_dir = pathlib.Path(os.environ["MESON_SOURCE_ROOT"]) + parent_dir = src_dir.parent.resolve() + dest_dir = pathlib.Path(os.environ["MESON_DIST_ROOT"]).resolve() + + license_file = parent_dir / 'LICENSE.txt' + shutil.copy(license_file, dest_dir) + notice_file = parent_dir / 'NOTICE.txt' + shutil.copy(notice_file, dest_dir) + + +if __name__ == "__main__": + main() diff --git a/python/setup.py b/python/setup.py deleted file mode 100755 index a27bd3baefd..00000000000 --- a/python/setup.py +++ /dev/null @@ -1,437 +0,0 @@ -#!/usr/bin/env python - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import contextlib -import os -import os.path -from os.path import join as pjoin -import re -import shlex -import shutil -import sys -import warnings - -if sys.version_info >= (3, 10): - import sysconfig -else: - # Get correct EXT_SUFFIX on Windows (https://bugs.python.org/issue39825) - from distutils import sysconfig - -from setuptools import setup, Extension, Distribution -from setuptools.command.sdist import sdist - -from Cython.Distutils import build_ext as _build_ext -import Cython - -# Check if we're running 64-bit Python -is_64_bit = sys.maxsize > 2**32 - -# We can't use sys.platform in a cross-compiling situation -# as here it may be set to the host not target platform -is_emscripten = ( - sysconfig.get_config_var("SOABI") - and sysconfig.get_config_var("SOABI").find("emscripten") != -1 -) - - -if Cython.__version__ < '3.1': - raise Exception( - 'Please update your Cython version. Supported Cython >= 3.1') - -setup_dir = os.path.abspath(os.path.dirname(__file__)) - -ext_suffix = sysconfig.get_config_var('EXT_SUFFIX') - - -@contextlib.contextmanager -def changed_dir(dirname): - oldcwd = os.getcwd() - os.chdir(dirname) - try: - yield - finally: - os.chdir(oldcwd) - - -def strtobool(val): - """Convert a string representation of truth to true (1) or false (0). - - True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values - are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if - 'val' is anything else. - """ - # Copied from distutils - val = val.lower() - if val in ('y', 'yes', 't', 'true', 'on', '1'): - return 1 - elif val in ('n', 'no', 'f', 'false', 'off', '0'): - return 0 - else: - raise ValueError("invalid truth value %r" % (val,)) - - -MSG_DEPR_SETUP_BUILD_FLAGS = """ - !! - - *********************************************************************** - The '{}' flag is being passed to setup.py, but this is - deprecated. - - If a certain component is available in Arrow C++, it will automatically - be enabled for the PyArrow build as well. If you want to force the - build of a certain component, you can still use the - PYARROW_WITH_$COMPONENT environment variable. - *********************************************************************** - - !! -""" - - -class build_ext(_build_ext): - _found_names = () - - def build_extensions(self): - import numpy - numpy_incl = numpy.get_include() - - self.extensions = [ext for ext in self.extensions - if ext.name != '__dummy__'] - - for ext in self.extensions: - if (hasattr(ext, 'include_dirs') and - numpy_incl not in ext.include_dirs): - ext.include_dirs.append(numpy_incl) - _build_ext.build_extensions(self) - - def run(self): - self._run_cmake() - _build_ext.run(self) - - # adapted from cmake_build_ext in dynd-python - # github.com/libdynd/dynd-python - - description = "Build the C-extensions for arrow" - user_options = ([('cmake-generator=', None, 'CMake generator'), - ('extra-cmake-args=', None, 'extra arguments for CMake'), - ('build-type=', None, - 'build type (debug or release), default release'), - ('boost-namespace=', None, - 'namespace of boost (default: boost)'), - ('with-cuda', None, 'build the Cuda extension'), - ('with-flight', None, 'build the Flight extension'), - ('with-substrait', None, 'build the Substrait extension'), - ('with-acero', None, 'build the Acero Engine extension'), - ('with-dataset', None, 'build the Dataset extension'), - ('with-parquet', None, 'build the Parquet extension'), - ('with-parquet-encryption', None, - 'build the Parquet encryption extension'), - ('with-azure', None, - 'build the Azure Blob Storage extension'), - ('with-gcs', None, - 'build the Google Cloud Storage (GCS) extension'), - ('with-s3', None, 'build the Amazon S3 extension'), - ('with-static-parquet', None, 'link parquet statically'), - ('with-static-boost', None, 'link boost statically'), - ('with-orc', None, 'build the ORC extension'), - ('with-gandiva', None, 'build the Gandiva extension'), - ('generate-coverage', None, - 'enable Cython code coverage'), - ('bundle-boost', None, - 'bundle the (shared) Boost libraries'), - ('bundle-cython-cpp', None, - 'bundle generated Cython C++ code ' - '(used for code coverage)'), - ('bundle-arrow-cpp', None, - 'bundle the Arrow C++ libraries'), - ('bundle-arrow-cpp-headers', None, - 'bundle the Arrow C++ headers')] + - _build_ext.user_options) - - def initialize_options(self): - _build_ext.initialize_options(self) - self.cmake_generator = os.environ.get('PYARROW_CMAKE_GENERATOR') - if not self.cmake_generator and sys.platform == 'win32': - self.cmake_generator = 'Visual Studio 15 2017 Win64' - self.extra_cmake_args = os.environ.get('PYARROW_CMAKE_OPTIONS', '') - self.build_type = os.environ.get('PYARROW_BUILD_TYPE', - 'release').lower() - - self.cmake_cxxflags = os.environ.get('PYARROW_CXXFLAGS', '') - - if sys.platform == 'win32': - # Cannot do debug builds in Windows unless Python itself is a debug - # build - if not hasattr(sys, 'gettotalrefcount'): - self.build_type = 'release' - - self.with_azure = None - self.with_gcs = None - self.with_s3 = None - self.with_hdfs = None - self.with_cuda = None - self.with_substrait = None - self.with_flight = None - self.with_acero = None - self.with_dataset = None - self.with_parquet = None - self.with_parquet_encryption = None - self.with_orc = None - self.with_gandiva = None - - self.generate_coverage = strtobool( - os.environ.get('PYARROW_GENERATE_COVERAGE', '0')) - self.bundle_arrow_cpp = strtobool( - os.environ.get('PYARROW_BUNDLE_ARROW_CPP', '0')) - self.bundle_cython_cpp = strtobool( - os.environ.get('PYARROW_BUNDLE_CYTHON_CPP', '0')) - - CYTHON_MODULE_NAMES = [ - 'lib', - '_fs', - '_csv', - '_json', - '_compute', - '_cuda', - '_flight', - '_dataset', - '_dataset_orc', - '_dataset_parquet', - '_acero', - '_feather', - '_parquet', - '_parquet_encryption', - '_pyarrow_cpp_tests', - '_orc', - '_azurefs', - '_gcsfs', - '_s3fs', - '_substrait', - '_hdfs', - 'gandiva'] - - def _run_cmake(self): - # check if build_type is correctly passed / set - if self.build_type.lower() not in ('release', 'debug', - 'relwithdebinfo'): - raise ValueError("--build-type (or PYARROW_BUILD_TYPE) needs to " - "be 'release', 'debug' or 'relwithdebinfo'") - - # The directory containing this setup.py - source = os.path.dirname(os.path.abspath(__file__)) - - # The staging directory for the module being built - build_cmd = self.get_finalized_command('build') - saved_cwd = os.getcwd() - build_temp = pjoin(saved_cwd, build_cmd.build_temp) - build_lib = pjoin(saved_cwd, build_cmd.build_lib) - - if not os.path.isdir(build_temp): - self.mkpath(build_temp) - - if self.inplace: - # a bit hacky - build_lib = saved_cwd - - install_prefix = pjoin(build_lib, "pyarrow") - - # Change to the build directory - with changed_dir(build_temp): - # Detect if we built elsewhere - if os.path.isfile('CMakeCache.txt'): - cachefile = open('CMakeCache.txt', 'r') - cachedir = re.search('CMAKE_CACHEFILE_DIR:INTERNAL=(.*)', - cachefile.read()).group(1) - cachefile.close() - if (cachedir != build_temp): - build_base = pjoin(saved_cwd, build_cmd.build_base) - print(f"-- Skipping build. Temp build {build_temp} does " - f"not match cached dir {cachedir}") - print("---- For a clean build you might want to delete " - f"{build_base}.") - return - - cmake_options = [ - f'-DCMAKE_INSTALL_PREFIX={install_prefix}', - f'-DPYTHON_EXECUTABLE={sys.executable}', - f'-DPython3_EXECUTABLE={sys.executable}', - f'-DPYARROW_CXXFLAGS={self.cmake_cxxflags}', - ] - - def append_cmake_bool(value, varname): - cmake_options.append(f'-D{varname}={"on" if value else "off"}') - - def append_cmake_component(flag, varname): - # only pass this to cmake if the user pass the --with-component - # flag to setup.py build_ext - if flag is not None: - flag_name = ( - "--with-" - + varname.removeprefix("PYARROW_").lower().replace("_", "-")) - warnings.warn( - MSG_DEPR_SETUP_BUILD_FLAGS.format(flag_name), - UserWarning, stacklevel=2 - ) - append_cmake_bool(flag, varname) - - if self.cmake_generator: - cmake_options += ['-G', self.cmake_generator] - - append_cmake_component(self.with_cuda, 'PYARROW_CUDA') - append_cmake_component(self.with_substrait, 'PYARROW_SUBSTRAIT') - append_cmake_component(self.with_flight, 'PYARROW_FLIGHT') - append_cmake_component(self.with_gandiva, 'PYARROW_GANDIVA') - append_cmake_component(self.with_acero, 'PYARROW_ACERO') - append_cmake_component(self.with_dataset, 'PYARROW_DATASET') - append_cmake_component(self.with_orc, 'PYARROW_ORC') - append_cmake_component(self.with_parquet, 'PYARROW_PARQUET') - append_cmake_component(self.with_parquet_encryption, - 'PYARROW_PARQUET_ENCRYPTION') - append_cmake_component(self.with_azure, 'PYARROW_AZURE') - append_cmake_component(self.with_gcs, 'PYARROW_GCS') - append_cmake_component(self.with_s3, 'PYARROW_S3') - append_cmake_component(self.with_hdfs, 'PYARROW_HDFS') - - append_cmake_bool(self.bundle_arrow_cpp, - 'PYARROW_BUNDLE_ARROW_CPP') - append_cmake_bool(self.bundle_cython_cpp, - 'PYARROW_BUNDLE_CYTHON_CPP') - append_cmake_bool(self.generate_coverage, - 'PYARROW_GENERATE_COVERAGE') - - cmake_options.append( - f'-DCMAKE_BUILD_TYPE={self.build_type.lower()}') - - extra_cmake_args = shlex.split(self.extra_cmake_args) - - build_tool_args = [] - if sys.platform == 'win32': - if not is_64_bit: - raise RuntimeError('Not supported on 32-bit Windows') - else: - build_tool_args.append('--') - if os.environ.get('PYARROW_BUILD_VERBOSE', '0') == '1': - cmake_options.append('-DCMAKE_VERBOSE_MAKEFILE=ON') - parallel = os.environ.get('PYARROW_PARALLEL') - if parallel: - build_tool_args.append(f'-j{parallel}') - - # Generate the build files - if is_emscripten: - print("-- Running emcmake cmake for PyArrow on Emscripten") - self.spawn(['emcmake', 'cmake'] + extra_cmake_args + - cmake_options + [source]) - else: - print("-- Running cmake for PyArrow") - self.spawn(['cmake'] + extra_cmake_args + cmake_options + [source]) - - print("-- Finished cmake for PyArrow") - - print("-- Running cmake --build for PyArrow") - self.spawn(['cmake', '--build', '.', '--config', self.build_type] + - build_tool_args) - print("-- Finished cmake --build for PyArrow") - - print("-- Running cmake --build --target install for PyArrow") - self.spawn(['cmake', '--build', '.', '--config', self.build_type] + - ['--target', 'install'] + build_tool_args) - print("-- Finished cmake --build --target install for PyArrow") - - self._found_names = [] - for name in self.CYTHON_MODULE_NAMES: - built_path = pjoin(install_prefix, name + ext_suffix) - if os.path.exists(built_path): - self._found_names.append(name) - - def _get_build_dir(self): - # Get the package directory from build_py - build_py = self.get_finalized_command('build_py') - return build_py.get_package_dir('pyarrow') - - def _get_cmake_ext_path(self, name): - # This is the name of the arrow C-extension - filename = name + ext_suffix - return pjoin(self._get_build_dir(), filename) - - def get_ext_generated_cpp_source(self, name): - if sys.platform == 'win32': - head, tail = os.path.split(name) - return pjoin(head, tail + ".cpp") - else: - return pjoin(name + ".cpp") - - def get_ext_built_api_header(self, name): - if sys.platform == 'win32': - head, tail = os.path.split(name) - return pjoin(head, tail + "_api.h") - else: - return pjoin(name + "_api.h") - - def get_names(self): - return self._found_names - - def get_outputs(self): - # Just the C extensions - # regular_exts = _build_ext.get_outputs(self) - return [self._get_cmake_ext_path(name) - for name in self.get_names()] - - -class BinaryDistribution(Distribution): - def has_ext_modules(foo): - return True - - -class CopyLicenseSdist(sdist): - """Custom sdist command that copies license files from parent directory.""" - - def make_release_tree(self, base_dir, files): - # Call parent to do the normal work - super().make_release_tree(base_dir, files) - - # Define source (parent dir) and destination (sdist root) for license files - license_files = [ - ("LICENSE.txt", "../LICENSE.txt"), - ("NOTICE.txt", "../NOTICE.txt"), - ] - - for dest_name, src_path in license_files: - src_full = os.path.join(os.path.dirname(__file__), src_path) - dest_full = os.path.join(base_dir, dest_name) - - # Remove any existing file/symlink at destination - if os.path.exists(dest_full) or os.path.islink(dest_full): - os.unlink(dest_full) - - if not os.path.exists(src_full): - msg = f"Required license file not found: {src_full}" - raise FileNotFoundError(msg) - - shutil.copy2(src_full, dest_full) - print(f"Copied {src_path} to {dest_name} in sdist") - - -setup( - distclass=BinaryDistribution, - # Dummy extension to trigger build_ext - ext_modules=[Extension('__dummy__', sources=[])], - cmdclass={ - 'build_ext': build_ext, - 'sdist': CopyLicenseSdist, - }, -) From 844d284cc703d87dbff88762fa4e90b80ee9b753 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 20 Dec 2025 15:26:28 -0500 Subject: [PATCH 2/2] Hack for grpc example --- cpp/examples/arrow/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cpp/examples/arrow/CMakeLists.txt b/cpp/examples/arrow/CMakeLists.txt index 82c075c51df..4fb81e04336 100644 --- a/cpp/examples/arrow/CMakeLists.txt +++ b/cpp/examples/arrow/CMakeLists.txt @@ -127,6 +127,12 @@ if(ARROW_FLIGHT) "${CMAKE_CURRENT_BINARY_DIR}/helloworld.pb.cc" "${CMAKE_CURRENT_BINARY_DIR}/helloworld.grpc.pb.cc") + # HACK: Workaround absl::Mutex ABI incompatibility by making sure the + # non-debug version of Abseil is included + # (https://github.com/conda-forge/abseil-cpp-feedstock/issues/104, + # https://github.com/abseil/abseil-cpp/issues/1624) + target_compile_options(flight-grpc-example PRIVATE "-DNDEBUG") + if(ARROW_FLIGHT_SQL) if(ARROW_BUILD_SHARED AND ARROW_GRPC_USE_SHARED) set(FLIGHT_SQL_EXAMPLES_LINK_LIBS arrow_flight_sql_shared)